// Copyright 2013 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "net/socket/tcp_socket.h"

#include <errno.h>
#include <fcntl.h>
#include <netdb.h>
#include <netinet/in.h>
#include <netinet/tcp.h>
#include <sys/socket.h>

#include "base/callback_helpers.h"
#include "base/logging.h"
#include "base/metrics/histogram.h"
#include "base/metrics/stats_counters.h"
#include "base/posix/eintr_wrapper.h"
#include "build/build_config.h"
#include "net/base/address_list.h"
#include "net/base/connection_type_histograms.h"
#include "net/base/io_buffer.h"
#include "net/base/ip_endpoint.h"
#include "net/base/net_errors.h"
#include "net/base/net_util.h"
#include "net/base/network_change_notifier.h"
#include "net/socket/socket_net_log_params.h"

// If we don't have a definition for TCPI_OPT_SYN_DATA, create one.
#ifndef TCPI_OPT_SYN_DATA
#define TCPI_OPT_SYN_DATA 32
#endif

namespace net {

namespace {

const int kTCPKeepAliveSeconds = 45;

// SetTCPNoDelay turns on/off buffering in the kernel. By default, TCP sockets
// will wait up to 200ms for more data to complete a packet before transmitting.
// After calling this function, the kernel will not wait. See TCP_NODELAY in
// `man 7 tcp`.
bool SetTCPNoDelay(int fd, bool no_delay) {
  int on = no_delay ? 1 : 0;
  int error = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
  return error == 0;
}

// SetTCPKeepAlive sets SO_KEEPALIVE.
bool SetTCPKeepAlive(int fd, bool enable, int delay) {
  int on = enable ? 1 : 0;
  if (setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on))) {
    PLOG(ERROR) << "Failed to set SO_KEEPALIVE on fd: " << fd;
    return false;
  }
#if defined(OS_LINUX) || defined(OS_ANDROID)
  // Set seconds until first TCP keep alive.
  if (setsockopt(fd, SOL_TCP, TCP_KEEPIDLE, &delay, sizeof(delay))) {
    PLOG(ERROR) << "Failed to set TCP_KEEPIDLE on fd: " << fd;
    return false;
  }
  // Set seconds between TCP keep alives.
  if (setsockopt(fd, SOL_TCP, TCP_KEEPINTVL, &delay, sizeof(delay))) {
    PLOG(ERROR) << "Failed to set TCP_KEEPINTVL on fd: " << fd;
    return false;
  }
#endif
  return true;
}

int MapAcceptError(int os_error) {
  switch (os_error) {
    // If the client aborts the connection before the server calls accept,
    // POSIX specifies accept should fail with ECONNABORTED. The server can
    // ignore the error and just call accept again, so we map the error to
    // ERR_IO_PENDING. See UNIX Network Programming, Vol. 1, 3rd Ed., Sec.
    // 5.11, "Connection Abort before accept Returns".
    case ECONNABORTED:
      return ERR_IO_PENDING;
    default:
      return MapSystemError(os_error);
  }
}

int MapConnectError(int os_error) {
  switch (os_error) {
    case EACCES:
      return ERR_NETWORK_ACCESS_DENIED;
    case ETIMEDOUT:
      return ERR_CONNECTION_TIMED_OUT;
    default: {
      int net_error = MapSystemError(os_error);
      if (net_error == ERR_FAILED)
        return ERR_CONNECTION_FAILED;  // More specific than ERR_FAILED.

      // Give a more specific error when the user is offline.
      if (net_error == ERR_ADDRESS_UNREACHABLE &&
          NetworkChangeNotifier::IsOffline()) {
        return ERR_INTERNET_DISCONNECTED;
      }
      return net_error;
    }
  }
}

}  // namespace

//-----------------------------------------------------------------------------

TCPSocketLibevent::Watcher::Watcher(
    const base::Closure& read_ready_callback,
    const base::Closure& write_ready_callback)
    : read_ready_callback_(read_ready_callback),
      write_ready_callback_(write_ready_callback) {
}

TCPSocketLibevent::Watcher::~Watcher() {
}

void TCPSocketLibevent::Watcher::OnFileCanReadWithoutBlocking(int /* fd */) {
  if (!read_ready_callback_.is_null())
    read_ready_callback_.Run();
  else
    NOTREACHED();
}

void TCPSocketLibevent::Watcher::OnFileCanWriteWithoutBlocking(int /* fd */) {
  if (!write_ready_callback_.is_null())
    write_ready_callback_.Run();
  else
    NOTREACHED();
}

TCPSocketLibevent::TCPSocketLibevent(NetLog* net_log,
                                     const NetLog::Source& source)
    : socket_(kInvalidSocket),
      accept_watcher_(base::Bind(&TCPSocketLibevent::DidCompleteAccept,
                                 base::Unretained(this)),
                      base::Closure()),
      accept_socket_(NULL),
      accept_address_(NULL),
      read_watcher_(base::Bind(&TCPSocketLibevent::DidCompleteRead,
                               base::Unretained(this)),
                    base::Closure()),
      write_watcher_(base::Closure(),
                     base::Bind(&TCPSocketLibevent::DidCompleteConnectOrWrite,
                                base::Unretained(this))),
      read_buf_len_(0),
      write_buf_len_(0),
      use_tcp_fastopen_(IsTCPFastOpenEnabled()),
      tcp_fastopen_connected_(false),
      fast_open_status_(FAST_OPEN_STATUS_UNKNOWN),
      waiting_connect_(false),
      connect_os_error_(0),
      logging_multiple_connect_attempts_(false),
      net_log_(BoundNetLog::Make(net_log, NetLog::SOURCE_SOCKET)) {
  net_log_.BeginEvent(NetLog::TYPE_SOCKET_ALIVE,
                      source.ToEventParametersCallback());
}

TCPSocketLibevent::~TCPSocketLibevent() {
  net_log_.EndEvent(NetLog::TYPE_SOCKET_ALIVE);
  if (tcp_fastopen_connected_) {
    UMA_HISTOGRAM_ENUMERATION("Net.TcpFastOpenSocketConnection",
                              fast_open_status_, FAST_OPEN_MAX_VALUE);
  }
  Close();
}

int TCPSocketLibevent::Open(AddressFamily family) {
  DCHECK(CalledOnValidThread());
  DCHECK_EQ(socket_, kInvalidSocket);

  socket_ = CreatePlatformSocket(ConvertAddressFamily(family), SOCK_STREAM,
                                 IPPROTO_TCP);
  if (socket_ < 0) {
    PLOG(ERROR) << "CreatePlatformSocket() returned an error";
    return MapSystemError(errno);
  }

  if (SetNonBlocking(socket_)) {
    int result = MapSystemError(errno);
    Close();
    return result;
  }

  return OK;
}

int TCPSocketLibevent::AdoptConnectedSocket(int socket,
                                            const IPEndPoint& peer_address) {
  DCHECK(CalledOnValidThread());
  DCHECK_EQ(socket_, kInvalidSocket);

  socket_ = socket;

  if (SetNonBlocking(socket_)) {
    int result = MapSystemError(errno);
    Close();
    return result;
  }

  peer_address_.reset(new IPEndPoint(peer_address));

  return OK;
}

int TCPSocketLibevent::Bind(const IPEndPoint& address) {
  DCHECK(CalledOnValidThread());
  DCHECK_NE(socket_, kInvalidSocket);

  SockaddrStorage storage;
  if (!address.ToSockAddr(storage.addr, &storage.addr_len))
    return ERR_ADDRESS_INVALID;

  int result = bind(socket_, storage.addr, storage.addr_len);
  if (result < 0) {
    PLOG(ERROR) << "bind() returned an error";
    return MapSystemError(errno);
  }

  return OK;
}

int TCPSocketLibevent::Listen(int backlog) {
  DCHECK(CalledOnValidThread());
  DCHECK_GT(backlog, 0);
  DCHECK_NE(socket_, kInvalidSocket);

  int result = listen(socket_, backlog);
  if (result < 0) {
    PLOG(ERROR) << "listen() returned an error";
    return MapSystemError(errno);
  }

  return OK;
}

int TCPSocketLibevent::Accept(scoped_ptr<TCPSocketLibevent>* socket,
                              IPEndPoint* address,
                              const CompletionCallback& callback) {
  DCHECK(CalledOnValidThread());
  DCHECK(socket);
  DCHECK(address);
  DCHECK(!callback.is_null());
  DCHECK(accept_callback_.is_null());

  net_log_.BeginEvent(NetLog::TYPE_TCP_ACCEPT);

  int result = AcceptInternal(socket, address);

  if (result == ERR_IO_PENDING) {
    if (!base::MessageLoopForIO::current()->WatchFileDescriptor(
            socket_, true, base::MessageLoopForIO::WATCH_READ,
            &accept_socket_watcher_, &accept_watcher_)) {
      PLOG(ERROR) << "WatchFileDescriptor failed on read";
      return MapSystemError(errno);
    }

    accept_socket_ = socket;
    accept_address_ = address;
    accept_callback_ = callback;
  }

  return result;
}

int TCPSocketLibevent::Connect(const IPEndPoint& address,
                               const CompletionCallback& callback) {
  DCHECK(CalledOnValidThread());
  DCHECK_NE(socket_, kInvalidSocket);
  DCHECK(!waiting_connect_);

  // |peer_address_| will be non-NULL if Connect() has been called. Unless
  // Close() is called to reset the internal state, a second call to Connect()
  // is not allowed.
  // Please note that we don't allow a second Connect() even if the previous
  // Connect() has failed. Connecting the same |socket_| again after a
  // connection attempt failed results in unspecified behavior according to
  // POSIX.
  DCHECK(!peer_address_);

  if (!logging_multiple_connect_attempts_)
    LogConnectBegin(AddressList(address));

  peer_address_.reset(new IPEndPoint(address));

  int rv = DoConnect();
  if (rv == ERR_IO_PENDING) {
    // Synchronous operation not supported.
    DCHECK(!callback.is_null());
    write_callback_ = callback;
    waiting_connect_ = true;
  } else {
    DoConnectComplete(rv);
  }

  return rv;
}

bool TCPSocketLibevent::IsConnected() const {
  DCHECK(CalledOnValidThread());

  if (socket_ == kInvalidSocket || waiting_connect_)
    return false;

  if (use_tcp_fastopen_ && !tcp_fastopen_connected_ && peer_address_) {
    // With TCP FastOpen, we pretend that the socket is connected.
    // This allows GetPeerAddress() to return peer_address_.
    return true;
  }

  // Check if connection is alive.
  char c;
  int rv = HANDLE_EINTR(recv(socket_, &c, 1, MSG_PEEK));
  if (rv == 0)
    return false;
  if (rv == -1 && errno != EAGAIN && errno != EWOULDBLOCK)
    return false;

  return true;
}

bool TCPSocketLibevent::IsConnectedAndIdle() const {
  DCHECK(CalledOnValidThread());

  if (socket_ == kInvalidSocket || waiting_connect_)
    return false;

  // TODO(wtc): should we also handle the TCP FastOpen case here,
  // as we do in IsConnected()?

  // Check if connection is alive and we haven't received any data
  // unexpectedly.
  char c;
  int rv = HANDLE_EINTR(recv(socket_, &c, 1, MSG_PEEK));
  if (rv >= 0)
    return false;
  if (errno != EAGAIN && errno != EWOULDBLOCK)
    return false;

  return true;
}

int TCPSocketLibevent::Read(IOBuffer* buf,
                            int buf_len,
                            const CompletionCallback& callback) {
  DCHECK(CalledOnValidThread());
  DCHECK_NE(kInvalidSocket, socket_);
  DCHECK(!waiting_connect_);
  DCHECK(read_callback_.is_null());
  // Synchronous operation not supported
  DCHECK(!callback.is_null());
  DCHECK_GT(buf_len, 0);

  int nread = HANDLE_EINTR(read(socket_, buf->data(), buf_len));
  if (nread >= 0) {
    base::StatsCounter read_bytes("tcp.read_bytes");
    read_bytes.Add(nread);
    net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_RECEIVED, nread,
                                  buf->data());
    RecordFastOpenStatus();
    return nread;
  }
  if (errno != EAGAIN && errno != EWOULDBLOCK) {
    int net_error = MapSystemError(errno);
    net_log_.AddEvent(NetLog::TYPE_SOCKET_READ_ERROR,
                      CreateNetLogSocketErrorCallback(net_error, errno));
    return net_error;
  }

  if (!base::MessageLoopForIO::current()->WatchFileDescriptor(
          socket_, true, base::MessageLoopForIO::WATCH_READ,
          &read_socket_watcher_, &read_watcher_)) {
    DVLOG(1) << "WatchFileDescriptor failed on read, errno " << errno;
    return MapSystemError(errno);
  }

  read_buf_ = buf;
  read_buf_len_ = buf_len;
  read_callback_ = callback;
  return ERR_IO_PENDING;
}

int TCPSocketLibevent::Write(IOBuffer* buf,
                             int buf_len,
                             const CompletionCallback& callback) {
  DCHECK(CalledOnValidThread());
  DCHECK_NE(kInvalidSocket, socket_);
  DCHECK(!waiting_connect_);
  DCHECK(write_callback_.is_null());
  // Synchronous operation not supported
  DCHECK(!callback.is_null());
  DCHECK_GT(buf_len, 0);

  int nwrite = InternalWrite(buf, buf_len);
  if (nwrite >= 0) {
    base::StatsCounter write_bytes("tcp.write_bytes");
    write_bytes.Add(nwrite);
    net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, nwrite,
                                  buf->data());
    return nwrite;
  }
  if (errno != EAGAIN && errno != EWOULDBLOCK) {
    int net_error = MapSystemError(errno);
    net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR,
                      CreateNetLogSocketErrorCallback(net_error, errno));
    return net_error;
  }

  if (!base::MessageLoopForIO::current()->WatchFileDescriptor(
          socket_, true, base::MessageLoopForIO::WATCH_WRITE,
          &write_socket_watcher_, &write_watcher_)) {
    DVLOG(1) << "WatchFileDescriptor failed on write, errno " << errno;
    return MapSystemError(errno);
  }

  write_buf_ = buf;
  write_buf_len_ = buf_len;
  write_callback_ = callback;
  return ERR_IO_PENDING;
}

int TCPSocketLibevent::GetLocalAddress(IPEndPoint* address) const {
  DCHECK(CalledOnValidThread());
  DCHECK(address);

  SockaddrStorage storage;
  if (getsockname(socket_, storage.addr, &storage.addr_len) < 0)
    return MapSystemError(errno);
  if (!address->FromSockAddr(storage.addr, storage.addr_len))
    return ERR_ADDRESS_INVALID;

  return OK;
}

int TCPSocketLibevent::GetPeerAddress(IPEndPoint* address) const {
  DCHECK(CalledOnValidThread());
  DCHECK(address);
  if (!IsConnected())
    return ERR_SOCKET_NOT_CONNECTED;
  *address = *peer_address_;
  return OK;
}

int TCPSocketLibevent::SetDefaultOptionsForServer() {
  DCHECK(CalledOnValidThread());
  return SetAddressReuse(true);
}

void TCPSocketLibevent::SetDefaultOptionsForClient() {
  DCHECK(CalledOnValidThread());

  // This mirrors the behaviour on Windows. See the comment in
  // tcp_socket_win.cc after searching for "NODELAY".
  SetTCPNoDelay(socket_, true);  // If SetTCPNoDelay fails, we don't care.
  SetTCPKeepAlive(socket_, true, kTCPKeepAliveSeconds);
}

int TCPSocketLibevent::SetAddressReuse(bool allow) {
  DCHECK(CalledOnValidThread());

  // SO_REUSEADDR is useful for server sockets to bind to a recently unbound
  // port. When a socket is closed, the end point changes its state to TIME_WAIT
  // and wait for 2 MSL (maximum segment lifetime) to ensure the remote peer
  // acknowledges its closure. For server sockets, it is usually safe to
  // bind to a TIME_WAIT end point immediately, which is a widely adopted
  // behavior.
  //
  // Note that on *nix, SO_REUSEADDR does not enable the TCP socket to bind to
  // an end point that is already bound by another socket. To do that one must
  // set SO_REUSEPORT instead. This option is not provided on Linux prior
  // to 3.9.
  //
  // SO_REUSEPORT is provided in MacOS X and iOS.
  int boolean_value = allow ? 1 : 0;
  int rv = setsockopt(socket_, SOL_SOCKET, SO_REUSEADDR, &boolean_value,
                      sizeof(boolean_value));
  if (rv < 0)
    return MapSystemError(errno);
  return OK;
}

bool TCPSocketLibevent::SetReceiveBufferSize(int32 size) {
  DCHECK(CalledOnValidThread());
  int rv = setsockopt(socket_, SOL_SOCKET, SO_RCVBUF,
      reinterpret_cast<const char*>(&size),
      sizeof(size));
  DCHECK(!rv) << "Could not set socket receive buffer size: " << errno;
  return rv == 0;
}

bool TCPSocketLibevent::SetSendBufferSize(int32 size) {
  DCHECK(CalledOnValidThread());
  int rv = setsockopt(socket_, SOL_SOCKET, SO_SNDBUF,
      reinterpret_cast<const char*>(&size),
      sizeof(size));
  DCHECK(!rv) << "Could not set socket send buffer size: " << errno;
  return rv == 0;
}

bool TCPSocketLibevent::SetKeepAlive(bool enable, int delay) {
  DCHECK(CalledOnValidThread());
  return SetTCPKeepAlive(socket_, enable, delay);
}

bool TCPSocketLibevent::SetNoDelay(bool no_delay) {
  DCHECK(CalledOnValidThread());
  return SetTCPNoDelay(socket_, no_delay);
}

void TCPSocketLibevent::Close() {
  DCHECK(CalledOnValidThread());

  bool ok = accept_socket_watcher_.StopWatchingFileDescriptor();
  DCHECK(ok);
  ok = read_socket_watcher_.StopWatchingFileDescriptor();
  DCHECK(ok);
  ok = write_socket_watcher_.StopWatchingFileDescriptor();
  DCHECK(ok);

  if (socket_ != kInvalidSocket) {
    if (IGNORE_EINTR(close(socket_)) < 0)
      PLOG(ERROR) << "close";
    socket_ = kInvalidSocket;
  }

  if (!accept_callback_.is_null()) {
    accept_socket_ = NULL;
    accept_address_ = NULL;
    accept_callback_.Reset();
  }

  if (!read_callback_.is_null()) {
    read_buf_ = NULL;
    read_buf_len_ = 0;
    read_callback_.Reset();
  }

  if (!write_callback_.is_null()) {
    write_buf_ = NULL;
    write_buf_len_ = 0;
    write_callback_.Reset();
  }

  tcp_fastopen_connected_ = false;
  fast_open_status_ = FAST_OPEN_STATUS_UNKNOWN;
  waiting_connect_ = false;
  peer_address_.reset();
  connect_os_error_ = 0;
}

bool TCPSocketLibevent::UsingTCPFastOpen() const {
  return use_tcp_fastopen_;
}

void TCPSocketLibevent::StartLoggingMultipleConnectAttempts(
    const AddressList& addresses) {
  if (!logging_multiple_connect_attempts_) {
    logging_multiple_connect_attempts_ = true;
    LogConnectBegin(addresses);
  } else {
    NOTREACHED();
  }
}

void TCPSocketLibevent::EndLoggingMultipleConnectAttempts(int net_error) {
  if (logging_multiple_connect_attempts_) {
    LogConnectEnd(net_error);
    logging_multiple_connect_attempts_ = false;
  } else {
    NOTREACHED();
  }
}

int TCPSocketLibevent::AcceptInternal(scoped_ptr<TCPSocketLibevent>* socket,
                                      IPEndPoint* address) {
  SockaddrStorage storage;
  int new_socket = HANDLE_EINTR(accept(socket_,
                                       storage.addr,
                                       &storage.addr_len));
  if (new_socket < 0) {
    int net_error = MapAcceptError(errno);
    if (net_error != ERR_IO_PENDING)
      net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, net_error);
    return net_error;
  }

  IPEndPoint ip_end_point;
  if (!ip_end_point.FromSockAddr(storage.addr, storage.addr_len)) {
    NOTREACHED();
    if (IGNORE_EINTR(close(new_socket)) < 0)
      PLOG(ERROR) << "close";
    net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT,
                                      ERR_ADDRESS_INVALID);
    return ERR_ADDRESS_INVALID;
  }
  scoped_ptr<TCPSocketLibevent> tcp_socket(new TCPSocketLibevent(
      net_log_.net_log(), net_log_.source()));
  int adopt_result = tcp_socket->AdoptConnectedSocket(new_socket, ip_end_point);
  if (adopt_result != OK) {
    net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, adopt_result);
    return adopt_result;
  }
  *socket = tcp_socket.Pass();
  *address = ip_end_point;
  net_log_.EndEvent(NetLog::TYPE_TCP_ACCEPT,
                    CreateNetLogIPEndPointCallback(&ip_end_point));
  return OK;
}

int TCPSocketLibevent::DoConnect() {
  DCHECK_EQ(0, connect_os_error_);

  net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
                      CreateNetLogIPEndPointCallback(peer_address_.get()));

  // Connect the socket.
  if (!use_tcp_fastopen_) {
    SockaddrStorage storage;
    if (!peer_address_->ToSockAddr(storage.addr, &storage.addr_len))
      return ERR_INVALID_ARGUMENT;

    if (!HANDLE_EINTR(connect(socket_, storage.addr, storage.addr_len))) {
      // Connected without waiting!
      return OK;
    }
  } else {
    // With TCP FastOpen, we pretend that the socket is connected.
    DCHECK(!tcp_fastopen_connected_);
    return OK;
  }

  // Check if the connect() failed synchronously.
  connect_os_error_ = errno;
  if (connect_os_error_ != EINPROGRESS)
    return MapConnectError(connect_os_error_);

  // Otherwise the connect() is going to complete asynchronously, so watch
  // for its completion.
  if (!base::MessageLoopForIO::current()->WatchFileDescriptor(
          socket_, true, base::MessageLoopForIO::WATCH_WRITE,
          &write_socket_watcher_, &write_watcher_)) {
    connect_os_error_ = errno;
    DVLOG(1) << "WatchFileDescriptor failed: " << connect_os_error_;
    return MapSystemError(connect_os_error_);
  }

  return ERR_IO_PENDING;
}

void TCPSocketLibevent::DoConnectComplete(int result) {
  // Log the end of this attempt (and any OS error it threw).
  int os_error = connect_os_error_;
  connect_os_error_ = 0;
  if (result != OK) {
    net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
                      NetLog::IntegerCallback("os_error", os_error));
  } else {
    net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT);
  }

  if (!logging_multiple_connect_attempts_)
    LogConnectEnd(result);
}

void TCPSocketLibevent::LogConnectBegin(const AddressList& addresses) {
  base::StatsCounter connects("tcp.connect");
  connects.Increment();

  net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT,
                      addresses.CreateNetLogCallback());
}

void TCPSocketLibevent::LogConnectEnd(int net_error) {
  if (net_error == OK)
    UpdateConnectionTypeHistograms(CONNECTION_ANY);

  if (net_error != OK) {
    net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, net_error);
    return;
  }

  SockaddrStorage storage;
  int rv = getsockname(socket_, storage.addr, &storage.addr_len);
  if (rv != 0) {
    PLOG(ERROR) << "getsockname() [rv: " << rv << "] error: ";
    NOTREACHED();
    net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, rv);
    return;
  }

  net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT,
                    CreateNetLogSourceAddressCallback(storage.addr,
                                                      storage.addr_len));
}

void TCPSocketLibevent::DidCompleteRead() {
  RecordFastOpenStatus();
  if (read_callback_.is_null())
    return;

  int bytes_transferred;
  bytes_transferred = HANDLE_EINTR(read(socket_, read_buf_->data(),
                                        read_buf_len_));

  int result;
  if (bytes_transferred >= 0) {
    result = bytes_transferred;
    base::StatsCounter read_bytes("tcp.read_bytes");
    read_bytes.Add(bytes_transferred);
    net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_RECEIVED, result,
                                  read_buf_->data());
  } else {
    result = MapSystemError(errno);
    if (result != ERR_IO_PENDING) {
      net_log_.AddEvent(NetLog::TYPE_SOCKET_READ_ERROR,
                        CreateNetLogSocketErrorCallback(result, errno));
    }
  }

  if (result != ERR_IO_PENDING) {
    read_buf_ = NULL;
    read_buf_len_ = 0;
    bool ok = read_socket_watcher_.StopWatchingFileDescriptor();
    DCHECK(ok);
    base::ResetAndReturn(&read_callback_).Run(result);
  }
}

void TCPSocketLibevent::DidCompleteWrite() {
  if (write_callback_.is_null())
    return;

  int bytes_transferred;
  bytes_transferred = HANDLE_EINTR(write(socket_, write_buf_->data(),
                                         write_buf_len_));

  int result;
  if (bytes_transferred >= 0) {
    result = bytes_transferred;
    base::StatsCounter write_bytes("tcp.write_bytes");
    write_bytes.Add(bytes_transferred);
    net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, result,
                                  write_buf_->data());
  } else {
    result = MapSystemError(errno);
    if (result != ERR_IO_PENDING) {
      net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR,
                        CreateNetLogSocketErrorCallback(result, errno));
    }
  }

  if (result != ERR_IO_PENDING) {
    write_buf_ = NULL;
    write_buf_len_ = 0;
    write_socket_watcher_.StopWatchingFileDescriptor();
    base::ResetAndReturn(&write_callback_).Run(result);
  }
}

void TCPSocketLibevent::DidCompleteConnect() {
  DCHECK(waiting_connect_);

  // Get the error that connect() completed with.
  int os_error = 0;
  socklen_t len = sizeof(os_error);
  if (getsockopt(socket_, SOL_SOCKET, SO_ERROR, &os_error, &len) < 0)
    os_error = errno;

  int result = MapConnectError(os_error);
  connect_os_error_ = os_error;
  if (result != ERR_IO_PENDING) {
    DoConnectComplete(result);
    waiting_connect_ = false;
    write_socket_watcher_.StopWatchingFileDescriptor();
    base::ResetAndReturn(&write_callback_).Run(result);
  }
}

void TCPSocketLibevent::DidCompleteConnectOrWrite() {
  if (waiting_connect_)
    DidCompleteConnect();
  else
    DidCompleteWrite();
}

void TCPSocketLibevent::DidCompleteAccept() {
  DCHECK(CalledOnValidThread());

  int result = AcceptInternal(accept_socket_, accept_address_);
  if (result != ERR_IO_PENDING) {
    accept_socket_ = NULL;
    accept_address_ = NULL;
    bool ok = accept_socket_watcher_.StopWatchingFileDescriptor();
    DCHECK(ok);
    CompletionCallback callback = accept_callback_;
    accept_callback_.Reset();
    callback.Run(result);
  }
}

int TCPSocketLibevent::InternalWrite(IOBuffer* buf, int buf_len) {
  int nwrite;
  if (use_tcp_fastopen_ && !tcp_fastopen_connected_) {
    SockaddrStorage storage;
    if (!peer_address_->ToSockAddr(storage.addr, &storage.addr_len)) {
      errno = EINVAL;
      return -1;
    }

    int flags = 0x20000000; // Magic flag to enable TCP_FASTOPEN.
#if defined(OS_LINUX)
    // sendto() will fail with EPIPE when the system doesn't support TCP Fast
    // Open. Theoretically that shouldn't happen since the caller should check
    // for system support on startup, but users may dynamically disable TCP Fast
    // Open via sysctl.
    flags |= MSG_NOSIGNAL;
#endif // defined(OS_LINUX)
    nwrite = HANDLE_EINTR(sendto(socket_,
                                 buf->data(),
                                 buf_len,
                                 flags,
                                 storage.addr,
                                 storage.addr_len));
    tcp_fastopen_connected_ = true;

    if (nwrite < 0) {
      DCHECK_NE(EPIPE, errno);

      // If errno == EINPROGRESS, that means the kernel didn't have a cookie
      // and would block. The kernel is internally doing a connect() though.
      // Remap EINPROGRESS to EAGAIN so we treat this the same as our other
      // asynchronous cases. Note that the user buffer has not been copied to
      // kernel space.
      if (errno == EINPROGRESS) {
        errno = EAGAIN;
        fast_open_status_ = FAST_OPEN_SLOW_CONNECT_RETURN;
      } else {
        fast_open_status_ = FAST_OPEN_ERROR;
      }
    } else {
      fast_open_status_ = FAST_OPEN_FAST_CONNECT_RETURN;
    }
  } else {
    nwrite = HANDLE_EINTR(write(socket_, buf->data(), buf_len));
  }
  return nwrite;
}

void TCPSocketLibevent::RecordFastOpenStatus() {
  if (use_tcp_fastopen_ &&
      (fast_open_status_ == FAST_OPEN_FAST_CONNECT_RETURN ||
       fast_open_status_ == FAST_OPEN_SLOW_CONNECT_RETURN)) {
    DCHECK_NE(FAST_OPEN_STATUS_UNKNOWN, fast_open_status_);
    bool getsockopt_success(false);
    bool server_acked_data(false);
#if defined(TCP_INFO)
    // Probe to see the if the socket used TCP Fast Open.
    tcp_info info;
    socklen_t info_len = sizeof(tcp_info);
    getsockopt_success =
        getsockopt(socket_, IPPROTO_TCP, TCP_INFO, &info, &info_len) == 0 &&
        info_len == sizeof(tcp_info);
    server_acked_data = getsockopt_success &&
        (info.tcpi_options & TCPI_OPT_SYN_DATA);
#endif
    if (getsockopt_success) {
      if (fast_open_status_ == FAST_OPEN_FAST_CONNECT_RETURN) {
        fast_open_status_ = (server_acked_data ? FAST_OPEN_SYN_DATA_ACK :
                             FAST_OPEN_SYN_DATA_NACK);
      } else {
        fast_open_status_ = (server_acked_data ? FAST_OPEN_NO_SYN_DATA_ACK :
                             FAST_OPEN_NO_SYN_DATA_NACK);
      }
    } else {
      fast_open_status_ = (fast_open_status_ == FAST_OPEN_FAST_CONNECT_RETURN ?
                           FAST_OPEN_SYN_DATA_FAILED :
                           FAST_OPEN_NO_SYN_DATA_FAILED);
    }
  }
}

}  // namespace net