// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// Parse the data returned from the SafeBrowsing v2.1 protocol response.

#include <stdlib.h>

#include "chrome/browser/safe_browsing/protocol_parser.h"
#include "chrome/browser/safe_browsing/safe_browsing_util.h"

#include "build/build_config.h"

#if defined(OS_WIN)
#include <Winsock2.h>
#elif defined(OS_POSIX)
#include <arpa/inet.h>
#endif

#include "base/format_macros.h"
#include "base/logging.h"
#include "base/string_split.h"
#include "base/string_util.h"

namespace {
// Helper function for quick scans of a line oriented protocol. Note that we use
//   std::string::assign(const charT* s, size_type n)
// to copy data into 'line'. This form of 'assign' does not call strlen on
// 'input', which is binary data and is not NULL terminated. 'input' may also
// contain valid NULL bytes in the payload, which a strlen based copy would
// truncate.
bool GetLine(const char* input, int input_len, std::string* line) {
  const char* pos = input;
  while (pos && (pos - input < input_len)) {
    if (*pos == '\n') {
      line->assign(input, pos - input);
      return true;
    }
    ++pos;
  }
  return false;
}
}

//------------------------------------------------------------------------------
// SafeBrowsingParser implementation

SafeBrowsingProtocolParser::SafeBrowsingProtocolParser() {
}

bool SafeBrowsingProtocolParser::ParseGetHash(
    const char* chunk_data,
    int chunk_len,
    const std::string& key,
    bool* re_key,
    std::vector<SBFullHashResult>* full_hashes) {
  full_hashes->clear();
  int length = chunk_len;
  const char* data = chunk_data;

  int offset;
  std::string line;
  if (!key.empty()) {
    if (!GetLine(data, length, &line))
      return false;  // Error! Bad GetHash result.

    if (line == "e:pleaserekey") {
      *re_key = true;
      return true;
    }

    offset = static_cast<int>(line.size()) + 1;
    data += offset;
    length -= offset;

    if (!safe_browsing_util::VerifyMAC(key, line, data, length))
      return false;
  }

  while (length > 0) {
    if (!GetLine(data, length, &line))
      return false;

    offset = static_cast<int>(line.size()) + 1;
    data += offset;
    length -= offset;

    std::vector<std::string> cmd_parts;
    base::SplitString(line, ':', &cmd_parts);
    if (cmd_parts.size() != 3)
      return false;

    SBFullHashResult full_hash;
    full_hash.list_name = cmd_parts[0];
    full_hash.add_chunk_id = atoi(cmd_parts[1].c_str());
    int full_hash_len = atoi(cmd_parts[2].c_str());

    // Ignore hash results from lists we don't recognize.
    if (safe_browsing_util::GetListId(full_hash.list_name) < 0) {
      data += full_hash_len;
      length -= full_hash_len;
      continue;
    }

    while (full_hash_len > 0) {
      DCHECK(static_cast<size_t>(full_hash_len) >= sizeof(SBFullHash));
      memcpy(&full_hash.hash, data, sizeof(SBFullHash));
      full_hashes->push_back(full_hash);
      data += sizeof(SBFullHash);
      length -= sizeof(SBFullHash);
      full_hash_len -= sizeof(SBFullHash);
    }
  }

  return length == 0;
}

void SafeBrowsingProtocolParser::FormatGetHash(
   const std::vector<SBPrefix>& prefixes, std::string* request) {
  DCHECK(request);

  // Format the request for GetHash.
  request->append(StringPrintf("%" PRIuS ":%" PRIuS "\n",
                               sizeof(SBPrefix),
                               sizeof(SBPrefix) * prefixes.size()));
  for (size_t i = 0; i < prefixes.size(); ++i) {
    request->append(reinterpret_cast<const char*>(&prefixes[i]),
                    sizeof(SBPrefix));
  }
}

bool SafeBrowsingProtocolParser::ParseUpdate(
    const char* chunk_data,
    int chunk_len,
    const std::string& key,
    int* next_update_sec,
    bool* re_key,
    bool* reset,
    std::vector<SBChunkDelete>* deletes,
    std::vector<ChunkUrl>* chunk_urls) {
  DCHECK(next_update_sec);
  DCHECK(deletes);
  DCHECK(chunk_urls);

  int length = chunk_len;
  const char* data = chunk_data;

  // Populated below.
  std::string list_name;

  while (length > 0) {
    std::string cmd_line;
    if (!GetLine(data, length, &cmd_line))
      return false;  // Error: bad list format!

    std::vector<std::string> cmd_parts;
    base::SplitString(cmd_line, ':', &cmd_parts);
    if (cmd_parts.empty())
      return false;
    const std::string& command = cmd_parts[0];
    if (cmd_parts.size() != 2 && command[0] != 'u')
      return false;

    const int consumed = static_cast<int>(cmd_line.size()) + 1;
    data += consumed;
    length -= consumed;
    if (length < 0)
      return false;  // Parsing error.

    // Differentiate on the first character of the command (which is usually
    // only one character, with the exception of the 'ad' and 'sd' commands).
    switch (command[0]) {
      case 'a':
      case 's': {
        // Must be either an 'ad' (add-del) or 'sd' (sub-del) chunk. We must
        // have also parsed the list name before getting here, or the add-del
        // or sub-del will have no context.
        if (command.size() != 2 || command[1] != 'd' || list_name.empty())
          return false;
        SBChunkDelete chunk_delete;
        chunk_delete.is_sub_del = command[0] == 's';
        StringToRanges(cmd_parts[1], &chunk_delete.chunk_del);
        chunk_delete.list_name = list_name;
        deletes->push_back(chunk_delete);
        break;
      }

      case 'e':
        if (cmd_parts[1] != "pleaserekey")
          return false;
        *re_key = true;
        break;

      case 'i':
        // The line providing the name of the list (i.e. 'goog-phish-shavar').
        list_name = cmd_parts[1];
        break;

      case 'm':
        // Verify that the MAC of the remainer of this chunk is what we expect.
        if (!key.empty() &&
            !safe_browsing_util::VerifyMAC(key, cmd_parts[1], data, length))
          return false;
        break;

      case 'n':
        // The line providing the next earliest time (in seconds) to re-query.
        *next_update_sec = atoi(cmd_parts[1].c_str());
        break;

      case 'u': {
        // The redirect command is of the form: u:<url>,<mac> where <url> can
        // contain multiple colons, commas or any valid URL characters. We scan
        // backwards in the string looking for the first ',' we encounter and
        // assume that everything before that is the URL and everything after
        // is the MAC (if the MAC was requested).
        std::string mac;
        std::string redirect_url(cmd_line, 2);  // Skip the initial "u:".
        if (!key.empty()) {
          std::string::size_type mac_pos = redirect_url.rfind(',');
          if (mac_pos == std::string::npos)
            return false;
          mac = redirect_url.substr(mac_pos + 1);
          redirect_url = redirect_url.substr(0, mac_pos);
        }

        ChunkUrl chunk_url;
        chunk_url.url = redirect_url;
        chunk_url.list_name = list_name;
        if (!key.empty())
          chunk_url.mac = mac;
        chunk_urls->push_back(chunk_url);
        break;
      }

      case 'r':
        if (cmd_parts[1] != "pleasereset")
          return false;
        *reset = true;
        break;

      default:
        // According to the spec, we ignore commands we don't understand.
        break;
    }
  }

  return true;
}

bool SafeBrowsingProtocolParser::ParseChunk(const std::string& list_name,
                                            const char* data,
                                            int length,
                                            const std::string& key,
                                            const std::string& mac,
                                            bool* re_key,
                                            SBChunkList* chunks) {
  int remaining = length;
  const char* chunk_data = data;

  if (!key.empty() &&
      !safe_browsing_util::VerifyMAC(key, mac, data, length)) {
    return false;
  }

  while (remaining > 0) {
    std::string cmd_line;
    if (!GetLine(chunk_data, length, &cmd_line))
      return false;  // Error: bad chunk format!

    const int line_len = static_cast<int>(cmd_line.length()) + 1;
    chunk_data += line_len;
    remaining -= line_len;
    std::vector<std::string> cmd_parts;
    base::SplitString(cmd_line, ':', &cmd_parts);

    // Handle a possible re-key command.
    if (cmd_parts.size() != 4) {
      if (cmd_parts.size() == 2 &&
          cmd_parts[0] == "e" &&
          cmd_parts[1] == "pleaserekey") {
        *re_key = true;
        continue;
      }
      return false;
    }

    // Process the chunk data.
    const int chunk_number = atoi(cmd_parts[1].c_str());
    const int hash_len = atoi(cmd_parts[2].c_str());
    if (hash_len != sizeof(SBPrefix) && hash_len != sizeof(SBFullHash)) {
      VLOG(1) << "ParseChunk got unknown hashlen " << hash_len;
      return false;
    }

    const int chunk_len = atoi(cmd_parts[3].c_str());

    if (remaining < chunk_len)
      return false;  // parse error.

    chunks->push_back(SBChunk());
    chunks->back().chunk_number = chunk_number;

    if (cmd_parts[0] == "a") {
      chunks->back().is_add = true;
      if (!ParseAddChunk(list_name, chunk_data, chunk_len, hash_len,
                         &chunks->back().hosts))
        return false;  // Parse error.
    } else if (cmd_parts[0] == "s") {
      chunks->back().is_add = false;
      if (!ParseSubChunk(list_name, chunk_data, chunk_len, hash_len,
                         &chunks->back().hosts))
        return false;  // Parse error.
    } else {
      NOTREACHED();
      return false;
    }

    chunk_data += chunk_len;
    remaining -= chunk_len;
    DCHECK_LE(0, remaining);
  }

  DCHECK(remaining == 0);

  return true;
}

bool SafeBrowsingProtocolParser::ParseAddChunk(const std::string& list_name,
                                               const char* data,
                                               int data_len,
                                               int hash_len,
                                               std::deque<SBChunkHost>* hosts) {
  const char* chunk_data = data;
  int remaining = data_len;
  int prefix_count;
  SBEntry::Type type = hash_len == sizeof(SBPrefix) ?
      SBEntry::ADD_PREFIX : SBEntry::ADD_FULL_HASH;

  if (list_name == safe_browsing_util::kBinHashList) {
    // kBinHashList only contains prefixes, no HOSTKEY and COUNT.
    DCHECK_EQ(0, remaining % hash_len);
    prefix_count = remaining / hash_len;
    SBChunkHost chunk_host;
    chunk_host.host = 0;
    chunk_host.entry = SBEntry::Create(type, prefix_count);
    hosts->push_back(chunk_host);
    if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry, prefix_count))
      return false;
  } else {
    SBPrefix host;
    const int min_size = sizeof(SBPrefix) + 1;
    while (remaining >= min_size) {
      ReadHostAndPrefixCount(&chunk_data, &remaining, &host, &prefix_count);
      SBChunkHost chunk_host;
      chunk_host.host = host;
      chunk_host.entry = SBEntry::Create(type, prefix_count);
      hosts->push_back(chunk_host);
      if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry,
                        prefix_count))
        return false;
    }
  }
  return remaining == 0;
}

bool SafeBrowsingProtocolParser::ParseSubChunk(const std::string& list_name,
                                               const char* data,
                                               int data_len,
                                               int hash_len,
                                               std::deque<SBChunkHost>* hosts) {
  int remaining = data_len;
  const char* chunk_data = data;
  int prefix_count;
  SBEntry::Type type = hash_len == sizeof(SBPrefix) ?
      SBEntry::SUB_PREFIX : SBEntry::SUB_FULL_HASH;

  if (list_name == safe_browsing_util::kBinHashList) {
    SBChunkHost chunk_host;
    // Set host to 0 and it won't be used for kBinHashList.
    chunk_host.host = 0;
    // kBinHashList only contains (add_chunk_number, prefix) pairs, no HOSTKEY
    // and COUNT. |add_chunk_number| is int32.
    prefix_count = remaining / (sizeof(int32) + hash_len);
    chunk_host.entry = SBEntry::Create(type, prefix_count);
    if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry, prefix_count))
      return false;
    hosts->push_back(chunk_host);
  } else {
    SBPrefix host;
    const int min_size = 2 * sizeof(SBPrefix) + 1;
    while (remaining >= min_size) {
      ReadHostAndPrefixCount(&chunk_data, &remaining, &host, &prefix_count);
      SBChunkHost chunk_host;
      chunk_host.host = host;
      chunk_host.entry = SBEntry::Create(type, prefix_count);
      hosts->push_back(chunk_host);
      if (prefix_count == 0) {
        // There is only an add chunk number (no prefixes).
        chunk_host.entry->set_chunk_id(ReadChunkId(&chunk_data, &remaining));
        continue;
      }
      if (!ReadPrefixes(&chunk_data, &remaining, chunk_host.entry,
                        prefix_count))
        return false;
    }
  }
  return remaining == 0;
}

void SafeBrowsingProtocolParser::ReadHostAndPrefixCount(
    const char** data, int* remaining, SBPrefix* host, int* count) {
  // Next 4 bytes are the host prefix.
  memcpy(host, *data, sizeof(SBPrefix));
  *data += sizeof(SBPrefix);
  *remaining -= sizeof(SBPrefix);

  // Next 1 byte is the prefix count (could be zero, but never negative).
  *count = static_cast<unsigned char>(**data);
  *data += 1;
  *remaining -= 1;
}

int SafeBrowsingProtocolParser::ReadChunkId(
    const char** data, int* remaining) {
  int chunk_number;
  memcpy(&chunk_number, *data, sizeof(chunk_number));
  *data += sizeof(chunk_number);
  *remaining -= sizeof(chunk_number);
  return htonl(chunk_number);
}

bool SafeBrowsingProtocolParser::ReadPrefixes(
    const char** data, int* remaining, SBEntry* entry, int count) {
  int hash_len = entry->HashLen();
  for (int i = 0; i < count; ++i) {
    if (entry->IsSub()) {
      entry->SetChunkIdAtPrefix(i, ReadChunkId(data, remaining));
      if (*remaining <= 0)
        return false;
    }

    if (entry->IsPrefix()) {
      entry->SetPrefixAt(i, *reinterpret_cast<const SBPrefix*>(*data));
    } else {
      entry->SetFullHashAt(i, *reinterpret_cast<const SBFullHash*>(*data));
    }
    *data += hash_len;
    *remaining -= hash_len;
    if (*remaining < 0)
      return false;
  }

  return true;
}

bool SafeBrowsingProtocolParser::ParseNewKey(const char* chunk_data,
                                             int chunk_length,
                                             std::string* client_key,
                                             std::string* wrapped_key) {
  DCHECK(client_key && wrapped_key);
  client_key->clear();
  wrapped_key->clear();

  const char* data = chunk_data;
  int remaining = chunk_length;

  while (remaining > 0) {
    std::string line;
    if (!GetLine(data, remaining, &line))
      return false;

    std::vector<std::string> cmd_parts;
    base::SplitString(line, ':', &cmd_parts);
    if (cmd_parts.size() != 3)
      return false;

    if (static_cast<int>(cmd_parts[2].size()) != atoi(cmd_parts[1].c_str()))
      return false;

    if (cmd_parts[0] == "clientkey") {
      client_key->assign(cmd_parts[2]);
    } else if (cmd_parts[0] == "wrappedkey") {
      wrapped_key->assign(cmd_parts[2]);
    } else {
      return false;
    }

    data += line.size() + 1;
    remaining -= static_cast<int>(line.size()) + 1;
  }

  if (client_key->empty() || wrapped_key->empty())
    return false;

  return true;
}