safe_browsing_database.cc - Android社区 - https://www.androidos.net.cn/

// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "chrome/browser/safe_browsing/safe_browsing_database.h"

#include <algorithm>
#include <iterator>

#include "base/file_util.h"
#include "base/metrics/histogram.h"
#include "base/metrics/stats_counters.h"
#include "base/time.h"
#include "base/message_loop.h"
#include "base/process_util.h"
#include "crypto/sha2.h"
#include "chrome/browser/safe_browsing/bloom_filter.h"
#include "chrome/browser/safe_browsing/prefix_set.h"
#include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
#include "content/browser/browser_thread.h"
#include "googleurl/src/gurl.h"

namespace {

// Filename suffix for the bloom filter.
const FilePath::CharType kBloomFilterFile[] = FILE_PATH_LITERAL(" Filter 2");
// Filename suffix for download store.
const FilePath::CharType kDownloadDBFile[] = FILE_PATH_LITERAL(" Download");
// Filename suffix for client-side phishing detection whitelist store.
const FilePath::CharType kCsdWhitelistDBFile[] =
    FILE_PATH_LITERAL(" Csd Whitelist");
// Filename suffix for browse store.
// TODO(lzheng): change to a better name when we change the file format.
const FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom");

// The maximum staleness for a cached entry.
const int kMaxStalenessMinutes = 45;

// Maximum number of entries we allow in the client-side phishing detection
// whitelist.  If the whitelist on disk contains more entries then
// ContainsCsdWhitelistedUrl will always return true.
const size_t kMaxCsdWhitelistSize = 5000;

// If the hash of this exact expression is on the csd whitelist then
// ContainsCsdWhitelistedUrl will always return true.
const char kCsdKillSwitchUrl[] =
    "sb-ssl.google.com/safebrowsing/csd/killswitch";

// To save space, the incoming |chunk_id| and |list_id| are combined
// into an |encoded_chunk_id| for storage by shifting the |list_id|
// into the low-order bits.  These functions decode that information.
// TODO(lzheng): It was reasonable when database is saved in sqlite, but
// there should be better ways to save chunk_id and list_id after we use
// SafeBrowsingStoreFile.
int GetListIdBit(const int encoded_chunk_id) {
  return encoded_chunk_id & 1;
}
int DecodeChunkId(int encoded_chunk_id) {
  return encoded_chunk_id >> 1;
}
int EncodeChunkId(const int chunk, const int list_id) {
  DCHECK_NE(list_id, safe_browsing_util::INVALID);
  return chunk << 1 | list_id % 2;
}

// Generate the set of full hashes to check for |url|.  If
// |include_whitelist_hashes| is true we will generate additional path-prefixes
// to match against the csd whitelist.  E.g., if the path-prefix /foo is on the
// whitelist it should also match /foo/bar which is not the case for all the
// other lists.
// TODO(shess): This function is almost the same as
// |CompareFullHashes()| in safe_browsing_util.cc, except that code
// does an early exit on match.  Since match should be the infrequent
// case (phishing or malware found), consider combining this function
// with that one.
void BrowseFullHashesToCheck(const GURL& url,
                             bool include_whitelist_hashes,
                             std::vector<SBFullHash>* full_hashes) {
  std::vector<std::string> hosts;
  if (url.HostIsIPAddress()) {
    hosts.push_back(url.host());
  } else {
    safe_browsing_util::GenerateHostsToCheck(url, &hosts);
  }

std::vector<std::string> paths;
  safe_browsing_util::GeneratePathsToCheck(url, &paths);

for (size_t i = 0; i < hosts.size(); ++i) {
    for (size_t j = 0; j < paths.size(); ++j) {
      const std::string& path = paths[j];
      SBFullHash full_hash;
      crypto::SHA256HashString(hosts[i] + path, &full_hash,
                               sizeof(full_hash));
      full_hashes->push_back(full_hash);

// We may have /foo as path-prefix in the whitelist which should
      // also match with /foo/bar and /foo?bar.  Hence, for every path
      // that ends in '/' we also add the path without the slash.
      if (include_whitelist_hashes &&
          path.size() > 1 &&
          path[path.size() - 1] == '/') {
        crypto::SHA256HashString(hosts[i] + path.substr(0, path.size() - 1),
                                 &full_hash, sizeof(full_hash));
        full_hashes->push_back(full_hash);
      }
    }
  }
}

// Get the prefixes matching the download |urls|.
void GetDownloadUrlPrefixes(const std::vector<GURL>& urls,
                            std::vector<SBPrefix>* prefixes) {
  std::vector<SBFullHash> full_hashes;
  for (size_t i = 0; i < urls.size(); ++i)
    BrowseFullHashesToCheck(urls[i], false, &full_hashes);

for (size_t i = 0; i < full_hashes.size(); ++i)
    prefixes->push_back(full_hashes[i].prefix);
}

// Find the entries in |full_hashes| with prefix in |prefix_hits|, and
// add them to |full_hits| if not expired.  "Not expired" is when
// either |last_update| was recent enough, or the item has been
// received recently enough.  Expired items are not deleted because a
// future update may make them acceptable again.
//
// For efficiency reasons the code walks |prefix_hits| and
// |full_hashes| in parallel, so they must be sorted by prefix.
void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits,
                                  const std::vector<SBAddFullHash>& full_hashes,
                                  std::vector<SBFullHashResult>* full_hits,
                                  base::Time last_update) {
  const base::Time expire_time =
      base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes);

std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin();
  std::vector<SBAddFullHash>::const_iterator hiter = full_hashes.begin();

while (piter != prefix_hits.end() && hiter != full_hashes.end()) {
    if (*piter < hiter->full_hash.prefix) {
      ++piter;
    } else if (hiter->full_hash.prefix < *piter) {
      ++hiter;
    } else {
      if (expire_time < last_update ||
          expire_time.ToTimeT() < hiter->received) {
        SBFullHashResult result;
        const int list_bit = GetListIdBit(hiter->chunk_id);
        DCHECK(list_bit == safe_browsing_util::MALWARE ||
               list_bit == safe_browsing_util::PHISH);
        if (!safe_browsing_util::GetListName(list_bit, &result.list_name))
          continue;
        result.add_chunk_id = DecodeChunkId(hiter->chunk_id);
        result.hash = hiter->full_hash;
        full_hits->push_back(result);
      }

// Only increment |hiter|, |piter| might have multiple hits.
      ++hiter;
    }
  }
}

// This function generates a chunk range string for |chunks|. It
// outputs one chunk range string per list and writes it to the
// |list_ranges| vector.  We expect |list_ranges| to already be of the
// right size.  E.g., if |chunks| contains chunks with two different
// list ids then |list_ranges| must contain two elements.
void GetChunkRanges(const std::vector<int>& chunks,
                    std::vector<std::string>* list_ranges) {
  DCHECK_GT(list_ranges->size(), 0U);
  DCHECK_LE(list_ranges->size(), 2U);
  std::vector<std::vector<int> > decoded_chunks(list_ranges->size());
  for (std::vector<int>::const_iterator iter = chunks.begin();
       iter != chunks.end(); ++iter) {
    int mod_list_id = GetListIdBit(*iter);
    DCHECK_GE(mod_list_id, 0);
    DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size());
    decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter));
  }
  for (size_t i = 0; i < decoded_chunks.size(); ++i) {
    ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i]));
  }
}

// Helper function to create chunk range lists for Browse related
// lists.
void UpdateChunkRanges(SafeBrowsingStore* store,
                       const std::vector<std::string>& listnames,
                       std::vector<SBListChunkRanges>* lists) {
  DCHECK_GT(listnames.size(), 0U);
  DCHECK_LE(listnames.size(), 2U);
  std::vector<int> add_chunks;
  std::vector<int> sub_chunks;
  store->GetAddChunks(&add_chunks);
  store->GetSubChunks(&sub_chunks);

std::vector<std::string> adds(listnames.size());
  std::vector<std::string> subs(listnames.size());
  GetChunkRanges(add_chunks, &adds);
  GetChunkRanges(sub_chunks, &subs);

for (size_t i = 0; i < listnames.size(); ++i) {
    const std::string& listname = listnames[i];
    DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2,
              static_cast<int>(i % 2));
    DCHECK_NE(safe_browsing_util::GetListId(listname),
              safe_browsing_util::INVALID);
    lists->push_back(SBListChunkRanges(listname));
    lists->back().adds.swap(adds[i]);
    lists->back().subs.swap(subs[i]);
  }
}

// Order |SBAddFullHash| on the prefix part.  |SBAddPrefixLess()| from
// safe_browsing_store.h orders on both chunk-id and prefix.
bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) {
  return a.full_hash.prefix < b.full_hash.prefix;
}

// As compared to the bloom filter, PrefixSet should have these
// properties:
// - Any bloom filter miss should be a prefix set miss.
// - Any prefix set hit should be a bloom filter hit.
// - Bloom filter false positives are prefix set misses.
// The following is to log actual performance to verify this.
enum PrefixSetEvent {
  PREFIX_SET_EVENT_HIT,
  PREFIX_SET_EVENT_BLOOM_HIT,
  PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT,
  PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID,
  PREFIX_SET_GETPREFIXES_BROKEN,
  PREFIX_SET_GETPREFIXES_BROKEN_SIZE,
  PREFIX_SET_GETPREFIXES_FIRST_BROKEN,
  PREFIX_SET_SBPREFIX_WAS_BROKEN,
  PREFIX_SET_GETPREFIXES_BROKEN_SORTING,
  PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION,
  PREFIX_SET_GETPREFIX_UNSORTED_IS_DELTA,
  PREFIX_SET_GETPREFIX_UNSORTED_IS_INDEX,
  PREFIX_SET_GETPREFIX_CHECKSUM_MISMATCH,

// Memory space for histograms is determined by the max.  ALWAYS ADD
  // NEW VALUES BEFORE THIS ONE.
  PREFIX_SET_EVENT_MAX
};

void RecordPrefixSetInfo(PrefixSetEvent event_type) {
  UMA_HISTOGRAM_ENUMERATION("SB2.PrefixSetEvent", event_type,
                            PREFIX_SET_EVENT_MAX);
}

// Generate a |PrefixSet| instance from the contents of
// |add_prefixes|.  Additionally performs various checks to make sure
// that the resulting prefix set is valid, so that the
// PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID histogram in
// ContainsBrowseUrl() can be trustworthy.
safe_browsing::PrefixSet* PrefixSetFromAddPrefixes(
    const std::vector<SBAddPrefix>& add_prefixes) {
  // TODO(shess): If |add_prefixes| were sorted by the prefix, it
  // could be passed directly to |PrefixSet()|, removing the need for
  // |prefixes|.  For now, |prefixes| is useful while debugging
  // things.
  std::vector<SBPrefix> prefixes;
  for (size_t i = 0; i < add_prefixes.size(); ++i) {
    prefixes.push_back(add_prefixes[i].prefix);
  }

std::sort(prefixes.begin(), prefixes.end());
  prefixes.erase(std::unique(prefixes.begin(), prefixes.end()),
                 prefixes.end());

scoped_ptr<safe_browsing::PrefixSet>
      prefix_set(new safe_browsing::PrefixSet(prefixes));

std::vector<SBPrefix> restored;
  prefix_set->GetPrefixes(&restored);

// Expect them to be equal.
  if (restored.size() == prefixes.size() &&
      std::equal(prefixes.begin(), prefixes.end(), restored.begin()))
    return prefix_set.release();

// Log BROKEN for continuity with previous release, and SIZE to
  // distinguish which test failed.
  NOTREACHED();
  RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN);
  if (restored.size() != prefixes.size())
    RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_SIZE);

// Try to distinguish between updates from one broken user and a
  // distributed problem.
  static bool logged_broken = false;
  if (!logged_broken) {
    RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_FIRST_BROKEN);
    logged_broken = true;
  }

// This seems so very very unlikely.  But if it ever were true, then
  // it could explain why GetPrefixes() seemed broken.
  if (sizeof(int) != sizeof(int32))
    RecordPrefixSetInfo(PREFIX_SET_SBPREFIX_WAS_BROKEN);

// Check if memory was corrupted during construction.
  if (!prefix_set->CheckChecksum())
    RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_CHECKSUM_MISMATCH);

// Check whether |restored| is unsorted, or has duplication.
  if (restored.size()) {
    size_t unsorted_count = 0;
    bool duplicates = false;
    SBPrefix prev = restored[0];
    for (size_t i = 0; i < restored.size(); prev = restored[i], ++i) {
      if (prev > restored[i]) {
        unsorted_count++;
        UMA_HISTOGRAM_COUNTS("SB2.PrefixSetUnsortedDifference",
                             prev - restored[i]);

// When unsorted, how big is the set, and how far are we into
        // it.  If the set is very small or large, that might inform
        // pursuit of a degenerate case.  If the percentage is close
        // to 0%, 100%, or 50%, then there might be an interesting
        // degenerate case to explore.
        UMA_HISTOGRAM_COUNTS("SB2.PrefixSetUnsortedSize", restored.size());
        UMA_HISTOGRAM_PERCENTAGE("SB2.PrefixSetUnsortedPercent",
                                 i * 100 / restored.size());

if (prefix_set->IsDeltaAt(i)) {
          RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_UNSORTED_IS_DELTA);

// Histograms require memory on the order of the number of
          // buckets, making high-precision logging expensive.  For
          // now aim for a sense of the range of the problem.
          UMA_HISTOGRAM_CUSTOM_COUNTS("SB2.PrefixSetUnsortedDelta",
                                      prefix_set->DeltaAt(i), 1, 0xFFFF, 50);
        } else {
          RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_UNSORTED_IS_INDEX);
        }
      }
      if (prev == restored[i])
        duplicates = true;
    }

// Record findings.
    if (unsorted_count) {
      RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_SORTING);
      UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetUnsorted", unsorted_count);
    }
    if (duplicates)
      RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION);

// Fix the problems noted.  If |restored| was unsorted, then
    // |duplicates| may give a false negative.
    if (unsorted_count)
      std::sort(restored.begin(), restored.end());
    if (unsorted_count || duplicates)
      restored.erase(std::unique(restored.begin(), restored.end()),
                     restored.end());
  }

// NOTE(shess): The following could be done using a single
  // uber-loop, but it's complicated by needing multiple parallel
  // iterators.  Didn't seem worthwhile for something that will only
  // live for a short period and only fires for one in a million
  // updates.

// Find elements in |restored| which are not in |prefixes|.
  std::vector<SBPrefix> difference;
  std::set_difference(restored.begin(), restored.end(),
                      prefixes.begin(), prefixes.end(),
                      std::back_inserter(difference));
  if (difference.size())
    UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetRestoredExcess", difference.size());

// Find elements in |prefixes| which are not in |restored|.
  difference.clear();
  std::set_difference(prefixes.begin(), prefixes.end(),
                      restored.begin(), restored.end(),
                      std::back_inserter(difference));
  if (difference.size())
    UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetRestoredShortfall",
                             difference.size());

return prefix_set.release();
}

}  // namespace

// The default SafeBrowsingDatabaseFactory.
class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory {
 public:
  virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
      bool enable_download_protection,
      bool enable_client_side_whitelist) {
    return new SafeBrowsingDatabaseNew(
        new SafeBrowsingStoreFile,
        enable_download_protection ? new SafeBrowsingStoreFile : NULL,
        enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL);
  }

SafeBrowsingDatabaseFactoryImpl() { }

private:
  DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl);
};

// static
SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL;

// Factory method, non-thread safe. Caller has to make sure this s called
// on SafeBrowsing Thread.
// TODO(shess): There's no need for a factory any longer.  Convert
// SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create()
// callers just construct things directly.
SafeBrowsingDatabase* SafeBrowsingDatabase::Create(
    bool enable_download_protection,
    bool enable_client_side_whitelist) {
  if (!factory_)
    factory_ = new SafeBrowsingDatabaseFactoryImpl();
  return factory_->CreateSafeBrowsingDatabase(enable_download_protection,
                                              enable_client_side_whitelist);
}

SafeBrowsingDatabase::~SafeBrowsingDatabase() {
}

// static
FilePath SafeBrowsingDatabase::BrowseDBFilename(
         const FilePath& db_base_filename) {
  return FilePath(db_base_filename.value() + kBrowseDBFile);
}

// static
FilePath SafeBrowsingDatabase::DownloadDBFilename(
    const FilePath& db_base_filename) {
  return FilePath(db_base_filename.value() + kDownloadDBFile);
}

// static
FilePath SafeBrowsingDatabase::BloomFilterForFilename(
    const FilePath& db_filename) {
  return FilePath(db_filename.value() + kBloomFilterFile);
}

// static
FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename(
    const FilePath& db_filename) {
  return FilePath(db_filename.value() + kCsdWhitelistDBFile);
}

SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) {
  DVLOG(3) << "Get store for list: " << list_id;
  if (list_id == safe_browsing_util::PHISH ||
      list_id == safe_browsing_util::MALWARE) {
    return browse_store_.get();
  } else if (list_id == safe_browsing_util::BINURL ||
             list_id == safe_browsing_util::BINHASH) {
    return download_store_.get();
  } else if (list_id == safe_browsing_util::CSDWHITELIST) {
    return csd_whitelist_store_.get();
  }
  return NULL;
}

// static
void SafeBrowsingDatabase::RecordFailure(FailureType failure_type) {
  UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type,
                            FAILURE_DATABASE_MAX);
}

SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew()
    : creation_loop_(MessageLoop::current()),
      browse_store_(new SafeBrowsingStoreFile),
      download_store_(NULL),
      csd_whitelist_store_(NULL),
      ALLOW_THIS_IN_INITIALIZER_LIST(reset_factory_(this)) {
  DCHECK(browse_store_.get());
  DCHECK(!download_store_.get());
  DCHECK(!csd_whitelist_store_.get());
}

SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew(
    SafeBrowsingStore* browse_store,
    SafeBrowsingStore* download_store,
    SafeBrowsingStore* csd_whitelist_store)
    : creation_loop_(MessageLoop::current()),
      browse_store_(browse_store),
      download_store_(download_store),
      csd_whitelist_store_(csd_whitelist_store),
      ALLOW_THIS_IN_INITIALIZER_LIST(reset_factory_(this)),
      corruption_detected_(false) {
  DCHECK(browse_store_.get());
}

SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() {
  DCHECK_EQ(creation_loop_, MessageLoop::current());
}

void SafeBrowsingDatabaseNew::Init(const FilePath& filename_base) {
  DCHECK_EQ(creation_loop_, MessageLoop::current());
  // Ensure we haven't been run before.
  DCHECK(browse_filename_.empty());
  DCHECK(download_filename_.empty());
  DCHECK(csd_whitelist_filename_.empty());

browse_filename_ = BrowseDBFilename(filename_base);
  bloom_filter_filename_ = BloomFilterForFilename(browse_filename_);

browse_store_->Init(
      browse_filename_,
      NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase));
  DVLOG(1) << "Init browse store: " << browse_filename_.value();

{
    // NOTE: There is no need to grab the lock in this function, since
    // until it returns, there are no pointers to this class on other
    // threads.  Then again, that means there is no possibility of
    // contention on the lock...
    base::AutoLock locked(lookup_lock_);
    full_browse_hashes_.clear();
    pending_browse_hashes_.clear();
    LoadBloomFilter();
  }

if (download_store_.get()) {
    download_filename_ = DownloadDBFilename(filename_base);
    download_store_->Init(
        download_filename_,
        NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase));
    DVLOG(1) << "Init download store: " << download_filename_.value();
  }

if (csd_whitelist_store_.get()) {
    csd_whitelist_filename_ = CsdWhitelistDBFilename(filename_base);
    csd_whitelist_store_->Init(
        csd_whitelist_filename_,
        NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase));
    DVLOG(1) << "Init csd whitelist store: " << csd_whitelist_filename_.value();
    std::vector<SBAddFullHash> full_hashes;
    if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) {
      LoadCsdWhitelist(full_hashes);
    } else {
      CsdWhitelistAllUrls();
    }
  } else {
    CsdWhitelistAllUrls();  // Just to be safe.
  }
}

bool SafeBrowsingDatabaseNew::ResetDatabase() {
  DCHECK_EQ(creation_loop_, MessageLoop::current());

// Delete files on disk.
  // TODO(shess): Hard to see where one might want to delete without a
  // reset.  Perhaps inline |Delete()|?
  if (!Delete())
    return false;

// Reset objects in memory.
  {
    base::AutoLock locked(lookup_lock_);
    full_browse_hashes_.clear();
    pending_browse_hashes_.clear();
    prefix_miss_cache_.clear();
    // TODO(shess): This could probably be |bloom_filter_.reset()|.
    browse_bloom_filter_ = new BloomFilter(BloomFilter::kBloomFilterMinSize *
                                           BloomFilter::kBloomFilterSizeRatio);
    // TODO(shess): It is simpler for the code to assume that presence
    // of a bloom filter always implies presence of a prefix set.
    prefix_set_.reset(new safe_browsing::PrefixSet(std::vector<SBPrefix>()));
  }
  // Wants to acquire the lock itself.
  CsdWhitelistAllUrls();

return true;
}

// TODO(lzheng): Remove matching_list, it is not used anywhere.
bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
    const GURL& url,
    std::string* matching_list,
    std::vector<SBPrefix>* prefix_hits,
    std::vector<SBFullHashResult>* full_hits,
    base::Time last_update) {
  // Clear the results first.
  matching_list->clear();
  prefix_hits->clear();
  full_hits->clear();

std::vector<SBFullHash> full_hashes;
  BrowseFullHashesToCheck(url, false, &full_hashes);
  if (full_hashes.empty())
    return false;

// This function is called on the I/O thread, prevent changes to
  // bloom filter and caches.
  base::AutoLock locked(lookup_lock_);

if (!browse_bloom_filter_.get())
    return false;
  DCHECK(prefix_set_.get());

// Used to double-check in case of a hit mis-match.
  std::vector<SBPrefix> restored;

size_t miss_count = 0;
  for (size_t i = 0; i < full_hashes.size(); ++i) {
    bool found = prefix_set_->Exists(full_hashes[i].prefix);

if (browse_bloom_filter_->Exists(full_hashes[i].prefix)) {
      RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_HIT);
      if (found)
        RecordPrefixSetInfo(PREFIX_SET_EVENT_HIT);
      prefix_hits->push_back(full_hashes[i].prefix);
      if (prefix_miss_cache_.count(full_hashes[i].prefix) > 0)
        ++miss_count;
    } else {
      // Bloom filter misses should never be in prefix set.  Re-create
      // the original prefixes and manually search for it, to check if
      // there's a bug with how |Exists()| is implemented.
      // |UpdateBrowseStore()| previously verified that
      // |GetPrefixes()| returns the same prefixes as were passed to
      // the constructor.
      DCHECK(!found);
      if (found) {
        if (restored.empty())
          prefix_set_->GetPrefixes(&restored);

// If the item is not in the re-created list, then there is an
        // error in |PrefixSet::Exists()|.  If the item is in the
        // re-created list, then the bloom filter was wrong.
        if (std::binary_search(restored.begin(), restored.end(),
                               full_hashes[i].prefix)) {
          RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT);
        } else {
          RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID);
        }
      }
    }
  }

// If all the prefixes are cached as 'misses', don't issue a GetHash.
  if (miss_count == prefix_hits->size())
    return false;

// Find the matching full-hash results.  |full_browse_hashes_| are from the
  // database, |pending_browse_hashes_| are from GetHash requests between
  // updates.
  std::sort(prefix_hits->begin(), prefix_hits->end());

GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_,
                               full_hits, last_update);
  GetCachedFullHashesForBrowse(*prefix_hits, pending_browse_hashes_,
                               full_hits, last_update);
  return true;
}

bool SafeBrowsingDatabaseNew::MatchDownloadAddPrefixes(
    int list_bit,
    const std::vector<SBPrefix>& prefixes,
    std::vector<SBPrefix>* prefix_hits) {
  prefix_hits->clear();

std::vector<SBAddPrefix> add_prefixes;
  download_store_->GetAddPrefixes(&add_prefixes);
  for (size_t i = 0; i < add_prefixes.size(); ++i) {
    for (size_t j = 0; j < prefixes.size(); ++j) {
      const SBPrefix& prefix = prefixes[j];
      if (prefix == add_prefixes[i].prefix &&
          GetListIdBit(add_prefixes[i].chunk_id) == list_bit) {
        prefix_hits->push_back(prefix);
      }
    }
  }
  return !prefix_hits->empty();
}

bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(
    const std::vector<GURL>& urls,
    std::vector<SBPrefix>* prefix_hits) {
  DCHECK_EQ(creation_loop_, MessageLoop::current());

// Ignore this check when download checking is not enabled.
  if (!download_store_.get())
    return false;

std::vector<SBPrefix> prefixes;
  GetDownloadUrlPrefixes(urls, &prefixes);
  return MatchDownloadAddPrefixes(safe_browsing_util::BINURL % 2,
                                  prefixes,
                                  prefix_hits);
}

bool SafeBrowsingDatabaseNew::ContainsDownloadHashPrefix(
    const SBPrefix& prefix) {
  DCHECK_EQ(creation_loop_, MessageLoop::current());

// Ignore this check when download store is not available.
  if (!download_store_.get())
    return false;

std::vector<SBPrefix> prefixes(1, prefix);
  std::vector<SBPrefix> prefix_hits;
  return MatchDownloadAddPrefixes(safe_browsing_util::BINHASH % 2,
                                  prefixes,
                                  &prefix_hits);
}

bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) {
  // This method is theoretically thread-safe but we expect all calls to
  // originate from the IO thread.
  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
  base::AutoLock l(lookup_lock_);
  if (csd_whitelist_all_urls_)
    return true;

std::vector<SBFullHash> full_hashes;
  BrowseFullHashesToCheck(url, true, &full_hashes);
  for (std::vector<SBFullHash>::const_iterator it = full_hashes.begin();
       it != full_hashes.end(); ++it) {
    if (std::binary_search(csd_whitelist_.begin(), csd_whitelist_.end(), *it))
      return true;
  }
  return false;
}

// Helper to insert entries for all of the prefixes or full hashes in
// |entry| into the store.
void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host,
                                        const SBEntry* entry, int list_id) {
  DCHECK_EQ(creation_loop_, MessageLoop::current());

SafeBrowsingStore* store = GetStore(list_id);
  if (!store) return;

STATS_COUNTER("SB.HostInsert", 1);
  const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
  const int count = entry->prefix_count();

DCHECK(!entry->IsSub());
  if (!count) {
    // No prefixes, use host instead.
    STATS_COUNTER("SB.PrefixAdd", 1);
    store->WriteAddPrefix(encoded_chunk_id, host);
  } else if (entry->IsPrefix()) {
    // Prefixes only.
    for (int i = 0; i < count; i++) {
      const SBPrefix prefix = entry->PrefixAt(i);
      STATS_COUNTER("SB.PrefixAdd", 1);
      store->WriteAddPrefix(encoded_chunk_id, prefix);
    }
  } else {
    // Prefixes and hashes.
    const base::Time receive_time = base::Time::Now();
    for (int i = 0; i < count; ++i) {
      const SBFullHash full_hash = entry->FullHashAt(i);
      const SBPrefix prefix = full_hash.prefix;

STATS_COUNTER("SB.PrefixAdd", 1);
      store->WriteAddPrefix(encoded_chunk_id, prefix);

STATS_COUNTER("SB.PrefixAddFull", 1);
      store->WriteAddHash(encoded_chunk_id, receive_time, full_hash);
    }
  }
}

// Helper to iterate over all the entries in the hosts in |chunks| and
// add them to the store.
void SafeBrowsingDatabaseNew::InsertAddChunks(const int list_id,
                                              const SBChunkList& chunks) {
  DCHECK_EQ(creation_loop_, MessageLoop::current());

SafeBrowsingStore* store = GetStore(list_id);
  if (!store) return;

for (SBChunkList::const_iterator citer = chunks.begin();
       citer != chunks.end(); ++citer) {
    const int chunk_id = citer->chunk_number;

// The server can give us a chunk that we already have because
    // it's part of a range.  Don't add it again.
    const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
    if (store->CheckAddChunk(encoded_chunk_id))
      continue;

store->SetAddChunk(encoded_chunk_id);
    for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
         hiter != citer->hosts.end(); ++hiter) {
      // NOTE: Could pass |encoded_chunk_id|, but then inserting add
      // chunks would look different from inserting sub chunks.
      InsertAdd(chunk_id, hiter->host, hiter->entry, list_id);
    }
  }
}

// Helper to insert entries for all of the prefixes or full hashes in
// |entry| into the store.
void SafeBrowsingDatabaseNew::InsertSub(int chunk_id, SBPrefix host,
                                        const SBEntry* entry, int list_id) {
  DCHECK_EQ(creation_loop_, MessageLoop::current());

SafeBrowsingStore* store = GetStore(list_id);
  if (!store) return;

STATS_COUNTER("SB.HostDelete", 1);
  const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
  const int count = entry->prefix_count();

DCHECK(entry->IsSub());
  if (!count) {
    // No prefixes, use host instead.
    STATS_COUNTER("SB.PrefixSub", 1);
    const int add_chunk_id = EncodeChunkId(entry->chunk_id(), list_id);
    store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, host);
  } else if (entry->IsPrefix()) {
    // Prefixes only.
    for (int i = 0; i < count; i++) {
      const SBPrefix prefix = entry->PrefixAt(i);
      const int add_chunk_id =
          EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);

STATS_COUNTER("SB.PrefixSub", 1);
      store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, prefix);
    }
  } else {
    // Prefixes and hashes.
    for (int i = 0; i < count; ++i) {
      const SBFullHash full_hash = entry->FullHashAt(i);
      const int add_chunk_id =
          EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);

STATS_COUNTER("SB.PrefixSub", 1);
      store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, full_hash.prefix);

STATS_COUNTER("SB.PrefixSubFull", 1);
      store->WriteSubHash(encoded_chunk_id, add_chunk_id, full_hash);
    }
  }
}

// Helper to iterate over all the entries in the hosts in |chunks| and
// add them to the store.
void SafeBrowsingDatabaseNew::InsertSubChunks(int list_id,
                                              const SBChunkList& chunks) {
  DCHECK_EQ(creation_loop_, MessageLoop::current());

SafeBrowsingStore* store = GetStore(list_id);
  if (!store) return;

for (SBChunkList::const_iterator citer = chunks.begin();
       citer != chunks.end(); ++citer) {
    const int chunk_id = citer->chunk_number;

store->SetSubChunk(encoded_chunk_id);
    for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
         hiter != citer->hosts.end(); ++hiter) {
      InsertSub(chunk_id, hiter->host, hiter->entry, list_id);
    }
  }
}

void SafeBrowsingDatabaseNew::InsertChunks(const std::string& list_name,
                                           const SBChunkList& chunks) {
  DCHECK_EQ(creation_loop_, MessageLoop::current());

if (corruption_detected_ || chunks.empty())
    return;

const base::Time insert_start = base::Time::Now();

const int list_id = safe_browsing_util::GetListId(list_name);
  DVLOG(2) << list_name << ": " << list_id;

SafeBrowsingStore* store = GetStore(list_id);
  if (!store) return;

change_detected_ = true;

store->BeginChunk();
  if (chunks.front().is_add) {
    InsertAddChunks(list_id, chunks);
  } else {
    InsertSubChunks(list_id, chunks);
  }
  store->FinishChunk();

UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::Time::Now() - insert_start);
}

void SafeBrowsingDatabaseNew::DeleteChunks(
    const std::vector<SBChunkDelete>& chunk_deletes) {
  DCHECK_EQ(creation_loop_, MessageLoop::current());

if (corruption_detected_ || chunk_deletes.empty())
    return;

const std::string& list_name = chunk_deletes.front().list_name;
  const int list_id = safe_browsing_util::GetListId(list_name);

SafeBrowsingStore* store = GetStore(list_id);
  if (!store) return;

change_detected_ = true;

for (size_t i = 0; i < chunk_deletes.size(); ++i) {
    std::vector<int> chunk_numbers;
    RangesToChunks(chunk_deletes[i].chunk_del, &chunk_numbers);
    for (size_t j = 0; j < chunk_numbers.size(); ++j) {
      const int encoded_chunk_id = EncodeChunkId(chunk_numbers[j], list_id);
      if (chunk_deletes[i].is_sub_del)
        store->DeleteSubChunk(encoded_chunk_id);
      else
        store->DeleteAddChunk(encoded_chunk_id);
    }
  }
}

void SafeBrowsingDatabaseNew::CacheHashResults(
    const std::vector<SBPrefix>& prefixes,
    const std::vector<SBFullHashResult>& full_hits) {
  // This is called on the I/O thread, lock against updates.
  base::AutoLock locked(lookup_lock_);

if (full_hits.empty()) {
    prefix_miss_cache_.insert(prefixes.begin(), prefixes.end());
    return;
  }

// TODO(shess): SBFullHashResult and SBAddFullHash are very similar.
  // Refactor to make them identical.
  const base::Time now = base::Time::Now();
  const size_t orig_size = pending_browse_hashes_.size();
  for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin();
       iter != full_hits.end(); ++iter) {
    const int list_id = safe_browsing_util::GetListId(iter->list_name);
    if (list_id == safe_browsing_util::MALWARE ||
        list_id == safe_browsing_util::PHISH) {
      int encoded_chunk_id = EncodeChunkId(iter->add_chunk_id, list_id);
      SBAddFullHash add_full_hash(encoded_chunk_id, now, iter->hash);
      pending_browse_hashes_.push_back(add_full_hash);
    }
  }

// Sort new entries then merge with the previously-sorted entries.
  std::vector<SBAddFullHash>::iterator
      orig_end = pending_browse_hashes_.begin() + orig_size;
  std::sort(orig_end, pending_browse_hashes_.end(), SBAddFullHashPrefixLess);
  std::inplace_merge(pending_browse_hashes_.begin(),
                     orig_end, pending_browse_hashes_.end(),
                     SBAddFullHashPrefixLess);
}

bool SafeBrowsingDatabaseNew::UpdateStarted(
    std::vector<SBListChunkRanges>* lists) {
  DCHECK_EQ(creation_loop_, MessageLoop::current());
  DCHECK(lists);

// If |BeginUpdate()| fails, reset the database.
  if (!browse_store_->BeginUpdate()) {
    RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN);
    HandleCorruptDatabase();
    return false;
  }

if (download_store_.get() && !download_store_->BeginUpdate()) {
    RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN);
    HandleCorruptDatabase();
    return false;
  }

if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) {
    RecordFailure(FAILURE_CSD_WHITELIST_DATABASE_UPDATE_BEGIN);
    HandleCorruptDatabase();
    return false;
  }

std::vector<std::string> browse_listnames;
  browse_listnames.push_back(safe_browsing_util::kMalwareList);
  browse_listnames.push_back(safe_browsing_util::kPhishingList);
  UpdateChunkRanges(browse_store_.get(), browse_listnames, lists);

if (download_store_.get()) {
    std::vector<std::string> download_listnames;
    download_listnames.push_back(safe_browsing_util::kBinUrlList);
    download_listnames.push_back(safe_browsing_util::kBinHashList);
    UpdateChunkRanges(download_store_.get(), download_listnames, lists);
  }

if (csd_whitelist_store_.get()) {
    std::vector<std::string> csd_whitelist_listnames;
    csd_whitelist_listnames.push_back(safe_browsing_util::kCsdWhiteList);
    UpdateChunkRanges(csd_whitelist_store_.get(),
                      csd_whitelist_listnames, lists);
  }

corruption_detected_ = false;
  change_detected_ = false;
  return true;
}

void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) {
  DCHECK_EQ(creation_loop_, MessageLoop::current());
  if (corruption_detected_)
    return;

// Unroll the transaction if there was a protocol error or if the
  // transaction was empty.  This will leave the bloom filter, the
  // pending hashes, and the prefix miss cache in place.
  if (!update_succeeded || !change_detected_) {
    // Track empty updates to answer questions at http://crbug.com/72216 .
    if (update_succeeded && !change_detected_)
      UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0);
    browse_store_->CancelUpdate();
    if (download_store_.get())
      download_store_->CancelUpdate();
    if (csd_whitelist_store_.get())
      csd_whitelist_store_->CancelUpdate();
    return;
  }

// for download
  UpdateDownloadStore();
  // for browsing
  UpdateBrowseStore();
  // for csd whitelist
  UpdateCsdWhitelistStore();
}

void SafeBrowsingDatabaseNew::UpdateCsdWhitelistStore() {
  if (!csd_whitelist_store_.get())
    return;

// For the csd whitelist, we don't cache and save full hashes since all
  // hashes are already full.
  std::vector<SBAddFullHash> empty_add_hashes;

// Not needed for the csd whitelist.
  std::set<SBPrefix> empty_miss_cache;

// Note: prefixes will not be empty.  The current data store implementation
  // stores all full-length hashes as both full and prefix hashes.
  std::vector<SBAddPrefix> prefixes;
  std::vector<SBAddFullHash> full_hashes;
  if (!csd_whitelist_store_->FinishUpdate(empty_add_hashes,
                                          empty_miss_cache,
                                          &prefixes,
                                          &full_hashes)) {
    RecordFailure(FAILURE_CSD_WHITELIST_DATABASE_UPDATE_FINISH);
    CsdWhitelistAllUrls();
    return;
  }
  LoadCsdWhitelist(full_hashes);
}

void SafeBrowsingDatabaseNew::UpdateDownloadStore() {
  if (!download_store_.get())
    return;

// For download, we don't cache and save full hashes.
  std::vector<SBAddFullHash> empty_add_hashes;

// For download, backend lookup happens only if a prefix is in add list.
  // No need to pass in miss cache when call FinishUpdate to caculate
  // bloomfilter false positives.
  std::set<SBPrefix> empty_miss_cache;

// These results are not used after this call. Simply ignore the
  // returned value after FinishUpdate(...).
  std::vector<SBAddPrefix> add_prefixes_result;
  std::vector<SBAddFullHash> add_full_hashes_result;

if (!download_store_->FinishUpdate(empty_add_hashes,
                                     empty_miss_cache,
                                     &add_prefixes_result,
                                     &add_full_hashes_result))
    RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH);
  return;
}

void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
  // Copy out the pending add hashes.  Copy rather than swapping in
  // case |ContainsBrowseURL()| is called before the new filter is complete.
  std::vector<SBAddFullHash> pending_add_hashes;
  {
    base::AutoLock locked(lookup_lock_);
    pending_add_hashes.insert(pending_add_hashes.end(),
                              pending_browse_hashes_.begin(),
                              pending_browse_hashes_.end());
  }

// Measure the amount of IO during the bloom filter build.
  base::IoCounters io_before, io_after;
  base::ProcessHandle handle = base::Process::Current().handle();
  scoped_ptr<base::ProcessMetrics> metric(
#if !defined(OS_MACOSX)
      base::ProcessMetrics::CreateProcessMetrics(handle)
#else
      // Getting stats only for the current process is enough, so NULL is fine.
      base::ProcessMetrics::CreateProcessMetrics(handle, NULL)
#endif
  );

// IoCounters are currently not supported on Mac, and may not be
  // available for Linux, so we check the result and only show IO
  // stats if they are available.
  const bool got_counters = metric->GetIOCounters(&io_before);

const base::Time before = base::Time::Now();

std::vector<SBAddPrefix> add_prefixes;
  std::vector<SBAddFullHash> add_full_hashes;
  if (!browse_store_->FinishUpdate(pending_add_hashes, prefix_miss_cache_,
                                   &add_prefixes, &add_full_hashes)) {
    RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH);
    return;
  }

// Create and populate |filter| from |add_prefixes|.
  // TODO(shess): The bloom filter doesn't need to be a
  // scoped_refptr<> for this code.  Refactor that away.
  const int filter_size =
      BloomFilter::FilterSizeForKeyCount(add_prefixes.size());
  scoped_refptr<BloomFilter> filter(new BloomFilter(filter_size));
  for (size_t i = 0; i < add_prefixes.size(); ++i) {
    filter->Insert(add_prefixes[i].prefix);
  }

scoped_ptr<safe_browsing::PrefixSet>
      prefix_set(PrefixSetFromAddPrefixes(add_prefixes));

// This needs to be in sorted order by prefix for efficient access.
  std::sort(add_full_hashes.begin(), add_full_hashes.end(),
            SBAddFullHashPrefixLess);

// Swap in the newly built filter and cache.
  {
    base::AutoLock locked(lookup_lock_);
    full_browse_hashes_.swap(add_full_hashes);

// TODO(shess): If |CacheHashResults()| is posted between the
    // earlier lock and this clear, those pending hashes will be lost.
    // It could be fixed by only removing hashes which were collected
    // at the earlier point.  I believe that is fail-safe as-is (the
    // hash will be fetched again).
    pending_browse_hashes_.clear();
    prefix_miss_cache_.clear();
    browse_bloom_filter_.swap(filter);
    prefix_set_.swap(prefix_set);
  }

const base::TimeDelta bloom_gen = base::Time::Now() - before;

// Persist the bloom filter to disk.  Since only this thread changes
  // |browse_bloom_filter_|, there is no need to lock.
  WriteBloomFilter();

// Gather statistics.
  if (got_counters && metric->GetIOCounters(&io_after)) {
    UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes",
                         static_cast<int>(io_after.ReadTransferCount -
                                          io_before.ReadTransferCount) / 1024);
    UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes",
                         static_cast<int>(io_after.WriteTransferCount -
                                          io_before.WriteTransferCount) / 1024);
    UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations",
                         static_cast<int>(io_after.ReadOperationCount -
                                          io_before.ReadOperationCount));
    UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations",
                         static_cast<int>(io_after.WriteOperationCount -
                                          io_before.WriteOperationCount));
  }
  DVLOG(1) << "SafeBrowsingDatabaseImpl built bloom filter in "
           << bloom_gen.InMilliseconds() << " ms total.  prefix count: "
           << add_prefixes.size();
  UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", bloom_gen);
  UMA_HISTOGRAM_COUNTS("SB2.FilterKilobytes",
                       browse_bloom_filter_->size() / 1024);
  int64 size_64;
  if (file_util::GetFileSize(browse_filename_, &size_64))
    UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
                         static_cast<int>(size_64 / 1024));
  if (file_util::GetFileSize(download_filename_, &size_64))
    UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes",
                         static_cast<int>(size_64 / 1024));
}

void SafeBrowsingDatabaseNew::HandleCorruptDatabase() {
  // Reset the database after the current task has unwound (but only
  // reset once within the scope of a given task).
  if (reset_factory_.empty()) {
    RecordFailure(FAILURE_DATABASE_CORRUPT);
    MessageLoop::current()->PostTask(FROM_HERE,
        reset_factory_.NewRunnableMethod(
            &SafeBrowsingDatabaseNew::OnHandleCorruptDatabase));
  }
}

void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() {
  RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER);
  corruption_detected_ = true;  // Stop updating the database.
  ResetDatabase();
  DCHECK(false) << "SafeBrowsing database was corrupt and reset";
}

// TODO(shess): I'm not clear why this code doesn't have any
// real error-handling.
void SafeBrowsingDatabaseNew::LoadBloomFilter() {
  DCHECK_EQ(creation_loop_, MessageLoop::current());
  DCHECK(!bloom_filter_filename_.empty());

// If we're missing either of the database or filter files, we wait until the
  // next update to generate a new filter.
  // TODO(paulg): Investigate how often the filter file is missing and how
  // expensive it would be to regenerate it.
  int64 size_64;
  if (!file_util::GetFileSize(browse_filename_, &size_64) || size_64 == 0)
    return;

if (!file_util::GetFileSize(bloom_filter_filename_, &size_64) ||
      size_64 == 0) {
    RecordFailure(FAILURE_DATABASE_FILTER_MISSING);
    return;
  }

const base::TimeTicks before = base::TimeTicks::Now();
  browse_bloom_filter_ = BloomFilter::LoadFile(bloom_filter_filename_);
  DVLOG(1) << "SafeBrowsingDatabaseNew read bloom filter in "
           << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";

if (!browse_bloom_filter_.get())
    RecordFailure(FAILURE_DATABASE_FILTER_READ);

// Manually re-generate the prefix set from the main database.
  // TODO(shess): Write/read for prefix set.
  std::vector<SBAddPrefix> add_prefixes;
  browse_store_->GetAddPrefixes(&add_prefixes);
  prefix_set_.reset(PrefixSetFromAddPrefixes(add_prefixes));
}

bool SafeBrowsingDatabaseNew::Delete() {
  DCHECK_EQ(creation_loop_, MessageLoop::current());

const bool r1 = browse_store_->Delete();
  if (!r1)
    RecordFailure(FAILURE_DATABASE_STORE_DELETE);

const bool r2 = download_store_.get() ? download_store_->Delete() : true;
  if (!r2)
    RecordFailure(FAILURE_DATABASE_STORE_DELETE);

const bool r3 = csd_whitelist_store_.get() ?
      csd_whitelist_store_->Delete() : true;
  if (!r3)
    RecordFailure(FAILURE_DATABASE_STORE_DELETE);

const bool r4 = file_util::Delete(bloom_filter_filename_, false);
  if (!r4)
    RecordFailure(FAILURE_DATABASE_FILTER_DELETE);
  return r1 && r2 && r3 && r4;
}

void SafeBrowsingDatabaseNew::WriteBloomFilter() {
  DCHECK_EQ(creation_loop_, MessageLoop::current());

if (!browse_bloom_filter_.get())
    return;

const base::TimeTicks before = base::TimeTicks::Now();
  const bool write_ok = browse_bloom_filter_->WriteFile(bloom_filter_filename_);
  DVLOG(1) << "SafeBrowsingDatabaseNew wrote bloom filter in "
           << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";

if (!write_ok)
    RecordFailure(FAILURE_DATABASE_FILTER_WRITE);
}

void SafeBrowsingDatabaseNew::CsdWhitelistAllUrls() {
  base::AutoLock locked(lookup_lock_);
  csd_whitelist_all_urls_ = true;
  csd_whitelist_.clear();
}

void SafeBrowsingDatabaseNew::LoadCsdWhitelist(
    const std::vector<SBAddFullHash>& full_hashes) {
  DCHECK_EQ(creation_loop_, MessageLoop::current());
  if (full_hashes.size() > kMaxCsdWhitelistSize) {
    CsdWhitelistAllUrls();
    return;
  }

std::vector<SBFullHash> new_csd_whitelist;
  for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
       it != full_hashes.end(); ++it) {
    new_csd_whitelist.push_back(it->full_hash);
  }
  std::sort(new_csd_whitelist.begin(), new_csd_whitelist.end());

SBFullHash kill_switch;
  crypto::SHA256HashString(kCsdKillSwitchUrl, &kill_switch,
                           sizeof(kill_switch));
  if (std::binary_search(new_csd_whitelist.begin(), new_csd_whitelist.end(),
                         kill_switch)) {
    // The kill switch is whitelisted hence we whitelist all URLs.
    CsdWhitelistAllUrls();
  } else {
    base::AutoLock locked(lookup_lock_);
    csd_whitelist_all_urls_ = false;
    csd_whitelist_.swap(new_csd_whitelist);
  }
}

普通文本 | 1318行 | 47.86 KB

// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "chrome/browser/safe_browsing/safe_browsing_database.h"

#include <algorithm>
#include <iterator>

#include "base/file_util.h"
#include "base/metrics/histogram.h"
#include "base/metrics/stats_counters.h"
#include "base/time.h"
#include "base/message_loop.h"
#include "base/process_util.h"
#include "crypto/sha2.h"
#include "chrome/browser/safe_browsing/bloom_filter.h"
#include "chrome/browser/safe_browsing/prefix_set.h"
#include "chrome/browser/safe_browsing/safe_browsing_store_file.h"
#include "content/browser/browser_thread.h"
#include "googleurl/src/gurl.h"

namespace {

// Filename suffix for the bloom filter.
const FilePath::CharType kBloomFilterFile[] = FILE_PATH_LITERAL(" Filter 2");
// Filename suffix for download store.
const FilePath::CharType kDownloadDBFile[] = FILE_PATH_LITERAL(" Download");
// Filename suffix for client-side phishing detection whitelist store.
const FilePath::CharType kCsdWhitelistDBFile[] =
    FILE_PATH_LITERAL(" Csd Whitelist");
// Filename suffix for browse store.
// TODO(lzheng): change to a better name when we change the file format.
const FilePath::CharType kBrowseDBFile[] = FILE_PATH_LITERAL(" Bloom");

// The maximum staleness for a cached entry.
const int kMaxStalenessMinutes = 45;

// Maximum number of entries we allow in the client-side phishing detection
// whitelist.  If the whitelist on disk contains more entries then
// ContainsCsdWhitelistedUrl will always return true.
const size_t kMaxCsdWhitelistSize = 5000;

// If the hash of this exact expression is on the csd whitelist then
// ContainsCsdWhitelistedUrl will always return true.
const char kCsdKillSwitchUrl[] =
    "sb-ssl.google.com/safebrowsing/csd/killswitch";

// To save space, the incoming |chunk_id| and |list_id| are combined
// into an |encoded_chunk_id| for storage by shifting the |list_id|
// into the low-order bits.  These functions decode that information.
// TODO(lzheng): It was reasonable when database is saved in sqlite, but
// there should be better ways to save chunk_id and list_id after we use
// SafeBrowsingStoreFile.
int GetListIdBit(const int encoded_chunk_id) {
  return encoded_chunk_id & 1;
}
int DecodeChunkId(int encoded_chunk_id) {
  return encoded_chunk_id >> 1;
}
int EncodeChunkId(const int chunk, const int list_id) {
  DCHECK_NE(list_id, safe_browsing_util::INVALID);
  return chunk << 1 | list_id % 2;
}

// Generate the set of full hashes to check for |url|.  If
// |include_whitelist_hashes| is true we will generate additional path-prefixes
// to match against the csd whitelist.  E.g., if the path-prefix /foo is on the
// whitelist it should also match /foo/bar which is not the case for all the
// other lists.
// TODO(shess): This function is almost the same as
// |CompareFullHashes()| in safe_browsing_util.cc, except that code
// does an early exit on match.  Since match should be the infrequent
// case (phishing or malware found), consider combining this function
// with that one.
void BrowseFullHashesToCheck(const GURL& url,
                             bool include_whitelist_hashes,
                             std::vector<SBFullHash>* full_hashes) {
  std::vector<std::string> hosts;
  if (url.HostIsIPAddress()) {
    hosts.push_back(url.host());
  } else {
    safe_browsing_util::GenerateHostsToCheck(url, &hosts);
  }

  std::vector<std::string> paths;
  safe_browsing_util::GeneratePathsToCheck(url, &paths);

  for (size_t i = 0; i < hosts.size(); ++i) {
    for (size_t j = 0; j < paths.size(); ++j) {
      const std::string& path = paths[j];
      SBFullHash full_hash;
      crypto::SHA256HashString(hosts[i] + path, &full_hash,
                               sizeof(full_hash));
      full_hashes->push_back(full_hash);

      // We may have /foo as path-prefix in the whitelist which should
      // also match with /foo/bar and /foo?bar.  Hence, for every path
      // that ends in '/' we also add the path without the slash.
      if (include_whitelist_hashes &&
          path.size() > 1 &&
          path[path.size() - 1] == '/') {
        crypto::SHA256HashString(hosts[i] + path.substr(0, path.size() - 1),
                                 &full_hash, sizeof(full_hash));
        full_hashes->push_back(full_hash);
      }
    }
  }
}

// Get the prefixes matching the download |urls|.
void GetDownloadUrlPrefixes(const std::vector<GURL>& urls,
                            std::vector<SBPrefix>* prefixes) {
  std::vector<SBFullHash> full_hashes;
  for (size_t i = 0; i < urls.size(); ++i)
    BrowseFullHashesToCheck(urls[i], false, &full_hashes);

  for (size_t i = 0; i < full_hashes.size(); ++i)
    prefixes->push_back(full_hashes[i].prefix);
}

// Find the entries in |full_hashes| with prefix in |prefix_hits|, and
// add them to |full_hits| if not expired.  "Not expired" is when
// either |last_update| was recent enough, or the item has been
// received recently enough.  Expired items are not deleted because a
// future update may make them acceptable again.
//
// For efficiency reasons the code walks |prefix_hits| and
// |full_hashes| in parallel, so they must be sorted by prefix.
void GetCachedFullHashesForBrowse(const std::vector<SBPrefix>& prefix_hits,
                                  const std::vector<SBAddFullHash>& full_hashes,
                                  std::vector<SBFullHashResult>* full_hits,
                                  base::Time last_update) {
  const base::Time expire_time =
      base::Time::Now() - base::TimeDelta::FromMinutes(kMaxStalenessMinutes);

  std::vector<SBPrefix>::const_iterator piter = prefix_hits.begin();
  std::vector<SBAddFullHash>::const_iterator hiter = full_hashes.begin();

  while (piter != prefix_hits.end() && hiter != full_hashes.end()) {
    if (*piter < hiter->full_hash.prefix) {
      ++piter;
    } else if (hiter->full_hash.prefix < *piter) {
      ++hiter;
    } else {
      if (expire_time < last_update ||
          expire_time.ToTimeT() < hiter->received) {
        SBFullHashResult result;
        const int list_bit = GetListIdBit(hiter->chunk_id);
        DCHECK(list_bit == safe_browsing_util::MALWARE ||
               list_bit == safe_browsing_util::PHISH);
        if (!safe_browsing_util::GetListName(list_bit, &result.list_name))
          continue;
        result.add_chunk_id = DecodeChunkId(hiter->chunk_id);
        result.hash = hiter->full_hash;
        full_hits->push_back(result);
      }

      // Only increment |hiter|, |piter| might have multiple hits.
      ++hiter;
    }
  }
}

// This function generates a chunk range string for |chunks|. It
// outputs one chunk range string per list and writes it to the
// |list_ranges| vector.  We expect |list_ranges| to already be of the
// right size.  E.g., if |chunks| contains chunks with two different
// list ids then |list_ranges| must contain two elements.
void GetChunkRanges(const std::vector<int>& chunks,
                    std::vector<std::string>* list_ranges) {
  DCHECK_GT(list_ranges->size(), 0U);
  DCHECK_LE(list_ranges->size(), 2U);
  std::vector<std::vector<int> > decoded_chunks(list_ranges->size());
  for (std::vector<int>::const_iterator iter = chunks.begin();
       iter != chunks.end(); ++iter) {
    int mod_list_id = GetListIdBit(*iter);
    DCHECK_GE(mod_list_id, 0);
    DCHECK_LT(static_cast<size_t>(mod_list_id), decoded_chunks.size());
    decoded_chunks[mod_list_id].push_back(DecodeChunkId(*iter));
  }
  for (size_t i = 0; i < decoded_chunks.size(); ++i) {
    ChunksToRangeString(decoded_chunks[i], &((*list_ranges)[i]));
  }
}

// Helper function to create chunk range lists for Browse related
// lists.
void UpdateChunkRanges(SafeBrowsingStore* store,
                       const std::vector<std::string>& listnames,
                       std::vector<SBListChunkRanges>* lists) {
  DCHECK_GT(listnames.size(), 0U);
  DCHECK_LE(listnames.size(), 2U);
  std::vector<int> add_chunks;
  std::vector<int> sub_chunks;
  store->GetAddChunks(&add_chunks);
  store->GetSubChunks(&sub_chunks);

  std::vector<std::string> adds(listnames.size());
  std::vector<std::string> subs(listnames.size());
  GetChunkRanges(add_chunks, &adds);
  GetChunkRanges(sub_chunks, &subs);

  for (size_t i = 0; i < listnames.size(); ++i) {
    const std::string& listname = listnames[i];
    DCHECK_EQ(safe_browsing_util::GetListId(listname) % 2,
              static_cast<int>(i % 2));
    DCHECK_NE(safe_browsing_util::GetListId(listname),
              safe_browsing_util::INVALID);
    lists->push_back(SBListChunkRanges(listname));
    lists->back().adds.swap(adds[i]);
    lists->back().subs.swap(subs[i]);
  }
}

// Order |SBAddFullHash| on the prefix part.  |SBAddPrefixLess()| from
// safe_browsing_store.h orders on both chunk-id and prefix.
bool SBAddFullHashPrefixLess(const SBAddFullHash& a, const SBAddFullHash& b) {
  return a.full_hash.prefix < b.full_hash.prefix;
}

// As compared to the bloom filter, PrefixSet should have these
// properties:
// - Any bloom filter miss should be a prefix set miss.
// - Any prefix set hit should be a bloom filter hit.
// - Bloom filter false positives are prefix set misses.
// The following is to log actual performance to verify this.
enum PrefixSetEvent {
  PREFIX_SET_EVENT_HIT,
  PREFIX_SET_EVENT_BLOOM_HIT,
  PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT,
  PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID,
  PREFIX_SET_GETPREFIXES_BROKEN,
  PREFIX_SET_GETPREFIXES_BROKEN_SIZE,
  PREFIX_SET_GETPREFIXES_FIRST_BROKEN,
  PREFIX_SET_SBPREFIX_WAS_BROKEN,
  PREFIX_SET_GETPREFIXES_BROKEN_SORTING,
  PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION,
  PREFIX_SET_GETPREFIX_UNSORTED_IS_DELTA,
  PREFIX_SET_GETPREFIX_UNSORTED_IS_INDEX,
  PREFIX_SET_GETPREFIX_CHECKSUM_MISMATCH,

  // Memory space for histograms is determined by the max.  ALWAYS ADD
  // NEW VALUES BEFORE THIS ONE.
  PREFIX_SET_EVENT_MAX
};

void RecordPrefixSetInfo(PrefixSetEvent event_type) {
  UMA_HISTOGRAM_ENUMERATION("SB2.PrefixSetEvent", event_type,
                            PREFIX_SET_EVENT_MAX);
}

// Generate a |PrefixSet| instance from the contents of
// |add_prefixes|.  Additionally performs various checks to make sure
// that the resulting prefix set is valid, so that the
// PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID histogram in
// ContainsBrowseUrl() can be trustworthy.
safe_browsing::PrefixSet* PrefixSetFromAddPrefixes(
    const std::vector<SBAddPrefix>& add_prefixes) {
  // TODO(shess): If |add_prefixes| were sorted by the prefix, it
  // could be passed directly to |PrefixSet()|, removing the need for
  // |prefixes|.  For now, |prefixes| is useful while debugging
  // things.
  std::vector<SBPrefix> prefixes;
  for (size_t i = 0; i < add_prefixes.size(); ++i) {
    prefixes.push_back(add_prefixes[i].prefix);
  }

  std::sort(prefixes.begin(), prefixes.end());
  prefixes.erase(std::unique(prefixes.begin(), prefixes.end()),
                 prefixes.end());

  scoped_ptr<safe_browsing::PrefixSet>
      prefix_set(new safe_browsing::PrefixSet(prefixes));

  std::vector<SBPrefix> restored;
  prefix_set->GetPrefixes(&restored);

  // Expect them to be equal.
  if (restored.size() == prefixes.size() &&
      std::equal(prefixes.begin(), prefixes.end(), restored.begin()))
    return prefix_set.release();

  // Log BROKEN for continuity with previous release, and SIZE to
  // distinguish which test failed.
  NOTREACHED();
  RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN);
  if (restored.size() != prefixes.size())
    RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_SIZE);

  // Try to distinguish between updates from one broken user and a
  // distributed problem.
  static bool logged_broken = false;
  if (!logged_broken) {
    RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_FIRST_BROKEN);
    logged_broken = true;
  }

  // This seems so very very unlikely.  But if it ever were true, then
  // it could explain why GetPrefixes() seemed broken.
  if (sizeof(int) != sizeof(int32))
    RecordPrefixSetInfo(PREFIX_SET_SBPREFIX_WAS_BROKEN);

  // Check if memory was corrupted during construction.
  if (!prefix_set->CheckChecksum())
    RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_CHECKSUM_MISMATCH);

  // Check whether |restored| is unsorted, or has duplication.
  if (restored.size()) {
    size_t unsorted_count = 0;
    bool duplicates = false;
    SBPrefix prev = restored[0];
    for (size_t i = 0; i < restored.size(); prev = restored[i], ++i) {
      if (prev > restored[i]) {
        unsorted_count++;
        UMA_HISTOGRAM_COUNTS("SB2.PrefixSetUnsortedDifference",
                             prev - restored[i]);

        // When unsorted, how big is the set, and how far are we into
        // it.  If the set is very small or large, that might inform
        // pursuit of a degenerate case.  If the percentage is close
        // to 0%, 100%, or 50%, then there might be an interesting
        // degenerate case to explore.
        UMA_HISTOGRAM_COUNTS("SB2.PrefixSetUnsortedSize", restored.size());
        UMA_HISTOGRAM_PERCENTAGE("SB2.PrefixSetUnsortedPercent",
                                 i * 100 / restored.size());

        if (prefix_set->IsDeltaAt(i)) {
          RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_UNSORTED_IS_DELTA);

          // Histograms require memory on the order of the number of
          // buckets, making high-precision logging expensive.  For
          // now aim for a sense of the range of the problem.
          UMA_HISTOGRAM_CUSTOM_COUNTS("SB2.PrefixSetUnsortedDelta",
                                      prefix_set->DeltaAt(i), 1, 0xFFFF, 50);
        } else {
          RecordPrefixSetInfo(PREFIX_SET_GETPREFIX_UNSORTED_IS_INDEX);
        }
      }
      if (prev == restored[i])
        duplicates = true;
    }

    // Record findings.
    if (unsorted_count) {
      RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_SORTING);
      UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetUnsorted", unsorted_count);
    }
    if (duplicates)
      RecordPrefixSetInfo(PREFIX_SET_GETPREFIXES_BROKEN_DUPLICATION);

    // Fix the problems noted.  If |restored| was unsorted, then
    // |duplicates| may give a false negative.
    if (unsorted_count)
      std::sort(restored.begin(), restored.end());
    if (unsorted_count || duplicates)
      restored.erase(std::unique(restored.begin(), restored.end()),
                     restored.end());
  }

  // NOTE(shess): The following could be done using a single
  // uber-loop, but it's complicated by needing multiple parallel
  // iterators.  Didn't seem worthwhile for something that will only
  // live for a short period and only fires for one in a million
  // updates.

  // Find elements in |restored| which are not in |prefixes|.
  std::vector<SBPrefix> difference;
  std::set_difference(restored.begin(), restored.end(),
                      prefixes.begin(), prefixes.end(),
                      std::back_inserter(difference));
  if (difference.size())
    UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetRestoredExcess", difference.size());

  // Find elements in |prefixes| which are not in |restored|.
  difference.clear();
  std::set_difference(prefixes.begin(), prefixes.end(),
                      restored.begin(), restored.end(),
                      std::back_inserter(difference));
  if (difference.size())
    UMA_HISTOGRAM_COUNTS_100("SB2.PrefixSetRestoredShortfall",
                             difference.size());

  return prefix_set.release();
}

}  // namespace

// The default SafeBrowsingDatabaseFactory.
class SafeBrowsingDatabaseFactoryImpl : public SafeBrowsingDatabaseFactory {
 public:
  virtual SafeBrowsingDatabase* CreateSafeBrowsingDatabase(
      bool enable_download_protection,
      bool enable_client_side_whitelist) {
    return new SafeBrowsingDatabaseNew(
        new SafeBrowsingStoreFile,
        enable_download_protection ? new SafeBrowsingStoreFile : NULL,
        enable_client_side_whitelist ? new SafeBrowsingStoreFile : NULL);
  }

  SafeBrowsingDatabaseFactoryImpl() { }

 private:
  DISALLOW_COPY_AND_ASSIGN(SafeBrowsingDatabaseFactoryImpl);
};

// static
SafeBrowsingDatabaseFactory* SafeBrowsingDatabase::factory_ = NULL;

// Factory method, non-thread safe. Caller has to make sure this s called
// on SafeBrowsing Thread.
// TODO(shess): There's no need for a factory any longer.  Convert
// SafeBrowsingDatabaseNew to SafeBrowsingDatabase, and have Create()
// callers just construct things directly.
SafeBrowsingDatabase* SafeBrowsingDatabase::Create(
    bool enable_download_protection,
    bool enable_client_side_whitelist) {
  if (!factory_)
    factory_ = new SafeBrowsingDatabaseFactoryImpl();
  return factory_->CreateSafeBrowsingDatabase(enable_download_protection,
                                              enable_client_side_whitelist);
}

SafeBrowsingDatabase::~SafeBrowsingDatabase() {
}

// static
FilePath SafeBrowsingDatabase::BrowseDBFilename(
         const FilePath& db_base_filename) {
  return FilePath(db_base_filename.value() + kBrowseDBFile);
}

// static
FilePath SafeBrowsingDatabase::DownloadDBFilename(
    const FilePath& db_base_filename) {
  return FilePath(db_base_filename.value() + kDownloadDBFile);
}

// static
FilePath SafeBrowsingDatabase::BloomFilterForFilename(
    const FilePath& db_filename) {
  return FilePath(db_filename.value() + kBloomFilterFile);
}

// static
FilePath SafeBrowsingDatabase::CsdWhitelistDBFilename(
    const FilePath& db_filename) {
  return FilePath(db_filename.value() + kCsdWhitelistDBFile);
}

SafeBrowsingStore* SafeBrowsingDatabaseNew::GetStore(const int list_id) {
  DVLOG(3) << "Get store for list: " << list_id;
  if (list_id == safe_browsing_util::PHISH ||
      list_id == safe_browsing_util::MALWARE) {
    return browse_store_.get();
  } else if (list_id == safe_browsing_util::BINURL ||
             list_id == safe_browsing_util::BINHASH) {
    return download_store_.get();
  } else if (list_id == safe_browsing_util::CSDWHITELIST) {
    return csd_whitelist_store_.get();
  }
  return NULL;
}

// static
void SafeBrowsingDatabase::RecordFailure(FailureType failure_type) {
  UMA_HISTOGRAM_ENUMERATION("SB2.DatabaseFailure", failure_type,
                            FAILURE_DATABASE_MAX);
}

SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew()
    : creation_loop_(MessageLoop::current()),
      browse_store_(new SafeBrowsingStoreFile),
      download_store_(NULL),
      csd_whitelist_store_(NULL),
      ALLOW_THIS_IN_INITIALIZER_LIST(reset_factory_(this)) {
  DCHECK(browse_store_.get());
  DCHECK(!download_store_.get());
  DCHECK(!csd_whitelist_store_.get());
}

SafeBrowsingDatabaseNew::SafeBrowsingDatabaseNew(
    SafeBrowsingStore* browse_store,
    SafeBrowsingStore* download_store,
    SafeBrowsingStore* csd_whitelist_store)
    : creation_loop_(MessageLoop::current()),
      browse_store_(browse_store),
      download_store_(download_store),
      csd_whitelist_store_(csd_whitelist_store),
      ALLOW_THIS_IN_INITIALIZER_LIST(reset_factory_(this)),
      corruption_detected_(false) {
  DCHECK(browse_store_.get());
}

SafeBrowsingDatabaseNew::~SafeBrowsingDatabaseNew() {
  DCHECK_EQ(creation_loop_, MessageLoop::current());
}

void SafeBrowsingDatabaseNew::Init(const FilePath& filename_base) {
  DCHECK_EQ(creation_loop_, MessageLoop::current());
  // Ensure we haven't been run before.
  DCHECK(browse_filename_.empty());
  DCHECK(download_filename_.empty());
  DCHECK(csd_whitelist_filename_.empty());

  browse_filename_ = BrowseDBFilename(filename_base);
  bloom_filter_filename_ = BloomFilterForFilename(browse_filename_);

  browse_store_->Init(
      browse_filename_,
      NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase));
  DVLOG(1) << "Init browse store: " << browse_filename_.value();

  {
    // NOTE: There is no need to grab the lock in this function, since
    // until it returns, there are no pointers to this class on other
    // threads.  Then again, that means there is no possibility of
    // contention on the lock...
    base::AutoLock locked(lookup_lock_);
    full_browse_hashes_.clear();
    pending_browse_hashes_.clear();
    LoadBloomFilter();
  }

  if (download_store_.get()) {
    download_filename_ = DownloadDBFilename(filename_base);
    download_store_->Init(
        download_filename_,
        NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase));
    DVLOG(1) << "Init download store: " << download_filename_.value();
  }

  if (csd_whitelist_store_.get()) {
    csd_whitelist_filename_ = CsdWhitelistDBFilename(filename_base);
    csd_whitelist_store_->Init(
        csd_whitelist_filename_,
        NewCallback(this, &SafeBrowsingDatabaseNew::HandleCorruptDatabase));
    DVLOG(1) << "Init csd whitelist store: " << csd_whitelist_filename_.value();
    std::vector<SBAddFullHash> full_hashes;
    if (csd_whitelist_store_->GetAddFullHashes(&full_hashes)) {
      LoadCsdWhitelist(full_hashes);
    } else {
      CsdWhitelistAllUrls();
    }
  } else {
    CsdWhitelistAllUrls();  // Just to be safe.
  }
}

bool SafeBrowsingDatabaseNew::ResetDatabase() {
  DCHECK_EQ(creation_loop_, MessageLoop::current());

  // Delete files on disk.
  // TODO(shess): Hard to see where one might want to delete without a
  // reset.  Perhaps inline |Delete()|?
  if (!Delete())
    return false;

  // Reset objects in memory.
  {
    base::AutoLock locked(lookup_lock_);
    full_browse_hashes_.clear();
    pending_browse_hashes_.clear();
    prefix_miss_cache_.clear();
    // TODO(shess): This could probably be |bloom_filter_.reset()|.
    browse_bloom_filter_ = new BloomFilter(BloomFilter::kBloomFilterMinSize *
                                           BloomFilter::kBloomFilterSizeRatio);
    // TODO(shess): It is simpler for the code to assume that presence
    // of a bloom filter always implies presence of a prefix set.
    prefix_set_.reset(new safe_browsing::PrefixSet(std::vector<SBPrefix>()));
  }
  // Wants to acquire the lock itself.
  CsdWhitelistAllUrls();

  return true;
}

// TODO(lzheng): Remove matching_list, it is not used anywhere.
bool SafeBrowsingDatabaseNew::ContainsBrowseUrl(
    const GURL& url,
    std::string* matching_list,
    std::vector<SBPrefix>* prefix_hits,
    std::vector<SBFullHashResult>* full_hits,
    base::Time last_update) {
  // Clear the results first.
  matching_list->clear();
  prefix_hits->clear();
  full_hits->clear();

  std::vector<SBFullHash> full_hashes;
  BrowseFullHashesToCheck(url, false, &full_hashes);
  if (full_hashes.empty())
    return false;

  // This function is called on the I/O thread, prevent changes to
  // bloom filter and caches.
  base::AutoLock locked(lookup_lock_);

  if (!browse_bloom_filter_.get())
    return false;
  DCHECK(prefix_set_.get());

  // Used to double-check in case of a hit mis-match.
  std::vector<SBPrefix> restored;

  size_t miss_count = 0;
  for (size_t i = 0; i < full_hashes.size(); ++i) {
    bool found = prefix_set_->Exists(full_hashes[i].prefix);

    if (browse_bloom_filter_->Exists(full_hashes[i].prefix)) {
      RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_HIT);
      if (found)
        RecordPrefixSetInfo(PREFIX_SET_EVENT_HIT);
      prefix_hits->push_back(full_hashes[i].prefix);
      if (prefix_miss_cache_.count(full_hashes[i].prefix) > 0)
        ++miss_count;
    } else {
      // Bloom filter misses should never be in prefix set.  Re-create
      // the original prefixes and manually search for it, to check if
      // there's a bug with how |Exists()| is implemented.
      // |UpdateBrowseStore()| previously verified that
      // |GetPrefixes()| returns the same prefixes as were passed to
      // the constructor.
      DCHECK(!found);
      if (found) {
        if (restored.empty())
          prefix_set_->GetPrefixes(&restored);

        // If the item is not in the re-created list, then there is an
        // error in |PrefixSet::Exists()|.  If the item is in the
        // re-created list, then the bloom filter was wrong.
        if (std::binary_search(restored.begin(), restored.end(),
                               full_hashes[i].prefix)) {
          RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT);
        } else {
          RecordPrefixSetInfo(PREFIX_SET_EVENT_BLOOM_MISS_PREFIX_HIT_INVALID);
        }
      }
    }
  }

  // If all the prefixes are cached as 'misses', don't issue a GetHash.
  if (miss_count == prefix_hits->size())
    return false;

  // Find the matching full-hash results.  |full_browse_hashes_| are from the
  // database, |pending_browse_hashes_| are from GetHash requests between
  // updates.
  std::sort(prefix_hits->begin(), prefix_hits->end());

  GetCachedFullHashesForBrowse(*prefix_hits, full_browse_hashes_,
                               full_hits, last_update);
  GetCachedFullHashesForBrowse(*prefix_hits, pending_browse_hashes_,
                               full_hits, last_update);
  return true;
}

bool SafeBrowsingDatabaseNew::MatchDownloadAddPrefixes(
    int list_bit,
    const std::vector<SBPrefix>& prefixes,
    std::vector<SBPrefix>* prefix_hits) {
  prefix_hits->clear();

  std::vector<SBAddPrefix> add_prefixes;
  download_store_->GetAddPrefixes(&add_prefixes);
  for (size_t i = 0; i < add_prefixes.size(); ++i) {
    for (size_t j = 0; j < prefixes.size(); ++j) {
      const SBPrefix& prefix = prefixes[j];
      if (prefix == add_prefixes[i].prefix &&
          GetListIdBit(add_prefixes[i].chunk_id) == list_bit) {
        prefix_hits->push_back(prefix);
      }
    }
  }
  return !prefix_hits->empty();
}

bool SafeBrowsingDatabaseNew::ContainsDownloadUrl(
    const std::vector<GURL>& urls,
    std::vector<SBPrefix>* prefix_hits) {
  DCHECK_EQ(creation_loop_, MessageLoop::current());

  // Ignore this check when download checking is not enabled.
  if (!download_store_.get())
    return false;

  std::vector<SBPrefix> prefixes;
  GetDownloadUrlPrefixes(urls, &prefixes);
  return MatchDownloadAddPrefixes(safe_browsing_util::BINURL % 2,
                                  prefixes,
                                  prefix_hits);
}

bool SafeBrowsingDatabaseNew::ContainsDownloadHashPrefix(
    const SBPrefix& prefix) {
  DCHECK_EQ(creation_loop_, MessageLoop::current());

  // Ignore this check when download store is not available.
  if (!download_store_.get())
    return false;

  std::vector<SBPrefix> prefixes(1, prefix);
  std::vector<SBPrefix> prefix_hits;
  return MatchDownloadAddPrefixes(safe_browsing_util::BINHASH % 2,
                                  prefixes,
                                  &prefix_hits);
}

bool SafeBrowsingDatabaseNew::ContainsCsdWhitelistedUrl(const GURL& url) {
  // This method is theoretically thread-safe but we expect all calls to
  // originate from the IO thread.
  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
  base::AutoLock l(lookup_lock_);
  if (csd_whitelist_all_urls_)
    return true;

  std::vector<SBFullHash> full_hashes;
  BrowseFullHashesToCheck(url, true, &full_hashes);
  for (std::vector<SBFullHash>::const_iterator it = full_hashes.begin();
       it != full_hashes.end(); ++it) {
    if (std::binary_search(csd_whitelist_.begin(), csd_whitelist_.end(), *it))
      return true;
  }
  return false;
}

// Helper to insert entries for all of the prefixes or full hashes in
// |entry| into the store.
void SafeBrowsingDatabaseNew::InsertAdd(int chunk_id, SBPrefix host,
                                        const SBEntry* entry, int list_id) {
  DCHECK_EQ(creation_loop_, MessageLoop::current());

  SafeBrowsingStore* store = GetStore(list_id);
  if (!store) return;

  STATS_COUNTER("SB.HostInsert", 1);
  const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
  const int count = entry->prefix_count();

  DCHECK(!entry->IsSub());
  if (!count) {
    // No prefixes, use host instead.
    STATS_COUNTER("SB.PrefixAdd", 1);
    store->WriteAddPrefix(encoded_chunk_id, host);
  } else if (entry->IsPrefix()) {
    // Prefixes only.
    for (int i = 0; i < count; i++) {
      const SBPrefix prefix = entry->PrefixAt(i);
      STATS_COUNTER("SB.PrefixAdd", 1);
      store->WriteAddPrefix(encoded_chunk_id, prefix);
    }
  } else {
    // Prefixes and hashes.
    const base::Time receive_time = base::Time::Now();
    for (int i = 0; i < count; ++i) {
      const SBFullHash full_hash = entry->FullHashAt(i);
      const SBPrefix prefix = full_hash.prefix;

      STATS_COUNTER("SB.PrefixAdd", 1);
      store->WriteAddPrefix(encoded_chunk_id, prefix);

      STATS_COUNTER("SB.PrefixAddFull", 1);
      store->WriteAddHash(encoded_chunk_id, receive_time, full_hash);
    }
  }
}

// Helper to iterate over all the entries in the hosts in |chunks| and
// add them to the store.
void SafeBrowsingDatabaseNew::InsertAddChunks(const int list_id,
                                              const SBChunkList& chunks) {
  DCHECK_EQ(creation_loop_, MessageLoop::current());

  SafeBrowsingStore* store = GetStore(list_id);
  if (!store) return;

  for (SBChunkList::const_iterator citer = chunks.begin();
       citer != chunks.end(); ++citer) {
    const int chunk_id = citer->chunk_number;

    // The server can give us a chunk that we already have because
    // it's part of a range.  Don't add it again.
    const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
    if (store->CheckAddChunk(encoded_chunk_id))
      continue;

    store->SetAddChunk(encoded_chunk_id);
    for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
         hiter != citer->hosts.end(); ++hiter) {
      // NOTE: Could pass |encoded_chunk_id|, but then inserting add
      // chunks would look different from inserting sub chunks.
      InsertAdd(chunk_id, hiter->host, hiter->entry, list_id);
    }
  }
}

// Helper to insert entries for all of the prefixes or full hashes in
// |entry| into the store.
void SafeBrowsingDatabaseNew::InsertSub(int chunk_id, SBPrefix host,
                                        const SBEntry* entry, int list_id) {
  DCHECK_EQ(creation_loop_, MessageLoop::current());

  SafeBrowsingStore* store = GetStore(list_id);
  if (!store) return;

  STATS_COUNTER("SB.HostDelete", 1);
  const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
  const int count = entry->prefix_count();

  DCHECK(entry->IsSub());
  if (!count) {
    // No prefixes, use host instead.
    STATS_COUNTER("SB.PrefixSub", 1);
    const int add_chunk_id = EncodeChunkId(entry->chunk_id(), list_id);
    store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, host);
  } else if (entry->IsPrefix()) {
    // Prefixes only.
    for (int i = 0; i < count; i++) {
      const SBPrefix prefix = entry->PrefixAt(i);
      const int add_chunk_id =
          EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);

      STATS_COUNTER("SB.PrefixSub", 1);
      store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, prefix);
    }
  } else {
    // Prefixes and hashes.
    for (int i = 0; i < count; ++i) {
      const SBFullHash full_hash = entry->FullHashAt(i);
      const int add_chunk_id =
          EncodeChunkId(entry->ChunkIdAtPrefix(i), list_id);

      STATS_COUNTER("SB.PrefixSub", 1);
      store->WriteSubPrefix(encoded_chunk_id, add_chunk_id, full_hash.prefix);

      STATS_COUNTER("SB.PrefixSubFull", 1);
      store->WriteSubHash(encoded_chunk_id, add_chunk_id, full_hash);
    }
  }
}

// Helper to iterate over all the entries in the hosts in |chunks| and
// add them to the store.
void SafeBrowsingDatabaseNew::InsertSubChunks(int list_id,
                                              const SBChunkList& chunks) {
  DCHECK_EQ(creation_loop_, MessageLoop::current());

  SafeBrowsingStore* store = GetStore(list_id);
  if (!store) return;

  for (SBChunkList::const_iterator citer = chunks.begin();
       citer != chunks.end(); ++citer) {
    const int chunk_id = citer->chunk_number;

    // The server can give us a chunk that we already have because
    // it's part of a range.  Don't add it again.
    const int encoded_chunk_id = EncodeChunkId(chunk_id, list_id);
    if (store->CheckSubChunk(encoded_chunk_id))
      continue;

    store->SetSubChunk(encoded_chunk_id);
    for (std::deque<SBChunkHost>::const_iterator hiter = citer->hosts.begin();
         hiter != citer->hosts.end(); ++hiter) {
      InsertSub(chunk_id, hiter->host, hiter->entry, list_id);
    }
  }
}

void SafeBrowsingDatabaseNew::InsertChunks(const std::string& list_name,
                                           const SBChunkList& chunks) {
  DCHECK_EQ(creation_loop_, MessageLoop::current());

  if (corruption_detected_ || chunks.empty())
    return;

  const base::Time insert_start = base::Time::Now();

  const int list_id = safe_browsing_util::GetListId(list_name);
  DVLOG(2) << list_name << ": " << list_id;

  SafeBrowsingStore* store = GetStore(list_id);
  if (!store) return;

  change_detected_ = true;

  store->BeginChunk();
  if (chunks.front().is_add) {
    InsertAddChunks(list_id, chunks);
  } else {
    InsertSubChunks(list_id, chunks);
  }
  store->FinishChunk();

  UMA_HISTOGRAM_TIMES("SB2.ChunkInsert", base::Time::Now() - insert_start);
}

void SafeBrowsingDatabaseNew::DeleteChunks(
    const std::vector<SBChunkDelete>& chunk_deletes) {
  DCHECK_EQ(creation_loop_, MessageLoop::current());

  if (corruption_detected_ || chunk_deletes.empty())
    return;

  const std::string& list_name = chunk_deletes.front().list_name;
  const int list_id = safe_browsing_util::GetListId(list_name);

  SafeBrowsingStore* store = GetStore(list_id);
  if (!store) return;

  change_detected_ = true;

  for (size_t i = 0; i < chunk_deletes.size(); ++i) {
    std::vector<int> chunk_numbers;
    RangesToChunks(chunk_deletes[i].chunk_del, &chunk_numbers);
    for (size_t j = 0; j < chunk_numbers.size(); ++j) {
      const int encoded_chunk_id = EncodeChunkId(chunk_numbers[j], list_id);
      if (chunk_deletes[i].is_sub_del)
        store->DeleteSubChunk(encoded_chunk_id);
      else
        store->DeleteAddChunk(encoded_chunk_id);
    }
  }
}

void SafeBrowsingDatabaseNew::CacheHashResults(
    const std::vector<SBPrefix>& prefixes,
    const std::vector<SBFullHashResult>& full_hits) {
  // This is called on the I/O thread, lock against updates.
  base::AutoLock locked(lookup_lock_);

  if (full_hits.empty()) {
    prefix_miss_cache_.insert(prefixes.begin(), prefixes.end());
    return;
  }

  // TODO(shess): SBFullHashResult and SBAddFullHash are very similar.
  // Refactor to make them identical.
  const base::Time now = base::Time::Now();
  const size_t orig_size = pending_browse_hashes_.size();
  for (std::vector<SBFullHashResult>::const_iterator iter = full_hits.begin();
       iter != full_hits.end(); ++iter) {
    const int list_id = safe_browsing_util::GetListId(iter->list_name);
    if (list_id == safe_browsing_util::MALWARE ||
        list_id == safe_browsing_util::PHISH) {
      int encoded_chunk_id = EncodeChunkId(iter->add_chunk_id, list_id);
      SBAddFullHash add_full_hash(encoded_chunk_id, now, iter->hash);
      pending_browse_hashes_.push_back(add_full_hash);
    }
  }

  // Sort new entries then merge with the previously-sorted entries.
  std::vector<SBAddFullHash>::iterator
      orig_end = pending_browse_hashes_.begin() + orig_size;
  std::sort(orig_end, pending_browse_hashes_.end(), SBAddFullHashPrefixLess);
  std::inplace_merge(pending_browse_hashes_.begin(),
                     orig_end, pending_browse_hashes_.end(),
                     SBAddFullHashPrefixLess);
}

bool SafeBrowsingDatabaseNew::UpdateStarted(
    std::vector<SBListChunkRanges>* lists) {
  DCHECK_EQ(creation_loop_, MessageLoop::current());
  DCHECK(lists);

  // If |BeginUpdate()| fails, reset the database.
  if (!browse_store_->BeginUpdate()) {
    RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_BEGIN);
    HandleCorruptDatabase();
    return false;
  }

  if (download_store_.get() && !download_store_->BeginUpdate()) {
    RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_BEGIN);
    HandleCorruptDatabase();
    return false;
  }

  if (csd_whitelist_store_.get() && !csd_whitelist_store_->BeginUpdate()) {
    RecordFailure(FAILURE_CSD_WHITELIST_DATABASE_UPDATE_BEGIN);
    HandleCorruptDatabase();
    return false;
  }

  std::vector<std::string> browse_listnames;
  browse_listnames.push_back(safe_browsing_util::kMalwareList);
  browse_listnames.push_back(safe_browsing_util::kPhishingList);
  UpdateChunkRanges(browse_store_.get(), browse_listnames, lists);

  if (download_store_.get()) {
    std::vector<std::string> download_listnames;
    download_listnames.push_back(safe_browsing_util::kBinUrlList);
    download_listnames.push_back(safe_browsing_util::kBinHashList);
    UpdateChunkRanges(download_store_.get(), download_listnames, lists);
  }

  if (csd_whitelist_store_.get()) {
    std::vector<std::string> csd_whitelist_listnames;
    csd_whitelist_listnames.push_back(safe_browsing_util::kCsdWhiteList);
    UpdateChunkRanges(csd_whitelist_store_.get(),
                      csd_whitelist_listnames, lists);
  }

  corruption_detected_ = false;
  change_detected_ = false;
  return true;
}

void SafeBrowsingDatabaseNew::UpdateFinished(bool update_succeeded) {
  DCHECK_EQ(creation_loop_, MessageLoop::current());
  if (corruption_detected_)
    return;

  // Unroll the transaction if there was a protocol error or if the
  // transaction was empty.  This will leave the bloom filter, the
  // pending hashes, and the prefix miss cache in place.
  if (!update_succeeded || !change_detected_) {
    // Track empty updates to answer questions at http://crbug.com/72216 .
    if (update_succeeded && !change_detected_)
      UMA_HISTOGRAM_COUNTS("SB2.DatabaseUpdateKilobytes", 0);
    browse_store_->CancelUpdate();
    if (download_store_.get())
      download_store_->CancelUpdate();
    if (csd_whitelist_store_.get())
      csd_whitelist_store_->CancelUpdate();
    return;
  }

  // for download
  UpdateDownloadStore();
  // for browsing
  UpdateBrowseStore();
  // for csd whitelist
  UpdateCsdWhitelistStore();
}

void SafeBrowsingDatabaseNew::UpdateCsdWhitelistStore() {
  if (!csd_whitelist_store_.get())
    return;

  // For the csd whitelist, we don't cache and save full hashes since all
  // hashes are already full.
  std::vector<SBAddFullHash> empty_add_hashes;

  // Not needed for the csd whitelist.
  std::set<SBPrefix> empty_miss_cache;

  // Note: prefixes will not be empty.  The current data store implementation
  // stores all full-length hashes as both full and prefix hashes.
  std::vector<SBAddPrefix> prefixes;
  std::vector<SBAddFullHash> full_hashes;
  if (!csd_whitelist_store_->FinishUpdate(empty_add_hashes,
                                          empty_miss_cache,
                                          &prefixes,
                                          &full_hashes)) {
    RecordFailure(FAILURE_CSD_WHITELIST_DATABASE_UPDATE_FINISH);
    CsdWhitelistAllUrls();
    return;
  }
  LoadCsdWhitelist(full_hashes);
}

void SafeBrowsingDatabaseNew::UpdateDownloadStore() {
  if (!download_store_.get())
    return;

  // For download, we don't cache and save full hashes.
  std::vector<SBAddFullHash> empty_add_hashes;

  // For download, backend lookup happens only if a prefix is in add list.
  // No need to pass in miss cache when call FinishUpdate to caculate
  // bloomfilter false positives.
  std::set<SBPrefix> empty_miss_cache;

  // These results are not used after this call. Simply ignore the
  // returned value after FinishUpdate(...).
  std::vector<SBAddPrefix> add_prefixes_result;
  std::vector<SBAddFullHash> add_full_hashes_result;

  if (!download_store_->FinishUpdate(empty_add_hashes,
                                     empty_miss_cache,
                                     &add_prefixes_result,
                                     &add_full_hashes_result))
    RecordFailure(FAILURE_DOWNLOAD_DATABASE_UPDATE_FINISH);
  return;
}

void SafeBrowsingDatabaseNew::UpdateBrowseStore() {
  // Copy out the pending add hashes.  Copy rather than swapping in
  // case |ContainsBrowseURL()| is called before the new filter is complete.
  std::vector<SBAddFullHash> pending_add_hashes;
  {
    base::AutoLock locked(lookup_lock_);
    pending_add_hashes.insert(pending_add_hashes.end(),
                              pending_browse_hashes_.begin(),
                              pending_browse_hashes_.end());
  }

  // Measure the amount of IO during the bloom filter build.
  base::IoCounters io_before, io_after;
  base::ProcessHandle handle = base::Process::Current().handle();
  scoped_ptr<base::ProcessMetrics> metric(
#if !defined(OS_MACOSX)
      base::ProcessMetrics::CreateProcessMetrics(handle)
#else
      // Getting stats only for the current process is enough, so NULL is fine.
      base::ProcessMetrics::CreateProcessMetrics(handle, NULL)
#endif
  );

  // IoCounters are currently not supported on Mac, and may not be
  // available for Linux, so we check the result and only show IO
  // stats if they are available.
  const bool got_counters = metric->GetIOCounters(&io_before);

  const base::Time before = base::Time::Now();

  std::vector<SBAddPrefix> add_prefixes;
  std::vector<SBAddFullHash> add_full_hashes;
  if (!browse_store_->FinishUpdate(pending_add_hashes, prefix_miss_cache_,
                                   &add_prefixes, &add_full_hashes)) {
    RecordFailure(FAILURE_BROWSE_DATABASE_UPDATE_FINISH);
    return;
  }

  // Create and populate |filter| from |add_prefixes|.
  // TODO(shess): The bloom filter doesn't need to be a
  // scoped_refptr<> for this code.  Refactor that away.
  const int filter_size =
      BloomFilter::FilterSizeForKeyCount(add_prefixes.size());
  scoped_refptr<BloomFilter> filter(new BloomFilter(filter_size));
  for (size_t i = 0; i < add_prefixes.size(); ++i) {
    filter->Insert(add_prefixes[i].prefix);
  }

  scoped_ptr<safe_browsing::PrefixSet>
      prefix_set(PrefixSetFromAddPrefixes(add_prefixes));

  // This needs to be in sorted order by prefix for efficient access.
  std::sort(add_full_hashes.begin(), add_full_hashes.end(),
            SBAddFullHashPrefixLess);

  // Swap in the newly built filter and cache.
  {
    base::AutoLock locked(lookup_lock_);
    full_browse_hashes_.swap(add_full_hashes);

    // TODO(shess): If |CacheHashResults()| is posted between the
    // earlier lock and this clear, those pending hashes will be lost.
    // It could be fixed by only removing hashes which were collected
    // at the earlier point.  I believe that is fail-safe as-is (the
    // hash will be fetched again).
    pending_browse_hashes_.clear();
    prefix_miss_cache_.clear();
    browse_bloom_filter_.swap(filter);
    prefix_set_.swap(prefix_set);
  }

  const base::TimeDelta bloom_gen = base::Time::Now() - before;

  // Persist the bloom filter to disk.  Since only this thread changes
  // |browse_bloom_filter_|, there is no need to lock.
  WriteBloomFilter();

  // Gather statistics.
  if (got_counters && metric->GetIOCounters(&io_after)) {
    UMA_HISTOGRAM_COUNTS("SB2.BuildReadKilobytes",
                         static_cast<int>(io_after.ReadTransferCount -
                                          io_before.ReadTransferCount) / 1024);
    UMA_HISTOGRAM_COUNTS("SB2.BuildWriteKilobytes",
                         static_cast<int>(io_after.WriteTransferCount -
                                          io_before.WriteTransferCount) / 1024);
    UMA_HISTOGRAM_COUNTS("SB2.BuildReadOperations",
                         static_cast<int>(io_after.ReadOperationCount -
                                          io_before.ReadOperationCount));
    UMA_HISTOGRAM_COUNTS("SB2.BuildWriteOperations",
                         static_cast<int>(io_after.WriteOperationCount -
                                          io_before.WriteOperationCount));
  }
  DVLOG(1) << "SafeBrowsingDatabaseImpl built bloom filter in "
           << bloom_gen.InMilliseconds() << " ms total.  prefix count: "
           << add_prefixes.size();
  UMA_HISTOGRAM_LONG_TIMES("SB2.BuildFilter", bloom_gen);
  UMA_HISTOGRAM_COUNTS("SB2.FilterKilobytes",
                       browse_bloom_filter_->size() / 1024);
  int64 size_64;
  if (file_util::GetFileSize(browse_filename_, &size_64))
    UMA_HISTOGRAM_COUNTS("SB2.BrowseDatabaseKilobytes",
                         static_cast<int>(size_64 / 1024));
  if (file_util::GetFileSize(download_filename_, &size_64))
    UMA_HISTOGRAM_COUNTS("SB2.DownloadDatabaseKilobytes",
                         static_cast<int>(size_64 / 1024));
}

void SafeBrowsingDatabaseNew::HandleCorruptDatabase() {
  // Reset the database after the current task has unwound (but only
  // reset once within the scope of a given task).
  if (reset_factory_.empty()) {
    RecordFailure(FAILURE_DATABASE_CORRUPT);
    MessageLoop::current()->PostTask(FROM_HERE,
        reset_factory_.NewRunnableMethod(
            &SafeBrowsingDatabaseNew::OnHandleCorruptDatabase));
  }
}

void SafeBrowsingDatabaseNew::OnHandleCorruptDatabase() {
  RecordFailure(FAILURE_DATABASE_CORRUPT_HANDLER);
  corruption_detected_ = true;  // Stop updating the database.
  ResetDatabase();
  DCHECK(false) << "SafeBrowsing database was corrupt and reset";
}

// TODO(shess): I'm not clear why this code doesn't have any
// real error-handling.
void SafeBrowsingDatabaseNew::LoadBloomFilter() {
  DCHECK_EQ(creation_loop_, MessageLoop::current());
  DCHECK(!bloom_filter_filename_.empty());

  // If we're missing either of the database or filter files, we wait until the
  // next update to generate a new filter.
  // TODO(paulg): Investigate how often the filter file is missing and how
  // expensive it would be to regenerate it.
  int64 size_64;
  if (!file_util::GetFileSize(browse_filename_, &size_64) || size_64 == 0)
    return;

  if (!file_util::GetFileSize(bloom_filter_filename_, &size_64) ||
      size_64 == 0) {
    RecordFailure(FAILURE_DATABASE_FILTER_MISSING);
    return;
  }

  const base::TimeTicks before = base::TimeTicks::Now();
  browse_bloom_filter_ = BloomFilter::LoadFile(bloom_filter_filename_);
  DVLOG(1) << "SafeBrowsingDatabaseNew read bloom filter in "
           << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";

  if (!browse_bloom_filter_.get())
    RecordFailure(FAILURE_DATABASE_FILTER_READ);

  // Manually re-generate the prefix set from the main database.
  // TODO(shess): Write/read for prefix set.
  std::vector<SBAddPrefix> add_prefixes;
  browse_store_->GetAddPrefixes(&add_prefixes);
  prefix_set_.reset(PrefixSetFromAddPrefixes(add_prefixes));
}

bool SafeBrowsingDatabaseNew::Delete() {
  DCHECK_EQ(creation_loop_, MessageLoop::current());

  const bool r1 = browse_store_->Delete();
  if (!r1)
    RecordFailure(FAILURE_DATABASE_STORE_DELETE);

  const bool r2 = download_store_.get() ? download_store_->Delete() : true;
  if (!r2)
    RecordFailure(FAILURE_DATABASE_STORE_DELETE);

  const bool r3 = csd_whitelist_store_.get() ?
      csd_whitelist_store_->Delete() : true;
  if (!r3)
    RecordFailure(FAILURE_DATABASE_STORE_DELETE);

  const bool r4 = file_util::Delete(bloom_filter_filename_, false);
  if (!r4)
    RecordFailure(FAILURE_DATABASE_FILTER_DELETE);
  return r1 && r2 && r3 && r4;
}

void SafeBrowsingDatabaseNew::WriteBloomFilter() {
  DCHECK_EQ(creation_loop_, MessageLoop::current());

  if (!browse_bloom_filter_.get())
    return;

  const base::TimeTicks before = base::TimeTicks::Now();
  const bool write_ok = browse_bloom_filter_->WriteFile(bloom_filter_filename_);
  DVLOG(1) << "SafeBrowsingDatabaseNew wrote bloom filter in "
           << (base::TimeTicks::Now() - before).InMilliseconds() << " ms";

  if (!write_ok)
    RecordFailure(FAILURE_DATABASE_FILTER_WRITE);
}

void SafeBrowsingDatabaseNew::CsdWhitelistAllUrls() {
  base::AutoLock locked(lookup_lock_);
  csd_whitelist_all_urls_ = true;
  csd_whitelist_.clear();
}

void SafeBrowsingDatabaseNew::LoadCsdWhitelist(
    const std::vector<SBAddFullHash>& full_hashes) {
  DCHECK_EQ(creation_loop_, MessageLoop::current());
  if (full_hashes.size() > kMaxCsdWhitelistSize) {
    CsdWhitelistAllUrls();
    return;
  }

  std::vector<SBFullHash> new_csd_whitelist;
  for (std::vector<SBAddFullHash>::const_iterator it = full_hashes.begin();
       it != full_hashes.end(); ++it) {
    new_csd_whitelist.push_back(it->full_hash);
  }
  std::sort(new_csd_whitelist.begin(), new_csd_whitelist.end());

  SBFullHash kill_switch;
  crypto::SHA256HashString(kCsdKillSwitchUrl, &kill_switch,
                           sizeof(kill_switch));
  if (std::binary_search(new_csd_whitelist.begin(), new_csd_whitelist.end(),
                         kill_switch)) {
    // The kill switch is whitelisted hence we whitelist all URLs.
    CsdWhitelistAllUrls();
  } else {
    base::AutoLock locked(lookup_lock_);
    csd_whitelist_all_urls_ = false;
    csd_whitelist_.swap(new_csd_whitelist);
  }
}

登录后可以享受更多权益