// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_ #define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_ #pragma once #include <set> #include <vector> #include "base/basictypes.h" #include "base/callback.h" #include "base/hash_tables.h" #include "base/task.h" #include "base/time.h" #include "chrome/browser/safe_browsing/safe_browsing_util.h" class FilePath; // SafeBrowsingStore provides a storage abstraction for the // safe-browsing data used to build the bloom filter. The items // stored are: // The set of add and sub chunks seen. // List of SBAddPrefix (chunk_id and SBPrefix). // List of SBSubPrefix (chunk_id and the target SBAddPrefix). // List of SBAddFullHash (SBAddPrefix, time received and an SBFullHash). // List of SBSubFullHash (chunk_id, target SBAddPrefix, and an SBFullHash). // // The store is geared towards updating the data, not runtime access // to the data (that is handled by SafeBrowsingDatabase). Updates are // handled similar to a SQL transaction cycle, with the new data being // returned from FinishUpdate() (the COMMIT). Data is not persistent // until FinishUpdate() returns successfully. // // FinishUpdate() also handles dropping items who's chunk has been // deleted, and netting out the add/sub lists (when a sub matches an // add, both are dropped). // GetAddChunkId(), GetAddPrefix() and GetFullHash() are exposed so // that these items can be generically compared with each other by // SBAddPrefixLess() and SBAddPrefixHashLess(). struct SBAddPrefix { int32 chunk_id; SBPrefix prefix; SBAddPrefix(int32 id, SBPrefix p) : chunk_id(id), prefix(p) {} SBAddPrefix() : chunk_id(), prefix() {} int32 GetAddChunkId() const { return chunk_id; } SBPrefix GetAddPrefix() const { return prefix; } }; struct SBSubPrefix { int32 chunk_id; int32 add_chunk_id; SBPrefix add_prefix; SBSubPrefix(int32 id, int32 add_id, int prefix) : chunk_id(id), add_chunk_id(add_id), add_prefix(prefix) {} SBSubPrefix() : chunk_id(), add_chunk_id(), add_prefix() {} int32 GetAddChunkId() const { return add_chunk_id; } SBPrefix GetAddPrefix() const { return add_prefix; } }; struct SBAddFullHash { int32 chunk_id; int32 received; SBFullHash full_hash; SBAddFullHash(int32 id, base::Time r, const SBFullHash& h) : chunk_id(id), received(static_cast<int32>(r.ToTimeT())), full_hash(h) { } // Provided for ReadAddHashes() implementations, which already have // an int32 for the time. SBAddFullHash(int32 id, int32 r, const SBFullHash& h) : chunk_id(id), received(r), full_hash(h) {} SBAddFullHash() : chunk_id(), received(), full_hash() {} int32 GetAddChunkId() const { return chunk_id; } SBPrefix GetAddPrefix() const { return full_hash.prefix; } }; struct SBSubFullHash { int32 chunk_id; int32 add_chunk_id; SBFullHash full_hash; SBSubFullHash(int32 id, int32 add_id, const SBFullHash& h) : chunk_id(id), add_chunk_id(add_id), full_hash(h) {} SBSubFullHash() : chunk_id(), add_chunk_id(), full_hash() {} int32 GetAddChunkId() const { return add_chunk_id; } SBPrefix GetAddPrefix() const { return full_hash.prefix; } }; // Determine less-than based on add chunk and prefix. template <class T, class U> bool SBAddPrefixLess(const T& a, const U& b) { if (a.GetAddChunkId() != b.GetAddChunkId()) return a.GetAddChunkId() < b.GetAddChunkId(); return a.GetAddPrefix() < b.GetAddPrefix(); } // Determine less-than based on add chunk, prefix, and full hash. // Prefix can compare differently than hash due to byte ordering, // so it must take precedence. template <class T, class U> bool SBAddPrefixHashLess(const T& a, const U& b) { if (SBAddPrefixLess(a, b)) return true; if (SBAddPrefixLess(b, a)) return false; return memcmp(a.full_hash.full_hash, b.full_hash.full_hash, sizeof(a.full_hash.full_hash)) < 0; } // Process the lists for subs which knock out adds. For any item in // |sub_prefixes| which has a match in |add_prefixes|, knock out the // matched items from all vectors. Additionally remove items from // deleted chunks. // // TODO(shess): Since the prefixes are uniformly-distributed hashes, // there aren't many ways to organize the inputs for efficient // processing. For this reason, the vectors are sorted and processed // in parallel. At this time this code does the sorting internally, // but it might make sense to make sorting an API requirement so that // the storage can optimize for it. // // TODO(shess): The original code did not process |sub_full_hashes| // for matches in |add_full_hashes|, so this code doesn't, either. I // think this is probably a bug. void SBProcessSubs(std::vector<SBAddPrefix>* add_prefixes, std::vector<SBSubPrefix>* sub_prefixes, std::vector<SBAddFullHash>* add_full_hashes, std::vector<SBSubFullHash>* sub_full_hashes, const base::hash_set<int32>& add_chunks_deleted, const base::hash_set<int32>& sub_chunks_deleted); // Records a histogram of the number of items in |prefix_misses| which // are not in |add_prefixes|. void SBCheckPrefixMisses(const std::vector<SBAddPrefix>& add_prefixes, const std::set<SBPrefix>& prefix_misses); // TODO(shess): This uses int32 rather than int because it's writing // specifically-sized items to files. SBPrefix should likewise be // explicitly sized. // Abstract interface for storing data. class SafeBrowsingStore { public: SafeBrowsingStore() {} virtual ~SafeBrowsingStore() {} // Sets up the information for later use, but does not necessarily // check whether the underlying file exists, or is valid. If // |curruption_callback| is non-NULL it will be called if corruption // is detected, which could happen as part of any call other than // Delete(). The appropriate action is to use Delete() to clear the // store. virtual void Init(const FilePath& filename, Callback0::Type* corruption_callback) = 0; // Deletes the files which back the store, returning true if // successful. virtual bool Delete() = 0; // Get all Add prefixes out from the store. virtual bool GetAddPrefixes(std::vector<SBAddPrefix>* add_prefixes) = 0; // Get all add full-length hashes. virtual bool GetAddFullHashes( std::vector<SBAddFullHash>* add_full_hashes) = 0; // Start an update. None of the following methods should be called // unless this returns true. If this returns true, the update // should be terminated by FinishUpdate() or CancelUpdate(). virtual bool BeginUpdate() = 0; // Start a chunk of data. None of the methods through FinishChunk() // should be called unless this returns true. // TODO(shess): Would it make sense for this to accept |chunk_id|? // Possibly not, because of possible confusion between sub_chunk_id // and add_chunk_id. virtual bool BeginChunk() = 0; virtual bool WriteAddPrefix(int32 chunk_id, SBPrefix prefix) = 0; virtual bool WriteAddHash(int32 chunk_id, base::Time receive_time, const SBFullHash& full_hash) = 0; virtual bool WriteSubPrefix(int32 chunk_id, int32 add_chunk_id, SBPrefix prefix) = 0; virtual bool WriteSubHash(int32 chunk_id, int32 add_chunk_id, const SBFullHash& full_hash) = 0; // Collect the chunk data and preferrably store it on disk to // release memory. Shoul not modify the data in-place. virtual bool FinishChunk() = 0; // Track the chunks which have been seen. virtual void SetAddChunk(int32 chunk_id) = 0; virtual bool CheckAddChunk(int32 chunk_id) = 0; virtual void GetAddChunks(std::vector<int32>* out) = 0; virtual void SetSubChunk(int32 chunk_id) = 0; virtual bool CheckSubChunk(int32 chunk_id) = 0; virtual void GetSubChunks(std::vector<int32>* out) = 0; // Delete the indicated chunk_id. The chunk will continue to be // visible until the end of the transaction. virtual void DeleteAddChunk(int32 chunk_id) = 0; virtual void DeleteSubChunk(int32 chunk_id) = 0; // Pass the collected chunks through SBPRocessSubs() and commit to // permanent storage. The resulting add prefixes and hashes will be // stored in |add_prefixes_result| and |add_full_hashes_result|. // |pending_adds| is the set of full hashes which have been received // since the previous update, and is provided as a convenience // (could be written via WriteAddHash(), but that would flush the // chunk to disk). |prefix_misses| is the set of prefixes where the // |GetHash()| request returned no full hashes, used for diagnostic // purposes. virtual bool FinishUpdate( const std::vector<SBAddFullHash>& pending_adds, const std::set<SBPrefix>& prefix_misses, std::vector<SBAddPrefix>* add_prefixes_result, std::vector<SBAddFullHash>* add_full_hashes_result) = 0; // Cancel the update in process and remove any temporary disk // storage, leaving the original data unmodified. virtual bool CancelUpdate() = 0; private: DISALLOW_COPY_AND_ASSIGN(SafeBrowsingStore); }; #endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_STORE_H_