// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// Utilities for the SafeBrowsing code.
#ifndef CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
#define CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_
#pragma once
#include <cstring>
#include <deque>
#include <string>
#include <vector>
#include "base/basictypes.h"
#include "chrome/browser/safe_browsing/chunk_range.h"
class GURL;
class SBEntry;
// A truncated hash's type.
typedef int32 SBPrefix;
// Container for holding a chunk URL and the MAC of the contents of the URL.
struct ChunkUrl {
std::string url;
std::string mac;
std::string list_name;
};
// A full hash.
union SBFullHash {
char full_hash[32];
SBPrefix prefix;
};
inline bool operator==(const SBFullHash& lhash, const SBFullHash& rhash) {
return memcmp(lhash.full_hash, rhash.full_hash, sizeof(SBFullHash)) == 0;
}
inline bool operator<(const SBFullHash& lhash, const SBFullHash& rhash) {
return memcmp(lhash.full_hash, rhash.full_hash, sizeof(SBFullHash)) < 0;
}
// Container for information about a specific host in an add/sub chunk.
struct SBChunkHost {
SBPrefix host;
SBEntry* entry;
};
// Container for an add/sub chunk.
struct SBChunk {
SBChunk();
~SBChunk();
int chunk_number;
int list_id;
bool is_add;
std::deque<SBChunkHost> hosts;
};
// Container for a set of chunks. Interim wrapper to replace use of
// |std::deque<SBChunk>| with something having safer memory semantics.
// management.
// TODO(shess): |SBEntry| is currently a very roundabout way to hold
// things pending storage. It could be replaced with the structures
// used in SafeBrowsingStore, then lots of bridging code could
// dissappear.
class SBChunkList {
public:
SBChunkList();
~SBChunkList();
// Implement that subset of the |std::deque<>| interface which
// callers expect.
bool empty() const { return chunks_.empty(); }
size_t size() { return chunks_.size(); }
void push_back(const SBChunk& chunk) { chunks_.push_back(chunk); }
SBChunk& back() { return chunks_.back(); }
SBChunk& front() { return chunks_.front(); }
const SBChunk& front() const { return chunks_.front(); }
typedef std::vector<SBChunk>::const_iterator const_iterator;
const_iterator begin() const { return chunks_.begin(); }
const_iterator end() const { return chunks_.end(); }
typedef std::vector<SBChunk>::iterator iterator;
iterator begin() { return chunks_.begin(); }
iterator end() { return chunks_.end(); }
SBChunk& operator[](size_t n) { return chunks_[n]; }
const SBChunk& operator[](size_t n) const { return chunks_[n]; }
// Calls |SBEvent::Destroy()| before clearing |chunks_|.
void clear();
private:
std::vector<SBChunk> chunks_;
DISALLOW_COPY_AND_ASSIGN(SBChunkList);
};
// Used when we get a gethash response.
struct SBFullHashResult {
SBFullHash hash;
std::string list_name;
int add_chunk_id;
};
// Contains information about a list in the database.
struct SBListChunkRanges {
explicit SBListChunkRanges(const std::string& n);
std::string name; // The list name.
std::string adds; // The ranges for add chunks.
std::string subs; // The ranges for sub chunks.
};
// Container for deleting chunks from the database.
struct SBChunkDelete {
SBChunkDelete();
~SBChunkDelete();
std::string list_name;
bool is_sub_del;
std::vector<ChunkRange> chunk_del;
};
// SBEntry ---------------------------------------------------------------------
// Holds information about the prefixes for a hostkey. prefixes can either be
// 4 bytes (truncated hash) or 32 bytes (full hash).
// For adds:
// [list id ][chunk id][prefix count (0..n)][prefix1][prefix2]
// For subs:
// [list id ][chunk id (only used if prefix count is 0][prefix count (0..n)]
// [add chunk][prefix][add chunk][prefix]
class SBEntry {
public:
enum Type {
ADD_PREFIX, // 4 byte add entry.
SUB_PREFIX, // 4 byte sub entry.
ADD_FULL_HASH, // 32 byte add entry.
SUB_FULL_HASH, // 32 byte sub entry.
};
// Creates a SBEntry with the necessary size for the given number of prefixes.
// Caller ownes the object and needs to free it by calling Destroy.
static SBEntry* Create(Type type, int prefix_count);
// Frees the entry's memory.
void Destroy();
void set_list_id(int list_id) { data_.list_id = list_id; }
int list_id() const { return data_.list_id; }
void set_chunk_id(int chunk_id) { data_.chunk_id = chunk_id; }
int chunk_id() const { return data_.chunk_id; }
int prefix_count() const { return data_.prefix_count; }
// Returns true if this is a prefix as opposed to a full hash.
bool IsPrefix() const {
return type() == ADD_PREFIX || type() == SUB_PREFIX;
}
// Returns true if this is an add entry.
bool IsAdd() const {
return type() == ADD_PREFIX || type() == ADD_FULL_HASH;
}
// Returns true if this is a sub entry.
bool IsSub() const {
return type() == SUB_PREFIX || type() == SUB_FULL_HASH;
}
// Helper to return the size of the prefixes.
int HashLen() const {
return IsPrefix() ? sizeof(SBPrefix) : sizeof(SBFullHash);
}
// For add entries, returns the add chunk id. For sub entries, returns the
// add_chunk id for the prefix at the given index.
int ChunkIdAtPrefix(int index) const;
// Used for sub chunks to set the chunk id at a given index.
void SetChunkIdAtPrefix(int index, int chunk_id);
// Return the prefix/full hash at the given index. Caller is expected to
// call the right function based on the hash length.
const SBPrefix& PrefixAt(int index) const;
const SBFullHash& FullHashAt(int index) const;
// Return the prefix/full hash at the given index. Caller is expected to
// call the right function based on the hash length.
void SetPrefixAt(int index, const SBPrefix& prefix);
void SetFullHashAt(int index, const SBFullHash& full_hash);
private:
// Container for a sub prefix.
struct SBSubPrefix {
int add_chunk;
SBPrefix prefix;
};
// Container for a sub full hash.
struct SBSubFullHash {
int add_chunk;
SBFullHash prefix;
};
// Keep the fixed data together in one struct so that we can get its size
// easily. If any of this is modified, the database will have to be cleared.
struct Data {
int list_id;
// For adds, this is the add chunk number.
// For subs: if prefix_count is 0 then this is the add chunk that this sub
// refers to. Otherwise it's ignored, and the add_chunk in sub_prefixes
// or sub_full_hashes is used for each corresponding prefix.
int chunk_id;
Type type;
int prefix_count;
};
SBEntry();
~SBEntry();
// Helper to return the size of each prefix entry (i.e. for subs this
// includes an add chunk id).
static int PrefixSize(Type type);
// Helper to return how much memory a given Entry would require.
static int Size(Type type, int prefix_count);
// Returns how many bytes this entry is.
int Size() const;
Type type() const { return data_.type; }
void set_prefix_count(int count) { data_.prefix_count = count; }
void set_type(Type type) { data_.type = type; }
// The prefixes union must follow the fixed data so that they're contiguous
// in memory.
Data data_;
union {
SBPrefix add_prefixes_[1];
SBSubPrefix sub_prefixes_[1];
SBFullHash add_full_hashes_[1];
SBSubFullHash sub_full_hashes_[1];
};
};
// Utility functions -----------------------------------------------------------
namespace safe_browsing_util {
// SafeBrowsing list names.
extern const char kMalwareList[];
extern const char kPhishingList[];
// Binary Download list names.
extern const char kBinUrlList[];
extern const char kBinHashList[];
// SafeBrowsing client-side detection whitelist list name.
extern const char kCsdWhiteList[];
enum ListType {
INVALID = -1,
MALWARE = 0,
PHISH = 1,
BINURL = 2,
BINHASH = 3,
CSDWHITELIST = 4,
};
// Maps a list name to ListType.
int GetListId(const std::string& name);
// Maps a ListId to list name. Return false if fails.
bool GetListName(int list_id, std::string* list);
// Canonicalizes url as per Google Safe Browsing Specification.
// See section 6.1 in
// http://code.google.com/p/google-safe-browsing/wiki/Protocolv2Spec.
void CanonicalizeUrl(const GURL& url, std::string* canonicalized_hostname,
std::string* canonicalized_path,
std::string* canonicalized_query);
// Given a URL, returns all the hosts we need to check. They are returned
// in order of size (i.e. b.c is first, then a.b.c).
void GenerateHostsToCheck(const GURL& url, std::vector<std::string>* hosts);
// Given a URL, returns all the paths we need to check.
void GeneratePathsToCheck(const GURL& url, std::vector<std::string>* paths);
int GetHashIndex(const SBFullHash& hash,
const std::vector<SBFullHashResult>& full_hashes);
// Given a URL, compare all the possible host + path full hashes to the set of
// provided full hashes. Returns the index of the match if one is found, or -1
// otherwise.
int GetUrlHashIndex(const GURL& url,
const std::vector<SBFullHashResult>& full_hashes);
bool IsPhishingList(const std::string& list_name);
bool IsMalwareList(const std::string& list_name);
bool IsBadbinurlList(const std::string& list_name);
bool IsBadbinhashList(const std::string& list_name);
// Returns 'true' if 'mac' can be verified using 'key' and 'data'.
bool VerifyMAC(const std::string& key,
const std::string& mac,
const char* data,
int data_length);
GURL GeneratePhishingReportUrl(const std::string& report_page,
const std::string& url_to_report);
void StringToSBFullHash(const std::string& hash_in, SBFullHash* hash_out);
std::string SBFullHashToString(const SBFullHash& hash_out);
} // namespace safe_browsing_util
#endif // CHROME_BROWSER_SAFE_BROWSING_SAFE_BROWSING_UTIL_H_