// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "net/dns/dns_hosts.h"
#include "base/file_util.h"
#include "base/logging.h"
#include "base/metrics/histogram.h"
#include "base/strings/string_util.h"
#include "base/strings/string_tokenizer.h"
using base::StringPiece;
namespace net {
// Parses the contents of a hosts file. Returns one token (IP or hostname) at
// a time. Doesn't copy anything; accepts the file as a StringPiece and
// returns tokens as StringPieces.
class HostsParser {
public:
explicit HostsParser(const StringPiece& text)
: text_(text),
data_(text.data()),
end_(text.size()),
pos_(0),
token_(),
token_is_ip_(false) {}
// Advances to the next token (IP or hostname). Returns whether another
// token was available. |token_is_ip| and |token| can be used to find out
// the type and text of the token.
bool Advance() {
bool next_is_ip = (pos_ == 0);
while (pos_ < end_ && pos_ != std::string::npos) {
switch (text_[pos_]) {
case ' ':
case '\t':
SkipWhitespace();
break;
case '\r':
case '\n':
next_is_ip = true;
pos_++;
break;
case '#':
SkipRestOfLine();
break;
default: {
size_t token_start = pos_;
SkipToken();
size_t token_end = (pos_ == std::string::npos) ? end_ : pos_;
token_ = StringPiece(data_ + token_start, token_end - token_start);
token_is_ip_ = next_is_ip;
return true;
}
}
}
text_ = StringPiece();
return false;
}
// Fast-forwards the parser to the next line. Should be called if an IP
// address doesn't parse, to avoid wasting time tokenizing hostnames that
// will be ignored.
void SkipRestOfLine() {
pos_ = text_.find("\n", pos_);
}
// Returns whether the last-parsed token is an IP address (true) or a
// hostname (false).
bool token_is_ip() { return token_is_ip_; }
// Returns the text of the last-parsed token as a StringPiece referencing
// the same underlying memory as the StringPiece passed to the constructor.
// Returns an empty StringPiece if no token has been parsed or the end of
// the input string has been reached.
const StringPiece& token() { return token_; }
private:
void SkipToken() {
pos_ = text_.find_first_of(" \t\n\r#", pos_);
}
void SkipWhitespace() {
pos_ = text_.find_first_not_of(" \t", pos_);
}
StringPiece text_;
const char* data_;
const size_t end_;
size_t pos_;
StringPiece token_;
bool token_is_ip_;
DISALLOW_COPY_AND_ASSIGN(HostsParser);
};
void ParseHosts(const std::string& contents, DnsHosts* dns_hosts) {
CHECK(dns_hosts);
DnsHosts& hosts = *dns_hosts;
StringPiece ip_text;
IPAddressNumber ip;
AddressFamily family = ADDRESS_FAMILY_IPV4;
HostsParser parser(contents);
while (parser.Advance()) {
if (parser.token_is_ip()) {
StringPiece new_ip_text = parser.token();
// Some ad-blocking hosts files contain thousands of entries pointing to
// the same IP address (usually 127.0.0.1). Don't bother parsing the IP
// again if it's the same as the one above it.
if (new_ip_text != ip_text) {
IPAddressNumber new_ip;
if (ParseIPLiteralToNumber(parser.token().as_string(), &new_ip)) {
ip_text = new_ip_text;
ip.swap(new_ip);
family = (ip.size() == 4) ? ADDRESS_FAMILY_IPV4 : ADDRESS_FAMILY_IPV6;
} else {
parser.SkipRestOfLine();
}
}
} else {
DnsHostsKey key(parser.token().as_string(), family);
StringToLowerASCII(&key.first);
IPAddressNumber& mapped_ip = hosts[key];
if (mapped_ip.empty())
mapped_ip = ip;
// else ignore this entry (first hit counts)
}
}
}
bool ParseHostsFile(const base::FilePath& path, DnsHosts* dns_hosts) {
dns_hosts->clear();
// Missing file indicates empty HOSTS.
if (!base::PathExists(path))
return true;
int64 size;
if (!base::GetFileSize(path, &size))
return false;
UMA_HISTOGRAM_COUNTS("AsyncDNS.HostsSize", size);
// Reject HOSTS files larger than |kMaxHostsSize| bytes.
const int64 kMaxHostsSize = 1 << 25; // 32MB
if (size > kMaxHostsSize)
return false;
std::string contents;
if (!base::ReadFileToString(path, &contents))
return false;
ParseHosts(contents, dns_hosts);
return true;
}
} // namespace net