// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // // Implementation of the MalwareDetails class. #include "chrome/browser/safe_browsing/malware_details.h" #include "base/callback.h" #include "base/lazy_instance.h" #include "base/md5.h" #include "base/string_util.h" #include "chrome/browser/net/chrome_url_request_context.h" #include "chrome/browser/safe_browsing/malware_details_cache.h" #include "chrome/browser/safe_browsing/safe_browsing_service.h" #include "chrome/browser/safe_browsing/report.pb.h" #include "content/browser/browser_thread.h" #include "net/base/load_flags.h" #include "net/http/http_response_headers.h" #include "net/url_request/url_request_context_getter.h" #include "net/url_request/url_request_status.h" using safe_browsing::ClientMalwareReportRequest; // Only send small files for now, a better strategy would use the size // of the whole report and the user's bandwidth. static const uint32 kMaxBodySizeBytes = 1024; MalwareDetailsCacheCollector::MalwareDetailsCacheCollector() : has_started_(false), current_fetch_(NULL) { } MalwareDetailsCacheCollector::~MalwareDetailsCacheCollector() { } void MalwareDetailsCacheCollector::StartCacheCollection( net::URLRequestContextGetter* request_context_getter, safe_browsing::ResourceMap* resources, bool* result, Task* callback) { // Start the data collection from the HTTP cache. We use a URLFetcher // and set the right flags so we only hit the cache. DVLOG(1) << "Getting cache data for all urls..."; request_context_getter_ = request_context_getter; resources_ = resources; resources_it_ = resources_->begin(); result_ = result; callback_ = callback; has_started_ = true; // Post a task in the message loop, so the callers don't need to // check if we call their callback immediately. BrowserThread::PostTask( BrowserThread::IO, FROM_HERE, NewRunnableMethod(this, &MalwareDetailsCacheCollector::OpenEntry)); } bool MalwareDetailsCacheCollector::HasStarted() { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); return has_started_; } // Fetch a URL and advance to the next one when done. void MalwareDetailsCacheCollector::OpenEntry() { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); DVLOG(1) << "OpenEntry"; if (resources_it_ == resources_->end()) { // We are done. AllDone(true); return; } if (!request_context_getter_) { DVLOG(1) << "Missing request context getter"; AllDone(false); return; } current_fetch_.reset(new URLFetcher( GURL(resources_it_->first), URLFetcher::GET, this)); current_fetch_->set_request_context(request_context_getter_); // Only from cache, and don't save cookies. current_fetch_->set_load_flags(net::LOAD_ONLY_FROM_CACHE | net::LOAD_DO_NOT_SAVE_COOKIES); current_fetch_->set_automatically_retry_on_5xx(false); // No retries. current_fetch_->Start(); // OnURLFetchComplete will be called when done. } ClientMalwareReportRequest::Resource* MalwareDetailsCacheCollector::GetResource( const GURL& url) { safe_browsing::ResourceMap::iterator it = resources_->find(url.spec()); if (it != resources_->end()) { return it->second.get(); } return NULL; } void MalwareDetailsCacheCollector::OnURLFetchComplete( const URLFetcher* source, const GURL& url, const net::URLRequestStatus& status, int response_code, const ResponseCookies& cookies, const std::string& data) { DVLOG(1) << "OnUrlFetchComplete"; DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); DCHECK(current_fetch_.get()); if (status.status() != net::URLRequestStatus::SUCCESS && status.os_error() == net::ERR_CACHE_MISS) { // Cache miss, skip this resource. DVLOG(1) << "Cache miss for url: " << url; AdvanceEntry(); return; } if (status.status() != net::URLRequestStatus::SUCCESS) { // Some other error occurred, e.g. the request could have been cancelled. DVLOG(1) << "Unsuccessful fetch: " << url; AdvanceEntry(); return; } // Set the response headers and body to the right resource, which // might not be the same as the one we asked for. // For redirects, resources_it_->first != url.spec(). ClientMalwareReportRequest::Resource* resource = GetResource(url); if (!resource) { DVLOG(1) << "Cannot find resource for url:" << url; AdvanceEntry(); return; } ReadResponse(resource, source); ReadData(resource, data); AdvanceEntry(); } void MalwareDetailsCacheCollector::ReadResponse( ClientMalwareReportRequest::Resource* pb_resource, const URLFetcher* source) { DVLOG(1) << "ReadResponse"; DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); net::HttpResponseHeaders* headers = source->response_headers(); if (!headers) { DVLOG(1) << "Missing response headers."; return; } ClientMalwareReportRequest::HTTPResponse* pb_response = pb_resource->mutable_response(); pb_response->mutable_firstline()->set_code(headers->response_code()); void* iter = NULL; std::string name, value; while (headers->EnumerateHeaderLines(&iter, &name, &value)) { ClientMalwareReportRequest::HTTPHeader* pb_header = pb_response->add_headers(); pb_header->set_name(name); // Strip any Set-Cookie headers. if (LowerCaseEqualsASCII(name, "set-cookie")) { pb_header->set_value(""); } else { pb_header->set_value(value); } } } void MalwareDetailsCacheCollector::ReadData( ClientMalwareReportRequest::Resource* pb_resource, const std::string& data) { DVLOG(1) << "ReadData"; DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); ClientMalwareReportRequest::HTTPResponse* pb_response = pb_resource->mutable_response(); if (data.size() <= kMaxBodySizeBytes) { // Only send small bodies for now. pb_response->set_body(data); } pb_response->set_bodylength(data.size()); MD5Digest digest; MD5Sum(data.c_str(), data.size(), &digest); pb_response->set_bodydigest(MD5DigestToBase16(digest)); } void MalwareDetailsCacheCollector::AdvanceEntry() { DVLOG(1) << "AdvanceEntry"; DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); // Advance to the next resource. ++resources_it_; current_fetch_.reset(NULL); // Create a task so we don't take over the IO thread for too long. BrowserThread::PostTask( BrowserThread::IO, FROM_HERE, NewRunnableMethod(this, &MalwareDetailsCacheCollector::OpenEntry)); } void MalwareDetailsCacheCollector::AllDone(bool success) { DVLOG(1) << "AllDone"; DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); *result_ = success; BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, callback_); }