// Copyright (c) 2011 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// Implementation of the MalwareDetails class.
#include "chrome/browser/safe_browsing/malware_details.h"
#include "base/callback.h"
#include "base/lazy_instance.h"
#include "chrome/browser/net/chrome_url_request_context.h"
#include "chrome/browser/profiles/profile.h"
#include "chrome/browser/safe_browsing/malware_details_cache.h"
#include "chrome/browser/safe_browsing/report.pb.h"
#include "chrome/browser/safe_browsing/safe_browsing_service.h"
#include "chrome/common/safe_browsing/safebrowsing_messages.h"
#include "content/browser/browser_thread.h"
#include "content/browser/renderer_host/render_view_host.h"
#include "content/browser/tab_contents/navigation_entry.h"
#include "content/browser/tab_contents/tab_contents.h"
#include "net/base/io_buffer.h"
#include "net/disk_cache/disk_cache.h"
#include "net/url_request/url_request_context_getter.h"
using safe_browsing::ClientMalwareReportRequest;
// Keep in sync with KMaxNodes in renderer/safe_browsing/malware_dom_details
static const uint32 kMaxDomNodes = 500;
// static
MalwareDetailsFactory* MalwareDetails::factory_ = NULL;
// The default MalwareDetailsFactory. Global, made a singleton so we
// don't leak it.
class MalwareDetailsFactoryImpl
: public MalwareDetailsFactory {
public:
MalwareDetails* CreateMalwareDetails(
SafeBrowsingService* sb_service,
TabContents* tab_contents,
const SafeBrowsingService::UnsafeResource& unsafe_resource) {
return new MalwareDetails(sb_service, tab_contents, unsafe_resource);
}
private:
friend struct base::DefaultLazyInstanceTraits<
MalwareDetailsFactoryImpl>;
MalwareDetailsFactoryImpl() { }
DISALLOW_COPY_AND_ASSIGN(MalwareDetailsFactoryImpl);
};
static base::LazyInstance<MalwareDetailsFactoryImpl>
g_malware_details_factory_impl(base::LINKER_INITIALIZED);
// Create a MalwareDetails for the given tab.
/* static */
MalwareDetails* MalwareDetails::NewMalwareDetails(
SafeBrowsingService* sb_service,
TabContents* tab_contents,
const SafeBrowsingService::UnsafeResource& resource) {
// Set up the factory if this has not been done already (tests do that
// before this method is called).
if (!factory_)
factory_ = g_malware_details_factory_impl.Pointer();
return factory_->CreateMalwareDetails(sb_service, tab_contents, resource);
}
// Create a MalwareDetails for the given tab. Runs in the UI thread.
MalwareDetails::MalwareDetails(
SafeBrowsingService* sb_service,
TabContents* tab_contents,
const SafeBrowsingService::UnsafeResource& resource)
: TabContentsObserver(tab_contents),
request_context_getter_(tab_contents->profile()->GetRequestContext()),
sb_service_(sb_service),
resource_(resource),
cache_collector_(new MalwareDetailsCacheCollector) {
StartCollection();
}
MalwareDetails::~MalwareDetails() {
}
bool MalwareDetails::OnMessageReceived(const IPC::Message& message) {
bool handled = true;
IPC_BEGIN_MESSAGE_MAP(MalwareDetails, message)
IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_MalwareDOMDetails,
OnReceivedMalwareDOMDetails)
IPC_MESSAGE_UNHANDLED(handled = false)
IPC_END_MESSAGE_MAP()
return handled;
}
bool MalwareDetails::IsPublicUrl(const GURL& url) const {
return url.SchemeIs("http"); // TODO(panayiotis): also skip internal urls.
}
// Looks for a Resource for the given url in resources_. If found, it
// updates |resource|. Otherwise, it creates a new message, adds it to
// resources_ and updates |resource| to point to it.
ClientMalwareReportRequest::Resource* MalwareDetails::FindOrCreateResource(
const GURL& url) {
safe_browsing::ResourceMap::iterator it = resources_.find(url.spec());
if (it != resources_.end()) {
return it->second.get();
}
// Create the resource for |url|.
int id = resources_.size();
linked_ptr<ClientMalwareReportRequest::Resource> new_resource(
new ClientMalwareReportRequest::Resource());
new_resource->set_url(url.spec());
new_resource->set_id(id);
resources_[url.spec()] = new_resource;
return new_resource.get();
}
void MalwareDetails::AddUrl(const GURL& url,
const GURL& parent,
const std::string& tagname,
const std::vector<GURL>* children) {
if (!IsPublicUrl(url))
return;
// Find (or create) the resource for the url.
ClientMalwareReportRequest::Resource* url_resource =
FindOrCreateResource(url);
if (!tagname.empty()) {
url_resource->set_tag_name(tagname);
}
if (!parent.is_empty() && IsPublicUrl(parent)) {
// Add the resource for the parent.
ClientMalwareReportRequest::Resource* parent_resource =
FindOrCreateResource(parent);
// Update the parent-child relation
url_resource->set_parent_id(parent_resource->id());
}
if (children) {
for (std::vector<GURL>::const_iterator it = children->begin();
it != children->end(); it++) {
ClientMalwareReportRequest::Resource* child_resource =
FindOrCreateResource(*it);
url_resource->add_child_ids(child_resource->id());
}
}
}
void MalwareDetails::StartCollection() {
DVLOG(1) << "Starting to compute malware details.";
report_.reset(new ClientMalwareReportRequest());
if (IsPublicUrl(resource_.url)) {
report_->set_malware_url(resource_.url.spec());
}
GURL page_url = tab_contents()->GetURL();
if (IsPublicUrl(page_url)) {
report_->set_page_url(page_url.spec());
}
GURL referrer_url;
NavigationEntry* nav_entry = tab_contents()->controller().GetActiveEntry();
if (nav_entry) {
referrer_url = nav_entry->referrer();
if (IsPublicUrl(referrer_url)) {
report_->set_referrer_url(referrer_url.spec());
}
}
// Add the nodes, starting from the page url.
AddUrl(page_url, GURL(), "", NULL);
// Add the resource_url and its original url, if non-empty and different.
if (!resource_.original_url.is_empty() &&
resource_.url != resource_.original_url) {
// Add original_url, as the parent of resource_url.
AddUrl(resource_.original_url, GURL(), "", NULL);
AddUrl(resource_.url, resource_.original_url, "", NULL);
} else {
AddUrl(resource_.url, GURL(), "", NULL);
}
// Add the redirect urls, if non-empty. The redirect urls do not include the
// original url, but include the unsafe url which is the last one of the
// redirect urls chain
GURL parent_url;
// Set the original url as the parent of the first redirect url if it's not
// empty.
if (!resource_.original_url.is_empty()) {
parent_url = resource_.original_url;
}
// Set the previous redirect url as the parent of the next one
for (unsigned int i = 0; i < resource_.redirect_urls.size(); ++i) {
AddUrl(resource_.redirect_urls[i], parent_url, "", NULL);
parent_url = resource_.redirect_urls[i];
}
// Add the referrer url.
if (nav_entry && !referrer_url.is_empty()) {
AddUrl(referrer_url, GURL(), "", NULL);
}
// Get URLs of frames, scripts etc from the DOM.
// OnReceivedMalwareDOMDetails will be called when the renderer replies.
tab_contents()->render_view_host()->GetMalwareDOMDetails();
}
// When the renderer is done, this is called.
void MalwareDetails::OnReceivedMalwareDOMDetails(
const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) {
// Schedule this in IO thread, so it doesn't conflict with future users
// of our data structures (eg GetSerializedReport).
BrowserThread::PostTask(
BrowserThread::IO, FROM_HERE,
NewRunnableMethod(
this, &MalwareDetails::AddDOMDetails, params));
}
void MalwareDetails::AddDOMDetails(
const std::vector<SafeBrowsingHostMsg_MalwareDOMDetails_Node>& params) {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
DVLOG(1) << "Nodes from the DOM: " << params.size();
// If we have already started collecting data from the HTTP cache, don't
// modify our state.
if (cache_collector_->HasStarted())
return;
// Add the urls from the DOM to |resources_|. The renderer could be
// sending bogus messages, so limit the number of nodes we accept.
for (uint32 i = 0; i < params.size() && i < kMaxDomNodes; ++i) {
SafeBrowsingHostMsg_MalwareDOMDetails_Node node = params[i];
DVLOG(1) << node.url << ", " << node.tag_name << ", " << node.parent;
AddUrl(node.url, node.parent, node.tag_name, &(node.children));
}
}
// Called from the SB Service on the IO thread, after the user has
// closed the tab, or clicked proceed or goback. Since the user needs
// to take an action, we expect this to be called after
// OnReceivedMalwareDOMDetails in most cases. If not, we don't include
// the DOM data in our report.
void MalwareDetails::FinishCollection() {
DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
cache_collector_->StartCacheCollection(
request_context_getter_,
&resources_,
&cache_result_,
NewRunnableMethod(this, &MalwareDetails::OnCacheCollectionReady));
}
void MalwareDetails::OnCacheCollectionReady() {
DVLOG(1) << "OnCacheCollectionReady.";
// Add all the urls in our |resources_| maps to the |report_| protocol buffer.
for (safe_browsing::ResourceMap::const_iterator it = resources_.begin();
it != resources_.end(); it++) {
ClientMalwareReportRequest::Resource* pb_resource =
report_->add_resources();
pb_resource->CopyFrom(*(it->second));
}
report_->set_complete(cache_result_);
// Send the report, using the SafeBrowsingService.
std::string serialized;
if (!report_->SerializeToString(&serialized)) {
DLOG(ERROR) << "Unable to serialize the malware report.";
return;
}
sb_service_->SendSerializedMalwareDetails(serialized);
}