// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "chrome/browser/safe_browsing/client_side_detection_host.h" #include <vector> #include "base/command_line.h" #include "base/logging.h" #include "base/memory/ref_counted.h" #include "base/memory/scoped_ptr.h" #include "base/metrics/histogram.h" #include "base/task.h" #include "chrome/browser/browser_process.h" #include "chrome/browser/profiles/profile.h" #include "chrome/browser/safe_browsing/client_side_detection_service.h" #include "chrome/browser/safe_browsing/safe_browsing_service.h" #include "chrome/common/chrome_switches.h" #include "chrome/common/safe_browsing/csd.pb.h" #include "chrome/common/safe_browsing/safebrowsing_messages.h" #include "content/browser/browser_thread.h" #include "content/browser/renderer_host/render_process_host.h" #include "content/browser/renderer_host/render_view_host.h" #include "content/browser/renderer_host/resource_dispatcher_host.h" #include "content/browser/tab_contents/navigation_controller.h" #include "content/browser/tab_contents/tab_contents.h" #include "content/common/notification_service.h" #include "content/common/notification_type.h" #include "content/common/view_messages.h" #include "googleurl/src/gurl.h" namespace safe_browsing { // This class is instantiated each time a new toplevel URL loads, and // asynchronously checks whether the phishing classifier should run for this // URL. If so, it notifies the renderer with a StartPhishingDetection IPC. // Objects of this class are ref-counted and will be destroyed once nobody // uses it anymore. If |tab_contents|, |csd_service| or |host| go away you need // to call Cancel(). We keep the |sb_service| alive in a ref pointer for as // long as it takes. class ClientSideDetectionHost::ShouldClassifyUrlRequest : public base::RefCountedThreadSafe< ClientSideDetectionHost::ShouldClassifyUrlRequest> { public: ShouldClassifyUrlRequest(const ViewHostMsg_FrameNavigate_Params& params, TabContents* tab_contents, ClientSideDetectionService* csd_service, SafeBrowsingService* sb_service, ClientSideDetectionHost* host) : canceled_(false), params_(params), tab_contents_(tab_contents), csd_service_(csd_service), sb_service_(sb_service), host_(host) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); DCHECK(tab_contents_); DCHECK(csd_service_); DCHECK(sb_service_); DCHECK(host_); } void Start() { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); // We start by doing some simple checks that can run on the UI thread. UMA_HISTOGRAM_COUNTS("SBClientPhishing.ClassificationStart", 1); // Only classify [X]HTML documents. if (params_.contents_mime_type != "text/html" && params_.contents_mime_type != "application/xhtml+xml") { VLOG(1) << "Skipping phishing classification for URL: " << params_.url << " because it has an unsupported MIME type: " << params_.contents_mime_type; UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", NO_CLASSIFY_UNSUPPORTED_MIME_TYPE, NO_CLASSIFY_MAX); return; } // Don't run the phishing classifier if the URL came from a private // network, since we don't want to ping back in this case. We also need // to check whether the connection was proxied -- if so, we won't have the // correct remote IP address, and will skip phishing classification. if (params_.was_fetched_via_proxy) { VLOG(1) << "Skipping phishing classification for URL: " << params_.url << " because it was fetched via a proxy."; UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", NO_CLASSIFY_PROXY_FETCH, NO_CLASSIFY_MAX); return; } if (csd_service_->IsPrivateIPAddress(params_.socket_address.host())) { VLOG(1) << "Skipping phishing classification for URL: " << params_.url << " because of hosting on private IP: " << params_.socket_address.host(); UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", NO_CLASSIFY_PRIVATE_IP, NO_CLASSIFY_MAX); return; } // Don't run the phishing classifier if the tab is incognito. if (tab_contents_->profile()->IsOffTheRecord()) { VLOG(1) << "Skipping phishing classification for URL: " << params_.url << " because we're browsing incognito."; UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", NO_CLASSIFY_OFF_THE_RECORD, NO_CLASSIFY_MAX); return; } // We lookup the csd-whitelist before we lookup the cache because // a URL may have recently been whitelisted. If the URL matches // the csd-whitelist we won't start classification. The // csd-whitelist check has to be done on the IO thread because it // uses the SafeBrowsing service class. BrowserThread::PostTask( BrowserThread::IO, FROM_HERE, NewRunnableMethod(this, &ShouldClassifyUrlRequest::CheckCsdWhitelist, params_.url)); } void Cancel() { canceled_ = true; // Just to make sure we don't do anything stupid we reset all these // pointers except for the safebrowsing service class which may be // accessed by CheckCsdWhitelist(). tab_contents_ = NULL; csd_service_ = NULL; host_ = NULL; } private: friend class base::RefCountedThreadSafe< ClientSideDetectionHost::ShouldClassifyUrlRequest>; // Enum used to keep stats about why the pre-classification check failed. enum PreClassificationCheckFailures { NO_CLASSIFY_PROXY_FETCH, NO_CLASSIFY_PRIVATE_IP, NO_CLASSIFY_OFF_THE_RECORD, NO_CLASSIFY_MATCH_CSD_WHITELIST, NO_CLASSIFY_TOO_MANY_REPORTS, NO_CLASSIFY_UNSUPPORTED_MIME_TYPE, NO_CLASSIFY_MAX // Always add new values before this one. }; // The destructor can be called either from the UI or the IO thread. virtual ~ShouldClassifyUrlRequest() { } void CheckCsdWhitelist(const GURL& url) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); if (!sb_service_ || sb_service_->MatchCsdWhitelistUrl(url)) { // We're done. There is no point in going back to the UI thread. UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", NO_CLASSIFY_MATCH_CSD_WHITELIST, NO_CLASSIFY_MAX); return; } BrowserThread::PostTask( BrowserThread::UI, FROM_HERE, NewRunnableMethod(this, &ShouldClassifyUrlRequest::CheckCache)); } void CheckCache() { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); if (canceled_) { return; } // If result is cached, we don't want to run classification again bool is_phishing; if (csd_service_->GetValidCachedResult(params_.url, &is_phishing)) { VLOG(1) << "Satisfying request for " << params_.url << " from cache"; UMA_HISTOGRAM_COUNTS("SBClientPhishing.RequestSatisfiedFromCache", 1); // Since we are already on the UI thread, this is safe. host_->MaybeShowPhishingWarning(params_.url, is_phishing); return; } // We want to limit the number of requests, though we will ignore the // limit for urls in the cache. We don't want to start classifying // too many pages as phishing, but for those that we already think are // phishing we want to give ourselves a chance to fix false positives. if (csd_service_->IsInCache(params_.url)) { VLOG(1) << "Reporting limit skipped for " << params_.url << " as it was in the cache."; UMA_HISTOGRAM_COUNTS("SBClientPhishing.ReportLimitSkipped", 1); } else if (csd_service_->OverReportLimit()) { VLOG(1) << "Too many report phishing requests sent recently, " << "not running classification for " << params_.url; UMA_HISTOGRAM_ENUMERATION("SBClientPhishing.PreClassificationCheckFail", NO_CLASSIFY_TOO_MANY_REPORTS, NO_CLASSIFY_MAX); return; } // Everything checks out, so start classification. // |tab_contents_| is safe to call as we will be destructed // before it is. RenderViewHost* rvh = tab_contents_->render_view_host(); rvh->Send(new SafeBrowsingMsg_StartPhishingDetection( rvh->routing_id(), params_.url)); } // No need to protect |canceled_| with a lock because it is only read and // written by the UI thread. bool canceled_; ViewHostMsg_FrameNavigate_Params params_; TabContents* tab_contents_; ClientSideDetectionService* csd_service_; // We keep a ref pointer here just to make sure the service class stays alive // long enough. scoped_refptr<SafeBrowsingService> sb_service_; ClientSideDetectionHost* host_; DISALLOW_COPY_AND_ASSIGN(ShouldClassifyUrlRequest); }; // This class is used to display the phishing interstitial. class CsdClient : public SafeBrowsingService::Client { public: CsdClient() {} // Method from SafeBrowsingService::Client. This method is called on the // IO thread once the interstitial is going away. This method simply deletes // the CsdClient object. virtual void OnBlockingPageComplete(bool proceed) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); // Delete this on the UI thread since it was created there. BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, new DeleteTask<CsdClient>(this)); } private: friend class DeleteTask<CsdClient>; // Calls the private destructor. // We're taking care of deleting this object. No-one else should delete // this object. virtual ~CsdClient() {} DISALLOW_COPY_AND_ASSIGN(CsdClient); }; ClientSideDetectionHost::ClientSideDetectionHost(TabContents* tab) : TabContentsObserver(tab), csd_service_(g_browser_process->safe_browsing_detection_service()), cb_factory_(ALLOW_THIS_IN_INITIALIZER_LIST(this)) { DCHECK(tab); // Note: csd_service_ and sb_service_ might be NULL. ResourceDispatcherHost* resource = g_browser_process->resource_dispatcher_host(); if (resource) { sb_service_ = resource->safe_browsing_service(); } } ClientSideDetectionHost::~ClientSideDetectionHost() { // Tell any pending classification request that it is being canceled. if (classification_request_.get()) { classification_request_->Cancel(); } } bool ClientSideDetectionHost::OnMessageReceived(const IPC::Message& message) { bool handled = true; IPC_BEGIN_MESSAGE_MAP(ClientSideDetectionHost, message) IPC_MESSAGE_HANDLER(SafeBrowsingHostMsg_DetectedPhishingSite, OnDetectedPhishingSite) IPC_MESSAGE_UNHANDLED(handled = false) IPC_END_MESSAGE_MAP() return handled; } void ClientSideDetectionHost::DidNavigateMainFramePostCommit( const NavigationController::LoadCommittedDetails& details, const ViewHostMsg_FrameNavigate_Params& params) { // TODO(noelutz): move this DCHECK to TabContents and fix all the unit tests // that don't call this method on the UI thread. // DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); if (details.is_in_page) { // If the navigation is within the same page, the user isn't really // navigating away. We don't need to cancel a pending callback or // begin a new classification. return; } // If we navigate away and there currently is a pending phishing // report request we have to cancel it to make sure we don't display // an interstitial for the wrong page. Note that this won't cancel // the server ping back but only cancel the showing of the // interstial. cb_factory_.RevokeAll(); if (csd_service_) { // Cancel any pending classification request. if (classification_request_.get()) { classification_request_->Cancel(); } // Notify the renderer if it should classify this URL. classification_request_ = new ShouldClassifyUrlRequest(params, tab_contents(), csd_service_, sb_service_, this); classification_request_->Start(); } } void ClientSideDetectionHost::OnDetectedPhishingSite( const std::string& verdict_str) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); // There is something seriously wrong if there is no service class but // this method is called. The renderer should not start phishing detection // if there isn't any service class in the browser. DCHECK(csd_service_); // We parse the protocol buffer here. If we're unable to parse it we won't // send the verdict further. scoped_ptr<ClientPhishingRequest> verdict(new ClientPhishingRequest); if (csd_service_ && verdict->ParseFromString(verdict_str) && verdict->IsInitialized()) { // There shouldn't be any pending requests because we revoke them everytime // we navigate away. DCHECK(!cb_factory_.HasPendingCallbacks()); csd_service_->SendClientReportPhishingRequest( verdict.release(), // The service takes ownership of the verdict. cb_factory_.NewCallback( &ClientSideDetectionHost::MaybeShowPhishingWarning)); } } void ClientSideDetectionHost::MaybeShowPhishingWarning(GURL phishing_url, bool is_phishing) { DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); if (is_phishing && CommandLine::ForCurrentProcess()->HasSwitch( switches::kEnableClientSidePhishingInterstitial)) { DCHECK(tab_contents()); // TODO(noelutz): this is not perfect. It's still possible that the // user browses away before the interstitial is shown. Maybe we should // stop all pending navigations? if (sb_service_) { // TODO(noelutz): refactor the SafeBrowsing service class and the // SafeBrowsing blocking page class so that we don't need to depend // on the SafeBrowsingService here and so that we don't need to go // through the IO message loop. std::vector<GURL> redirect_urls; BrowserThread::PostTask( BrowserThread::IO, FROM_HERE, NewRunnableMethod(sb_service_.get(), &SafeBrowsingService::DisplayBlockingPage, phishing_url, phishing_url, redirect_urls, // We only classify the main frame URL. ResourceType::MAIN_FRAME, // TODO(noelutz): create a separate threat type // for client-side phishing detection. SafeBrowsingService::URL_PHISHING, new CsdClient() /* will delete itself */, tab_contents()->GetRenderProcessHost()->id(), tab_contents()->render_view_host()->routing_id())); } } } void ClientSideDetectionHost::set_client_side_detection_service( ClientSideDetectionService* service) { csd_service_ = service; } void ClientSideDetectionHost::set_safe_browsing_service( SafeBrowsingService* service) { sb_service_ = service; } } // namespace safe_browsing