// Copyright (c) 2010 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "webkit/glue/site_isolation_metrics.h" #include <set> #include "base/hash_tables.h" #include "base/metrics/histogram.h" #include "net/base/mime_sniffer.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebSecurityOrigin.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebString.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebURL.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebURLRequest.h" #include "third_party/WebKit/Source/WebKit/chromium/public/WebURLResponse.h" using WebKit::WebFrame; using WebKit::WebSecurityOrigin; using WebKit::WebString; using WebKit::WebURL; using WebKit::WebURLRequest; using WebKit::WebURLResponse; namespace webkit_glue { typedef base::hash_map<unsigned, WebURLRequest::TargetType> TargetTypeMap; typedef base::hash_map<std::string, int> MimeTypeMap; typedef std::set<std::string> CrossOriginTextHtmlResponseSet; static TargetTypeMap* GetTargetTypeMap() { static TargetTypeMap target_type_map_; return &target_type_map_; } // Copied from net/base/mime_util.cc, supported_non_image_types[] static const char* const kCrossOriginMimeTypesToLog[] = { "text/cache-manifest", "text/html", "text/xml", "text/xsl", "text/plain", "text/vnd.chromium.ftp-dir", "text/", "text/css", "image/svg+xml", "application/xml", "application/xhtml+xml", "application/rss+xml", "application/atom+xml", "application/json", "application/x-x509-user-cert", "multipart/x-mixed-replace", "(NONE)" // Keep track of missing MIME types as well }; static MimeTypeMap* GetMimeTypeMap() { static MimeTypeMap mime_type_map_; if (!mime_type_map_.size()) { for (size_t i = 0; i < arraysize(kCrossOriginMimeTypesToLog); ++i) mime_type_map_[kCrossOriginMimeTypesToLog[i]] = i; } return &mime_type_map_; } // This is set is used to keep track of the response urls that we want to // sniff, since we will have to wait for the payload to arrive. static CrossOriginTextHtmlResponseSet* GetCrossOriginTextHtmlResponseSet() { static CrossOriginTextHtmlResponseSet cross_origin_text_html_response_set_; return &cross_origin_text_html_response_set_; } static void LogVerifiedTextHtmlResponse() { UMA_HISTOGRAM_COUNTS( "SiteIsolation.CrossSiteNonFrameResponse_verified_texthtml_BLOCK", 1); } static void LogMislabeledTextHtmlResponse() { UMA_HISTOGRAM_COUNTS( "SiteIsolation.CrossSiteNonFrameResponse_mislabeled_texthtml", 1); } void SiteIsolationMetrics::AddRequest(unsigned identifier, WebURLRequest::TargetType target_type) { TargetTypeMap& target_type_map = *GetTargetTypeMap(); target_type_map[identifier] = target_type; } // Check whether the given response is allowed due to access control headers. // This is basically a copy of the logic of passesAccessControlCheck() in // WebCore/loader/CrossOriginAccessControl.cpp. bool SiteIsolationMetrics::AllowedByAccessControlHeader( WebFrame* frame, const WebURLResponse& response) { WebString access_control_origin = response.httpHeaderField( WebString::fromUTF8("Access-Control-Allow-Origin")); WebSecurityOrigin security_origin = WebSecurityOrigin::createFromString(access_control_origin); return access_control_origin == WebString::fromUTF8("*") || frame->securityOrigin().canAccess(security_origin); } // We want to log any cross-site request that we don't think a renderer should // be allowed to make. We can safely ignore frame requests (since we'd like // those to be in a separate renderer) and plugin requests, even if they are // cross-origin. // // For comparison, we keep counts of: // - All requests made by a renderer // - All cross-site requests // // Then, for cross-site non-frame/plugin requests, we keep track of: // - Counts for MIME types of interest // - Counts of those MIME types that carry CORS headers // - Counts of mislabeled text/html responses (without CORS) // As well as those we would block: // - Counts of verified text/html responses (without CORS) // - Counts of XML/JSON responses (without CORS) // // This will let us say what percentage of requests we would end up blocking. void SiteIsolationMetrics::LogMimeTypeForCrossOriginRequest( WebFrame* frame, unsigned identifier, const WebURLResponse& response) { UMA_HISTOGRAM_COUNTS("SiteIsolation.Requests", 1); TargetTypeMap& target_type_map = *GetTargetTypeMap(); TargetTypeMap::iterator iter = target_type_map.find(identifier); if (iter != target_type_map.end()) { WebURLRequest::TargetType target_type = iter->second; target_type_map.erase(iter); // Focus on cross-site requests. if (!frame->securityOrigin().canAccess( WebSecurityOrigin::create(response.url()))) { UMA_HISTOGRAM_COUNTS("SiteIsolation.CrossSiteRequests", 1); // Now focus on non-frame, non-plugin requests. if (target_type != WebURLRequest::TargetIsMainFrame && target_type != WebURLRequest::TargetIsSubframe && target_type != WebURLRequest::TargetIsObject) { // If it is part of a MIME type we might block, log the MIME type. std::string mime_type = response.mimeType().utf8(); MimeTypeMap mime_type_map = *GetMimeTypeMap(); // Also track it if it lacks a MIME type. // TODO(creis): 304 responses have no MIME type, so we don't handle // them correctly. Can we look up their MIME type from the cache? if (mime_type == "") mime_type = "(NONE)"; MimeTypeMap::iterator mime_type_iter = mime_type_map.find(mime_type); if (mime_type_iter != mime_type_map.end()) { UMA_HISTOGRAM_ENUMERATION( "SiteIsolation.CrossSiteNonFrameResponse_MIME_Type", mime_type_iter->second, arraysize(kCrossOriginMimeTypesToLog)); // We also check access control headers, in case this // cross-origin request has been explicitly permitted. if (AllowedByAccessControlHeader(frame, response)) { UMA_HISTOGRAM_ENUMERATION( "SiteIsolation.CrossSiteNonFrameResponse_With_CORS_MIME_Type", mime_type_iter->second, arraysize(kCrossOriginMimeTypesToLog)); } else { // Without access control headers, we might block this request. // Sometimes resources are mislabled as text/html, though, and we // should only block them if we can verify that. To do so, we sniff // the content once we have some of the payload. if (mime_type == "text/html") { // Remember the response until we can sniff its contents. GetCrossOriginTextHtmlResponseSet()->insert( response.url().spec()); } else if (mime_type == "text/xml" || mime_type == "text/xsl" || mime_type == "application/xml" || mime_type == "application/xhtml+xml" || mime_type == "application/rss+xml" || mime_type == "application/atom+xml" || mime_type == "application/json") { // We will also block XML and JSON MIME types for cross-site // non-frame requests without CORS headers. UMA_HISTOGRAM_COUNTS( "SiteIsolation.CrossSiteNonFrameResponse_xml_or_json_BLOCK", 1); } } } } } } } void SiteIsolationMetrics::SniffCrossOriginHTML(const WebURL& response_url, const char* data, int len) { if (!response_url.isValid()) return; // Look up the URL to see if it is a text/html request we are tracking. CrossOriginTextHtmlResponseSet& cross_origin_text_html_response_set = *GetCrossOriginTextHtmlResponseSet(); CrossOriginTextHtmlResponseSet::iterator request_iter = cross_origin_text_html_response_set.find(response_url.spec()); if (request_iter != cross_origin_text_html_response_set.end()) { // Log whether it actually looks like HTML. std::string sniffed_mime_type; bool successful = net::SniffMimeType(data, len, response_url, "", &sniffed_mime_type); if (successful && sniffed_mime_type == "text/html") LogVerifiedTextHtmlResponse(); else LogMislabeledTextHtmlResponse(); cross_origin_text_html_response_set.erase(request_iter); } } void SiteIsolationMetrics::RemoveCompletedResponse( const WebURL& response_url) { if (!response_url.isValid()) return; // Ensure we don't leave responses in the set after they've completed. CrossOriginTextHtmlResponseSet& cross_origin_text_html_response_set = *GetCrossOriginTextHtmlResponseSet(); CrossOriginTextHtmlResponseSet::iterator request_iter = cross_origin_text_html_response_set.find(response_url.spec()); if (request_iter != cross_origin_text_html_response_set.end()) { LogMislabeledTextHtmlResponse(); cross_origin_text_html_response_set.erase(request_iter); } } } // namespace webkit_glue