// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "base/command_line.h" #include "components/google/core/browser/google_switches.h" #include "components/google/core/browser/google_url_tracker.h" #include "components/google/core/browser/google_util.h" #include "testing/gtest/include/gtest/gtest.h" using google_util::IsGoogleDomainUrl; // Helpers -------------------------------------------------------------------- namespace { // These functions merely provide brevity in the callers. bool IsHomePage(const std::string& url) { return google_util::IsGoogleHomePageUrl(GURL(url)); } bool IsSearch(const std::string& url) { return google_util::IsGoogleSearchUrl(GURL(url)); } bool StartsWithBaseURL(const std::string& url) { return google_util::StartsWithCommandLineGoogleBaseURL(GURL(url)); } } // namespace // Actual tests --------------------------------------------------------------- TEST(GoogleUtilTest, GoodHomePagesNonSecure) { // Valid home page hosts. EXPECT_TRUE(IsHomePage(GoogleURLTracker::kDefaultGoogleHomepage)); EXPECT_TRUE(IsHomePage("http://google.com")); EXPECT_TRUE(IsHomePage("http://www.google.com")); EXPECT_TRUE(IsHomePage("http://www.google.ca")); EXPECT_TRUE(IsHomePage("http://www.google.co.uk")); EXPECT_TRUE(IsHomePage("http://www.google.com:80/")); // Only the paths /, /webhp, and /ig.* are valid. Query parameters are // ignored. EXPECT_TRUE(IsHomePage("http://www.google.com/")); EXPECT_TRUE(IsHomePage("http://www.google.com/webhp")); EXPECT_TRUE(IsHomePage("http://www.google.com/webhp?rlz=TEST")); EXPECT_TRUE(IsHomePage("http://www.google.com/ig")); EXPECT_TRUE(IsHomePage("http://www.google.com/ig/foo")); EXPECT_TRUE(IsHomePage("http://www.google.com/ig?rlz=TEST")); EXPECT_TRUE(IsHomePage("http://www.google.com/ig/foo?rlz=TEST")); } TEST(GoogleUtilTest, GoodHomePagesSecure) { // Valid home page hosts. EXPECT_TRUE(IsHomePage("https://google.com")); EXPECT_TRUE(IsHomePage("https://www.google.com")); EXPECT_TRUE(IsHomePage("https://www.google.ca")); EXPECT_TRUE(IsHomePage("https://www.google.co.uk")); EXPECT_TRUE(IsHomePage("https://www.google.com:443/")); // Only the paths /, /webhp, and /ig.* are valid. Query parameters are // ignored. EXPECT_TRUE(IsHomePage("https://www.google.com/")); EXPECT_TRUE(IsHomePage("https://www.google.com/webhp")); EXPECT_TRUE(IsHomePage("https://www.google.com/webhp?rlz=TEST")); EXPECT_TRUE(IsHomePage("https://www.google.com/ig")); EXPECT_TRUE(IsHomePage("https://www.google.com/ig/foo")); EXPECT_TRUE(IsHomePage("https://www.google.com/ig?rlz=TEST")); EXPECT_TRUE(IsHomePage("https://www.google.com/ig/foo?rlz=TEST")); } TEST(GoogleUtilTest, BadHomePages) { EXPECT_FALSE(IsHomePage(std::string())); // If specified, only the "www" subdomain is OK. EXPECT_FALSE(IsHomePage("http://maps.google.com")); EXPECT_FALSE(IsHomePage("http://foo.google.com")); // No non-standard port numbers. EXPECT_FALSE(IsHomePage("http://www.google.com:1234")); EXPECT_FALSE(IsHomePage("https://www.google.com:5678")); // Invalid TLDs. EXPECT_FALSE(IsHomePage("http://www.google.abc")); EXPECT_FALSE(IsHomePage("http://www.google.com.abc")); EXPECT_FALSE(IsHomePage("http://www.google.abc.com")); EXPECT_FALSE(IsHomePage("http://www.google.ab.cd")); EXPECT_FALSE(IsHomePage("http://www.google.uk.qq")); // Must be http or https. EXPECT_FALSE(IsHomePage("ftp://www.google.com")); EXPECT_FALSE(IsHomePage("file://does/not/exist")); EXPECT_FALSE(IsHomePage("bad://www.google.com")); EXPECT_FALSE(IsHomePage("www.google.com")); // Only the paths /, /webhp, and /ig.* are valid. EXPECT_FALSE(IsHomePage("http://www.google.com/abc")); EXPECT_FALSE(IsHomePage("http://www.google.com/webhpabc")); EXPECT_FALSE(IsHomePage("http://www.google.com/webhp/abc")); EXPECT_FALSE(IsHomePage("http://www.google.com/abcig")); EXPECT_FALSE(IsHomePage("http://www.google.com/webhp/ig")); // A search URL should not be identified as a home page URL. EXPECT_FALSE(IsHomePage("http://www.google.com/search?q=something")); // Path is case sensitive. EXPECT_FALSE(IsHomePage("https://www.google.com/WEBHP")); } TEST(GoogleUtilTest, GoodSearchPagesNonSecure) { // Queries with path "/search" need to have the query parameter in either // the url parameter or the hash fragment. EXPECT_TRUE(IsSearch("http://www.google.com/search?q=something")); EXPECT_TRUE(IsSearch("http://www.google.com/search#q=something")); EXPECT_TRUE(IsSearch("http://www.google.com/search?name=bob&q=something")); EXPECT_TRUE(IsSearch("http://www.google.com/search?name=bob#q=something")); EXPECT_TRUE(IsSearch("http://www.google.com/search?name=bob#age=24&q=thing")); EXPECT_TRUE(IsSearch("http://www.google.co.uk/search?q=something")); // It's actually valid for both to have the query parameter. EXPECT_TRUE(IsSearch("http://www.google.com/search?q=something#q=other")); // Queries with path "/webhp", "/" or "" need to have the query parameter in // the hash fragment. EXPECT_TRUE(IsSearch("http://www.google.com/webhp#q=something")); EXPECT_TRUE(IsSearch("http://www.google.com/webhp#name=bob&q=something")); EXPECT_TRUE(IsSearch("http://www.google.com/webhp?name=bob#q=something")); EXPECT_TRUE(IsSearch("http://www.google.com/webhp?name=bob#age=24&q=thing")); EXPECT_TRUE(IsSearch("http://www.google.com/#q=something")); EXPECT_TRUE(IsSearch("http://www.google.com/#name=bob&q=something")); EXPECT_TRUE(IsSearch("http://www.google.com/?name=bob#q=something")); EXPECT_TRUE(IsSearch("http://www.google.com/?name=bob#age=24&q=something")); EXPECT_TRUE(IsSearch("http://www.google.com#q=something")); EXPECT_TRUE(IsSearch("http://www.google.com#name=bob&q=something")); EXPECT_TRUE(IsSearch("http://www.google.com?name=bob#q=something")); EXPECT_TRUE(IsSearch("http://www.google.com?name=bob#age=24&q=something")); } TEST(GoogleUtilTest, GoodSearchPagesSecure) { // Queries with path "/search" need to have the query parameter in either // the url parameter or the hash fragment. EXPECT_TRUE(IsSearch("https://www.google.com/search?q=something")); EXPECT_TRUE(IsSearch("https://www.google.com/search#q=something")); EXPECT_TRUE(IsSearch("https://www.google.com/search?name=bob&q=something")); EXPECT_TRUE(IsSearch("https://www.google.com/search?name=bob#q=something")); EXPECT_TRUE(IsSearch("https://www.google.com/search?name=bob#age=24&q=q")); EXPECT_TRUE(IsSearch("https://www.google.co.uk/search?q=something")); // It's actually valid for both to have the query parameter. EXPECT_TRUE(IsSearch("https://www.google.com/search?q=something#q=other")); // Queries with path "/webhp", "/" or "" need to have the query parameter in // the hash fragment. EXPECT_TRUE(IsSearch("https://www.google.com/webhp#q=something")); EXPECT_TRUE(IsSearch("https://www.google.com/webhp#name=bob&q=something")); EXPECT_TRUE(IsSearch("https://www.google.com/webhp?name=bob#q=something")); EXPECT_TRUE(IsSearch("https://www.google.com/webhp?name=bob#age=24&q=thing")); EXPECT_TRUE(IsSearch("https://www.google.com/#q=something")); EXPECT_TRUE(IsSearch("https://www.google.com/#name=bob&q=something")); EXPECT_TRUE(IsSearch("https://www.google.com/?name=bob#q=something")); EXPECT_TRUE(IsSearch("https://www.google.com/?name=bob#age=24&q=something")); EXPECT_TRUE(IsSearch("https://www.google.com#q=something")); EXPECT_TRUE(IsSearch("https://www.google.com#name=bob&q=something")); EXPECT_TRUE(IsSearch("https://www.google.com?name=bob#q=something")); EXPECT_TRUE(IsSearch("https://www.google.com?name=bob#age=24&q=something")); } TEST(GoogleUtilTest, BadSearches) { // A home page URL should not be identified as a search URL. EXPECT_FALSE(IsSearch(GoogleURLTracker::kDefaultGoogleHomepage)); EXPECT_FALSE(IsSearch("http://google.com")); EXPECT_FALSE(IsSearch("http://www.google.com")); EXPECT_FALSE(IsSearch("http://www.google.com/search")); EXPECT_FALSE(IsSearch("http://www.google.com/search?")); // Must be http or https EXPECT_FALSE(IsSearch("ftp://www.google.com/search?q=something")); EXPECT_FALSE(IsSearch("file://does/not/exist/search?q=something")); EXPECT_FALSE(IsSearch("bad://www.google.com/search?q=something")); EXPECT_FALSE(IsSearch("www.google.com/search?q=something")); // Can't have an empty query parameter. EXPECT_FALSE(IsSearch("http://www.google.com/search?q=")); EXPECT_FALSE(IsSearch("http://www.google.com/search?name=bob&q=")); EXPECT_FALSE(IsSearch("http://www.google.com/webhp#q=")); EXPECT_FALSE(IsSearch("http://www.google.com/webhp#name=bob&q=")); // Home page searches without a hash fragment query parameter are invalid. EXPECT_FALSE(IsSearch("http://www.google.com/webhp?q=something")); EXPECT_FALSE(IsSearch("http://www.google.com/webhp?q=something#no=good")); EXPECT_FALSE(IsSearch("http://www.google.com/webhp?name=bob&q=something")); EXPECT_FALSE(IsSearch("http://www.google.com/?q=something")); EXPECT_FALSE(IsSearch("http://www.google.com?q=something")); // Some paths are outright invalid as searches. EXPECT_FALSE(IsSearch("http://www.google.com/notreal?q=something")); EXPECT_FALSE(IsSearch("http://www.google.com/chrome?q=something")); EXPECT_FALSE(IsSearch("http://www.google.com/search/nogood?q=something")); EXPECT_FALSE(IsSearch("http://www.google.com/webhp/nogood#q=something")); EXPECT_FALSE(IsSearch(std::string())); // Case sensitive paths. EXPECT_FALSE(IsSearch("http://www.google.com/SEARCH?q=something")); EXPECT_FALSE(IsSearch("http://www.google.com/WEBHP#q=something")); } TEST(GoogleUtilTest, GoogleDomains) { // Test some good Google domains (valid TLDs). EXPECT_TRUE(IsGoogleDomainUrl(GURL("http://www.google.com"), google_util::ALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_TRUE(IsGoogleDomainUrl(GURL("http://google.com"), google_util::ALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_TRUE(IsGoogleDomainUrl(GURL("http://www.google.ca"), google_util::ALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_TRUE(IsGoogleDomainUrl(GURL("http://www.google.biz.tj"), google_util::ALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_TRUE(IsGoogleDomainUrl(GURL("http://www.google.com/search?q=thing"), google_util::ALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_TRUE(IsGoogleDomainUrl(GURL("http://www.google.com/webhp"), google_util::ALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); // Test some bad Google domains (invalid TLDs). EXPECT_FALSE(IsGoogleDomainUrl(GURL("http://www.google.notrealtld"), google_util::ALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_FALSE(IsGoogleDomainUrl(GURL("http://www.google.faketld/search?q=q"), google_util::ALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_FALSE(IsGoogleDomainUrl(GURL("http://www.yahoo.com"), google_util::ALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); // Test subdomain checks. EXPECT_TRUE(IsGoogleDomainUrl(GURL("http://images.google.com"), google_util::ALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_FALSE(IsGoogleDomainUrl(GURL("http://images.google.com"), google_util::DISALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_TRUE(IsGoogleDomainUrl(GURL("http://google.com"), google_util::DISALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_TRUE(IsGoogleDomainUrl(GURL("http://www.google.com"), google_util::DISALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); // Port and scheme checks. EXPECT_TRUE(IsGoogleDomainUrl(GURL("http://www.google.com:80"), google_util::DISALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_FALSE(IsGoogleDomainUrl(GURL("http://www.google.com:123"), google_util::DISALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_TRUE(IsGoogleDomainUrl(GURL("https://www.google.com:443"), google_util::DISALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_FALSE(IsGoogleDomainUrl(GURL("http://www.google.com:123"), google_util::DISALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_TRUE(IsGoogleDomainUrl(GURL("http://www.google.com:123"), google_util::DISALLOW_SUBDOMAIN, google_util::ALLOW_NON_STANDARD_PORTS)); EXPECT_TRUE(IsGoogleDomainUrl(GURL("https://www.google.com:123"), google_util::DISALLOW_SUBDOMAIN, google_util::ALLOW_NON_STANDARD_PORTS)); EXPECT_TRUE(IsGoogleDomainUrl(GURL("http://www.google.com:80"), google_util::DISALLOW_SUBDOMAIN, google_util::ALLOW_NON_STANDARD_PORTS)); EXPECT_TRUE(IsGoogleDomainUrl(GURL("https://www.google.com:443"), google_util::DISALLOW_SUBDOMAIN, google_util::ALLOW_NON_STANDARD_PORTS)); EXPECT_FALSE(IsGoogleDomainUrl(GURL("file://www.google.com"), google_util::DISALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_FALSE(IsGoogleDomainUrl(GURL("doesnotexist://www.google.com"), google_util::DISALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); } TEST(GoogleUtilTest, GoogleBaseURLNotSpecified) { // When no command-line flag is specified, no input to // StartsWithCommandLineGoogleBaseURL() should return true. EXPECT_FALSE(StartsWithBaseURL(std::string())); EXPECT_FALSE(StartsWithBaseURL("http://www.foo.com/")); EXPECT_FALSE(StartsWithBaseURL("http://www.google.com/")); // By default, none of the IsGoogleXXX functions should return true for a // "foo.com" URL. EXPECT_FALSE(IsGoogleHostname("www.foo.com", google_util::DISALLOW_SUBDOMAIN)); EXPECT_FALSE(IsGoogleDomainUrl(GURL("http://www.foo.com/xyz"), google_util::DISALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_FALSE(IsGoogleDomainUrl(GURL("https://www.foo.com/"), google_util::DISALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_FALSE(IsHomePage("https://www.foo.com/webhp")); EXPECT_FALSE(IsSearch("http://www.foo.com/search?q=a")); // Override the Google base URL on the command line. CommandLine::ForCurrentProcess()->AppendSwitchASCII(switches::kGoogleBaseURL, "http://www.foo.com/"); // Only URLs which start with exactly the string on the command line should // cause StartsWithCommandLineGoogleBaseURL() to return true. EXPECT_FALSE(StartsWithBaseURL(std::string())); EXPECT_TRUE(StartsWithBaseURL("http://www.foo.com/")); EXPECT_TRUE(StartsWithBaseURL("http://www.foo.com/abc")); EXPECT_FALSE(StartsWithBaseURL("https://www.foo.com/")); EXPECT_FALSE(StartsWithBaseURL("http://www.google.com/")); // The various IsGoogleXXX functions should respect the command-line flag. EXPECT_TRUE(IsGoogleHostname("www.foo.com", google_util::DISALLOW_SUBDOMAIN)); EXPECT_FALSE(IsGoogleHostname("foo.com", google_util::ALLOW_SUBDOMAIN)); EXPECT_TRUE(IsGoogleDomainUrl(GURL("http://www.foo.com/xyz"), google_util::DISALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_TRUE(IsGoogleDomainUrl(GURL("https://www.foo.com/"), google_util::DISALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_TRUE(IsHomePage("https://www.foo.com/webhp")); EXPECT_FALSE(IsHomePage("http://www.foo.com/xyz")); EXPECT_TRUE(IsSearch("http://www.foo.com/search?q=a")); } TEST(GoogleUtilTest, GoogleBaseURLDisallowQuery) { CommandLine::ForCurrentProcess()->AppendSwitchASCII(switches::kGoogleBaseURL, "http://www.foo.com/?q="); EXPECT_FALSE(google_util::CommandLineGoogleBaseURL().is_valid()); } TEST(GoogleUtilTest, GoogleBaseURLDisallowRef) { CommandLine::ForCurrentProcess()->AppendSwitchASCII(switches::kGoogleBaseURL, "http://www.foo.com/#q="); EXPECT_FALSE(google_util::CommandLineGoogleBaseURL().is_valid()); } TEST(GoogleUtilTest, GoogleBaseURLFixup) { CommandLine::ForCurrentProcess()->AppendSwitchASCII(switches::kGoogleBaseURL, "www.foo.com"); ASSERT_TRUE(google_util::CommandLineGoogleBaseURL().is_valid()); EXPECT_EQ("http://www.foo.com/", google_util::CommandLineGoogleBaseURL().spec()); } TEST(GoogleUtilTest, YoutubeDomains) { EXPECT_TRUE(IsYoutubeDomainUrl(GURL("http://www.youtube.com"), google_util::ALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_TRUE(IsYoutubeDomainUrl(GURL("http://youtube.com"), google_util::ALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_TRUE(IsYoutubeDomainUrl(GURL("http://youtube.com/path/main.html"), google_util::ALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_FALSE(IsYoutubeDomainUrl(GURL("http://notyoutube.com"), google_util::ALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); // TLD checks. EXPECT_TRUE(IsYoutubeDomainUrl(GURL("http://www.youtube.ca"), google_util::ALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_TRUE(IsYoutubeDomainUrl(GURL("http://www.youtube.co.uk"), google_util::ALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_FALSE(IsYoutubeDomainUrl(GURL("http://www.youtube.notrealtld"), google_util::ALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); // Subdomain checks. EXPECT_TRUE(IsYoutubeDomainUrl(GURL("http://images.youtube.com"), google_util::ALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_FALSE(IsYoutubeDomainUrl(GURL("http://images.youtube.com"), google_util::DISALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); // Port and scheme checks. EXPECT_TRUE(IsYoutubeDomainUrl(GURL("http://www.youtube.com:80"), google_util::DISALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_TRUE(IsYoutubeDomainUrl(GURL("https://www.youtube.com:443"), google_util::DISALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_FALSE(IsYoutubeDomainUrl(GURL("http://www.youtube.com:123"), google_util::DISALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); EXPECT_TRUE(IsYoutubeDomainUrl(GURL("http://www.youtube.com:123"), google_util::DISALLOW_SUBDOMAIN, google_util::ALLOW_NON_STANDARD_PORTS)); EXPECT_FALSE(IsYoutubeDomainUrl(GURL("file://www.youtube.com"), google_util::DISALLOW_SUBDOMAIN, google_util::DISALLOW_NON_STANDARD_PORTS)); }