普通文本  |  337行  |  16.82 KB

// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "base/command_line.h"
#include "chrome/browser/google/google_url_tracker.h"
#include "chrome/browser/google/google_util.h"
#include "chrome/common/chrome_switches.h"
#include "testing/gtest/include/gtest/gtest.h"

using google_util::IsGoogleDomainUrl;


// Helpers --------------------------------------------------------------------

namespace {

// These functions merely provide brevity in the callers.

bool IsHomePage(const std::string& url) {
  return google_util::IsGoogleHomePageUrl(GURL(url));
}

bool IsSearch(const std::string& url) {
  return google_util::IsGoogleSearchUrl(GURL(url));
}

bool StartsWithBaseURL(const std::string& url) {
  return google_util::StartsWithCommandLineGoogleBaseURL(GURL(url));
}

}  // namespace


// Actual tests ---------------------------------------------------------------

TEST(GoogleUtilTest, GoodHomePagesNonSecure) {
  // Valid home page hosts.
  EXPECT_TRUE(IsHomePage(GoogleURLTracker::kDefaultGoogleHomepage));
  EXPECT_TRUE(IsHomePage("http://google.com"));
  EXPECT_TRUE(IsHomePage("http://www.google.com"));
  EXPECT_TRUE(IsHomePage("http://www.google.ca"));
  EXPECT_TRUE(IsHomePage("http://www.google.co.uk"));
  EXPECT_TRUE(IsHomePage("http://www.google.com:80/"));

  // Only the paths /, /webhp, and /ig.* are valid.  Query parameters are
  // ignored.
  EXPECT_TRUE(IsHomePage("http://www.google.com/"));
  EXPECT_TRUE(IsHomePage("http://www.google.com/webhp"));
  EXPECT_TRUE(IsHomePage("http://www.google.com/webhp?rlz=TEST"));
  EXPECT_TRUE(IsHomePage("http://www.google.com/ig"));
  EXPECT_TRUE(IsHomePage("http://www.google.com/ig/foo"));
  EXPECT_TRUE(IsHomePage("http://www.google.com/ig?rlz=TEST"));
  EXPECT_TRUE(IsHomePage("http://www.google.com/ig/foo?rlz=TEST"));
}

TEST(GoogleUtilTest, GoodHomePagesSecure) {
  // Valid home page hosts.
  EXPECT_TRUE(IsHomePage("https://google.com"));
  EXPECT_TRUE(IsHomePage("https://www.google.com"));
  EXPECT_TRUE(IsHomePage("https://www.google.ca"));
  EXPECT_TRUE(IsHomePage("https://www.google.co.uk"));
  EXPECT_TRUE(IsHomePage("https://www.google.com:443/"));

  // Only the paths /, /webhp, and /ig.* are valid.  Query parameters are
  // ignored.
  EXPECT_TRUE(IsHomePage("https://www.google.com/"));
  EXPECT_TRUE(IsHomePage("https://www.google.com/webhp"));
  EXPECT_TRUE(IsHomePage("https://www.google.com/webhp?rlz=TEST"));
  EXPECT_TRUE(IsHomePage("https://www.google.com/ig"));
  EXPECT_TRUE(IsHomePage("https://www.google.com/ig/foo"));
  EXPECT_TRUE(IsHomePage("https://www.google.com/ig?rlz=TEST"));
  EXPECT_TRUE(IsHomePage("https://www.google.com/ig/foo?rlz=TEST"));
}

TEST(GoogleUtilTest, BadHomePages) {
  EXPECT_FALSE(IsHomePage(std::string()));

  // If specified, only the "www" subdomain is OK.
  EXPECT_FALSE(IsHomePage("http://maps.google.com"));
  EXPECT_FALSE(IsHomePage("http://foo.google.com"));

  // No non-standard port numbers.
  EXPECT_FALSE(IsHomePage("http://www.google.com:1234"));
  EXPECT_FALSE(IsHomePage("https://www.google.com:5678"));

  // Invalid TLDs.
  EXPECT_FALSE(IsHomePage("http://www.google.abc"));
  EXPECT_FALSE(IsHomePage("http://www.google.com.abc"));
  EXPECT_FALSE(IsHomePage("http://www.google.abc.com"));
  EXPECT_FALSE(IsHomePage("http://www.google.ab.cd"));
  EXPECT_FALSE(IsHomePage("http://www.google.uk.qq"));

  // Must be http or https.
  EXPECT_FALSE(IsHomePage("ftp://www.google.com"));
  EXPECT_FALSE(IsHomePage("file://does/not/exist"));
  EXPECT_FALSE(IsHomePage("bad://www.google.com"));
  EXPECT_FALSE(IsHomePage("www.google.com"));

  // Only the paths /, /webhp, and /ig.* are valid.
  EXPECT_FALSE(IsHomePage("http://www.google.com/abc"));
  EXPECT_FALSE(IsHomePage("http://www.google.com/webhpabc"));
  EXPECT_FALSE(IsHomePage("http://www.google.com/webhp/abc"));
  EXPECT_FALSE(IsHomePage("http://www.google.com/abcig"));
  EXPECT_FALSE(IsHomePage("http://www.google.com/webhp/ig"));

  // A search URL should not be identified as a home page URL.
  EXPECT_FALSE(IsHomePage("http://www.google.com/search?q=something"));

  // Path is case sensitive.
  EXPECT_FALSE(IsHomePage("https://www.google.com/WEBHP"));
}

TEST(GoogleUtilTest, GoodSearchPagesNonSecure) {
  // Queries with path "/search" need to have the query parameter in either
  // the url parameter or the hash fragment.
  EXPECT_TRUE(IsSearch("http://www.google.com/search?q=something"));
  EXPECT_TRUE(IsSearch("http://www.google.com/search#q=something"));
  EXPECT_TRUE(IsSearch("http://www.google.com/search?name=bob&q=something"));
  EXPECT_TRUE(IsSearch("http://www.google.com/search?name=bob#q=something"));
  EXPECT_TRUE(IsSearch("http://www.google.com/search?name=bob#age=24&q=thing"));
  EXPECT_TRUE(IsSearch("http://www.google.co.uk/search?q=something"));
  // It's actually valid for both to have the query parameter.
  EXPECT_TRUE(IsSearch("http://www.google.com/search?q=something#q=other"));

  // Queries with path "/webhp", "/" or "" need to have the query parameter in
  // the hash fragment.
  EXPECT_TRUE(IsSearch("http://www.google.com/webhp#q=something"));
  EXPECT_TRUE(IsSearch("http://www.google.com/webhp#name=bob&q=something"));
  EXPECT_TRUE(IsSearch("http://www.google.com/webhp?name=bob#q=something"));
  EXPECT_TRUE(IsSearch("http://www.google.com/webhp?name=bob#age=24&q=thing"));

  EXPECT_TRUE(IsSearch("http://www.google.com/#q=something"));
  EXPECT_TRUE(IsSearch("http://www.google.com/#name=bob&q=something"));
  EXPECT_TRUE(IsSearch("http://www.google.com/?name=bob#q=something"));
  EXPECT_TRUE(IsSearch("http://www.google.com/?name=bob#age=24&q=something"));

  EXPECT_TRUE(IsSearch("http://www.google.com#q=something"));
  EXPECT_TRUE(IsSearch("http://www.google.com#name=bob&q=something"));
  EXPECT_TRUE(IsSearch("http://www.google.com?name=bob#q=something"));
  EXPECT_TRUE(IsSearch("http://www.google.com?name=bob#age=24&q=something"));
}

TEST(GoogleUtilTest, GoodSearchPagesSecure) {
  // Queries with path "/search" need to have the query parameter in either
  // the url parameter or the hash fragment.
  EXPECT_TRUE(IsSearch("https://www.google.com/search?q=something"));
  EXPECT_TRUE(IsSearch("https://www.google.com/search#q=something"));
  EXPECT_TRUE(IsSearch("https://www.google.com/search?name=bob&q=something"));
  EXPECT_TRUE(IsSearch("https://www.google.com/search?name=bob#q=something"));
  EXPECT_TRUE(IsSearch("https://www.google.com/search?name=bob#age=24&q=q"));
  EXPECT_TRUE(IsSearch("https://www.google.co.uk/search?q=something"));
  // It's actually valid for both to have the query parameter.
  EXPECT_TRUE(IsSearch("https://www.google.com/search?q=something#q=other"));

  // Queries with path "/webhp", "/" or "" need to have the query parameter in
  // the hash fragment.
  EXPECT_TRUE(IsSearch("https://www.google.com/webhp#q=something"));
  EXPECT_TRUE(IsSearch("https://www.google.com/webhp#name=bob&q=something"));
  EXPECT_TRUE(IsSearch("https://www.google.com/webhp?name=bob#q=something"));
  EXPECT_TRUE(IsSearch("https://www.google.com/webhp?name=bob#age=24&q=thing"));

  EXPECT_TRUE(IsSearch("https://www.google.com/#q=something"));
  EXPECT_TRUE(IsSearch("https://www.google.com/#name=bob&q=something"));
  EXPECT_TRUE(IsSearch("https://www.google.com/?name=bob#q=something"));
  EXPECT_TRUE(IsSearch("https://www.google.com/?name=bob#age=24&q=something"));

  EXPECT_TRUE(IsSearch("https://www.google.com#q=something"));
  EXPECT_TRUE(IsSearch("https://www.google.com#name=bob&q=something"));
  EXPECT_TRUE(IsSearch("https://www.google.com?name=bob#q=something"));
  EXPECT_TRUE(IsSearch("https://www.google.com?name=bob#age=24&q=something"));
}

TEST(GoogleUtilTest, BadSearches) {
  // A home page URL should not be identified as a search URL.
  EXPECT_FALSE(IsSearch(GoogleURLTracker::kDefaultGoogleHomepage));
  EXPECT_FALSE(IsSearch("http://google.com"));
  EXPECT_FALSE(IsSearch("http://www.google.com"));
  EXPECT_FALSE(IsSearch("http://www.google.com/search"));
  EXPECT_FALSE(IsSearch("http://www.google.com/search?"));

  // Must be http or https
  EXPECT_FALSE(IsSearch("ftp://www.google.com/search?q=something"));
  EXPECT_FALSE(IsSearch("file://does/not/exist/search?q=something"));
  EXPECT_FALSE(IsSearch("bad://www.google.com/search?q=something"));
  EXPECT_FALSE(IsSearch("www.google.com/search?q=something"));

  // Can't have an empty query parameter.
  EXPECT_FALSE(IsSearch("http://www.google.com/search?q="));
  EXPECT_FALSE(IsSearch("http://www.google.com/search?name=bob&q="));
  EXPECT_FALSE(IsSearch("http://www.google.com/webhp#q="));
  EXPECT_FALSE(IsSearch("http://www.google.com/webhp#name=bob&q="));

  // Home page searches without a hash fragment query parameter are invalid.
  EXPECT_FALSE(IsSearch("http://www.google.com/webhp?q=something"));
  EXPECT_FALSE(IsSearch("http://www.google.com/webhp?q=something#no=good"));
  EXPECT_FALSE(IsSearch("http://www.google.com/webhp?name=bob&q=something"));
  EXPECT_FALSE(IsSearch("http://www.google.com/?q=something"));
  EXPECT_FALSE(IsSearch("http://www.google.com?q=something"));

  // Some paths are outright invalid as searches.
  EXPECT_FALSE(IsSearch("http://www.google.com/notreal?q=something"));
  EXPECT_FALSE(IsSearch("http://www.google.com/chrome?q=something"));
  EXPECT_FALSE(IsSearch("http://www.google.com/search/nogood?q=something"));
  EXPECT_FALSE(IsSearch("http://www.google.com/webhp/nogood#q=something"));
  EXPECT_FALSE(IsSearch(std::string()));

  // Case sensitive paths.
  EXPECT_FALSE(IsSearch("http://www.google.com/SEARCH?q=something"));
  EXPECT_FALSE(IsSearch("http://www.google.com/WEBHP#q=something"));
}

TEST(GoogleUtilTest, GoogleDomains) {
  // Test some good Google domains (valid TLDs).
  EXPECT_TRUE(IsGoogleDomainUrl(GURL("http://www.google.com"),
                                google_util::ALLOW_SUBDOMAIN,
                                google_util::DISALLOW_NON_STANDARD_PORTS));
  EXPECT_TRUE(IsGoogleDomainUrl(GURL("http://google.com"),
                                google_util::ALLOW_SUBDOMAIN,
                                google_util::DISALLOW_NON_STANDARD_PORTS));
  EXPECT_TRUE(IsGoogleDomainUrl(GURL("http://www.google.ca"),
                                google_util::ALLOW_SUBDOMAIN,
                                google_util::DISALLOW_NON_STANDARD_PORTS));
  EXPECT_TRUE(IsGoogleDomainUrl(GURL("http://www.google.biz.tj"),
                                google_util::ALLOW_SUBDOMAIN,
                                google_util::DISALLOW_NON_STANDARD_PORTS));
  EXPECT_TRUE(IsGoogleDomainUrl(GURL("http://www.google.com/search?q=thing"),
                                google_util::ALLOW_SUBDOMAIN,
                                google_util::DISALLOW_NON_STANDARD_PORTS));
  EXPECT_TRUE(IsGoogleDomainUrl(GURL("http://www.google.com/webhp"),
                                google_util::ALLOW_SUBDOMAIN,
                                google_util::DISALLOW_NON_STANDARD_PORTS));

  // Test some bad Google domains (invalid TLDs).
  EXPECT_FALSE(IsGoogleDomainUrl(GURL("http://www.google.notrealtld"),
                                 google_util::ALLOW_SUBDOMAIN,
                                 google_util::DISALLOW_NON_STANDARD_PORTS));
  EXPECT_FALSE(IsGoogleDomainUrl(GURL("http://www.google.faketld/search?q=q"),
                                 google_util::ALLOW_SUBDOMAIN,
                                 google_util::DISALLOW_NON_STANDARD_PORTS));
  EXPECT_FALSE(IsGoogleDomainUrl(GURL("http://www.yahoo.com"),
                                 google_util::ALLOW_SUBDOMAIN,
                                 google_util::DISALLOW_NON_STANDARD_PORTS));

  // Test subdomain checks.
  EXPECT_TRUE(IsGoogleDomainUrl(GURL("http://images.google.com"),
                                google_util::ALLOW_SUBDOMAIN,
                                google_util::DISALLOW_NON_STANDARD_PORTS));
  EXPECT_FALSE(IsGoogleDomainUrl(GURL("http://images.google.com"),
                                 google_util::DISALLOW_SUBDOMAIN,
                                 google_util::DISALLOW_NON_STANDARD_PORTS));
  EXPECT_TRUE(IsGoogleDomainUrl(GURL("http://google.com"),
                                google_util::DISALLOW_SUBDOMAIN,
                                google_util::DISALLOW_NON_STANDARD_PORTS));
  EXPECT_TRUE(IsGoogleDomainUrl(GURL("http://www.google.com"),
                                google_util::DISALLOW_SUBDOMAIN,
                                google_util::DISALLOW_NON_STANDARD_PORTS));

  // Port and scheme checks.
  EXPECT_TRUE(IsGoogleDomainUrl(GURL("http://www.google.com:80"),
                                google_util::DISALLOW_SUBDOMAIN,
                                google_util::DISALLOW_NON_STANDARD_PORTS));
  EXPECT_FALSE(IsGoogleDomainUrl(GURL("http://www.google.com:123"),
                                 google_util::DISALLOW_SUBDOMAIN,
                                 google_util::DISALLOW_NON_STANDARD_PORTS));
  EXPECT_TRUE(IsGoogleDomainUrl(GURL("https://www.google.com:443"),
                                google_util::DISALLOW_SUBDOMAIN,
                                google_util::DISALLOW_NON_STANDARD_PORTS));
  EXPECT_FALSE(IsGoogleDomainUrl(GURL("http://www.google.com:123"),
                                 google_util::DISALLOW_SUBDOMAIN,
                                 google_util::DISALLOW_NON_STANDARD_PORTS));
  EXPECT_TRUE(IsGoogleDomainUrl(GURL("http://www.google.com:123"),
                                google_util::DISALLOW_SUBDOMAIN,
                                google_util::ALLOW_NON_STANDARD_PORTS));
  EXPECT_TRUE(IsGoogleDomainUrl(GURL("https://www.google.com:123"),
                                google_util::DISALLOW_SUBDOMAIN,
                                google_util::ALLOW_NON_STANDARD_PORTS));
  EXPECT_TRUE(IsGoogleDomainUrl(GURL("http://www.google.com:80"),
                                google_util::DISALLOW_SUBDOMAIN,
                                google_util::ALLOW_NON_STANDARD_PORTS));
  EXPECT_TRUE(IsGoogleDomainUrl(GURL("https://www.google.com:443"),
                                google_util::DISALLOW_SUBDOMAIN,
                                google_util::ALLOW_NON_STANDARD_PORTS));
  EXPECT_FALSE(IsGoogleDomainUrl(GURL("file://www.google.com"),
                                 google_util::DISALLOW_SUBDOMAIN,
                                 google_util::DISALLOW_NON_STANDARD_PORTS));
  EXPECT_FALSE(IsGoogleDomainUrl(GURL("doesnotexist://www.google.com"),
                                 google_util::DISALLOW_SUBDOMAIN,
                                 google_util::DISALLOW_NON_STANDARD_PORTS));
}

TEST(GoogleUtilTest, GoogleBaseURL) {
  // When no command-line flag is specified, no input to
  // StartsWithCommandLineGoogleBaseURL() should return true.
  EXPECT_FALSE(StartsWithBaseURL(std::string()));
  EXPECT_FALSE(StartsWithBaseURL("http://www.foo.com/"));
  EXPECT_FALSE(StartsWithBaseURL("http://www.google.com/"));

  // By default, none of the IsGoogleXXX functions should return true for a
  // "foo.com" URL.
  EXPECT_FALSE(IsGoogleHostname("www.foo.com",
                                google_util::DISALLOW_SUBDOMAIN));
  EXPECT_FALSE(IsGoogleDomainUrl(GURL("http://www.foo.com/xyz"),
                                 google_util::DISALLOW_SUBDOMAIN,
                                 google_util::DISALLOW_NON_STANDARD_PORTS));
  EXPECT_FALSE(IsGoogleDomainUrl(GURL("https://www.foo.com/"),
                                 google_util::DISALLOW_SUBDOMAIN,
                                 google_util::DISALLOW_NON_STANDARD_PORTS));
  EXPECT_FALSE(IsHomePage("https://www.foo.com/webhp"));
  EXPECT_FALSE(IsSearch("http://www.foo.com/search?q=a"));

  // Override the Google base URL on the command line.
  CommandLine::ForCurrentProcess()->AppendSwitchASCII(switches::kGoogleBaseURL,
                                                      "http://www.foo.com/");

  // Only URLs which start with exactly the string on the command line should
  // cause StartsWithCommandLineGoogleBaseURL() to return true.
  EXPECT_FALSE(StartsWithBaseURL(std::string()));
  EXPECT_TRUE(StartsWithBaseURL("http://www.foo.com/"));
  EXPECT_TRUE(StartsWithBaseURL("http://www.foo.com/abc"));
  EXPECT_FALSE(StartsWithBaseURL("https://www.foo.com/"));
  EXPECT_FALSE(StartsWithBaseURL("http://www.google.com/"));

  // The various IsGoogleXXX functions should respect the command-line flag.
  EXPECT_TRUE(IsGoogleHostname("www.foo.com", google_util::DISALLOW_SUBDOMAIN));
  EXPECT_FALSE(IsGoogleHostname("foo.com", google_util::ALLOW_SUBDOMAIN));
  EXPECT_TRUE(IsGoogleDomainUrl(GURL("http://www.foo.com/xyz"),
                                google_util::DISALLOW_SUBDOMAIN,
                                google_util::DISALLOW_NON_STANDARD_PORTS));
  EXPECT_TRUE(IsGoogleDomainUrl(GURL("https://www.foo.com/"),
                                google_util::DISALLOW_SUBDOMAIN,
                                google_util::DISALLOW_NON_STANDARD_PORTS));
  EXPECT_TRUE(IsHomePage("https://www.foo.com/webhp"));
  EXPECT_FALSE(IsHomePage("http://www.foo.com/xyz"));
  EXPECT_TRUE(IsSearch("http://www.foo.com/search?q=a"));
}