// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "chrome/browser/history/url_database.h" #include <algorithm> #include <limits> #include <string> #include <vector> #include "base/i18n/case_conversion.h" #include "base/strings/utf_string_conversions.h" #include "chrome/common/url_constants.h" #include "net/base/net_util.h" #include "sql/statement.h" #include "ui/base/l10n/l10n_util.h" #include "url/gurl.h" namespace history { const char URLDatabase::kURLRowFields[] = HISTORY_URL_ROW_FIELDS; const int URLDatabase::kNumURLRowFields = 9; URLDatabase::URLEnumeratorBase::URLEnumeratorBase() : initialized_(false) { } URLDatabase::URLEnumeratorBase::~URLEnumeratorBase() { } URLDatabase::URLEnumerator::URLEnumerator() { } bool URLDatabase::URLEnumerator::GetNextURL(URLRow* r) { if (statement_.Step()) { FillURLRow(statement_, r); return true; } return false; } URLDatabase::URLDatabase() : has_keyword_search_terms_(false) { } URLDatabase::~URLDatabase() { } // static std::string URLDatabase::GURLToDatabaseURL(const GURL& gurl) { // TODO(brettw): do something fancy here with encoding, etc. // Strip username and password from URL before sending to DB. GURL::Replacements replacements; replacements.ClearUsername(); replacements.ClearPassword(); return (gurl.ReplaceComponents(replacements)).spec(); } // Convenience to fill a history::URLRow. Must be in sync with the fields in // kURLRowFields. void URLDatabase::FillURLRow(sql::Statement& s, history::URLRow* i) { DCHECK(i); i->id_ = s.ColumnInt64(0); i->url_ = GURL(s.ColumnString(1)); i->title_ = s.ColumnString16(2); i->visit_count_ = s.ColumnInt(3); i->typed_count_ = s.ColumnInt(4); i->last_visit_ = base::Time::FromInternalValue(s.ColumnInt64(5)); i->hidden_ = s.ColumnInt(6) != 0; } bool URLDatabase::GetURLRow(URLID url_id, URLRow* info) { // TODO(brettw) We need check for empty URLs to handle the case where // there are old URLs in the database that are empty that got in before // we added any checks. We should eventually be able to remove it // when all inputs are using GURL (which prohibit empty input). sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE id=?")); statement.BindInt64(0, url_id); if (statement.Step()) { FillURLRow(statement, info); return true; } return false; } bool URLDatabase::GetAllTypedUrls(URLRows* urls) { sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE typed_count > 0")); while (statement.Step()) { URLRow info; FillURLRow(statement, &info); urls->push_back(info); } return true; } URLID URLDatabase::GetRowForURL(const GURL& url, history::URLRow* info) { sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE url=?")); std::string url_string = GURLToDatabaseURL(url); statement.BindString(0, url_string); if (!statement.Step()) return 0; // no data if (info) FillURLRow(statement, info); return statement.ColumnInt64(0); } bool URLDatabase::UpdateURLRow(URLID url_id, const history::URLRow& info) { sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, "UPDATE urls SET title=?,visit_count=?,typed_count=?,last_visit_time=?," "hidden=?" "WHERE id=?")); statement.BindString16(0, info.title()); statement.BindInt(1, info.visit_count()); statement.BindInt(2, info.typed_count()); statement.BindInt64(3, info.last_visit().ToInternalValue()); statement.BindInt(4, info.hidden() ? 1 : 0); statement.BindInt64(5, url_id); return statement.Run(); } URLID URLDatabase::AddURLInternal(const history::URLRow& info, bool is_temporary) { // This function is used to insert into two different tables, so we have to // do some shuffling. Unfortinately, we can't use the macro // HISTORY_URL_ROW_FIELDS because that specifies the table name which is // invalid in the insert syntax. #define ADDURL_COMMON_SUFFIX \ " (url, title, visit_count, typed_count, "\ "last_visit_time, hidden) "\ "VALUES (?,?,?,?,?,?)" const char* statement_name; const char* statement_sql; if (is_temporary) { statement_name = "AddURLTemporary"; statement_sql = "INSERT INTO temp_urls" ADDURL_COMMON_SUFFIX; } else { statement_name = "AddURL"; statement_sql = "INSERT INTO urls" ADDURL_COMMON_SUFFIX; } #undef ADDURL_COMMON_SUFFIX sql::Statement statement(GetDB().GetCachedStatement( sql::StatementID(statement_name), statement_sql)); statement.BindString(0, GURLToDatabaseURL(info.url())); statement.BindString16(1, info.title()); statement.BindInt(2, info.visit_count()); statement.BindInt(3, info.typed_count()); statement.BindInt64(4, info.last_visit().ToInternalValue()); statement.BindInt(5, info.hidden() ? 1 : 0); if (!statement.Run()) { VLOG(0) << "Failed to add url " << info.url().possibly_invalid_spec() << " to table history.urls."; return 0; } return GetDB().GetLastInsertRowId(); } bool URLDatabase::DeleteURLRow(URLID id) { sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, "DELETE FROM urls WHERE id = ?")); statement.BindInt64(0, id); if (!statement.Run()) return false; // And delete any keyword visits. return !has_keyword_search_terms_ || DeleteKeywordSearchTermForURL(id); } bool URLDatabase::CreateTemporaryURLTable() { return CreateURLTable(true); } bool URLDatabase::CommitTemporaryURLTable() { // See the comments in the header file as well as // HistoryBackend::DeleteAllHistory() for more information on how this works // and why it does what it does. // // Note that the main database overrides this to additionally create the // supplimentary indices that the archived database doesn't need. // Swap the url table out and replace it with the temporary one. if (!GetDB().Execute("DROP TABLE urls")) { NOTREACHED() << GetDB().GetErrorMessage(); return false; } if (!GetDB().Execute("ALTER TABLE temp_urls RENAME TO urls")) { NOTREACHED() << GetDB().GetErrorMessage(); return false; } // Create the index over URLs. This is needed for the main, in-memory, and // archived databases, so we always do it. The supplimentary indices used by // the main database are not created here. When deleting all history, they // are created by HistoryDatabase::RecreateAllButStarAndURLTables(). CreateMainURLIndex(); return true; } bool URLDatabase::InitURLEnumeratorForEverything(URLEnumerator* enumerator) { DCHECK(!enumerator->initialized_); std::string sql("SELECT "); sql.append(kURLRowFields); sql.append(" FROM urls"); enumerator->statement_.Assign(GetDB().GetUniqueStatement(sql.c_str())); enumerator->initialized_ = enumerator->statement_.is_valid(); return enumerator->statement_.is_valid(); } bool URLDatabase::InitURLEnumeratorForSignificant(URLEnumerator* enumerator) { DCHECK(!enumerator->initialized_); std::string sql("SELECT "); sql.append(kURLRowFields); sql.append(" FROM urls WHERE last_visit_time >= ? OR visit_count >= ? OR " "typed_count >= ?"); enumerator->statement_.Assign(GetDB().GetUniqueStatement(sql.c_str())); enumerator->statement_.BindInt64( 0, AutocompleteAgeThreshold().ToInternalValue()); enumerator->statement_.BindInt(1, kLowQualityMatchVisitLimit); enumerator->statement_.BindInt(2, kLowQualityMatchTypedLimit); enumerator->initialized_ = enumerator->statement_.is_valid(); return enumerator->statement_.is_valid(); } bool URLDatabase::AutocompleteForPrefix(const std::string& prefix, size_t max_results, bool typed_only, URLRows* results) { // NOTE: this query originally sorted by starred as the second parameter. But // as bookmarks is no longer part of the db we no longer include the order // by clause. results->clear(); const char* sql; int line; if (typed_only) { sql = "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls " "WHERE url >= ? AND url < ? AND hidden = 0 AND typed_count > 0 " "ORDER BY typed_count DESC, visit_count DESC, last_visit_time DESC " "LIMIT ?"; line = __LINE__; } else { sql = "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls " "WHERE url >= ? AND url < ? AND hidden = 0 " "ORDER BY typed_count DESC, visit_count DESC, last_visit_time DESC " "LIMIT ?"; line = __LINE__; } sql::Statement statement( GetDB().GetCachedStatement(sql::StatementID(__FILE__, line), sql)); // We will find all strings between "prefix" and this string, which is prefix // followed by the maximum character size. Use 8-bit strings for everything // so we can be sure sqlite is comparing everything in 8-bit mode. Otherwise, // it will have to convert strings either to UTF-8 or UTF-16 for comparison. std::string end_query(prefix); end_query.push_back(std::numeric_limits<unsigned char>::max()); statement.BindString(0, prefix); statement.BindString(1, end_query); statement.BindInt(2, static_cast<int>(max_results)); while (statement.Step()) { history::URLRow info; FillURLRow(statement, &info); if (info.url().is_valid()) results->push_back(info); } return !results->empty(); } bool URLDatabase::IsTypedHost(const std::string& host) { const char* schemes[] = { content::kHttpScheme, content::kHttpsScheme, content::kFtpScheme }; URLRows dummy; for (size_t i = 0; i < arraysize(schemes); ++i) { std::string scheme_and_host(schemes[i]); scheme_and_host += content::kStandardSchemeSeparator + host; if (AutocompleteForPrefix(scheme_and_host + '/', 1, true, &dummy) || AutocompleteForPrefix(scheme_and_host + ':', 1, true, &dummy)) return true; } return false; } bool URLDatabase::FindShortestURLFromBase(const std::string& base, const std::string& url, int min_visits, int min_typed, bool allow_base, history::URLRow* info) { // Select URLs that start with |base| and are prefixes of |url|. All parts // of this query except the substr() call can be done using the index. We // could do this query with a couple of LIKE or GLOB statements as well, but // those wouldn't use the index, and would run into problems with "wildcard" // characters that appear in URLs (% for LIKE, or *, ? for GLOB). std::string sql("SELECT "); sql.append(kURLRowFields); sql.append(" FROM urls WHERE url "); sql.append(allow_base ? ">=" : ">"); sql.append(" ? AND url < :end AND url = substr(:end, 1, length(url)) " "AND hidden = 0 AND visit_count >= ? AND typed_count >= ? " "ORDER BY url LIMIT 1"); sql::Statement statement(GetDB().GetUniqueStatement(sql.c_str())); statement.BindString(0, base); statement.BindString(1, url); // :end statement.BindInt(2, min_visits); statement.BindInt(3, min_typed); if (!statement.Step()) return false; DCHECK(info); FillURLRow(statement, info); return true; } bool URLDatabase::GetTextMatches(const base::string16& query, URLRows* results) { ScopedVector<QueryNode> query_nodes; query_parser_.ParseQueryNodes(query, &query_nodes.get()); results->clear(); sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, "SELECT" HISTORY_URL_ROW_FIELDS "FROM urls WHERE hidden = 0")); while (statement.Step()) { std::vector<QueryWord> query_words; base::string16 url = base::i18n::ToLower(statement.ColumnString16(1)); query_parser_.ExtractQueryWords(url, &query_words); GURL gurl(url); if (gurl.is_valid()) { // Decode punycode to match IDN. // |query_words| won't be shown to user - therefore we can use empty // |languages| to reduce dependency (no need to call PrefService). base::string16 ascii = base::ASCIIToUTF16(gurl.host()); base::string16 utf = net::IDNToUnicode(gurl.host(), std::string()); if (ascii != utf) query_parser_.ExtractQueryWords(utf, &query_words); } base::string16 title = base::i18n::ToLower(statement.ColumnString16(2)); query_parser_.ExtractQueryWords(title, &query_words); if (query_parser_.DoesQueryMatch(query_words, query_nodes.get())) { history::URLResult info; FillURLRow(statement, &info); if (info.url().is_valid()) results->push_back(info); } } return !results->empty(); } bool URLDatabase::InitKeywordSearchTermsTable() { has_keyword_search_terms_ = true; if (!GetDB().DoesTableExist("keyword_search_terms")) { if (!GetDB().Execute("CREATE TABLE keyword_search_terms (" "keyword_id INTEGER NOT NULL," // ID of the TemplateURL. "url_id INTEGER NOT NULL," // ID of the url. "lower_term LONGVARCHAR NOT NULL," // The search term, in lower case. "term LONGVARCHAR NOT NULL)")) // The actual search term. return false; } return true; } bool URLDatabase::CreateKeywordSearchTermsIndices() { // For searching. if (!GetDB().Execute( "CREATE INDEX IF NOT EXISTS keyword_search_terms_index1 ON " "keyword_search_terms (keyword_id, lower_term)")) { return false; } // For deletion. if (!GetDB().Execute( "CREATE INDEX IF NOT EXISTS keyword_search_terms_index2 ON " "keyword_search_terms (url_id)")) { return false; } // For query or deletion by term. if (!GetDB().Execute( "CREATE INDEX IF NOT EXISTS keyword_search_terms_index3 ON " "keyword_search_terms (term)")) { return false; } return true; } bool URLDatabase::DropKeywordSearchTermsTable() { // This will implicitly delete the indices over the table. return GetDB().Execute("DROP TABLE keyword_search_terms"); } bool URLDatabase::SetKeywordSearchTermsForURL(URLID url_id, TemplateURLID keyword_id, const base::string16& term) { DCHECK(url_id && keyword_id && !term.empty()); sql::Statement exist_statement(GetDB().GetCachedStatement(SQL_FROM_HERE, "SELECT term FROM keyword_search_terms " "WHERE keyword_id = ? AND url_id = ?")); exist_statement.BindInt64(0, keyword_id); exist_statement.BindInt64(1, url_id); if (exist_statement.Step()) return true; // Term already exists, no need to add it. if (!exist_statement.Succeeded()) return false; sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, "INSERT INTO keyword_search_terms (keyword_id, url_id, lower_term, term) " "VALUES (?,?,?,?)")); statement.BindInt64(0, keyword_id); statement.BindInt64(1, url_id); statement.BindString16(2, base::i18n::ToLower(term)); statement.BindString16(3, term); return statement.Run(); } bool URLDatabase::GetKeywordSearchTermRow(URLID url_id, KeywordSearchTermRow* row) { DCHECK(url_id); sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, "SELECT keyword_id, term FROM keyword_search_terms WHERE url_id=?")); statement.BindInt64(0, url_id); if (!statement.Step()) return false; if (row) { row->url_id = url_id; row->keyword_id = statement.ColumnInt64(0); row->term = statement.ColumnString16(1); } return true; } bool URLDatabase::GetKeywordSearchTermRows( const base::string16& term, std::vector<KeywordSearchTermRow>* rows) { sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, "SELECT keyword_id, url_id FROM keyword_search_terms WHERE term=?")); statement.BindString16(0, term); if (!statement.is_valid()) return false; while (statement.Step()) { KeywordSearchTermRow row; row.url_id = statement.ColumnInt64(1); row.keyword_id = statement.ColumnInt64(0); row.term = term; rows->push_back(row); } return true; } void URLDatabase::DeleteAllSearchTermsForKeyword( TemplateURLID keyword_id) { DCHECK(keyword_id); sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, "DELETE FROM keyword_search_terms WHERE keyword_id=?")); statement.BindInt64(0, keyword_id); statement.Run(); } void URLDatabase::GetMostRecentKeywordSearchTerms( TemplateURLID keyword_id, const base::string16& prefix, int max_count, std::vector<KeywordSearchTermVisit>* matches) { // NOTE: the keyword_id can be zero if on first run the user does a query // before the TemplateURLService has finished loading. As the chances of this // occurring are small, we ignore it. if (!keyword_id) return; DCHECK(!prefix.empty()); sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, "SELECT DISTINCT kv.term, u.visit_count, u.last_visit_time " "FROM keyword_search_terms kv " "JOIN urls u ON kv.url_id = u.id " "WHERE kv.keyword_id = ? AND kv.lower_term >= ? AND kv.lower_term < ? " "ORDER BY u.last_visit_time DESC LIMIT ?")); // NOTE: Keep this ToLower() call in sync with search_provider.cc. base::string16 lower_prefix = base::i18n::ToLower(prefix); // This magic gives us a prefix search. base::string16 next_prefix = lower_prefix; next_prefix[next_prefix.size() - 1] = next_prefix[next_prefix.size() - 1] + 1; statement.BindInt64(0, keyword_id); statement.BindString16(1, lower_prefix); statement.BindString16(2, next_prefix); statement.BindInt(3, max_count); KeywordSearchTermVisit visit; while (statement.Step()) { visit.term = statement.ColumnString16(0); visit.visits = statement.ColumnInt(1); visit.time = base::Time::FromInternalValue(statement.ColumnInt64(2)); matches->push_back(visit); } } bool URLDatabase::DeleteKeywordSearchTerm(const base::string16& term) { sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE, "DELETE FROM keyword_search_terms WHERE term=?")); statement.BindString16(0, term); return statement.Run(); } bool URLDatabase::DeleteKeywordSearchTermForURL(URLID url_id) { sql::Statement statement(GetDB().GetCachedStatement( SQL_FROM_HERE, "DELETE FROM keyword_search_terms WHERE url_id=?")); statement.BindInt64(0, url_id); return statement.Run(); } bool URLDatabase::DropStarredIDFromURLs() { if (!GetDB().DoesColumnExist("urls", "starred_id")) return true; // urls is already updated, no need to continue. // Create a temporary table to contain the new URLs table. if (!CreateTemporaryURLTable()) { NOTREACHED(); return false; } // Copy the contents. if (!GetDB().Execute( "INSERT INTO temp_urls (id, url, title, visit_count, typed_count, " "last_visit_time, hidden, favicon_id) " "SELECT id, url, title, visit_count, typed_count, last_visit_time, " "hidden, favicon_id FROM urls")) { NOTREACHED() << GetDB().GetErrorMessage(); return false; } // Rename/commit the tmp table. CommitTemporaryURLTable(); return true; } bool URLDatabase::CreateURLTable(bool is_temporary) { const char* name = is_temporary ? "temp_urls" : "urls"; if (GetDB().DoesTableExist(name)) return true; std::string sql; sql.append("CREATE TABLE "); sql.append(name); sql.append("(" "id INTEGER PRIMARY KEY," "url LONGVARCHAR," "title LONGVARCHAR," "visit_count INTEGER DEFAULT 0 NOT NULL," "typed_count INTEGER DEFAULT 0 NOT NULL," "last_visit_time INTEGER NOT NULL," "hidden INTEGER DEFAULT 0 NOT NULL," "favicon_id INTEGER DEFAULT 0 NOT NULL)"); // favicon_id is not used now. return GetDB().Execute(sql.c_str()); } bool URLDatabase::CreateMainURLIndex() { // Index over URLs so we can quickly look up based on URL. return GetDB().Execute( "CREATE INDEX IF NOT EXISTS urls_url_index ON urls (url)"); } } // namespace history