普通文本  |  635行  |  22.57 KB

// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "chrome/browser/history/visit_database.h"

#include <algorithm>
#include <limits>
#include <map>
#include <set>

#include "base/logging.h"
#include "base/strings/string_number_conversions.h"
#include "chrome/browser/history/url_database.h"
#include "chrome/browser/history/visit_filter.h"
#include "chrome/common/url_constants.h"
#include "content/public/common/page_transition_types.h"
#include "sql/statement.h"

namespace history {

VisitDatabase::VisitDatabase() {
}

VisitDatabase::~VisitDatabase() {
}

bool VisitDatabase::InitVisitTable() {
  if (!GetDB().DoesTableExist("visits")) {
    if (!GetDB().Execute("CREATE TABLE visits("
        "id INTEGER PRIMARY KEY,"
        "url INTEGER NOT NULL," // key of the URL this corresponds to
        "visit_time INTEGER NOT NULL,"
        "from_visit INTEGER,"
        "transition INTEGER DEFAULT 0 NOT NULL,"
        "segment_id INTEGER,"
        // Some old DBs may have an "is_indexed" field here, but this is no
        // longer used and should NOT be read or written from any longer.
        "visit_duration INTEGER DEFAULT 0 NOT NULL)"))
      return false;
  }

  // Visit source table contains the source information for all the visits. To
  // save space, we do not record those user browsed visits which would be the
  // majority in this table. Only other sources are recorded.
  // Due to the tight relationship between visit_source and visits table, they
  // should be created and dropped at the same time.
  if (!GetDB().DoesTableExist("visit_source")) {
    if (!GetDB().Execute("CREATE TABLE visit_source("
                         "id INTEGER PRIMARY KEY,source INTEGER NOT NULL)"))
        return false;
  }

  // Index over url so we can quickly find visits for a page.
  if (!GetDB().Execute(
          "CREATE INDEX IF NOT EXISTS visits_url_index ON visits (url)"))
    return false;

  // Create an index over from visits so that we can efficiently find
  // referrers and redirects.
  if (!GetDB().Execute(
          "CREATE INDEX IF NOT EXISTS visits_from_index ON "
          "visits (from_visit)"))
    return false;

  // Create an index over time so that we can efficiently find the visits in a
  // given time range (most history views are time-based).
  if (!GetDB().Execute(
          "CREATE INDEX IF NOT EXISTS visits_time_index ON "
          "visits (visit_time)"))
    return false;

  return true;
}

bool VisitDatabase::DropVisitTable() {
  // This will also drop the indices over the table.
  return
      GetDB().Execute("DROP TABLE IF EXISTS visit_source") &&
      GetDB().Execute("DROP TABLE visits");
}

// Must be in sync with HISTORY_VISIT_ROW_FIELDS.
// static
void VisitDatabase::FillVisitRow(sql::Statement& statement, VisitRow* visit) {
  visit->visit_id = statement.ColumnInt64(0);
  visit->url_id = statement.ColumnInt64(1);
  visit->visit_time = base::Time::FromInternalValue(statement.ColumnInt64(2));
  visit->referring_visit = statement.ColumnInt64(3);
  visit->transition = content::PageTransitionFromInt(statement.ColumnInt(4));
  visit->segment_id = statement.ColumnInt64(5);
  visit->visit_duration =
      base::TimeDelta::FromInternalValue(statement.ColumnInt64(6));
}

// static
bool VisitDatabase::FillVisitVector(sql::Statement& statement,
                                    VisitVector* visits) {
  if (!statement.is_valid())
    return false;

  while (statement.Step()) {
    history::VisitRow visit;
    FillVisitRow(statement, &visit);
    visits->push_back(visit);
  }

  return statement.Succeeded();
}

// static
bool VisitDatabase::FillVisitVectorWithOptions(sql::Statement& statement,
                                               const QueryOptions& options,
                                               VisitVector* visits) {
  std::set<URLID> found_urls;

  // Keeps track of the day that |found_urls| is holding the URLs for, in order
  // to handle removing per-day duplicates.
  base::Time found_urls_midnight;

  while (statement.Step()) {
    VisitRow visit;
    FillVisitRow(statement, &visit);

    if (options.duplicate_policy != QueryOptions::KEEP_ALL_DUPLICATES) {
      if (options.duplicate_policy == QueryOptions::REMOVE_DUPLICATES_PER_DAY &&
          found_urls_midnight != visit.visit_time.LocalMidnight()) {
        found_urls.clear();
        found_urls_midnight = visit.visit_time.LocalMidnight();
      }
      // Make sure the URL this visit corresponds to is unique.
      if (found_urls.find(visit.url_id) != found_urls.end())
        continue;
      found_urls.insert(visit.url_id);
    }

    if (static_cast<int>(visits->size()) >= options.EffectiveMaxCount())
      return true;
    visits->push_back(visit);
  }
  return false;
}

VisitID VisitDatabase::AddVisit(VisitRow* visit, VisitSource source) {
  sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
      "INSERT INTO visits "
      "(url, visit_time, from_visit, transition, segment_id, "
      "visit_duration) VALUES (?,?,?,?,?,?)"));
  statement.BindInt64(0, visit->url_id);
  statement.BindInt64(1, visit->visit_time.ToInternalValue());
  statement.BindInt64(2, visit->referring_visit);
  statement.BindInt64(3, visit->transition);
  statement.BindInt64(4, visit->segment_id);
  statement.BindInt64(5, visit->visit_duration.ToInternalValue());

  if (!statement.Run()) {
    VLOG(0) << "Failed to execute visit insert statement:  "
            << "url_id = " << visit->url_id;
    return 0;
  }

  visit->visit_id = GetDB().GetLastInsertRowId();

  if (source != SOURCE_BROWSED) {
    // Record the source of this visit when it is not browsed.
    sql::Statement statement1(GetDB().GetCachedStatement(SQL_FROM_HERE,
        "INSERT INTO visit_source (id, source) VALUES (?,?)"));
    statement1.BindInt64(0, visit->visit_id);
    statement1.BindInt64(1, source);

    if (!statement1.Run()) {
      VLOG(0) << "Failed to execute visit_source insert statement:  "
              << "id = " << visit->visit_id;
      return 0;
    }
  }

  return visit->visit_id;
}

void VisitDatabase::DeleteVisit(const VisitRow& visit) {
  // Patch around this visit. Any visits that this went to will now have their
  // "source" be the deleted visit's source.
  sql::Statement update_chain(GetDB().GetCachedStatement(SQL_FROM_HERE,
      "UPDATE visits SET from_visit=? WHERE from_visit=?"));
  update_chain.BindInt64(0, visit.referring_visit);
  update_chain.BindInt64(1, visit.visit_id);
  if (!update_chain.Run())
    return;

  // Now delete the actual visit.
  sql::Statement del(GetDB().GetCachedStatement(SQL_FROM_HERE,
      "DELETE FROM visits WHERE id=?"));
  del.BindInt64(0, visit.visit_id);
  if (!del.Run())
    return;

  // Try to delete the entry in visit_source table as well.
  // If the visit was browsed, there is no corresponding entry in visit_source
  // table, and nothing will be deleted.
  del.Assign(GetDB().GetCachedStatement(SQL_FROM_HERE,
             "DELETE FROM visit_source WHERE id=?"));
  del.BindInt64(0, visit.visit_id);
  del.Run();
}

bool VisitDatabase::GetRowForVisit(VisitID visit_id, VisitRow* out_visit) {
  sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
      "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits WHERE id=?"));
  statement.BindInt64(0, visit_id);

  if (!statement.Step())
    return false;

  FillVisitRow(statement, out_visit);

  // We got a different visit than we asked for, something is wrong.
  DCHECK_EQ(visit_id, out_visit->visit_id);
  if (visit_id != out_visit->visit_id)
    return false;

  return true;
}

bool VisitDatabase::UpdateVisitRow(const VisitRow& visit) {
  // Don't store inconsistent data to the database.
  DCHECK_NE(visit.visit_id, visit.referring_visit);
  if (visit.visit_id == visit.referring_visit)
    return false;

  sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
      "UPDATE visits SET "
      "url=?,visit_time=?,from_visit=?,transition=?,segment_id=?,"
      "visit_duration=? WHERE id=?"));
  statement.BindInt64(0, visit.url_id);
  statement.BindInt64(1, visit.visit_time.ToInternalValue());
  statement.BindInt64(2, visit.referring_visit);
  statement.BindInt64(3, visit.transition);
  statement.BindInt64(4, visit.segment_id);
  statement.BindInt64(5, visit.visit_duration.ToInternalValue());
  statement.BindInt64(6, visit.visit_id);

  return statement.Run();
}

bool VisitDatabase::GetVisitsForURL(URLID url_id, VisitVector* visits) {
  visits->clear();

  sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
      "SELECT" HISTORY_VISIT_ROW_FIELDS
      "FROM visits "
      "WHERE url=? "
      "ORDER BY visit_time ASC"));
  statement.BindInt64(0, url_id);
  return FillVisitVector(statement, visits);
}

bool VisitDatabase::GetVisibleVisitsForURL(URLID url_id,
                                           const QueryOptions& options,
                                           VisitVector* visits) {
  visits->clear();

  if (options.REMOVE_ALL_DUPLICATES) {
    VisitRow visit_row;
    VisitID visit_id = GetMostRecentVisitForURL(url_id, &visit_row);
    if (visit_id && options.EffectiveMaxCount() != 0) {
      visits->push_back(visit_row);
    }
    return options.EffectiveMaxCount() == 0 && visit_id;
  } else {
    sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
        "SELECT" HISTORY_VISIT_ROW_FIELDS
        "FROM visits "
        "WHERE url=? AND visit_time >= ? AND visit_time < ? "
        "AND (transition & ?) != 0 "  // CHAIN_END
        "AND (transition & ?) NOT IN (?, ?, ?) "  // NO SUBFRAME or
                                                  // KEYWORD_GENERATED
        "ORDER BY visit_time DESC"));
    statement.BindInt64(0, url_id);
    statement.BindInt64(1, options.EffectiveBeginTime());
    statement.BindInt64(2, options.EffectiveEndTime());
    statement.BindInt(3, content::PAGE_TRANSITION_CHAIN_END);
    statement.BindInt(4, content::PAGE_TRANSITION_CORE_MASK);
    statement.BindInt(5, content::PAGE_TRANSITION_AUTO_SUBFRAME);
    statement.BindInt(6, content::PAGE_TRANSITION_MANUAL_SUBFRAME);
    statement.BindInt(7, content::PAGE_TRANSITION_KEYWORD_GENERATED);

    return FillVisitVectorWithOptions(statement, options, visits);
  }
}

bool VisitDatabase::GetVisitsForTimes(const std::vector<base::Time>& times,
                                      VisitVector* visits) {
  visits->clear();

  for (std::vector<base::Time>::const_iterator it = times.begin();
       it != times.end(); ++it) {
    sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
        "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits "
        "WHERE visit_time == ?"));

    statement.BindInt64(0, it->ToInternalValue());

    if (!FillVisitVector(statement, visits))
      return false;
  }
  return true;
}

bool VisitDatabase::GetAllVisitsInRange(base::Time begin_time,
                                        base::Time end_time,
                                        int max_results,
                                        VisitVector* visits) {
  visits->clear();

  sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
      "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits "
      "WHERE visit_time >= ? AND visit_time < ?"
      "ORDER BY visit_time LIMIT ?"));

  // See GetVisibleVisitsInRange for more info on how these times are bound.
  int64 end = end_time.ToInternalValue();
  statement.BindInt64(0, begin_time.ToInternalValue());
  statement.BindInt64(1, end ? end : std::numeric_limits<int64>::max());
  statement.BindInt64(2,
      max_results ? max_results : std::numeric_limits<int64>::max());

  return FillVisitVector(statement, visits);
}

bool VisitDatabase::GetVisitsInRangeForTransition(
    base::Time begin_time,
    base::Time end_time,
    int max_results,
    content::PageTransition transition,
    VisitVector* visits) {
  DCHECK(visits);
  visits->clear();

  sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
      "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits "
      "WHERE visit_time >= ? AND visit_time < ? "
      "AND (transition & ?) == ?"
      "ORDER BY visit_time LIMIT ?"));

  // See GetVisibleVisitsInRange for more info on how these times are bound.
  int64 end = end_time.ToInternalValue();
  statement.BindInt64(0, begin_time.ToInternalValue());
  statement.BindInt64(1, end ? end : std::numeric_limits<int64>::max());
  statement.BindInt(2, content::PAGE_TRANSITION_CORE_MASK);
  statement.BindInt(3, transition);
  statement.BindInt64(4,
      max_results ? max_results : std::numeric_limits<int64>::max());

  return FillVisitVector(statement, visits);
}

bool VisitDatabase::GetVisibleVisitsInRange(const QueryOptions& options,
                                            VisitVector* visits) {
  visits->clear();
  // The visit_time values can be duplicated in a redirect chain, so we sort
  // by id too, to ensure a consistent ordering just in case.
  sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
      "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits "
      "WHERE visit_time >= ? AND visit_time < ? "
      "AND (transition & ?) != 0 "  // CHAIN_END
      "AND (transition & ?) NOT IN (?, ?, ?) "  // NO SUBFRAME or
                                                // KEYWORD_GENERATED
      "ORDER BY visit_time DESC, id DESC"));

  statement.BindInt64(0, options.EffectiveBeginTime());
  statement.BindInt64(1, options.EffectiveEndTime());
  statement.BindInt(2, content::PAGE_TRANSITION_CHAIN_END);
  statement.BindInt(3, content::PAGE_TRANSITION_CORE_MASK);
  statement.BindInt(4, content::PAGE_TRANSITION_AUTO_SUBFRAME);
  statement.BindInt(5, content::PAGE_TRANSITION_MANUAL_SUBFRAME);
  statement.BindInt(6, content::PAGE_TRANSITION_KEYWORD_GENERATED);

  return FillVisitVectorWithOptions(statement, options, visits);
}

void VisitDatabase::GetDirectVisitsDuringTimes(const VisitFilter& time_filter,
                                                int max_results,
                                                VisitVector* visits) {
  visits->clear();
  if (max_results)
    visits->reserve(max_results);
  for (VisitFilter::TimeVector::const_iterator it = time_filter.times().begin();
       it != time_filter.times().end(); ++it) {
    sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
        "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits "
        "WHERE visit_time >= ? AND visit_time < ? "
        "AND (transition & ?) != 0 "  // CHAIN_START
        "AND (transition & ?) IN (?, ?) "  // TYPED or AUTO_BOOKMARK only
        "ORDER BY visit_time DESC, id DESC"));

    statement.BindInt64(0, it->first.ToInternalValue());
    statement.BindInt64(1, it->second.ToInternalValue());
    statement.BindInt(2, content::PAGE_TRANSITION_CHAIN_START);
    statement.BindInt(3, content::PAGE_TRANSITION_CORE_MASK);
    statement.BindInt(4, content::PAGE_TRANSITION_TYPED);
    statement.BindInt(5, content::PAGE_TRANSITION_AUTO_BOOKMARK);

    while (statement.Step()) {
      VisitRow visit;
      FillVisitRow(statement, &visit);
      visits->push_back(visit);

      if (max_results > 0 && static_cast<int>(visits->size()) >= max_results)
        return;
    }
  }
}

VisitID VisitDatabase::GetMostRecentVisitForURL(URLID url_id,
                                                VisitRow* visit_row) {
  // The visit_time values can be duplicated in a redirect chain, so we sort
  // by id too, to ensure a consistent ordering just in case.
  sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
      "SELECT" HISTORY_VISIT_ROW_FIELDS "FROM visits "
      "WHERE url=? "
      "ORDER BY visit_time DESC, id DESC "
      "LIMIT 1"));
  statement.BindInt64(0, url_id);
  if (!statement.Step())
    return 0;  // No visits for this URL.

  if (visit_row) {
    FillVisitRow(statement, visit_row);
    return visit_row->visit_id;
  }
  return statement.ColumnInt64(0);
}

bool VisitDatabase::GetMostRecentVisitsForURL(URLID url_id,
                                              int max_results,
                                              VisitVector* visits) {
  visits->clear();

  // The visit_time values can be duplicated in a redirect chain, so we sort
  // by id too, to ensure a consistent ordering just in case.
  sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
      "SELECT" HISTORY_VISIT_ROW_FIELDS
      "FROM visits "
      "WHERE url=? "
      "ORDER BY visit_time DESC, id DESC "
      "LIMIT ?"));
  statement.BindInt64(0, url_id);
  statement.BindInt(1, max_results);

  return FillVisitVector(statement, visits);
}

bool VisitDatabase::GetRedirectFromVisit(VisitID from_visit,
                                         VisitID* to_visit,
                                         GURL* to_url) {
  sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
      "SELECT v.id,u.url "
      "FROM visits v JOIN urls u ON v.url = u.id "
      "WHERE v.from_visit = ? "
      "AND (v.transition & ?) != 0"));  // IS_REDIRECT_MASK
  statement.BindInt64(0, from_visit);
  statement.BindInt(1, content::PAGE_TRANSITION_IS_REDIRECT_MASK);

  if (!statement.Step())
    return false;  // No redirect from this visit. (Or SQL error)
  if (to_visit)
    *to_visit = statement.ColumnInt64(0);
  if (to_url)
    *to_url = GURL(statement.ColumnString(1));
  return true;
}

bool VisitDatabase::GetRedirectToVisit(VisitID to_visit,
                                       VisitID* from_visit,
                                       GURL* from_url) {
  VisitRow row;
  if (!GetRowForVisit(to_visit, &row))
    return false;

  if (from_visit)
    *from_visit = row.referring_visit;

  if (from_url) {
    sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
        "SELECT u.url "
        "FROM visits v JOIN urls u ON v.url = u.id "
        "WHERE v.id = ?"));
    statement.BindInt64(0, row.referring_visit);

    if (!statement.Step())
      return false;

    *from_url = GURL(statement.ColumnString(0));
  }
  return true;
}

bool VisitDatabase::GetVisibleVisitCountToHost(const GURL& url,
                                               int* count,
                                               base::Time* first_visit) {
  if (!url.SchemeIs(content::kHttpScheme) &&
      !url.SchemeIs(content::kHttpsScheme))
    return false;

  // We need to search for URLs with a matching host/port. One way to query for
  // this is to use the LIKE operator, eg 'url LIKE http://google.com/%'. This
  // is inefficient though in that it doesn't use the index and each entry must
  // be visited. The same query can be executed by using >= and < operator.
  // The query becomes:
  // 'url >= http://google.com/' and url < http://google.com0'.
  // 0 is used as it is one character greater than '/'.
  const std::string host_query_min = url.GetOrigin().spec();
  if (host_query_min.empty())
    return false;

  // We also want to restrict ourselves to main frame navigations that are not
  // in the middle of redirect chains, hence the transition checks.
  sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
      "SELECT MIN(v.visit_time), COUNT(*) "
      "FROM visits v INNER JOIN urls u ON v.url = u.id "
      "WHERE u.url >= ? AND u.url < ? "
      "AND (transition & ?) != 0 "
      "AND (transition & ?) NOT IN (?, ?, ?)"));
  statement.BindString(0, host_query_min);
  statement.BindString(1,
      host_query_min.substr(0, host_query_min.size() - 1) + '0');
  statement.BindInt(2, content::PAGE_TRANSITION_CHAIN_END);
  statement.BindInt(3, content::PAGE_TRANSITION_CORE_MASK);
  statement.BindInt(4, content::PAGE_TRANSITION_AUTO_SUBFRAME);
  statement.BindInt(5, content::PAGE_TRANSITION_MANUAL_SUBFRAME);
  statement.BindInt(6, content::PAGE_TRANSITION_KEYWORD_GENERATED);

  if (!statement.Step()) {
    // We've never been to this page before.
    *count = 0;
    return true;
  }

  if (!statement.Succeeded())
    return false;

  *first_visit = base::Time::FromInternalValue(statement.ColumnInt64(0));
  *count = statement.ColumnInt(1);
  return true;
}

bool VisitDatabase::GetStartDate(base::Time* first_visit) {
  sql::Statement statement(GetDB().GetCachedStatement(SQL_FROM_HERE,
      "SELECT MIN(visit_time) FROM visits WHERE visit_time != 0"));
  if (!statement.Step() || statement.ColumnInt64(0) == 0) {
    *first_visit = base::Time::Now();
    return false;
  }
  *first_visit = base::Time::FromInternalValue(statement.ColumnInt64(0));
  return true;
}

void VisitDatabase::GetVisitsSource(const VisitVector& visits,
                                    VisitSourceMap* sources) {
  DCHECK(sources);
  sources->clear();

  // We query the source in batch. Here defines the batch size.
  const size_t batch_size = 500;
  size_t visits_size = visits.size();

  size_t start_index = 0, end_index = 0;
  while (end_index < visits_size) {
    start_index = end_index;
    end_index = end_index + batch_size < visits_size ? end_index + batch_size
                                                     : visits_size;

    // Compose the sql statement with a list of ids.
    std::string sql = "SELECT id,source FROM visit_source ";
    sql.append("WHERE id IN (");
    // Append all the ids in the statement.
    for (size_t j = start_index; j < end_index; j++) {
      if (j != start_index)
        sql.push_back(',');
      sql.append(base::Int64ToString(visits[j].visit_id));
    }
    sql.append(") ORDER BY id");
    sql::Statement statement(GetDB().GetUniqueStatement(sql.c_str()));

    // Get the source entries out of the query result.
    while (statement.Step()) {
      std::pair<VisitID, VisitSource> source_entry(statement.ColumnInt64(0),
          static_cast<VisitSource>(statement.ColumnInt(1)));
      sources->insert(source_entry);
    }
  }
}

bool VisitDatabase::MigrateVisitsWithoutDuration() {
  if (!GetDB().DoesTableExist("visits")) {
    NOTREACHED() << " Visits table should exist before migration";
    return false;
  }

  if (!GetDB().DoesColumnExist("visits", "visit_duration")) {
    // Old versions don't have the visit_duration column, we modify the table
    // to add that field.
    if (!GetDB().Execute("ALTER TABLE visits "
        "ADD COLUMN visit_duration INTEGER DEFAULT 0 NOT NULL"))
      return false;
  }
  return true;
}

void VisitDatabase::GetBriefVisitInfoOfMostRecentVisits(
    int max_visits,
    std::vector<BriefVisitInfo>* result_vector) {
  result_vector->clear();

  sql::Statement statement(GetDB().GetUniqueStatement(
      "SELECT url,visit_time,transition FROM visits "
      "ORDER BY id DESC LIMIT ?"));

  statement.BindInt64(0, max_visits);

  if (!statement.is_valid())
    return;

  while (statement.Step()) {
    BriefVisitInfo info;
    info.url_id = statement.ColumnInt64(0);
    info.time = base::Time::FromInternalValue(statement.ColumnInt64(1));
    info.transition = content::PageTransitionFromInt(statement.ColumnInt(2));
    result_vector->push_back(info);
  }
}

}  // namespace history