// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "chrome/browser/autocomplete/bookmark_provider.h"
#include <algorithm>
#include <functional>
#include <vector>
#include "base/metrics/histogram.h"
#include "base/prefs/pref_service.h"
#include "base/time/time.h"
#include "chrome/browser/autocomplete/autocomplete_result.h"
#include "chrome/browser/bookmarks/bookmark_model.h"
#include "chrome/browser/bookmarks/bookmark_model_factory.h"
#include "chrome/browser/bookmarks/bookmark_title_match.h"
#include "chrome/browser/profiles/profile.h"
#include "chrome/common/pref_names.h"
#include "net/base/net_util.h"
typedef std::vector<BookmarkTitleMatch> TitleMatches;
// BookmarkProvider ------------------------------------------------------------
BookmarkProvider::BookmarkProvider(
AutocompleteProviderListener* listener,
Profile* profile)
: AutocompleteProvider(listener, profile,
AutocompleteProvider::TYPE_BOOKMARK),
bookmark_model_(NULL) {
if (profile) {
bookmark_model_ = BookmarkModelFactory::GetForProfile(profile);
languages_ = profile_->GetPrefs()->GetString(prefs::kAcceptLanguages);
}
}
void BookmarkProvider::Start(const AutocompleteInput& input,
bool minimal_changes) {
if (minimal_changes)
return;
matches_.clear();
// Short-circuit any matching when inline autocompletion is disabled and
// we're looking for BEST_MATCH because none of the BookmarkProvider's
// matches can score high enough to qualify.
if (input.text().empty() ||
((input.type() != AutocompleteInput::UNKNOWN) &&
(input.type() != AutocompleteInput::QUERY)) ||
((input.matches_requested() == AutocompleteInput::BEST_MATCH) &&
input.prevent_inline_autocomplete()))
return;
base::TimeTicks start_time = base::TimeTicks::Now();
DoAutocomplete(input,
input.matches_requested() == AutocompleteInput::BEST_MATCH);
UMA_HISTOGRAM_TIMES("Autocomplete.BookmarkProviderMatchTime",
base::TimeTicks::Now() - start_time);
}
BookmarkProvider::~BookmarkProvider() {}
void BookmarkProvider::DoAutocomplete(const AutocompleteInput& input,
bool best_match) {
// We may not have a bookmark model for some unit tests.
if (!bookmark_model_)
return;
TitleMatches matches;
// Retrieve enough bookmarks so that we have a reasonable probability of
// suggesting the one that the user desires.
const size_t kMaxBookmarkMatches = 50;
// GetBookmarksWithTitlesMatching returns bookmarks matching the user's
// search terms using the following rules:
// - The search text is broken up into search terms. Each term is searched
// for separately.
// - Term matches are always performed against the start of a word. 'def'
// will match against 'define' but not against 'indefinite'.
// - Terms must be at least three characters in length in order to perform
// partial word matches. Any term of lesser length will only be used as an
// exact match. 'def' will match against 'define' but 'de' will not match.
// - A search containing multiple terms will return results with those words
// occuring in any order.
// - Terms enclosed in quotes comprises a phrase that must match exactly.
// - Multiple terms enclosed in quotes will require those exact words in that
// exact order to match.
//
// Note: GetBookmarksWithTitlesMatching() will never return a match span
// greater than the length of the title against which it is being matched,
// nor can those spans ever overlap because the match spans are coalesced
// for all matched terms.
//
// Please refer to the code for BookmarkIndex::GetBookmarksWithTitlesMatching
// for complete details of how title searches are performed against the user's
// bookmarks.
bookmark_model_->GetBookmarksWithTitlesMatching(input.text(),
kMaxBookmarkMatches,
&matches);
if (matches.empty())
return; // There were no matches.
for (TitleMatches::const_iterator i = matches.begin(); i != matches.end();
++i) {
// Create and score the AutocompleteMatch. If its score is 0 then the
// match is discarded.
AutocompleteMatch match(TitleMatchToACMatch(*i));
if (match.relevance > 0)
matches_.push_back(match);
}
// Sort and clip the resulting matches.
size_t max_matches = best_match ? 1 : AutocompleteProvider::kMaxMatches;
if (matches_.size() > max_matches) {
std::partial_sort(matches_.begin(),
matches_.begin() + max_matches,
matches_.end(),
AutocompleteMatch::MoreRelevant);
matches_.resize(max_matches);
} else {
std::sort(matches_.begin(), matches_.end(),
AutocompleteMatch::MoreRelevant);
}
}
namespace {
// for_each helper functor that calculates a match factor for each query term
// when calculating the final score.
//
// Calculate a 'factor' from 0.0 to 1.0 based on 1) how much of the bookmark's
// title the term matches, and 2) where the match is positioned within the
// bookmark's title. A full length match earns a 1.0. A half-length match earns
// at most a 0.5 and at least a 0.25. A single character match against a title
// that is 100 characters long where the match is at the first character will
// earn a 0.01 and at the last character will earn a 0.0001.
class ScoringFunctor {
public:
// |title_length| is the length of the bookmark title against which this
// match will be scored.
explicit ScoringFunctor(size_t title_length)
: title_length_(static_cast<double>(title_length)),
scoring_factor_(0.0) {
}
void operator()(const Snippet::MatchPosition& match) {
double term_length = static_cast<double>(match.second - match.first);
scoring_factor_ += term_length / title_length_ *
(title_length_ - match.first) / title_length_;
}
double ScoringFactor() { return scoring_factor_; }
private:
double title_length_;
double scoring_factor_;
};
} // namespace
AutocompleteMatch BookmarkProvider::TitleMatchToACMatch(
const BookmarkTitleMatch& title_match) {
// The AutocompleteMatch we construct is non-deletable because the only
// way to support this would be to delete the underlying bookmark, which is
// unlikely to be what the user intends.
AutocompleteMatch match(this, 0, false,
AutocompleteMatchType::BOOKMARK_TITLE);
const base::string16& title(title_match.node->GetTitle());
DCHECK(!title.empty());
const GURL& url(title_match.node->url());
match.destination_url = url;
match.contents = net::FormatUrl(url, languages_,
net::kFormatUrlOmitAll & net::kFormatUrlOmitHTTP,
net::UnescapeRule::SPACES, NULL, NULL, NULL);
match.contents_class.push_back(
ACMatchClassification(0, ACMatchClassification::NONE));
match.fill_into_edit =
AutocompleteInput::FormattedStringWithEquivalentMeaning(url,
match.contents);
match.description = title;
match.description_class =
ClassificationsFromMatch(title_match.match_positions,
match.description.size());
match.starred = true;
// Summary on how a relevance score is determined for the match:
//
// For each term matching within the bookmark's title (as given by the set of
// Snippet::MatchPositions) calculate a 'factor', sum up those factors, then
// use the sum to figure out a value between the base score and the maximum
// score.
//
// The factor for each term is the product of:
//
// 1) how much of the bookmark's title has been matched by the term:
// (term length / title length).
//
// Example: Given a bookmark title 'abcde fghijklm', with a title length
// of 14, and two different search terms, 'abcde' and 'fghijklm', with
// term lengths of 5 and 8, respectively, 'fghijklm' will score higher
// (with a partial factor of 8/14 = 0.571) than 'abcde' (5/14 = 0.357).
//
// 2) where the term match occurs within the bookmark's title, giving more
// points for matches that appear earlier in the title:
// ((title length - position of match start) / title_length).
//
// Example: Given a bookmark title of 'abcde fghijklm', with a title length
// of 14, and two different search terms, 'abcde' and 'fghij', with
// start positions of 0 and 6, respectively, 'abcde' will score higher
// (with a a partial factor of (14-0)/14 = 1.000 ) than 'fghij' (with
// a partial factor of (14-6)/14 = 0.571 ).
//
// Once all term factors have been calculated they are summed. The resulting
// sum will never be greater than 1.0 because of the way the bookmark model
// matches and removes overlaps. (In particular, the bookmark model only
// matches terms to the beginning of words and it removes all overlapping
// matches, keeping only the longest. Together these mean that each
// character is included in at most one match. This property ensures the
// sum of factors is at most 1.) This sum is then multiplied against the
// scoring range available, which is 299. The 299 is calculated by
// subtracting the minimum possible score, 900, from the maximum possible
// score, 1199. This product, ranging from 0 to 299, is added to the minimum
// possible score, 900, giving the preliminary score.
//
// If the preliminary score is less than the maximum possible score, 1199,
// it can be boosted up to that maximum possible score if the URL referenced
// by the bookmark is also referenced by any of the user's other bookmarks.
// A count of how many times the bookmark's URL is referenced is determined
// and, for each additional reference beyond the one for the bookmark being
// scored up to a maximum of three, the score is boosted by a fixed amount
// given by |kURLCountBoost|, below.
//
ScoringFunctor position_functor =
for_each(title_match.match_positions.begin(),
title_match.match_positions.end(), ScoringFunctor(title.size()));
const int kBaseBookmarkScore = 900;
const int kMaxBookmarkScore = AutocompleteResult::kLowestDefaultScore - 1;
const double kBookmarkScoreRange =
static_cast<double>(kMaxBookmarkScore - kBaseBookmarkScore);
// It's not likely that GetBookmarksWithTitlesMatching will return overlapping
// matches but let's play it safe.
match.relevance = std::min(kMaxBookmarkScore,
static_cast<int>(position_functor.ScoringFactor() * kBookmarkScoreRange) +
kBaseBookmarkScore);
// Don't waste any time searching for additional referenced URLs if we
// already have a perfect title match.
if (match.relevance >= kMaxBookmarkScore)
return match;
// Boost the score if the bookmark's URL is referenced by other bookmarks.
const int kURLCountBoost[4] = { 0, 75, 125, 150 };
std::vector<const BookmarkNode*> nodes;
bookmark_model_->GetNodesByURL(url, &nodes);
DCHECK_GE(std::min(arraysize(kURLCountBoost), nodes.size()), 1U);
match.relevance +=
kURLCountBoost[std::min(arraysize(kURLCountBoost), nodes.size()) - 1];
match.relevance = std::min(kMaxBookmarkScore, match.relevance);
return match;
}
// static
ACMatchClassifications BookmarkProvider::ClassificationsFromMatch(
const Snippet::MatchPositions& positions,
size_t text_length) {
ACMatchClassifications classifications;
if (positions.empty()) {
classifications.push_back(
ACMatchClassification(0, ACMatchClassification::NONE));
return classifications;
}
for (Snippet::MatchPositions::const_iterator i = positions.begin();
i != positions.end(); ++i) {
AutocompleteMatch::ACMatchClassifications new_class;
AutocompleteMatch::ClassifyLocationInString(i->first, i->second - i->first,
text_length, 0, &new_class);
classifications = AutocompleteMatch::MergeClassifications(
classifications, new_class);
}
return classifications;
}