// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// The query parser is used to parse queries entered into the history
// search into more normalized queries can be passed to the SQLite backend.
#ifndef CHROME_BROWSER_HISTORY_QUERY_PARSER_H_
#define CHROME_BROWSER_HISTORY_QUERY_PARSER_H_
#pragma once
#include <vector>
#include "base/string16.h"
#include "chrome/browser/history/snippet.h"
class QueryNodeList;
// Used by HasMatchIn.
struct QueryWord {
// The work to match against.
string16 word;
// The starting position of the word in the original text.
size_t position;
};
// QueryNode is used by QueryNodeParser to represent the elements that
// constitute a query. While QueryNode is exposed by way of ParseQuery, it
// really isn't meant for external usage.
class QueryNode {
public:
virtual ~QueryNode() {}
// Serialize ourselves out to a string that can be passed to SQLite. Returns
// the number of words in this node.
virtual int AppendToSQLiteQuery(string16* query) const = 0;
// Return true if this is a word node, false if it's a QueryNodeList.
virtual bool IsWord() const = 0;
// Returns true if this node matches the specified text. If exact is true,
// the string must exactly match. Otherwise, this uses a starts with
// comparison.
virtual bool Matches(const string16& word, bool exact) const = 0;
// Returns true if this node matches at least one of the words in words. If
// the node matches at least one word, an entry is added to match_positions
// giving the matching region.
virtual bool HasMatchIn(const std::vector<QueryWord>& words,
Snippet::MatchPositions* match_positions) const = 0;
// Appends the words that make up this node in |words|.
virtual void AppendWords(std::vector<string16>* words) const = 0;
};
class QueryParser {
public:
QueryParser();
// For CJK ideographs and Korean Hangul, even a single character
// can be useful in prefix matching, but that may give us too many
// false positives. Moreover, the current ICU word breaker gives us
// back every single Chinese character as a word so that there's no
// point doing anything for them and we only adjust the minimum length
// to 2 for Korean Hangul while using 3 for others. This is a temporary
// hack until we have a segmentation support.
static bool IsWordLongEnoughForPrefixSearch(const string16& word);
// Parse a query into a SQLite query. The resulting query is placed in
// sqlite_query and the number of words is returned.
int ParseQuery(const string16& query,
string16* sqlite_query);
// Parses the query words in query, returning the nodes that constitute the
// valid words in the query. This is intended for later usage with
// DoesQueryMatch.
// Ownership of the nodes passes to the caller.
void ParseQuery(const string16& query,
std::vector<QueryNode*>* nodes);
// Parses a query returning the words that make up the query. Any words in
// quotes are put in |words| without the quotes. For example, the query text
// "foo bar" results in two entries being added to words, one for foo and one
// for bar.
void ExtractQueryWords(const string16& query,
std::vector<string16>* words);
// Returns true if the string text matches the query nodes created by a call
// to ParseQuery. If the query does match each of the matching positions in
// the text is added to |match_positions|.
bool DoesQueryMatch(const string16& text,
const std::vector<QueryNode*>& nodes,
Snippet::MatchPositions* match_positions);
private:
// Does the work of parsing a query; creates nodes in QueryNodeList as
// appropriate. This is invoked from both of the ParseQuery methods.
bool ParseQueryImpl(const string16& query,
QueryNodeList* root);
// Extracts the words from text, placing each word into words.
void ExtractQueryWords(const string16& text,
std::vector<QueryWord>* words);
};
#endif // CHROME_BROWSER_HISTORY_QUERY_PARSER_H_