1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_PROVIDER_H_
6 #define COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_PROVIDER_H_
7 
8 #include <stddef.h>
9 
10 #include <map>
11 #include <utility>
12 #include <vector>
13 
14 #include "base/gtest_prod_util.h"
15 #include "base/memory/ref_counted.h"
16 #include "base/strings/string16.h"
17 #include "components/omnibox/browser/autocomplete_match.h"
18 #include "components/omnibox/browser/in_memory_url_index_types.h"
19 #include "third_party/metrics_proto/omnibox_event.pb.h"
20 
21 class AutocompleteInput;
22 
23 typedef std::vector<metrics::OmniboxEventProto_ProviderInfo> ProvidersInfo;
24 
25 // The AutocompleteProviders each return different kinds of matches,
26 // such as history or search matches.  These matches are given
27 // "relevance" scores.  Higher scores are better matches than lower
28 // scores.  The relevance scores and classes providing the respective
29 // matches are as listed below.
30 //
31 // IMPORTANT CAVEAT: The tables below are NOT COMPLETE.  Developers
32 // often forget to keep these tables in sync with the code when they
33 // change scoring algorithms or add new providers.  For example,
34 // neither the HistoryQuickProvider (which is a provider that appears
35 // often) nor the ShortcutsProvider are listed here.  For the best
36 // idea of how scoring works and what providers are affecting which
37 // queries, play with chrome://omnibox/ for a while.  While the tables
38 // below may have some utility, nothing compares with first-hand
39 // investigation and experience.
40 //
41 // ZERO SUGGEST (empty) input type:
42 // --------------------------------------------------------------------|-----
43 // Clipboard URL                                                       |  800
44 // Zero Suggest (most visited, Android only)                           |  600--
45 // Zero Suggest (default, may be overridden by server)                 |  100
46 // Local History Zero Suggest                                          |  500--
47 //
48 // UNKNOWN input type:
49 // --------------------------------------------------------------------|-----
50 // Keyword (non-substituting or in keyword UI mode, exact match)       | 1500
51 // HistoryURL (good exact or inline autocomplete matches, some inexact)| 1410++
52 // HistoryURL (intranet url never visited match, some inexact matches) | 1400++
53 // Search Primary Provider (past query in history within 2 days)       | 1399**
54 // Search Primary Provider (what you typed)                            | 1300
55 // HistoryURL (what you typed, some inexact matches)                   | 1200++
56 // Keyword (substituting, exact match)                                 | 1100
57 // Search Primary Provider (past query in history older than 2 days)   | 1050*
58 // HistoryURL (some inexact matches)                                   |  900++
59 // BookmarkProvider (prefix match in bookmark title or URL)            |  900+-
60 // Built-in                                                            |  860++
61 // Search Primary Provider (navigational suggestion)                   |  800++
62 // Search Primary Provider (suggestion)                                |  600++
63 // Keyword (inexact match)                                             |  450
64 // Search Secondary Provider (what you typed)                          |  250
65 // Search Secondary Provider (past query in history)                   |  200*
66 // Search Secondary Provider (navigational suggestion)                 |  150++
67 // Search Secondary Provider (suggestion)                              |  100++
68 // Non Personalized On Device Head Suggest Provider                    |    *
69 //                  (default value 99--, can be changed by Finch)
70 // Document Suggestions (*experimental): value controlled by Finch     |    *
71 //
72 // URL input type:
73 // --------------------------------------------------------------------|-----
74 // Keyword (non-substituting or in keyword UI mode, exact match)       | 1500
75 // HistoryURL (good exact or inline autocomplete matches, some inexact)| 1410++
76 // HistoryURL (intranet url never visited match, some inexact matches) | 1400++
77 // HistoryURL (what you typed, some inexact matches)                   | 1200++
78 // Keyword (substituting, exact match)                                 | 1100
79 // HistoryURL (some inexact matches)                                   |  900++
80 // Built-in                                                            |  860++
81 // Search Primary Provider (what you typed)                            |  850
82 // Search Primary Provider (navigational suggestion)                   |  800++
83 // Search Primary Provider (past query in history)                     |  750*
84 // Keyword (inexact match)                                             |  700
85 // Search Primary Provider (suggestion)                                |  300++
86 // Search Secondary Provider (what you typed)                          |  250
87 // Search Secondary Provider (past query in history)                   |  200*
88 // Search Secondary Provider (navigational suggestion)                 |  150++
89 // Search Secondary Provider (suggestion)                              |  100++
90 // Non Personalized On Device Head Suggest Provider                    |   99--
91 //
92 // QUERY input type:
93 // --------------------------------------------------------------------|-----
94 // Search Primary or Secondary (past query in history within 2 days)   | 1599**
95 // Keyword (non-substituting or in keyword UI mode, exact match)       | 1500
96 // Keyword (substituting, exact match)                                 | 1450
97 // Search Primary Provider (past query in history within 2 days)       | 1399**
98 // Search Primary Provider (what you typed)                            | 1300
99 // Search Primary Provider (past query in history older than 2 days)   | 1050*
100 // HistoryURL (inexact match)                                          |  900++
101 // BookmarkProvider (prefix match in bookmark title or URL)            |  900+-
102 // Search Primary Provider (navigational suggestion)                   |  800++
103 // Search Primary Provider (suggestion)                                |  600++
104 // Keyword (inexact match)                                             |  450
105 // Search Secondary Provider (what you typed)                          |  250
106 // Search Secondary Provider (past query in history)                   |  200*
107 // Search Secondary Provider (navigational suggestion)                 |  150++
108 // Search Secondary Provider (suggestion)                              |  100++
109 // Non Personalized On Device Head Suggest Provider                    |    *
110 //                  (default value 99--, can be changed by Finch)
111 //
112 // (A search keyword is a keyword with a replacement string; a bookmark keyword
113 // is a keyword with no replacement string, that is, a shortcut for a URL.)
114 //
115 // There are two possible providers for search suggestions. If the user has
116 // typed a keyword, then the primary provider is the keyword provider and the
117 // secondary provider is the default provider. If the user has not typed a
118 // keyword, then the primary provider corresponds to the default provider.
119 //
120 // Search providers may supply relevance values along with their results to be
121 // used in place of client-side calculated values.
122 //
123 // The value column gives the ranking returned from the various providers.
124 // ++: a series of matches with relevance from n up to (n + max_matches).
125 // --: a series of matches with relevance from n down to (n - max_matches).
126 // *:  relevance score falls off over time (discounted 50 points @ 15 minutes,
127 //     450 points @ two weeks)
128 // **: relevance score falls off over two days (discounted 99 points after two
129 //     days).
130 // +-: A base score that the provider will adjust upward or downward based on
131 //     provider-specific metrics.
132 //
133 // A single result provider for the autocomplete system.  Given user input, the
134 // provider decides what (if any) matches to return, their relevance, and their
135 // classifications.
136 class AutocompleteProvider
137     : public base::RefCountedThreadSafe<AutocompleteProvider> {
138  public:
139   // Different AutocompleteProvider implementations.
140   enum Type {
141     TYPE_BOOKMARK = 1 << 0,
142     TYPE_BUILTIN = 1 << 1,
143     TYPE_HISTORY_QUICK = 1 << 2,
144     TYPE_HISTORY_URL = 1 << 3,
145     TYPE_KEYWORD = 1 << 4,
146     TYPE_SEARCH = 1 << 5,
147     TYPE_SHORTCUTS = 1 << 6,
148     TYPE_ZERO_SUGGEST = 1 << 7,
149     TYPE_CLIPBOARD = 1 << 8,
150     TYPE_DOCUMENT = 1 << 9,
151     TYPE_ON_DEVICE_HEAD = 1 << 10,
152     TYPE_ZERO_SUGGEST_LOCAL_HISTORY = 1 << 11,
153     TYPE_QUERY_TILE = 1 << 12,
154     TYPE_MOST_VISITED_SITES = 1 << 13,
155     TYPE_VERBATIM_MATCH = 1 << 14,
156   };
157 
158   explicit AutocompleteProvider(Type type);
159 
160   AutocompleteProvider(const AutocompleteProvider&) = delete;
161   AutocompleteProvider& operator=(const AutocompleteProvider&) = delete;
162 
163   // Returns a string describing a particular AutocompleteProvider type.
164   static const char* TypeToString(Type type);
165 
166   // Called to start an autocomplete query.  The provider is responsible for
167   // tracking its matches for this query and whether it is done processing the
168   // query.  When new matches are available or the provider finishes, it
169   // calls the controller's OnProviderUpdate() method.  The controller can then
170   // get the new matches using the provider's accessors.
171   // Exception: Matches available immediately after starting the query (that
172   // is, synchronously) do not cause any notifications to be sent.  The
173   // controller is expected to check for these without prompting (since
174   // otherwise, starting each provider running would result in a flurry of
175   // notifications).
176   //
177   // Once Stop() has been called, usually no more notifications should be sent.
178   // (See comments on Stop() below.)
179   //
180   // |minimal_changes| is an optimization that lets the provider do less work
181   // when the |input|'s text hasn't changed.  See the body of
182   // OmniboxPopupModel::StartAutocomplete().
183   virtual void Start(const AutocompleteInput& input, bool minimal_changes) = 0;
184 
185   // Advises the provider to stop processing.  This may be called even if the
186   // provider is already done.  If the provider caches any results, it should
187   // clear the cache based on the value of |clear_cached_results|.  Normally,
188   // once this is called, the provider should not send more notifications to
189   // the controller.
190   //
191   // If |user_inactivity_timer| is true, Stop() is being called because it's
192   // been a long time since the user started the current query, and returning
193   // further asynchronous results would normally just be disruptive.  Most
194   // providers should still stop processing in this case, but continuing is
195   // legal if there's a good reason the user is likely to want even long-
196   // delayed asynchronous results, e.g. the user has explicitly invoked a
197   // keyword extension and the extension is still processing the request.
198   virtual void Stop(bool clear_cached_results,
199                     bool due_to_user_inactivity);
200 
201   // Returns the enum equivalent to the name of this provider.
202   // TODO(derat): Make metrics use AutocompleteProvider::Type directly, or at
203   // least move this method to the metrics directory.
204   metrics::OmniboxEventProto_ProviderType AsOmniboxEventProviderType() const;
205 
206   // Called to delete a match and the backing data that produced it.  This
207   // match should not appear again in this or future queries.  This can only be
208   // called for matches the provider marks as deletable.  This should only be
209   // called when no query is running.
210   // NOTE: Do NOT call OnProviderUpdate() in this method, it is the
211   // responsibility of the caller to do so after calling us.
212   virtual void DeleteMatch(const AutocompleteMatch& match);
213 
214   // Called when an omnibox event log entry is generated.  This gives
215   // a provider the opportunity to add diagnostic information to the
216   // logs.  A provider is expected to append a single entry of whatever
217   // information it wants to |provider_info|.
218   virtual void AddProviderInfo(ProvidersInfo* provider_info) const;
219 
220   // Called when a new omnibox session starts or the current session ends.
221   // This gives the opportunity to reset the internal state, if any, associated
222   // with the previous session.
223   virtual void ResetSession();
224 
225   // Estimates dynamic memory usage.
226   // See base/trace_event/memory_usage_estimator.h for more info.
227   //
228   // Note: Subclasses that override this method must call the base class
229   // method and include the response in their estimate.
230   virtual size_t EstimateMemoryUsage() const;
231 
232   // Returns a suggested upper bound for how many matches this provider should
233   // return.
provider_max_matches()234   size_t provider_max_matches() const { return provider_max_matches_; }
235 
236   // Returns the set of matches for the current query.
matches()237   const ACMatches& matches() const { return matches_; }
238 
239   // Returns whether the provider is done processing the query.
done()240   bool done() const { return done_; }
241 
242   // Returns this provider's type.
type()243   Type type() const { return type_; }
244 
245   // Returns a string describing this provider's type.
246   const char* GetName() const;
247 
248   typedef std::multimap<base::char16, base::string16> WordMap;
249 
250   // Finds the matches for |find_text| in |text|, classifies those matches,
251   // merges those classifications with |original_class|, and returns the merged
252   // classifications.
253   // If |text_is_search_query| is false, matches are classified as MATCH, and
254   // non-matches are classified as NONE. Otherwise, if |text_is_search_query| is
255   // true, matches are classified as NONE, and non-matches are classified as
256   // MATCH. This is done to mimic the behavior of SearchProvider which decorates
257   // matches according to the approach used by Google Suggest.
258   // |find_text| and |text| will be lowercased.
259   //
260   //   For example, given
261   //     |find_text| is "sp new",
262   //     |text| is "Sports and News at sports.somesite.com - visit us!",
263   //     |text_is_search_query| is false, and
264   //     |original_class| is {{0, NONE}, {19, URL}, {38, NONE}} (marking
265   //     "sports.somesite.com" as a URL),
266   //   Then this will return
267   //     {{0, MATCH}, {2, NONE}, {11, MATCH}, {14, NONE}, {19, URL|MATCH},
268   //     {21, URL}, {38, NONE}}; i.e.,
269   //     "Sports and News at sports.somesite.com - visit us!"
270   //      ^ ^        ^  ^    ^ ^                ^
271   //      0 2        11 14  19 21               38
272   //      M N        M  N  U|M U                N
273   //
274   //   For example, given
275   //     |find_text| is "canal",
276   //     |text| is "panama canal",
277   //     |text_is_search_query| is true, and
278   //     |original_class| is {{0, NONE}},
279   //   Then this will return
280   //     {{0,MATCH}, {7, NONE}}; i.e.,
281   //     "panama canal"
282   //      ^      ^
283   //      0 M    7 N
284   static ACMatchClassifications ClassifyAllMatchesInString(
285       const base::string16& find_text,
286       const base::string16& text,
287       const bool text_is_search_query,
288       const ACMatchClassifications& original_class = ACMatchClassifications());
289 
290   // Used to determine if we're in keyword mode, if experimental keyword
291   // mode is enabled, and if we're confident that the user is intentionally
292   // (not accidentally) in keyword mode. Combined, this method returns
293   // whether the caller should perform steps that are only valid in this state.
294   static bool InExplicitExperimentalKeywordMode(const AutocompleteInput& input,
295                                                 const base::string16& keyword);
296 
297   // Uses the keyword entry mode in |input| (and possibly compare the length
298   // of the user input vs |keyword|) to decide if the user intentionally
299   // entered keyword mode.
300   static bool IsExplicitlyInKeywordMode(const AutocompleteInput& input,
301                                         const base::string16& keyword);
302 
303  protected:
304   friend class base::RefCountedThreadSafe<AutocompleteProvider>;
305   FRIEND_TEST_ALL_PREFIXES(BookmarkProviderTest, InlineAutocompletion);
306   FRIEND_TEST_ALL_PREFIXES(AutocompleteResultTest,
307                            DemoteOnDeviceSearchSuggestions);
308 
309   typedef std::pair<bool, base::string16> FixupReturn;
310 
311   virtual ~AutocompleteProvider();
312 
313   // Fixes up user URL input to make it more possible to match against.  Among
314   // many other things, this takes care of the following:
315   // * Prepending file:// to file URLs
316   // * Converting drive letters in file URLs to uppercase
317   // * Converting case-insensitive parts of URLs (like the scheme and domain)
318   //   to lowercase
319   // * Convert spaces to %20s
320   // Note that we don't do this in AutocompleteInput's constructor, because if
321   // e.g. we convert a Unicode hostname to punycode, other providers will show
322   // output that surprises the user ("Search Google for xn--6ca.com").
323   // Returns a bool indicating whether fixup succeeded, as well as the fixed-up
324   // input text.  The returned string will be the same as the input string if
325   // fixup failed; this lets callers who don't care about failure simply use the
326   // string unconditionally.
327   static FixupReturn FixupUserInput(const AutocompleteInput& input);
328 
329   // Trims "http:" and up to two subsequent slashes from |url|.  Returns the
330   // number of characters that were trimmed.
331   // NOTE: For a view-source: URL, this will trim from after "view-source:" and
332   // return 0.
333   static size_t TrimHttpPrefix(base::string16* url);
334 
335   const size_t provider_max_matches_;
336 
337   ACMatches matches_;
338   bool done_;
339 
340   Type type_;
341 };
342 
343 #endif  // COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_PROVIDER_H_
344