1 // Copyright 2014 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_PROVIDER_H_ 6 #define COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_PROVIDER_H_ 7 8 #include <stddef.h> 9 10 #include <map> 11 #include <utility> 12 #include <vector> 13 14 #include "base/gtest_prod_util.h" 15 #include "base/memory/ref_counted.h" 16 #include "base/strings/string16.h" 17 #include "components/omnibox/browser/autocomplete_match.h" 18 #include "components/omnibox/browser/in_memory_url_index_types.h" 19 #include "third_party/metrics_proto/omnibox_event.pb.h" 20 21 class AutocompleteInput; 22 23 typedef std::vector<metrics::OmniboxEventProto_ProviderInfo> ProvidersInfo; 24 25 // The AutocompleteProviders each return different kinds of matches, 26 // such as history or search matches. These matches are given 27 // "relevance" scores. Higher scores are better matches than lower 28 // scores. The relevance scores and classes providing the respective 29 // matches are as listed below. 30 // 31 // IMPORTANT CAVEAT: The tables below are NOT COMPLETE. Developers 32 // often forget to keep these tables in sync with the code when they 33 // change scoring algorithms or add new providers. For example, 34 // neither the HistoryQuickProvider (which is a provider that appears 35 // often) nor the ShortcutsProvider are listed here. For the best 36 // idea of how scoring works and what providers are affecting which 37 // queries, play with chrome://omnibox/ for a while. While the tables 38 // below may have some utility, nothing compares with first-hand 39 // investigation and experience. 40 // 41 // ZERO SUGGEST (empty) input type: 42 // --------------------------------------------------------------------|----- 43 // Clipboard URL | 800 44 // Zero Suggest (most visited, Android only) | 600-- 45 // Zero Suggest (default, may be overridden by server) | 100 46 // Local History Zero Suggest | 500-- 47 // 48 // UNKNOWN input type: 49 // --------------------------------------------------------------------|----- 50 // Keyword (non-substituting or in keyword UI mode, exact match) | 1500 51 // HistoryURL (good exact or inline autocomplete matches, some inexact)| 1410++ 52 // HistoryURL (intranet url never visited match, some inexact matches) | 1400++ 53 // Search Primary Provider (past query in history within 2 days) | 1399** 54 // Search Primary Provider (what you typed) | 1300 55 // HistoryURL (what you typed, some inexact matches) | 1200++ 56 // Keyword (substituting, exact match) | 1100 57 // Search Primary Provider (past query in history older than 2 days) | 1050* 58 // HistoryURL (some inexact matches) | 900++ 59 // BookmarkProvider (prefix match in bookmark title or URL) | 900+- 60 // Built-in | 860++ 61 // Search Primary Provider (navigational suggestion) | 800++ 62 // Search Primary Provider (suggestion) | 600++ 63 // Keyword (inexact match) | 450 64 // Search Secondary Provider (what you typed) | 250 65 // Search Secondary Provider (past query in history) | 200* 66 // Search Secondary Provider (navigational suggestion) | 150++ 67 // Search Secondary Provider (suggestion) | 100++ 68 // Non Personalized On Device Head Suggest Provider | * 69 // (default value 99--, can be changed by Finch) 70 // Document Suggestions (*experimental): value controlled by Finch | * 71 // 72 // URL input type: 73 // --------------------------------------------------------------------|----- 74 // Keyword (non-substituting or in keyword UI mode, exact match) | 1500 75 // HistoryURL (good exact or inline autocomplete matches, some inexact)| 1410++ 76 // HistoryURL (intranet url never visited match, some inexact matches) | 1400++ 77 // HistoryURL (what you typed, some inexact matches) | 1200++ 78 // Keyword (substituting, exact match) | 1100 79 // HistoryURL (some inexact matches) | 900++ 80 // Built-in | 860++ 81 // Search Primary Provider (what you typed) | 850 82 // Search Primary Provider (navigational suggestion) | 800++ 83 // Search Primary Provider (past query in history) | 750* 84 // Keyword (inexact match) | 700 85 // Search Primary Provider (suggestion) | 300++ 86 // Search Secondary Provider (what you typed) | 250 87 // Search Secondary Provider (past query in history) | 200* 88 // Search Secondary Provider (navigational suggestion) | 150++ 89 // Search Secondary Provider (suggestion) | 100++ 90 // Non Personalized On Device Head Suggest Provider | 99-- 91 // 92 // QUERY input type: 93 // --------------------------------------------------------------------|----- 94 // Search Primary or Secondary (past query in history within 2 days) | 1599** 95 // Keyword (non-substituting or in keyword UI mode, exact match) | 1500 96 // Keyword (substituting, exact match) | 1450 97 // Search Primary Provider (past query in history within 2 days) | 1399** 98 // Search Primary Provider (what you typed) | 1300 99 // Search Primary Provider (past query in history older than 2 days) | 1050* 100 // HistoryURL (inexact match) | 900++ 101 // BookmarkProvider (prefix match in bookmark title or URL) | 900+- 102 // Search Primary Provider (navigational suggestion) | 800++ 103 // Search Primary Provider (suggestion) | 600++ 104 // Keyword (inexact match) | 450 105 // Search Secondary Provider (what you typed) | 250 106 // Search Secondary Provider (past query in history) | 200* 107 // Search Secondary Provider (navigational suggestion) | 150++ 108 // Search Secondary Provider (suggestion) | 100++ 109 // Non Personalized On Device Head Suggest Provider | * 110 // (default value 99--, can be changed by Finch) 111 // 112 // (A search keyword is a keyword with a replacement string; a bookmark keyword 113 // is a keyword with no replacement string, that is, a shortcut for a URL.) 114 // 115 // There are two possible providers for search suggestions. If the user has 116 // typed a keyword, then the primary provider is the keyword provider and the 117 // secondary provider is the default provider. If the user has not typed a 118 // keyword, then the primary provider corresponds to the default provider. 119 // 120 // Search providers may supply relevance values along with their results to be 121 // used in place of client-side calculated values. 122 // 123 // The value column gives the ranking returned from the various providers. 124 // ++: a series of matches with relevance from n up to (n + max_matches). 125 // --: a series of matches with relevance from n down to (n - max_matches). 126 // *: relevance score falls off over time (discounted 50 points @ 15 minutes, 127 // 450 points @ two weeks) 128 // **: relevance score falls off over two days (discounted 99 points after two 129 // days). 130 // +-: A base score that the provider will adjust upward or downward based on 131 // provider-specific metrics. 132 // 133 // A single result provider for the autocomplete system. Given user input, the 134 // provider decides what (if any) matches to return, their relevance, and their 135 // classifications. 136 class AutocompleteProvider 137 : public base::RefCountedThreadSafe<AutocompleteProvider> { 138 public: 139 // Different AutocompleteProvider implementations. 140 enum Type { 141 TYPE_BOOKMARK = 1 << 0, 142 TYPE_BUILTIN = 1 << 1, 143 TYPE_HISTORY_QUICK = 1 << 2, 144 TYPE_HISTORY_URL = 1 << 3, 145 TYPE_KEYWORD = 1 << 4, 146 TYPE_SEARCH = 1 << 5, 147 TYPE_SHORTCUTS = 1 << 6, 148 TYPE_ZERO_SUGGEST = 1 << 7, 149 TYPE_CLIPBOARD = 1 << 8, 150 TYPE_DOCUMENT = 1 << 9, 151 TYPE_ON_DEVICE_HEAD = 1 << 10, 152 TYPE_ZERO_SUGGEST_LOCAL_HISTORY = 1 << 11, 153 TYPE_QUERY_TILE = 1 << 12, 154 TYPE_MOST_VISITED_SITES = 1 << 13, 155 TYPE_VERBATIM_MATCH = 1 << 14, 156 }; 157 158 explicit AutocompleteProvider(Type type); 159 160 AutocompleteProvider(const AutocompleteProvider&) = delete; 161 AutocompleteProvider& operator=(const AutocompleteProvider&) = delete; 162 163 // Returns a string describing a particular AutocompleteProvider type. 164 static const char* TypeToString(Type type); 165 166 // Called to start an autocomplete query. The provider is responsible for 167 // tracking its matches for this query and whether it is done processing the 168 // query. When new matches are available or the provider finishes, it 169 // calls the controller's OnProviderUpdate() method. The controller can then 170 // get the new matches using the provider's accessors. 171 // Exception: Matches available immediately after starting the query (that 172 // is, synchronously) do not cause any notifications to be sent. The 173 // controller is expected to check for these without prompting (since 174 // otherwise, starting each provider running would result in a flurry of 175 // notifications). 176 // 177 // Once Stop() has been called, usually no more notifications should be sent. 178 // (See comments on Stop() below.) 179 // 180 // |minimal_changes| is an optimization that lets the provider do less work 181 // when the |input|'s text hasn't changed. See the body of 182 // OmniboxPopupModel::StartAutocomplete(). 183 virtual void Start(const AutocompleteInput& input, bool minimal_changes) = 0; 184 185 // Advises the provider to stop processing. This may be called even if the 186 // provider is already done. If the provider caches any results, it should 187 // clear the cache based on the value of |clear_cached_results|. Normally, 188 // once this is called, the provider should not send more notifications to 189 // the controller. 190 // 191 // If |user_inactivity_timer| is true, Stop() is being called because it's 192 // been a long time since the user started the current query, and returning 193 // further asynchronous results would normally just be disruptive. Most 194 // providers should still stop processing in this case, but continuing is 195 // legal if there's a good reason the user is likely to want even long- 196 // delayed asynchronous results, e.g. the user has explicitly invoked a 197 // keyword extension and the extension is still processing the request. 198 virtual void Stop(bool clear_cached_results, 199 bool due_to_user_inactivity); 200 201 // Returns the enum equivalent to the name of this provider. 202 // TODO(derat): Make metrics use AutocompleteProvider::Type directly, or at 203 // least move this method to the metrics directory. 204 metrics::OmniboxEventProto_ProviderType AsOmniboxEventProviderType() const; 205 206 // Called to delete a match and the backing data that produced it. This 207 // match should not appear again in this or future queries. This can only be 208 // called for matches the provider marks as deletable. This should only be 209 // called when no query is running. 210 // NOTE: Do NOT call OnProviderUpdate() in this method, it is the 211 // responsibility of the caller to do so after calling us. 212 virtual void DeleteMatch(const AutocompleteMatch& match); 213 214 // Called when an omnibox event log entry is generated. This gives 215 // a provider the opportunity to add diagnostic information to the 216 // logs. A provider is expected to append a single entry of whatever 217 // information it wants to |provider_info|. 218 virtual void AddProviderInfo(ProvidersInfo* provider_info) const; 219 220 // Called when a new omnibox session starts or the current session ends. 221 // This gives the opportunity to reset the internal state, if any, associated 222 // with the previous session. 223 virtual void ResetSession(); 224 225 // Estimates dynamic memory usage. 226 // See base/trace_event/memory_usage_estimator.h for more info. 227 // 228 // Note: Subclasses that override this method must call the base class 229 // method and include the response in their estimate. 230 virtual size_t EstimateMemoryUsage() const; 231 232 // Returns a suggested upper bound for how many matches this provider should 233 // return. provider_max_matches()234 size_t provider_max_matches() const { return provider_max_matches_; } 235 236 // Returns the set of matches for the current query. matches()237 const ACMatches& matches() const { return matches_; } 238 239 // Returns whether the provider is done processing the query. done()240 bool done() const { return done_; } 241 242 // Returns this provider's type. type()243 Type type() const { return type_; } 244 245 // Returns a string describing this provider's type. 246 const char* GetName() const; 247 248 typedef std::multimap<base::char16, base::string16> WordMap; 249 250 // Finds the matches for |find_text| in |text|, classifies those matches, 251 // merges those classifications with |original_class|, and returns the merged 252 // classifications. 253 // If |text_is_search_query| is false, matches are classified as MATCH, and 254 // non-matches are classified as NONE. Otherwise, if |text_is_search_query| is 255 // true, matches are classified as NONE, and non-matches are classified as 256 // MATCH. This is done to mimic the behavior of SearchProvider which decorates 257 // matches according to the approach used by Google Suggest. 258 // |find_text| and |text| will be lowercased. 259 // 260 // For example, given 261 // |find_text| is "sp new", 262 // |text| is "Sports and News at sports.somesite.com - visit us!", 263 // |text_is_search_query| is false, and 264 // |original_class| is {{0, NONE}, {19, URL}, {38, NONE}} (marking 265 // "sports.somesite.com" as a URL), 266 // Then this will return 267 // {{0, MATCH}, {2, NONE}, {11, MATCH}, {14, NONE}, {19, URL|MATCH}, 268 // {21, URL}, {38, NONE}}; i.e., 269 // "Sports and News at sports.somesite.com - visit us!" 270 // ^ ^ ^ ^ ^ ^ ^ 271 // 0 2 11 14 19 21 38 272 // M N M N U|M U N 273 // 274 // For example, given 275 // |find_text| is "canal", 276 // |text| is "panama canal", 277 // |text_is_search_query| is true, and 278 // |original_class| is {{0, NONE}}, 279 // Then this will return 280 // {{0,MATCH}, {7, NONE}}; i.e., 281 // "panama canal" 282 // ^ ^ 283 // 0 M 7 N 284 static ACMatchClassifications ClassifyAllMatchesInString( 285 const base::string16& find_text, 286 const base::string16& text, 287 const bool text_is_search_query, 288 const ACMatchClassifications& original_class = ACMatchClassifications()); 289 290 // Used to determine if we're in keyword mode, if experimental keyword 291 // mode is enabled, and if we're confident that the user is intentionally 292 // (not accidentally) in keyword mode. Combined, this method returns 293 // whether the caller should perform steps that are only valid in this state. 294 static bool InExplicitExperimentalKeywordMode(const AutocompleteInput& input, 295 const base::string16& keyword); 296 297 // Uses the keyword entry mode in |input| (and possibly compare the length 298 // of the user input vs |keyword|) to decide if the user intentionally 299 // entered keyword mode. 300 static bool IsExplicitlyInKeywordMode(const AutocompleteInput& input, 301 const base::string16& keyword); 302 303 protected: 304 friend class base::RefCountedThreadSafe<AutocompleteProvider>; 305 FRIEND_TEST_ALL_PREFIXES(BookmarkProviderTest, InlineAutocompletion); 306 FRIEND_TEST_ALL_PREFIXES(AutocompleteResultTest, 307 DemoteOnDeviceSearchSuggestions); 308 309 typedef std::pair<bool, base::string16> FixupReturn; 310 311 virtual ~AutocompleteProvider(); 312 313 // Fixes up user URL input to make it more possible to match against. Among 314 // many other things, this takes care of the following: 315 // * Prepending file:// to file URLs 316 // * Converting drive letters in file URLs to uppercase 317 // * Converting case-insensitive parts of URLs (like the scheme and domain) 318 // to lowercase 319 // * Convert spaces to %20s 320 // Note that we don't do this in AutocompleteInput's constructor, because if 321 // e.g. we convert a Unicode hostname to punycode, other providers will show 322 // output that surprises the user ("Search Google for xn--6ca.com"). 323 // Returns a bool indicating whether fixup succeeded, as well as the fixed-up 324 // input text. The returned string will be the same as the input string if 325 // fixup failed; this lets callers who don't care about failure simply use the 326 // string unconditionally. 327 static FixupReturn FixupUserInput(const AutocompleteInput& input); 328 329 // Trims "http:" and up to two subsequent slashes from |url|. Returns the 330 // number of characters that were trimmed. 331 // NOTE: For a view-source: URL, this will trim from after "view-source:" and 332 // return 0. 333 static size_t TrimHttpPrefix(base::string16* url); 334 335 const size_t provider_max_matches_; 336 337 ACMatches matches_; 338 bool done_; 339 340 Type type_; 341 }; 342 343 #endif // COMPONENTS_OMNIBOX_BROWSER_AUTOCOMPLETE_PROVIDER_H_ 344