1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef COMPONENTS_OMNIBOX_BROWSER_HISTORY_URL_PROVIDER_H_
6 #define COMPONENTS_OMNIBOX_BROWSER_HISTORY_URL_PROVIDER_H_
7 
8 #include <stddef.h>
9 
10 #include <memory>
11 #include <string>
12 #include <vector>
13 
14 #include "base/compiler_specific.h"
15 #include "base/gtest_prod_util.h"
16 #include "base/memory/ref_counted.h"
17 #include "base/synchronization/atomic_flag.h"
18 #include "base/threading/thread_checker.h"
19 #include "components/omnibox/browser/autocomplete_input.h"
20 #include "components/omnibox/browser/history_match.h"
21 #include "components/omnibox/browser/history_provider.h"
22 #include "components/omnibox/browser/omnibox_field_trial.h"
23 #include "components/search_engines/template_url.h"
24 
25 class AutocompleteProviderListener;
26 class SearchTermsData;
27 
28 namespace base {
29 class SequencedTaskRunner;
30 }
31 
32 namespace history {
33 class HistoryBackend;
34 class URLDatabase;
35 }
36 
37 // How history autocomplete works
38 // ==============================
39 //
40 // Read down this diagram for temporal ordering.
41 //
42 //   Main thread                History thread
43 //   -----------                --------------
44 //   AutocompleteController::Start
45 //     -> HistoryURLProvider::Start
46 //       -> SuggestExactInput
47 //       [params_ allocated]
48 //       -> DoAutocomplete (for inline autocomplete)
49 //         -> URLDatabase::AutocompleteForPrefix (on in-memory DB)
50 //       -> HistoryService::ScheduleAutocomplete
51 //       (return to controller) ----
52 //                                 /
53 //                            HistoryBackend::ScheduleAutocomplete
54 //                              -> HistoryURLProvider::ExecuteWithDB
55 //                                -> DoAutocomplete
56 //                                  -> URLDatabase::AutocompleteForPrefix
57 //                              /
58 //   HistoryService::QueryComplete
59 //     [params_ destroyed]
60 //     -> AutocompleteProviderListener::OnProviderUpdate
61 //
62 // The autocomplete controller calls us, and must be called back, on the main
63 // thread.  When called, we run two autocomplete passes.  The first pass runs
64 // synchronously on the main thread and queries the in-memory URL database.
65 // This pass promotes matches for inline autocomplete if applicable.  We do
66 // this synchronously so that users get consistent behavior when they type
67 // quickly and hit enter, no matter how loaded the main history database is.
68 // Doing this synchronously also prevents inline autocomplete from being
69 // "flickery" in the AutocompleteEdit.  Because the in-memory DB does not have
70 // redirect data, results other than the top match might change between the
71 // two passes, so we can't just decide to use this pass' matches as the final
72 // results.
73 //
74 // The second autocomplete pass uses the full history database, which must be
75 // queried on the history thread.  Start() asks the history service schedule to
76 // callback on the history thread with a pointer to the main database.  When we
77 // are done doing queries, we schedule a task on the main thread that notifies
78 // the AutocompleteController that we're done.
79 //
80 // The communication between these threads is done using a
81 // HistoryURLProviderParams object.  This is allocated in the main thread, and
82 // normally deleted in QueryComplete().  So that both autocomplete passes can
83 // use the same code, we also use this to hold results during the first
84 // autocomplete pass.
85 //
86 // While the second pass is running, the AutocompleteController may cancel the
87 // request.  This can happen frequently when the user is typing quickly.  In
88 // this case, the main thread sets params_->cancel, which the background thread
89 // checks periodically.  If it finds the flag set, it stops what it's doing
90 // immediately and calls back to the main thread.  (We don't delete the params
91 // on the history thread, because we should only do that when we can safely
92 // NULL out params_, and that must be done on the main thread.)
93 
94 // Used to communicate autocomplete parameters between threads via the history
95 // service.
96 struct HistoryURLProviderParams {
97   // See comments on |promote_type| below.
98   enum PromoteType {
99     WHAT_YOU_TYPED_MATCH,
100     FRONT_HISTORY_MATCH,
101     NEITHER,
102   };
103 
104   HistoryURLProviderParams(const AutocompleteInput& input,
105                            const AutocompleteInput& input_before_fixup,
106                            bool trim_http,
107                            const AutocompleteMatch& what_you_typed_match,
108                            const TemplateURL* default_search_provider,
109                            const SearchTermsData* search_terms_data);
110   ~HistoryURLProviderParams();
111   HistoryURLProviderParams(const HistoryURLProviderParams&) = delete;
112   HistoryURLProviderParams& operator=(const HistoryURLProviderParams&) = delete;
113 
114   // Estimates dynamic memory usage.
115   // See base/trace_event/memory_usage_estimator.h for more info.
116   size_t EstimateMemoryUsage() const;
117 
118   const scoped_refptr<base::SequencedTaskRunner> origin_task_runner;
119 
120   // A copy of the autocomplete input. We need the copy since this object will
121   // live beyond the original query while it runs on the history thread.
122   AutocompleteInput input;
123   // |input_before_fixup| is needed for invoking
124   // |AutocompleteMatch::SetAllowedToBeDefault| which considers
125   // trailing input whitespaces which the fixed up |input| will have trimmed.
126   AutocompleteInput input_before_fixup;
127 
128   // Set when "http://" should be trimmed from the beginning of the URLs.
129   bool trim_http;
130 
131   // A match corresponding to what the user typed.
132   AutocompleteMatch what_you_typed_match;
133 
134   // Set by the main thread to cancel this request.  If this flag is set when
135   // the query runs, the query will be abandoned.  This allows us to avoid
136   // running queries that are no longer needed.  Since we don't care if we run
137   // the extra queries, the lack of signaling is not a problem.
138   base::AtomicFlag cancel_flag;
139 
140   // Set by ExecuteWithDB() on the history thread when the query could not be
141   // performed because the history system failed to properly init the database.
142   // If this is set when the main thread is called back, it avoids changing
143   // |matches_| at all, so it won't delete the default match Start() creates.
144   bool failed;
145 
146   // List of matches written by DoAutocomplete().  Upon its return the provider
147   // converts this list to ACMatches and places them in |matches_|.
148   history::HistoryMatches matches;
149 
150   // True if the suggestion for exactly what the user typed appears as a known
151   // URL in the user's history.  In this case, this will also be the first match
152   // in |matches|.
153   //
154   // NOTE: There are some complications related to keeping things consistent
155   // between passes and how we deal with intranet URLs, which are too complex to
156   // explain here; see the implementations of DoAutocomplete() and
157   // FixupExactSuggestion() for specific comments.
158   bool exact_suggestion_is_in_history;
159 
160   // Tells the provider whether to promote the what you typed match, the first
161   // element of |matches|, or neither as the first AutocompleteMatch.  If
162   // |exact_suggestion_is_in_history| is true (and thus "the what you typed
163   // match" and "the first element of |matches|" represent the same thing), this
164   // will be set to WHAT_YOU_TYPED_MATCH.
165   //
166   // NOTE: The second pass of DoAutocomplete() checks what the first pass set
167   // this to.  See comments in DoAutocomplete().
168   PromoteType promote_type;
169 
170   // True if |what_you_typed_match| is eligible for display.  If this is true,
171   // PromoteMatchesIfNecessary() may choose to place |what_you_typed_match| on
172   // |matches_| even when |promote_type| is not WHAT_YOU_TYPED_MATCH.
173   bool have_what_you_typed_match;
174 
175   // The default search provider and search terms data necessary to cull results
176   // that correspond to searches (on the default engine).  These can only be
177   // obtained on the UI thread, so we have to copy them into here to pass them
178   // to the history thread.  We use a std::unique_ptr<TemplateURL> for the DSP
179   // since TemplateURLs can't be copied by value.
180   std::unique_ptr<TemplateURL> default_search_provider;
181   // Similarly, we use a std::unique_ptr<SearchTermsData> so that we can store a
182   // snapshot of the SearchTermsData accessible from the history thread.
183   std::unique_ptr<SearchTermsData> search_terms_data;
184 };
185 
186 // This class is an autocomplete provider and is also a pseudo-internal
187 // component of the history system.  See comments above.
188 class HistoryURLProvider : public HistoryProvider {
189  public:
190   // Various values used in scoring, made public so other providers
191   // can insert results in appropriate ranges relative to these.
192   static const int kScoreForBestInlineableResult;
193   static const int kScoreForUnvisitedIntranetResult;
194   static const int kScoreForWhatYouTypedResult;
195   static const int kBaseScoreForNonInlineableResult;
196 
197   HistoryURLProvider(AutocompleteProviderClient* client,
198                      AutocompleteProviderListener* listener);
199 
200   HistoryURLProvider(const HistoryURLProvider&) = delete;
201   HistoryURLProvider& operator=(const HistoryURLProvider&) = delete;
202 
203   // HistoryProvider:
204   void Start(const AutocompleteInput& input, bool minimal_changes) override;
205   void Stop(bool clear_cached_results, bool due_to_user_inactivity) override;
206 
207   // Estimates dynamic memory usage.
208   // See base/trace_event/memory_usage_estimator.h for more info.
209   size_t EstimateMemoryUsage() const override;
210 
211   // Returns a match representing a navigation to |destination_url|, highlighted
212   // appropriately against |input|.  |trim_http| controls whether the match's
213   // |fill_into_edit| and |contents| should have any HTTP stripped off, and
214   // should not be set to true if the user's original input contains an http
215   // prefix.
216   // NOTES: This does not set the relevance of the returned match, as different
217   //        callers want different behavior. Callers must set this manually.
218   //        This function should only be called on the UI thread.
219   AutocompleteMatch SuggestExactInput(const AutocompleteInput& input,
220                                       const GURL& destination_url,
221                                       bool trim_http);
222 
223   // Runs the history query on the history thread, called by the history
224   // system. The history database MAY BE NULL in which case it is not
225   // available and we should return no data. Also schedules returning the
226   // results to the main thread
227   void ExecuteWithDB(HistoryURLProviderParams* params,
228                      history::HistoryBackend* backend,
229                      history::URLDatabase* db);
230 
231  private:
232   FRIEND_TEST_ALL_PREFIXES(HistoryURLProviderTest, HUPScoringExperiment);
233   FRIEND_TEST_ALL_PREFIXES(HistoryURLProviderTest, DoTrimHttpScheme);
234   FRIEND_TEST_ALL_PREFIXES(HistoryURLProviderTest,
235                            DontTrimHttpSchemeIfInputHasScheme);
236   FRIEND_TEST_ALL_PREFIXES(HistoryURLProviderTest,
237                            DontTrimHttpSchemeIfInputMatchesInScheme);
238   FRIEND_TEST_ALL_PREFIXES(HistoryURLProviderTest,
239                            DontTrimHttpsSchemeIfInputMatchesInScheme);
240   FRIEND_TEST_ALL_PREFIXES(HistoryURLProviderTest, DoTrimHttpsScheme);
241 
242   enum MatchType {
243     NORMAL,
244     WHAT_YOU_TYPED,
245     INLINE_AUTOCOMPLETE,
246     UNVISITED_INTRANET,  // An intranet site that has never been visited.
247   };
248   class VisitClassifier;
249 
250   ~HistoryURLProvider() override;
251 
252   // Determines the relevance for a match, given its type.  If |match_type| is
253   // NORMAL, |match_number| is a number indicating the relevance of the match
254   // (higher == more relevant).  For other values of |match_type|,
255   // |match_number| is ignored.  Only called some of the time; for some matches,
256   // relevancy scores are assigned consecutively decreasing (1416, 1415, ...).
257   static int CalculateRelevance(MatchType match_type, int match_number);
258 
259   // Returns a set of classifications that highlight all the occurrences of
260   // |input_text| at word breaks in |description|.
261   static ACMatchClassifications ClassifyDescription(
262       const base::string16& input_text,
263       const base::string16& description);
264 
265   // Actually runs the autocomplete job on the given database, which is
266   // guaranteed not to be NULL.  Used by both autocomplete passes, and therefore
267   // called on multiple different threads (though not simultaneously).
268   void DoAutocomplete(history::HistoryBackend* backend,
269                       history::URLDatabase* db,
270                       HistoryURLProviderParams* params);
271 
272   // May promote the what you typed match, the first history match in
273   // params->matches, or both to the front of |matches_|, depending on the
274   // values of params->promote_type, params->have_what_you_typed_match, and
275   // params->prevent_inline_autocomplete.
276   void PromoteMatchesIfNecessary(const HistoryURLProviderParams& params);
277 
278   // Dispatches the results to the autocomplete controller. Called on the
279   // main thread by ExecuteWithDB when the results are available.
280   // Frees params_gets_deleted on exit.
281   void QueryComplete(HistoryURLProviderParams* params_gets_deleted);
282 
283   // Looks up the info for params->what_you_typed_match in the DB.  If found,
284   // fills in the title, promotes the match's priority to that of an inline
285   // autocomplete match (maybe it should be slightly better?), and places it on
286   // the front of params->matches (so we pick the right matches to throw away
287   // when culling redirects to/from it).  Returns whether a match was promoted.
288   bool FixupExactSuggestion(history::URLDatabase* db,
289                             const VisitClassifier& classifier,
290                             HistoryURLProviderParams* params) const;
291 
292   // Helper function for FixupExactSuggestion. If a URL with the same host name
293   // has been visited by the user in the past, the function returns a valid URL.
294   // The return value is built from the canonicalized version of the
295   // autocomplete input in |params|. The scheme and host format (e.g. prefixed
296   // with "www.") of the return value is the same as one of the corresponding
297   // entries in the database.
298   GURL AsKnownIntranetURL(history::URLDatabase* db,
299                           const AutocompleteInput& input) const;
300 
301   // Sees if a shorter version of the best match should be created, and if so
302   // places it at the front of params->matches.  This can suggest history URLs
303   // that are prefixes of the best match (if they've been visited enough,
304   // compared to the best match), or create host-only suggestions even when they
305   // haven't been visited before: if the user visited http://example.com/asdf
306   // once, we'll suggest http://example.com/ even if they've never been to it.
307   // Returns true if a match was successfully created/promoted that we're
308   // willing to inline autocomplete.
309   bool PromoteOrCreateShorterSuggestion(
310       history::URLDatabase* db,
311       HistoryURLProviderParams* params);
312 
313   // Removes results that have been rarely typed or visited, and not any time
314   // recently.  The exact parameters for this heuristic can be found in the
315   // function body. Also culls results corresponding to queries from the default
316   // search engine. These are low-quality, difficult-to-understand matches for
317   // users, and the SearchProvider should surface past queries in a better way
318   // anyway.
319   void CullPoorMatches(HistoryURLProviderParams* params) const;
320 
321   // Removes results that redirect to each other, leaving at most |max_results|
322   // results.
323   void CullRedirects(history::HistoryBackend* backend,
324                      history::HistoryMatches* matches,
325                      size_t max_results) const;
326 
327   // Helper function for CullRedirects, this removes all but the first
328   // occurance of [any of the set of strings in |remove|] from the |matches|
329   // list.
330   //
331   // The return value is the index of the item that is after the item in the
332   // input identified by |source_index|. If |source_index| or an item before
333   // is removed, the next item will be shifted, and this allows the caller to
334   // pick up on the next one when this happens.
335   size_t RemoveSubsequentMatchesOf(history::HistoryMatches* matches,
336                                    size_t source_index,
337                                    const std::vector<GURL>& remove) const;
338 
339   // Converts a specified |match_number| from params.matches into an
340   // autocomplete match for display.  If experimental scoring is enabled, the
341   // final relevance score might be different from the given |relevance|.
342   // NOTE: This function should only be called on the UI thread.
343   AutocompleteMatch HistoryMatchToACMatch(
344       const HistoryURLProviderParams& params,
345       size_t match_number,
346       int relevance);
347 
348   AutocompleteProviderListener* listener_;
349 
350   // Params for the current query.  The provider should not free this directly;
351   // instead, it is passed as a parameter through the history backend, and the
352   // parameter itself is freed once it's no longer needed.  The only reason we
353   // keep this member is so we can set the cancel bit on it.
354   HistoryURLProviderParams* params_;
355 
356   // Whether to query the history URL database to match.  Even if false, we
357   // still use the URL database to decide if the URL-what-you-typed was visited
358   // before or not.  If false, the only possible result that HistoryURL provider
359   // can return is URL-what-you-typed.  This variable is not part of params_
360   // because it never changes after the HistoryURLProvider is initialized.
361   // It's used to aid the possible transition to get all URLs from history to
362   // be scored in the HistoryQuick provider only.
363   bool search_url_database_;
364 
365   // Params controlling experimental behavior of this provider.
366   HUPScoringParams scoring_params_;
367 
368   base::ThreadChecker thread_checker_;
369 };
370 
371 #endif  // COMPONENTS_OMNIBOX_BROWSER_HISTORY_URL_PROVIDER_H_
372