1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/omnibox/browser/history_url_provider.h"
6 
7 #include <algorithm>
8 #include <memory>
9 #include <utility>
10 
11 #include "base/bind.h"
12 #include "base/command_line.h"
13 #include "base/feature_list.h"
14 #include "base/location.h"
15 #include "base/metrics/histogram_macros.h"
16 #include "base/single_thread_task_runner.h"
17 #include "base/strings/string_util.h"
18 #include "base/strings/utf_string_conversions.h"
19 #include "base/threading/sequenced_task_runner_handle.h"
20 #include "base/time/time.h"
21 #include "base/trace_event/memory_usage_estimator.h"
22 #include "base/trace_event/trace_event.h"
23 #include "components/bookmarks/browser/bookmark_utils.h"
24 #include "components/history/core/browser/history_backend.h"
25 #include "components/history/core/browser/history_database.h"
26 #include "components/history/core/browser/history_service.h"
27 #include "components/history/core/browser/history_types.h"
28 #include "components/omnibox/browser/autocomplete_match.h"
29 #include "components/omnibox/browser/autocomplete_match_classification.h"
30 #include "components/omnibox/browser/autocomplete_provider.h"
31 #include "components/omnibox/browser/autocomplete_provider_listener.h"
32 #include "components/omnibox/browser/autocomplete_result.h"
33 #include "components/omnibox/browser/history_provider.h"
34 #include "components/omnibox/browser/in_memory_url_index_types.h"
35 #include "components/omnibox/browser/omnibox_field_trial.h"
36 #include "components/omnibox/browser/url_index_private_data.h"
37 #include "components/omnibox/browser/url_prefix.h"
38 #include "components/omnibox/common/omnibox_features.h"
39 #include "components/prefs/pref_service.h"
40 #include "components/search_engines/omnibox_focus_type.h"
41 #include "components/search_engines/search_terms_data.h"
42 #include "components/search_engines/template_url_service.h"
43 #include "components/url_formatter/url_fixer.h"
44 #include "components/url_formatter/url_formatter.h"
45 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
46 #include "third_party/metrics_proto/omnibox_input_type.pb.h"
47 #include "url/gurl.h"
48 #include "url/third_party/mozilla/url_parse.h"
49 #include "url/url_util.h"
50 
51 namespace {
52 
53 // Acts like the > operator for URLInfo classes.
CompareHistoryMatch(const history::HistoryMatch & a,const history::HistoryMatch & b)54 bool CompareHistoryMatch(const history::HistoryMatch& a,
55                          const history::HistoryMatch& b) {
56   // A URL that has been typed at all is better than one that has never been
57   // typed.  (Note "!"s on each side)
58   if (!a.url_info.typed_count() != !b.url_info.typed_count())
59     return a.url_info.typed_count() > b.url_info.typed_count();
60 
61   // Innermost matches (matches after any scheme or "www.") are better than
62   // non-innermost matches.
63   if (a.innermost_match != b.innermost_match)
64     return a.innermost_match;
65 
66   // URLs that have been typed more often are better.
67   if (a.url_info.typed_count() != b.url_info.typed_count())
68     return a.url_info.typed_count() > b.url_info.typed_count();
69 
70   // For URLs that have each been typed once, a host (alone) is better than a
71   // page inside.
72   if ((a.url_info.typed_count() == 1) && (a.IsHostOnly() != b.IsHostOnly()))
73     return a.IsHostOnly();
74 
75   // URLs that have been visited more often are better.
76   if (a.url_info.visit_count() != b.url_info.visit_count())
77     return a.url_info.visit_count() > b.url_info.visit_count();
78 
79   // URLs that have been visited more recently are better.
80   if (a.url_info.last_visit() != b.url_info.last_visit())
81     return a.url_info.last_visit() > b.url_info.last_visit();
82 
83   // Use alphabetical order on the url spec as a tie-breaker.
84   return a.url_info.url().spec() > b.url_info.url().spec();
85 }
86 
87 // Sorts and dedups the given list of matches.
SortAndDedupMatches(history::HistoryMatches * matches)88 void SortAndDedupMatches(history::HistoryMatches* matches) {
89   // Sort by quality, best first.
90   std::sort(matches->begin(), matches->end(), &CompareHistoryMatch);
91 
92   // Remove duplicate matches (caused by the search string appearing in one of
93   // the prefixes as well as after it).  Consider the following scenario:
94   //
95   // User has visited "http://http.com" once and "http://htaccess.com" twice.
96   // User types "http".  The autocomplete search with prefix "http://" returns
97   // the first host, while the search with prefix "" returns both hosts.  Now
98   // we sort them into rank order:
99   //   http://http.com     (innermost_match)
100   //   http://htaccess.com (!innermost_match, url_info.visit_count == 2)
101   //   http://http.com     (!innermost_match, url_info.visit_count == 1)
102   //
103   // The above scenario tells us we can't use std::unique(), since our
104   // duplicates are not always sequential.  It also tells us we should remove
105   // the lower-quality duplicate(s), since otherwise the returned results won't
106   // be ordered correctly.  This is easy to do: we just always remove the later
107   // element of a duplicate pair.
108   // Be careful!  Because the vector contents may change as we remove elements,
109   // we use an index instead of an iterator in the outer loop, and don't
110   // precalculate the ending position.
111   for (size_t i = 0; i < matches->size(); ++i) {
112     for (history::HistoryMatches::iterator j(matches->begin() + i + 1);
113          j != matches->end(); ) {
114       if ((*matches)[i].url_info.url() == j->url_info.url())
115         j = matches->erase(j);
116       else
117         ++j;
118     }
119   }
120 }
121 
122 // Calculates a new relevance score applying half-life time decaying to |count|
123 // using |time_since_last_visit| and |score_buckets|.  This function will never
124 // return a score higher than |undecayed_relevance|; in other words, it can only
125 // demote the old score.
CalculateRelevanceUsingScoreBuckets(const HUPScoringParams::ScoreBuckets & score_buckets,const base::TimeDelta & time_since_last_visit,int undecayed_relevance,int undecayed_count)126 double CalculateRelevanceUsingScoreBuckets(
127     const HUPScoringParams::ScoreBuckets& score_buckets,
128     const base::TimeDelta& time_since_last_visit,
129     int undecayed_relevance,
130     int undecayed_count) {
131   // Back off if above relevance cap.
132   if ((score_buckets.relevance_cap() != -1) &&
133       (undecayed_relevance >= score_buckets.relevance_cap()))
134     return undecayed_relevance;
135 
136   // Time based decay using half-life time.
137   double decayed_count = undecayed_count;
138   double decay_factor = score_buckets.HalfLifeTimeDecay(time_since_last_visit);
139   if (decayed_count > 0)
140     decayed_count *= decay_factor;
141 
142   const HUPScoringParams::ScoreBuckets::CountMaxRelevance* score_bucket =
143       nullptr;
144   const double factor = (score_buckets.use_decay_factor() ?
145       decay_factor : decayed_count);
146   for (size_t i = 0; i < score_buckets.buckets().size(); ++i) {
147     score_bucket = &score_buckets.buckets()[i];
148     if (factor >= score_bucket->first)
149       break;
150   }
151 
152   return (score_bucket && (undecayed_relevance > score_bucket->second)) ?
153       score_bucket->second : undecayed_relevance;
154 }
155 
156 // Returns a new relevance score for the given |match| based on the
157 // |old_relevance| score and |scoring_params|.  The new relevance score is
158 // guaranteed to be less than or equal to |old_relevance|.  In other words, this
159 // function can only demote a score, never boost it.  Returns |old_relevance| if
160 // experimental scoring is disabled.
CalculateRelevanceScoreUsingScoringParams(const history::HistoryMatch & match,int old_relevance,const HUPScoringParams & scoring_params)161 int CalculateRelevanceScoreUsingScoringParams(
162     const history::HistoryMatch& match,
163     int old_relevance,
164     const HUPScoringParams& scoring_params) {
165   const base::TimeDelta time_since_last_visit =
166       base::Time::Now() - match.url_info.last_visit();
167 
168   int relevance = CalculateRelevanceUsingScoreBuckets(
169       scoring_params.typed_count_buckets, time_since_last_visit, old_relevance,
170       match.url_info.typed_count());
171 
172   // Additional demotion (on top of typed_count demotion) of URLs that were
173   // never typed.
174   if (match.url_info.typed_count() == 0) {
175     relevance = CalculateRelevanceUsingScoreBuckets(
176         scoring_params.visited_count_buckets, time_since_last_visit, relevance,
177         match.url_info.visit_count());
178   }
179 
180   DCHECK_LE(relevance, old_relevance);
181   return relevance;
182 }
183 
184 // Extracts typed_count, visit_count, and last_visited time from the URLRow and
185 // puts them in the additional info field of the |match| for display in
186 // about:omnibox.
RecordAdditionalInfoFromUrlRow(const history::URLRow & info,AutocompleteMatch * match)187 void RecordAdditionalInfoFromUrlRow(const history::URLRow& info,
188                                     AutocompleteMatch* match) {
189   match->RecordAdditionalInfo("typed count", info.typed_count());
190   match->RecordAdditionalInfo("visit count", info.visit_count());
191   match->RecordAdditionalInfo("last visit", info.last_visit());
192 }
193 
194 // If |create_if_necessary| is true, ensures that |matches| contains an entry
195 // for |info|, creating a new such entry if necessary (using |match_template|
196 // to get all the other match data).
197 //
198 // If |promote| is true, this also ensures the entry is the first element in
199 // |matches|, moving or adding it to the front as appropriate.  When |promote|
200 // is false, existing matches are left in place, and newly added matches are
201 // placed at the back.
202 //
203 // It's OK to call this function with both |create_if_necessary| and |promote|
204 // false, in which case we'll do nothing.
205 //
206 // Returns whether the match exists regardless if it was promoted/created.
CreateOrPromoteMatch(const history::URLRow & info,const history::HistoryMatch & match_template,history::HistoryMatches * matches,bool create_if_necessary,bool promote)207 bool CreateOrPromoteMatch(const history::URLRow& info,
208                           const history::HistoryMatch& match_template,
209                           history::HistoryMatches* matches,
210                           bool create_if_necessary,
211                           bool promote) {
212   // |matches| may already have an entry for this.
213   for (history::HistoryMatches::iterator i(matches->begin());
214        i != matches->end(); ++i) {
215     if (i->url_info.url() == info.url()) {
216       // Rotate it to the front if the caller wishes.
217       if (promote)
218         std::rotate(matches->begin(), i, i + 1);
219       return true;
220     }
221   }
222 
223   if (!create_if_necessary)
224     return false;
225 
226   // No entry, so create one using |match_template| as a basis.
227   history::HistoryMatch match = match_template;
228   match.url_info = info;
229   if (promote)
230     matches->push_front(match);
231   else
232     matches->push_back(match);
233 
234   return true;
235 }
236 
237 // Returns whether |match| is suitable for inline autocompletion.
CanPromoteMatchForInlineAutocomplete(const history::HistoryMatch & match)238 bool CanPromoteMatchForInlineAutocomplete(const history::HistoryMatch& match) {
239   // We can promote this match if it's been typed at least n times, where n == 1
240   // for "simple" (host-only) URLs and n == 2 for others.  We set a higher bar
241   // for these long URLs because it's less likely that users will want to visit
242   // them again.  Even though we don't increment the typed_count for pasted-in
243   // URLs, if the user manually edits the URL or types some long thing in by
244   // hand, we wouldn't want to immediately start autocompleting it.
245   return match.url_info.typed_count() &&
246       ((match.url_info.typed_count() > 1) || match.IsHostOnly());
247 }
248 
249 // Given the user's |input| and a |match| created from it, reduce the match's
250 // URL to just a host.  If this host still matches the user input, return it.
251 // Returns the empty string on failure.
ConvertToHostOnly(const history::HistoryMatch & match,const base::string16 & input)252 GURL ConvertToHostOnly(const history::HistoryMatch& match,
253                        const base::string16& input) {
254   // See if we should try to do host-only suggestions for this URL. Nonstandard
255   // schemes means there's no authority section, so suggesting the host name
256   // is useless. File URLs are standard, but host suggestion is not useful for
257   // them either.
258   const GURL& url = match.url_info.url();
259   if (!url.is_valid() || !url.IsStandard() || url.SchemeIsFile())
260     return GURL();
261 
262   // Transform to a host-only match.  Bail if the host no longer matches the
263   // user input (e.g. because the user typed more than just a host).
264   GURL host = url.GetWithEmptyPath();
265   if ((host.spec().length() < (match.input_location + input.length())))
266     return GURL();  // User typing is longer than this host suggestion.
267 
268   const base::string16 spec = base::UTF8ToUTF16(host.spec());
269   if (spec.compare(match.input_location, input.length(), input))
270     return GURL();  // User typing is no longer a prefix.
271 
272   return host;
273 }
274 
275 }  // namespace
276 
277 // -----------------------------------------------------------------
278 // SearchTermsDataSnapshot
279 
280 // Implementation of SearchTermsData that takes a snapshot of another
281 // SearchTermsData by copying all the responses to the different getters into
282 // member strings, then returning those strings when its own getters are called.
283 // This will typically be constructed on the UI thread from
284 // UIThreadSearchTermsData but is subsequently safe to use on any thread.
285 class SearchTermsDataSnapshot : public SearchTermsData {
286  public:
287   explicit SearchTermsDataSnapshot(const SearchTermsData* search_terms_data);
288   ~SearchTermsDataSnapshot() override;
289   SearchTermsDataSnapshot(const SearchTermsDataSnapshot&) = delete;
290   SearchTermsDataSnapshot& operator=(const SearchTermsDataSnapshot&) = delete;
291 
292   std::string GoogleBaseURLValue() const override;
293   std::string GetApplicationLocale() const override;
294   base::string16 GetRlzParameterValue(bool from_app_list) const override;
295   std::string GetSearchClient() const override;
296   std::string GoogleImageSearchSource() const override;
297 
298   // Estimates dynamic memory usage.
299   // See base/trace_event/memory_usage_estimator.h for more info.
300   size_t EstimateMemoryUsage() const override;
301 
302  private:
303   std::string google_base_url_value_;
304   std::string application_locale_;
305   base::string16 rlz_parameter_value_;
306   std::string search_client_;
307   std::string google_image_search_source_;
308 };
309 
SearchTermsDataSnapshot(const SearchTermsData * search_terms_data)310 SearchTermsDataSnapshot::SearchTermsDataSnapshot(
311     const SearchTermsData* search_terms_data) {
312   if (search_terms_data) {
313     google_base_url_value_ = search_terms_data->GoogleBaseURLValue();
314     application_locale_ = search_terms_data->GetApplicationLocale();
315     rlz_parameter_value_ = search_terms_data->GetRlzParameterValue(false);
316     search_client_ = search_terms_data->GetSearchClient();
317     google_image_search_source_ = search_terms_data->GoogleImageSearchSource();
318   }
319 }
320 
~SearchTermsDataSnapshot()321 SearchTermsDataSnapshot::~SearchTermsDataSnapshot() {
322 }
323 
GoogleBaseURLValue() const324 std::string SearchTermsDataSnapshot::GoogleBaseURLValue() const {
325   return google_base_url_value_;
326 }
327 
GetApplicationLocale() const328 std::string SearchTermsDataSnapshot::GetApplicationLocale() const {
329   return application_locale_;
330 }
331 
GetRlzParameterValue(bool from_app_list) const332 base::string16 SearchTermsDataSnapshot::GetRlzParameterValue(
333     bool from_app_list) const {
334   return rlz_parameter_value_;
335 }
336 
GetSearchClient() const337 std::string SearchTermsDataSnapshot::GetSearchClient() const {
338   return search_client_;
339 }
340 
GoogleImageSearchSource() const341 std::string SearchTermsDataSnapshot::GoogleImageSearchSource() const {
342   return google_image_search_source_;
343 }
344 
EstimateMemoryUsage() const345 size_t SearchTermsDataSnapshot::EstimateMemoryUsage() const {
346   size_t res = 0;
347 
348   res += base::trace_event::EstimateMemoryUsage(google_base_url_value_);
349   res += base::trace_event::EstimateMemoryUsage(application_locale_);
350   res += base::trace_event::EstimateMemoryUsage(rlz_parameter_value_);
351   res += base::trace_event::EstimateMemoryUsage(search_client_);
352   res += base::trace_event::EstimateMemoryUsage(google_image_search_source_);
353 
354   return res;
355 }
356 
357 // -----------------------------------------------------------------
358 // HistoryURLProvider
359 
360 // These ugly magic numbers will go away once we switch all scoring
361 // behavior (including URL-what-you-typed) to HistoryQuick provider.
362 const int HistoryURLProvider::kScoreForBestInlineableResult = 1413;
363 const int HistoryURLProvider::kScoreForUnvisitedIntranetResult = 1403;
364 const int HistoryURLProvider::kScoreForWhatYouTypedResult = 1203;
365 const int HistoryURLProvider::kBaseScoreForNonInlineableResult = 900;
366 
367 // VisitClassifier is used to classify the type of visit to a particular url.
368 class HistoryURLProvider::VisitClassifier {
369  public:
370   enum Type {
371     INVALID,             // Navigations to the URL are not allowed.
372     UNVISITED_INTRANET,  // A navigable URL for which we have no visit data but
373                          // which is known to refer to a visited intranet host.
374     VISITED,             // The site has been previously visited.
375   };
376 
377   VisitClassifier(HistoryURLProvider* provider,
378                   const AutocompleteInput& input,
379                   history::URLDatabase* db);
380 
381   VisitClassifier(const VisitClassifier&) = delete;
382   VisitClassifier& operator=(const VisitClassifier&) = delete;
383 
384   // Returns the type of visit for the specified input.
type() const385   Type type() const { return type_; }
386 
387   // Returns the URLRow for the visit.
388   // If the type of the visit is UNVISITED_INTRANET, the return value of this
389   // function does not have any visit data; only the URL field is set.
url_row() const390   const history::URLRow& url_row() const { return url_row_; }
391 
392  private:
393   HistoryURLProvider* provider_;
394   history::URLDatabase* db_;
395   Type type_;
396   history::URLRow url_row_;
397 };
398 
VisitClassifier(HistoryURLProvider * provider,const AutocompleteInput & input,history::URLDatabase * db)399 HistoryURLProvider::VisitClassifier::VisitClassifier(
400     HistoryURLProvider* provider,
401     const AutocompleteInput& input,
402     history::URLDatabase* db)
403     : provider_(provider),
404       db_(db),
405       type_(INVALID) {
406   // Detect email addresses.  These cases will look like "http://user@site/",
407   // and because the history backend strips auth creds, we'll get a bogus exact
408   // match below if the user has visited "site".
409   if ((input.type() == metrics::OmniboxInputType::UNKNOWN) &&
410       input.parts().username.is_nonempty() &&
411       !input.parts().password.is_nonempty() &&
412       !input.parts().path.is_nonempty())
413     return;
414 
415   // If the input can be canonicalized to a valid URL, look up all
416   // prefix+input combinations in the URL database to determine if the input
417   // corresponds to any visited URL.
418   if (!input.canonicalized_url().is_valid())
419     return;
420 
421   // Iterate over all prefixes in ascending number of components (i.e. from the
422   // empty prefix to those that have most components).
423   const std::string& desired_tld = input.desired_tld();
424   const URLPrefixes& url_prefixes = URLPrefix::GetURLPrefixes();
425   for (auto prefix_it = url_prefixes.rbegin(); prefix_it != url_prefixes.rend();
426        ++prefix_it) {
427     const GURL url_with_prefix = url_formatter::FixupURL(
428         base::UTF16ToUTF8(prefix_it->prefix + input.text()), desired_tld);
429     if (url_with_prefix.is_valid() &&
430         db_->GetRowForURL(url_with_prefix, &url_row_) && !url_row_.hidden()) {
431       type_ = VISITED;
432       return;
433     }
434   }
435 
436   // If the input does not correspond to a visited URL, we check if the
437   // canonical URL has an intranet hostname that the user visited (albeit with a
438   // different port and/or path) before. If this is true, |url_row_| will be
439   // mostly empty: the URL field will be set to an unvisited URL with the same
440   // scheme and host as some visited URL in the db.
441   const GURL as_known_intranet_url = provider_->AsKnownIntranetURL(db_, input);
442   if (as_known_intranet_url.is_valid()) {
443     url_row_ = history::URLRow(as_known_intranet_url);
444     type_ = UNVISITED_INTRANET;
445   }
446 }
447 
HistoryURLProviderParams(const AutocompleteInput & input,const AutocompleteInput & input_before_fixup,bool trim_http,const AutocompleteMatch & what_you_typed_match,const TemplateURL * default_search_provider,const SearchTermsData * search_terms_data)448 HistoryURLProviderParams::HistoryURLProviderParams(
449     const AutocompleteInput& input,
450     const AutocompleteInput& input_before_fixup,
451     bool trim_http,
452     const AutocompleteMatch& what_you_typed_match,
453     const TemplateURL* default_search_provider,
454     const SearchTermsData* search_terms_data)
455     : origin_task_runner(base::SequencedTaskRunnerHandle::Get()),
456       input(input),
457       input_before_fixup(input_before_fixup),
458       trim_http(trim_http),
459       what_you_typed_match(what_you_typed_match),
460       failed(false),
461       exact_suggestion_is_in_history(false),
462       promote_type(NEITHER),
463       default_search_provider(
464           default_search_provider
465               ? new TemplateURL(default_search_provider->data())
466               : nullptr),
467       search_terms_data(new SearchTermsDataSnapshot(search_terms_data)) {}
468 
~HistoryURLProviderParams()469 HistoryURLProviderParams::~HistoryURLProviderParams() {
470 }
471 
EstimateMemoryUsage() const472 size_t HistoryURLProviderParams::EstimateMemoryUsage() const {
473   size_t res = 0;
474 
475   res += base::trace_event::EstimateMemoryUsage(input);
476   res += base::trace_event::EstimateMemoryUsage(what_you_typed_match);
477   res += base::trace_event::EstimateMemoryUsage(matches);
478   res += base::trace_event::EstimateMemoryUsage(default_search_provider);
479   res += base::trace_event::EstimateMemoryUsage(search_terms_data);
480 
481   return res;
482 }
483 
HistoryURLProvider(AutocompleteProviderClient * client,AutocompleteProviderListener * listener)484 HistoryURLProvider::HistoryURLProvider(AutocompleteProviderClient* client,
485                                        AutocompleteProviderListener* listener)
486     : HistoryProvider(AutocompleteProvider::TYPE_HISTORY_URL, client),
487       listener_(listener),
488       params_(nullptr),
489       search_url_database_(OmniboxFieldTrial::HUPSearchDatabase()) {
490   // Initialize the default HUP scoring params.
491   OmniboxFieldTrial::GetDefaultHUPScoringParams(&scoring_params_);
492   // Initialize HUP scoring params based on the current experiment.
493   OmniboxFieldTrial::GetExperimentalHUPScoringParams(&scoring_params_);
494 }
495 
Start(const AutocompleteInput & input,bool minimal_changes)496 void HistoryURLProvider::Start(const AutocompleteInput& input,
497                                bool minimal_changes) {
498   TRACE_EVENT0("omnibox", "HistoryURLProvider::Start");
499   // NOTE: We could try hard to do less work in the |minimal_changes| case
500   // here; some clever caching would let us reuse the raw matches from the
501   // history DB without re-querying.  However, we'd still have to go back to
502   // the history thread to mark these up properly, and if pass 2 is currently
503   // running, we'd need to wait for it to return to the main thread before
504   // doing this (we can't just write new data for it to read due to thread
505   // safety issues).  At that point it's just as fast, and easier, to simply
506   // re-run the query from scratch and ignore |minimal_changes|.
507 
508   // Cancel any in-progress query.
509   Stop(false, false);
510 
511   matches_.clear();
512 
513   if (input.focus_type() != OmniboxFocusType::DEFAULT ||
514       (input.type() == metrics::OmniboxInputType::EMPTY))
515     return;
516 
517   // Do some fixup on the user input before matching against it, so we provide
518   // good results for local file paths, input with spaces, etc.
519   const FixupReturn fixup_return(FixupUserInput(input));
520   if (!fixup_return.first)
521     return;
522   url::Parsed parts;
523   url_formatter::SegmentURL(fixup_return.second, &parts);
524   AutocompleteInput fixed_up_input(input);
525   fixed_up_input.UpdateText(fixup_return.second, base::string16::npos, parts);
526 
527   // Create a match for what the user typed.
528   const bool trim_http = !AutocompleteInput::HasHTTPScheme(input.text());
529   AutocompleteMatch what_you_typed_match(SuggestExactInput(
530       fixed_up_input, fixed_up_input.canonicalized_url(), trim_http));
531 
532   // If the input fix-up above added characters, show them as an
533   // autocompletion, unless directed not to.
534   if (!input.prevent_inline_autocomplete() &&
535       fixed_up_input.text().size() > input.text().size() &&
536       base::StartsWith(fixed_up_input.text(), input.text(),
537                        base::CompareCase::SENSITIVE)) {
538     what_you_typed_match.fill_into_edit = fixed_up_input.text();
539     what_you_typed_match.inline_autocompletion =
540         fixed_up_input.text().substr(input.text().size());
541     what_you_typed_match.contents_class.push_back(
542         {input.text().length(), ACMatchClassification::URL});
543   }
544 
545   what_you_typed_match.relevance = CalculateRelevance(WHAT_YOU_TYPED, 0);
546 
547   // Add the what-you-typed match as a fallback in case we can't get the history
548   // service or URL DB; otherwise, we'll replace this match lower down.  Don't
549   // do this for queries, though -- while we can sometimes mark up a match for
550   // them, it's not what the user wants, and just adds noise.
551   if (fixed_up_input.type() != metrics::OmniboxInputType::QUERY)
552     matches_.push_back(what_you_typed_match);
553 
554   // We'll need the history service to run both passes, so try to obtain it.
555   history::HistoryService* const history_service =
556       client()->GetHistoryService();
557   if (!history_service)
558     return;
559 
560   // Get the default search provider and search terms data now since we have to
561   // retrieve these on the UI thread, and the second pass runs on the history
562   // thread. |template_url_service| can be null when testing.
563   TemplateURLService* template_url_service = client()->GetTemplateURLService();
564   const TemplateURL* default_search_provider = template_url_service ?
565       template_url_service->GetDefaultSearchProvider() : nullptr;
566   const SearchTermsData* search_terms_data =
567       template_url_service ? &template_url_service->search_terms_data()
568                            : nullptr;
569 
570   // Create the data structure for the autocomplete passes.  We'll save this off
571   // onto the |params_| member for later deletion below if we need to run pass
572   // 2.
573   std::unique_ptr<HistoryURLProviderParams> params(new HistoryURLProviderParams(
574       fixed_up_input, input, trim_http, what_you_typed_match,
575       default_search_provider, search_terms_data));
576 
577   // Pass 1: Get the in-memory URL database, and use it to find and promote
578   // the inline autocomplete match, if any.
579   history::URLDatabase* url_db = history_service->InMemoryDatabase();
580   // url_db can be null if it hasn't finished initializing (or failed to
581   // initialize).  In this case all we can do is fall back on the second pass.
582   //
583   // TODO(pkasting): We should just block here until this loads.  Any time
584   // someone unloads the history backend, we'll get inconsistent inline
585   // autocomplete behavior here.
586   if (url_db) {
587     DoAutocomplete(nullptr, url_db, params.get());
588     matches_.clear();
589     PromoteMatchesIfNecessary(*params);
590     // NOTE: We don't reset |params| here since at least the |promote_type|
591     // field on it will be read by the second pass -- see comments in
592     // DoAutocomplete().
593   }
594 
595   // Pass 2: Ask the history service to call us back on the history thread,
596   // where we can read the full on-disk DB.
597   if (search_url_database_ && input.want_asynchronous_matches()) {
598     done_ = false;
599     params_ = params.release();  // This object will be destroyed in
600                                  // QueryComplete() once we're done with it.
601     history_service->ScheduleAutocomplete(
602         base::BindOnce(&HistoryURLProvider::ExecuteWithDB, this, params_));
603   }
604 }
605 
Stop(bool clear_cached_results,bool due_to_user_inactivity)606 void HistoryURLProvider::Stop(bool clear_cached_results,
607                               bool due_to_user_inactivity) {
608   done_ = true;
609 
610   if (params_)
611     params_->cancel_flag.Set();
612 }
613 
EstimateMemoryUsage() const614 size_t HistoryURLProvider::EstimateMemoryUsage() const {
615   size_t res = HistoryProvider::EstimateMemoryUsage();
616 
617   if (params_)
618     res += base::trace_event::EstimateMemoryUsage(*params_);
619   res += base::trace_event::EstimateMemoryUsage(scoring_params_);
620 
621   return res;
622 }
623 
SuggestExactInput(const AutocompleteInput & input,const GURL & destination_url,bool trim_http)624 AutocompleteMatch HistoryURLProvider::SuggestExactInput(
625     const AutocompleteInput& input,
626     const GURL& destination_url,
627     bool trim_http) {
628   // The FormattedStringWithEquivalentMeaning() call below requires callers to
629   // be on the main thread.
630   DCHECK(thread_checker_.CalledOnValidThread());
631 
632   AutocompleteMatch match(this, 0, false,
633                           AutocompleteMatchType::URL_WHAT_YOU_TYPED);
634 
635   if (destination_url.is_valid()) {
636     match.destination_url = destination_url;
637 
638     // If the input explicitly contains "http://", callers must set |trim_http|
639     // to false. Otherwise, |trim_http| may be either true or false.
640     DCHECK(!(trim_http && AutocompleteInput::HasHTTPScheme(input.text())));
641     base::string16 display_string(url_formatter::FormatUrl(
642         destination_url,
643         url_formatter::kFormatUrlOmitDefaults &
644             ~url_formatter::kFormatUrlOmitHTTP,
645         net::UnescapeRule::SPACES, nullptr, nullptr, nullptr));
646     if (trim_http)
647       TrimHttpPrefix(&display_string);
648     match.fill_into_edit =
649         AutocompleteInput::FormattedStringWithEquivalentMeaning(
650             destination_url, display_string, client()->GetSchemeClassifier(),
651             nullptr);
652     // The what-you-typed match is generally only allowed to be default for
653     // URL inputs or when there is no default search provider.  (It's also
654     // allowed to be default for UNKNOWN inputs where the destination is a known
655     // intranet site.  In this case, |allowed_to_be_default_match| is revised in
656     // FixupExactSuggestion().)
657     const bool has_default_search_provider =
658        client()->GetTemplateURLService() &&
659        client()->GetTemplateURLService()->GetDefaultSearchProvider();
660     match.allowed_to_be_default_match =
661         (input.type() == metrics::OmniboxInputType::URL) ||
662         !has_default_search_provider;
663     // NOTE: Don't set match.inline_autocompletion to something non-empty here;
664     // it's surprising and annoying.
665 
666     // Try to highlight "innermost" match location.  If we fix up "w" into
667     // "www.w.com", we want to highlight the fifth character, not the first.
668     // This relies on match.destination_url being the non-prefix-trimmed version
669     // of match.contents.
670     match.contents = display_string;
671 
672     TermMatches termMatches = {{0, 0, input.text().length()}};
673     match.contents_class = ClassifyTermMatches(
674         termMatches, match.contents.size(),
675         ACMatchClassification::MATCH | ACMatchClassification::URL,
676         ACMatchClassification::URL);
677   }
678 
679   return match;
680 }
681 
ExecuteWithDB(HistoryURLProviderParams * params,history::HistoryBackend * backend,history::URLDatabase * db)682 void HistoryURLProvider::ExecuteWithDB(HistoryURLProviderParams* params,
683                                        history::HistoryBackend* backend,
684                                        history::URLDatabase* db) {
685   // We may get called with a null database if it couldn't be properly
686   // initialized.
687   if (!db) {
688     params->failed = true;
689   } else if (!params->cancel_flag.IsSet()) {
690     base::TimeTicks beginning_time = base::TimeTicks::Now();
691 
692     DoAutocomplete(backend, db, params);
693 
694     UMA_HISTOGRAM_TIMES("Autocomplete.HistoryAsyncQueryTime",
695                         base::TimeTicks::Now() - beginning_time);
696   }
697 
698   // Return the results (if any) to the originating sequence.
699   params->origin_task_runner->PostTask(
700       FROM_HERE,
701       base::BindOnce(&HistoryURLProvider::QueryComplete, this, params));
702 }
703 
~HistoryURLProvider()704 HistoryURLProvider::~HistoryURLProvider() {
705   // Note: This object can get leaked on shutdown if there are pending
706   // requests on the database (which hold a reference to us). Normally, these
707   // messages get flushed for each thread. We do a round trip from main, to
708   // history, back to main while holding a reference. If the main thread
709   // completes before the history thread, the message to delegate back to the
710   // main thread will not run and the reference will leak. Therefore, don't do
711   // anything on destruction.
712 }
713 
714 // static
CalculateRelevance(MatchType match_type,int match_number)715 int HistoryURLProvider::CalculateRelevance(MatchType match_type,
716                                            int match_number) {
717   switch (match_type) {
718     case INLINE_AUTOCOMPLETE:
719       return kScoreForBestInlineableResult;
720 
721     case UNVISITED_INTRANET:
722       return kScoreForUnvisitedIntranetResult;
723 
724     case WHAT_YOU_TYPED:
725       return kScoreForWhatYouTypedResult;
726 
727     default:  // NORMAL
728       return kBaseScoreForNonInlineableResult + match_number;
729   }
730 }
731 
732 // static
ClassifyDescription(const base::string16 & input_text,const base::string16 & description)733 ACMatchClassifications HistoryURLProvider::ClassifyDescription(
734     const base::string16& input_text,
735     const base::string16& description) {
736   TermMatches term_matches = FindTermMatches(input_text, description);
737   return ClassifyTermMatches(term_matches, description.size(),
738                              ACMatchClassification::MATCH,
739                              ACMatchClassification::NONE);
740 }
741 
DoAutocomplete(history::HistoryBackend * backend,history::URLDatabase * db,HistoryURLProviderParams * params)742 void HistoryURLProvider::DoAutocomplete(history::HistoryBackend* backend,
743                                         history::URLDatabase* db,
744                                         HistoryURLProviderParams* params) {
745   // Get the matching URLs from the DB.
746   params->matches.clear();
747   history::URLRows url_matches;
748 
749   if (search_url_database_) {
750     const URLPrefixes& prefixes = URLPrefix::GetURLPrefixes();
751     const bool hide_visits_from_cct =
752         base::FeatureList::IsEnabled(omnibox::kHideVisitsFromCct);
753     for (auto i(prefixes.begin()); i != prefixes.end(); ++i) {
754       if (params->cancel_flag.IsSet())
755         return;  // Canceled in the middle of a query, give up.
756 
757       // We only need provider_max_matches_ results in the end, but before we
758       // get there we need to promote lower-quality matches that are prefixes of
759       // higher- quality matches, and remove lower-quality redirects.  So we ask
760       // for more results than we need, of every prefix type, in hopes this will
761       // give us far more than enough to work with.  CullRedirects() will then
762       // reduce the list to the best provider_max_matches_ results.
763       std::string prefixed_input =
764           base::UTF16ToUTF8(i->prefix + params->input.text());
765       db->AutocompleteForPrefix(prefixed_input, provider_max_matches_ * 2,
766                                 !backend, &url_matches);
767       for (history::URLRows::const_iterator j(url_matches.begin());
768            j != url_matches.end(); ++j) {
769         if (hide_visits_from_cct && backend) {
770           history::VisitVector visits;
771           if (db->GetVisitsForUrl2(j->id(), &visits) &&
772               URLIndexPrivateData::ShouldExcludeBecauseOfCctVisits(visits))
773             continue;
774         }
775         const GURL& row_url = j->url();
776         const URLPrefix* best_prefix = URLPrefix::BestURLPrefix(
777             base::UTF8ToUTF16(row_url.spec()), base::string16());
778         DCHECK(best_prefix);
779         history::HistoryMatch match;
780         match.url_info = *j;
781         match.input_location = i->prefix.length();
782         match.innermost_match =
783             i->num_components >= best_prefix->num_components;
784 
785         AutocompleteMatch::GetMatchComponents(
786             row_url, {{match.input_location, prefixed_input.length()}},
787             &match.match_in_scheme, &match.match_in_subdomain);
788 
789         params->matches.push_back(std::move(match));
790       }
791     }
792 
793     // Create sorted list of suggestions.
794     CullPoorMatches(params);
795     SortAndDedupMatches(&params->matches);
796   }
797 
798   // Try to create a shorter suggestion from the best match.
799   // We consider the what you typed match eligible for display when it's
800   // navigable and there's a reasonable chance the user intended to do
801   // something other than search.  We use a variety of heuristics to determine
802   // this, e.g. whether the user explicitly typed a scheme, or if omnibox
803   // searching has been disabled by policy. In the cases where we've parsed as
804   // UNKNOWN, we'll still show an accidental search infobar if need be.
805   VisitClassifier classifier(this, params->input, db);
806   params->have_what_you_typed_match =
807       (params->input.type() != metrics::OmniboxInputType::QUERY) &&
808       ((params->input.type() != metrics::OmniboxInputType::UNKNOWN) ||
809        (classifier.type() == VisitClassifier::UNVISITED_INTRANET) ||
810        !params->trim_http ||
811        (AutocompleteInput::NumNonHostComponents(params->input.parts()) > 0) ||
812        !params->default_search_provider);
813   const bool have_shorter_suggestion_suitable_for_inline_autocomplete =
814       PromoteOrCreateShorterSuggestion(db, params);
815 
816   // Check whether what the user typed appears in history.
817   const bool can_check_history_for_exact_match =
818       // Checking what_you_typed_match.destination_url.is_valid() tells us
819       // whether SuggestExactInput() succeeded in constructing a valid match.
820       params->what_you_typed_match.destination_url.is_valid() &&
821       // Additionally, in the case where the user has typed "foo.com" and
822       // visited (but not typed) "foo/", and the input is "foo", the first pass
823       // will fall into the FRONT_HISTORY_MATCH case for "foo.com" but the
824       // second pass can suggest the exact input as a better URL.  Since we need
825       // both passes to agree, and since during the first pass there's no way to
826       // know about "foo/", ensure that if the promote type was set to
827       // FRONT_HISTORY_MATCH during the first pass, the second pass will not
828       // consider the exact suggestion to be in history and therefore will not
829       // suggest the exact input as a better match.  (Note that during the first
830       // pass, this conditional will always succeed since |promote_type| is
831       // initialized to NEITHER.)
832       (params->promote_type != HistoryURLProviderParams::FRONT_HISTORY_MATCH);
833   params->exact_suggestion_is_in_history = can_check_history_for_exact_match &&
834       FixupExactSuggestion(db, classifier, params);
835 
836   // If we succeeded in fixing up the exact match based on the user's history,
837   // we should treat it as the best match regardless of input type.  If not,
838   // then we check whether there's an inline autocompletion we can create from
839   // this input, so we can promote that as the best match.
840   if (params->exact_suggestion_is_in_history) {
841     params->promote_type = HistoryURLProviderParams::WHAT_YOU_TYPED_MATCH;
842   } else if (!params->matches.empty() &&
843              (have_shorter_suggestion_suitable_for_inline_autocomplete ||
844               CanPromoteMatchForInlineAutocomplete(params->matches[0]))) {
845     // Note that we promote this inline-autocompleted match even when
846     // params->prevent_inline_autocomplete is true.  This is safe because in
847     // this case the match will be marked as "not allowed to be default", and
848     // a non-inlined match that is "allowed to be default" will be reordered
849     // above it by the controller/AutocompleteResult.  We ensure there is such
850     // a match in two ways:
851     //   * If params->have_what_you_typed_match is true, we force the
852     //     what-you-typed match to be added in this case.  See comments in
853     //     PromoteMatchesIfNecessary().
854     //   * Otherwise, we should have some sort of QUERY or UNKNOWN input that
855     //     the SearchProvider will provide a defaultable what-you-typed match
856     //     for.
857     params->promote_type = HistoryURLProviderParams::FRONT_HISTORY_MATCH;
858   } else {
859     // Failed to promote any URLs.  Use the What You Typed match, if we have it.
860     params->promote_type = params->have_what_you_typed_match ?
861         HistoryURLProviderParams::WHAT_YOU_TYPED_MATCH :
862         HistoryURLProviderParams::NEITHER;
863   }
864 
865   const size_t max_results =
866       provider_max_matches_ + (params->exact_suggestion_is_in_history ? 1 : 0);
867   if (backend) {
868     // Remove redirects and trim list to size.  We want to provide up to
869     // provider_max_matches_ results plus the What You Typed result, if it was
870     // added to params->matches above.
871     CullRedirects(backend, &params->matches, max_results);
872   } else if (params->matches.size() > max_results) {
873     // Simply trim the list to size.
874     params->matches.resize(max_results);
875   }
876 }
877 
PromoteMatchesIfNecessary(const HistoryURLProviderParams & params)878 void HistoryURLProvider::PromoteMatchesIfNecessary(
879     const HistoryURLProviderParams& params) {
880   if (params.promote_type == HistoryURLProviderParams::NEITHER)
881     return;
882   if (params.promote_type == HistoryURLProviderParams::FRONT_HISTORY_MATCH) {
883     matches_.push_back(
884         HistoryMatchToACMatch(params, 0,
885                               CalculateRelevance(INLINE_AUTOCOMPLETE, 0)));
886   }
887   // There are two cases where we need to add the what-you-typed-match:
888   //   * If params.promote_type is WHAT_YOU_TYPED_MATCH, we're being explicitly
889   //     directed to.
890   //   * If params.have_what_you_typed_match is true, then params.promote_type
891   //     can't be NEITHER (see code near the end of DoAutocomplete()), so if
892   //     it's not WHAT_YOU_TYPED_MATCH, it must be FRONT_HISTORY_MATCH, and
893   //     we'll have promoted the history match above.  If
894   //     params.prevent_inline_autocomplete is also true, then this match
895   //     will be marked "not allowed to be default", and we need to add the
896   //     what-you-typed match to ensure there's a legal default match for the
897   //     controller/AutocompleteResult to promote.  (If
898   //     params.have_what_you_typed_match is false, the SearchProvider should
899   //     take care of adding this defaultable match.)
900   if ((params.promote_type == HistoryURLProviderParams::WHAT_YOU_TYPED_MATCH) ||
901       (!matches_.back().allowed_to_be_default_match &&
902        params.have_what_you_typed_match)) {
903     matches_.push_back(params.what_you_typed_match);
904   }
905 }
906 
QueryComplete(HistoryURLProviderParams * params_gets_deleted)907 void HistoryURLProvider::QueryComplete(
908     HistoryURLProviderParams* params_gets_deleted) {
909   TRACE_EVENT0("omnibox", "HistoryURLProvider::QueryComplete");
910   // Ensure |params_gets_deleted| gets deleted on exit.
911   std::unique_ptr<HistoryURLProviderParams> params(params_gets_deleted);
912 
913   // If the user hasn't already started another query, clear our member pointer
914   // so we can't write into deleted memory.
915   if (params_ == params_gets_deleted)
916     params_ = nullptr;
917 
918   // Don't send responses for queries that have been canceled.
919   if (params->cancel_flag.IsSet())
920     return;  // Already set done_ when we canceled, no need to set it again.
921 
922   // Don't modify |matches_| if the query failed, since it might have a default
923   // match in it, whereas |params->matches| will be empty.
924   if (!params->failed) {
925     matches_.clear();
926     PromoteMatchesIfNecessary(*params);
927 
928     // Determine relevance of highest scoring match, if any.
929     int relevance = matches_.empty() ?
930         CalculateRelevance(NORMAL,
931                            static_cast<int>(params->matches.size() - 1)) :
932         matches_[0].relevance;
933 
934     // Convert the history matches to autocomplete matches.  If we promoted the
935     // first match, skip over it.
936     const size_t first_match =
937         (params->exact_suggestion_is_in_history ||
938          (params->promote_type ==
939              HistoryURLProviderParams::FRONT_HISTORY_MATCH)) ? 1 : 0;
940     for (size_t i = first_match; i < params->matches.size(); ++i) {
941       // All matches score one less than the previous match.
942       --relevance;
943       // The experimental scoring must not change the top result's score.
944       if (!matches_.empty()) {
945         relevance = CalculateRelevanceScoreUsingScoringParams(
946             params->matches[i], relevance, scoring_params_);
947       }
948       matches_.push_back(HistoryMatchToACMatch(*params, i, relevance));
949     }
950   }
951 
952   done_ = true;
953   listener_->OnProviderUpdate(true);
954 }
955 
FixupExactSuggestion(history::URLDatabase * db,const VisitClassifier & classifier,HistoryURLProviderParams * params) const956 bool HistoryURLProvider::FixupExactSuggestion(
957     history::URLDatabase* db,
958     const VisitClassifier& classifier,
959     HistoryURLProviderParams* params) const {
960   MatchType type = INLINE_AUTOCOMPLETE;
961 
962   switch (classifier.type()) {
963     case VisitClassifier::INVALID:
964       return false;
965     case VisitClassifier::UNVISITED_INTRANET:
966       params->what_you_typed_match.destination_url = classifier.url_row().url();
967       type = UNVISITED_INTRANET;
968       break;
969     default:
970       DCHECK_EQ(VisitClassifier::VISITED, classifier.type());
971       // We have data for this match, use it.
972       params->what_you_typed_match.deletable = true;
973       auto title = classifier.url_row().title();
974       if (OmniboxFieldTrial::RichAutocompletionShowTitles())
975         params->what_you_typed_match.fill_into_edit_additional_text = title;
976       params->what_you_typed_match.description = title;
977       params->what_you_typed_match.destination_url = classifier.url_row().url();
978       RecordAdditionalInfoFromUrlRow(classifier.url_row(),
979                                      &params->what_you_typed_match);
980       params->what_you_typed_match.description_class = ClassifyDescription(
981           params->input.text(), params->what_you_typed_match.description);
982       if (!classifier.url_row().typed_count()) {
983         // If we reach here, we must be in the second pass, and we must not have
984         // this row's data available during the first pass.  That means we
985         // either scored it as WHAT_YOU_TYPED or UNVISITED_INTRANET, and to
986         // maintain the ordering between passes consistent, we need to score it
987         // the same way here.
988         const GURL as_known_intranet_url =
989             AsKnownIntranetURL(db, params->input);
990         type = as_known_intranet_url.is_valid() ? UNVISITED_INTRANET
991                                                 : WHAT_YOU_TYPED;
992       }
993       break;
994   }
995 
996   const GURL& url = params->what_you_typed_match.destination_url;
997   const url::Parsed& parsed = url.parsed_for_possibly_invalid_spec();
998   // If the what-you-typed result looks like a single word (which can be
999   // interpreted as an intranet address) followed by a pound sign ("#"), leave
1000   // the score for the url-what-you-typed result as is and also don't mark it
1001   // as allowed to be the default match.  It will likely be outscored by a
1002   // search query from the SearchProvider or, if not, the search query default
1003   // match will in any case--which is allowed to be the default match--will be
1004   // reordered to be first.  This test fixes cases such as "c#" and "c# foo"
1005   // where the user has visited an intranet site "c".  We want the search-what-
1006   // you-typed score to beat the URL-what-you-typed score in this case.  Most
1007   // of the below test tries to make sure that this code does not trigger if
1008   // the user did anything to indicate the desired match is a URL.  For
1009   // instance, "c/# foo" will not pass the test because that will be classified
1010   // as input type URL.  The parsed.CountCharactersBefore() in the test looks
1011   // for the presence of a reference fragment in the URL by checking whether
1012   // the position differs included the delimiter (pound sign) versus not
1013   // including the delimiter.  (One cannot simply check url.ref() because it
1014   // will not distinguish between the input "c" and the input "c#", both of
1015   // which will have empty reference fragments.)
1016   if ((type == UNVISITED_INTRANET) &&
1017       (params->input.type() != metrics::OmniboxInputType::URL) &&
1018       url.username().empty() && url.password().empty() && url.port().empty() &&
1019       (url.path_piece() == "/") && url.query().empty() &&
1020       (parsed.CountCharactersBefore(url::Parsed::REF, true) !=
1021        parsed.CountCharactersBefore(url::Parsed::REF, false))) {
1022     return false;
1023   }
1024 
1025   params->what_you_typed_match.allowed_to_be_default_match = true;
1026   params->what_you_typed_match.relevance = CalculateRelevance(type, 0);
1027 
1028   // If there are any other matches, then don't promote this match here, in
1029   // hopes the caller will be able to inline autocomplete a better suggestion.
1030   // DoAutocomplete() will fall back on this match if inline autocompletion
1031   // fails.  This matches how we react to never-visited URL inputs in the non-
1032   // intranet case.
1033   if (type == UNVISITED_INTRANET && !params->matches.empty())
1034     return false;
1035 
1036   // Put it on the front of the HistoryMatches for redirect culling.
1037   CreateOrPromoteMatch(classifier.url_row(), history::HistoryMatch(),
1038                        &params->matches, true, true);
1039   return true;
1040 }
1041 
AsKnownIntranetURL(history::URLDatabase * db,const AutocompleteInput & input) const1042 GURL HistoryURLProvider::AsKnownIntranetURL(
1043     history::URLDatabase* db,
1044     const AutocompleteInput& input) const {
1045   // Normally passing the first two conditions below ought to guarantee the
1046   // third condition, but because FixupUserInput() can run and modify the
1047   // input's text and parts between Parse() and here, it seems better to be
1048   // paranoid and check.
1049   if ((input.type() != metrics::OmniboxInputType::UNKNOWN) ||
1050       !base::LowerCaseEqualsASCII(input.scheme(), url::kHttpScheme) ||
1051       !input.parts().host.is_nonempty())
1052     return GURL();
1053 
1054   const std::string host(base::UTF16ToUTF8(
1055       input.text().substr(input.parts().host.begin, input.parts().host.len)));
1056 
1057   // Check if the host has registry domain.
1058   if (net::registry_controlled_domains::HostHasRegistryControlledDomain(
1059           host, net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
1060           net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES))
1061     return GURL();
1062 
1063   const GURL& url = input.canonicalized_url();
1064   // Check if the host of the canonical URL can be found in the database.
1065   std::string scheme_in_db;
1066   if (db->IsTypedHost(host, &scheme_in_db)) {
1067     GURL::Replacements replace_scheme;
1068     replace_scheme.SetSchemeStr(scheme_in_db);
1069     return url.ReplaceComponents(replace_scheme);
1070   }
1071 
1072   // Check if appending "www." to the canonicalized URL generates a URL found in
1073   // the database.
1074   const std::string alternative_host = "www." + host;
1075   if (!base::StartsWith(host, "www.", base::CompareCase::INSENSITIVE_ASCII) &&
1076       db->IsTypedHost(alternative_host, &scheme_in_db)) {
1077     GURL::Replacements replace_scheme_and_host;
1078     replace_scheme_and_host.SetHostStr(alternative_host);
1079     replace_scheme_and_host.SetSchemeStr(scheme_in_db);
1080     return url.ReplaceComponents(replace_scheme_and_host);
1081   }
1082 
1083   return GURL();
1084 }
1085 
PromoteOrCreateShorterSuggestion(history::URLDatabase * db,HistoryURLProviderParams * params)1086 bool HistoryURLProvider::PromoteOrCreateShorterSuggestion(
1087     history::URLDatabase* db,
1088     HistoryURLProviderParams* params) {
1089   if (params->matches.empty())
1090     return false;  // No matches, nothing to do.
1091 
1092   // Determine the base URL from which to search, and whether that URL could
1093   // itself be added as a match.  We can add the base iff it's not "effectively
1094   // the same" as any "what you typed" match.
1095   const history::HistoryMatch& match = params->matches[0];
1096   GURL search_base = ConvertToHostOnly(match, params->input.text());
1097   bool can_add_search_base_to_matches = !params->have_what_you_typed_match;
1098   if (search_base.is_empty()) {
1099     // Search from what the user typed when we couldn't reduce the best match
1100     // to a host.  Careful: use a substring of |match| here, rather than the
1101     // first match in |params|, because they might have different prefixes.  If
1102     // the user typed "google.com", params->what_you_typed_match will hold
1103     // "http://google.com/", but |match| might begin with
1104     // "http://www.google.com/".
1105     // TODO: this should be cleaned up, and is probably incorrect for IDN.
1106     std::string new_match = match.url_info.url().possibly_invalid_spec().
1107         substr(0, match.input_location + params->input.text().length());
1108     search_base = GURL(new_match);
1109     if (search_base.is_empty())
1110       return false;  // Can't construct a URL from which to start a search.
1111   } else if (!can_add_search_base_to_matches) {
1112     can_add_search_base_to_matches =
1113         (search_base != params->what_you_typed_match.destination_url);
1114   }
1115   if (search_base == match.url_info.url())
1116     return false;  // Couldn't shorten |match|, so no URLs to search over.
1117 
1118   // Search the DB for short URLs between our base and |match|.
1119   history::URLRow info(search_base);
1120   bool promote = true;
1121   // A short URL is only worth suggesting if it's been visited at least a third
1122   // as often as the longer URL.
1123   const int min_visit_count = ((match.url_info.visit_count() - 1) / 3) + 1;
1124   // For stability between the in-memory and on-disk autocomplete passes, when
1125   // the long URL has been typed before, only suggest shorter URLs that have
1126   // also been typed.  Otherwise, the on-disk pass could suggest a shorter URL
1127   // (which hasn't been typed) that the in-memory pass doesn't know about,
1128   // thereby making the top match, and thus the behavior of inline
1129   // autocomplete, unstable.
1130   const int min_typed_count = match.url_info.typed_count() ? 1 : 0;
1131   if (!db->FindShortestURLFromBase(search_base.possibly_invalid_spec(),
1132           match.url_info.url().possibly_invalid_spec(), min_visit_count,
1133           min_typed_count, can_add_search_base_to_matches, &info)) {
1134     if (!can_add_search_base_to_matches)
1135       return false;  // Couldn't find anything and can't add the search base.
1136 
1137     // Try to get info on the search base itself.  Promote it to the top if the
1138     // original best match isn't good enough to autocomplete.
1139     db->GetRowForURL(search_base, &info);
1140     promote = match.url_info.typed_count() <= 1;
1141   }
1142 
1143   // Promote or add the desired URL to the list of matches.
1144   const bool ensure_can_inline =
1145       promote && CanPromoteMatchForInlineAutocomplete(match);
1146   return CreateOrPromoteMatch(info, match, &params->matches, true, promote) &&
1147          ensure_can_inline;
1148 }
1149 
CullPoorMatches(HistoryURLProviderParams * params) const1150 void HistoryURLProvider::CullPoorMatches(
1151     HistoryURLProviderParams* params) const {
1152   const base::Time& threshold(history::AutocompleteAgeThreshold());
1153   for (history::HistoryMatches::iterator i(params->matches.begin());
1154        i != params->matches.end(); ) {
1155     if (RowQualifiesAsSignificant(i->url_info, threshold) &&
1156         (!params->default_search_provider ||
1157          !params->default_search_provider->IsSearchURL(
1158              i->url_info.url(), *params->search_terms_data))) {
1159       ++i;
1160     } else {
1161       i = params->matches.erase(i);
1162     }
1163   }
1164 }
1165 
CullRedirects(history::HistoryBackend * backend,history::HistoryMatches * matches,size_t max_results) const1166 void HistoryURLProvider::CullRedirects(history::HistoryBackend* backend,
1167                                        history::HistoryMatches* matches,
1168                                        size_t max_results) const {
1169   for (size_t source = 0;
1170        (source < matches->size()) && (source < max_results); ) {
1171     const GURL& url = (*matches)[source].url_info.url();
1172     // TODO(brettw) this should go away when everything uses GURL.
1173     history::RedirectList redirects = backend->QueryRedirectsFrom(url);
1174     if (!redirects.empty()) {
1175       // Remove all but the first occurrence of any of these redirects in the
1176       // search results. We also must add the URL we queried for, since it may
1177       // not be the first match and we'd want to remove it.
1178       //
1179       // For example, when A redirects to B and our matches are [A, X, B],
1180       // we'll get B as the redirects from, and we want to remove the second
1181       // item of that pair, removing B. If A redirects to B and our matches are
1182       // [B, X, A], we'll want to remove A instead.
1183       redirects.push_back(url);
1184       source = RemoveSubsequentMatchesOf(matches, source, redirects);
1185     } else {
1186       // Advance to next item.
1187       source++;
1188     }
1189   }
1190 
1191   if (matches->size() > max_results)
1192     matches->resize(max_results);
1193 }
1194 
RemoveSubsequentMatchesOf(history::HistoryMatches * matches,size_t source_index,const std::vector<GURL> & remove) const1195 size_t HistoryURLProvider::RemoveSubsequentMatchesOf(
1196     history::HistoryMatches* matches,
1197     size_t source_index,
1198     const std::vector<GURL>& remove) const {
1199   size_t next_index = source_index + 1;  // return value = item after source
1200 
1201   // Find the first occurrence of any URL in the redirect chain. We want to
1202   // keep this one since it is rated the highest.
1203   history::HistoryMatches::iterator first(std::find_first_of(
1204       matches->begin(), matches->end(), remove.begin(), remove.end(),
1205       history::HistoryMatch::EqualsGURL));
1206   DCHECK(first != matches->end()) << "We should have always found at least the "
1207       "original URL.";
1208 
1209   // Find any following occurrences of any URL in the redirect chain, these
1210   // should be deleted.
1211   for (history::HistoryMatches::iterator next(std::find_first_of(first + 1,
1212            matches->end(), remove.begin(), remove.end(),
1213            history::HistoryMatch::EqualsGURL));
1214        next != matches->end(); next = std::find_first_of(next, matches->end(),
1215            remove.begin(), remove.end(), history::HistoryMatch::EqualsGURL)) {
1216     // Remove this item. When we remove an item before the source index, we
1217     // need to shift it to the right and remember that so we can return it.
1218     next = matches->erase(next);
1219     if (static_cast<size_t>(next - matches->begin()) < next_index)
1220       --next_index;
1221   }
1222   return next_index;
1223 }
1224 
HistoryMatchToACMatch(const HistoryURLProviderParams & params,size_t match_number,int relevance)1225 AutocompleteMatch HistoryURLProvider::HistoryMatchToACMatch(
1226     const HistoryURLProviderParams& params,
1227     size_t match_number,
1228     int relevance) {
1229   // The FormattedStringWithEquivalentMeaning() call below requires callers to
1230   // be on the main thread.
1231   DCHECK(thread_checker_.CalledOnValidThread());
1232 
1233   const history::HistoryMatch& history_match = params.matches[match_number];
1234   const history::URLRow& info = history_match.url_info;
1235   AutocompleteMatch match(this, relevance,
1236       !!info.visit_count(), AutocompleteMatchType::HISTORY_URL);
1237   match.typed_count = info.typed_count();
1238   match.destination_url = info.url();
1239   DCHECK(match.destination_url.is_valid());
1240   size_t inline_autocomplete_offset =
1241       history_match.input_location + params.input.text().length();
1242 
1243   auto fill_into_edit_format_types = url_formatter::kFormatUrlOmitDefaults;
1244   if (!params.trim_http || history_match.match_in_scheme)
1245     fill_into_edit_format_types &= ~url_formatter::kFormatUrlOmitHTTP;
1246   match.fill_into_edit =
1247       AutocompleteInput::FormattedStringWithEquivalentMeaning(
1248           info.url(),
1249           url_formatter::FormatUrl(info.url(), fill_into_edit_format_types,
1250                                    net::UnescapeRule::SPACES, nullptr, nullptr,
1251                                    &inline_autocomplete_offset),
1252           client()->GetSchemeClassifier(), &inline_autocomplete_offset);
1253 
1254   const auto format_types = AutocompleteMatch::GetFormatTypes(
1255       params.input.parts().scheme.len > 0 || !params.trim_http ||
1256           history_match.match_in_scheme,
1257       history_match.match_in_subdomain);
1258   match.contents = url_formatter::FormatUrl(info.url(), format_types,
1259                                             net::UnescapeRule::SPACES, nullptr,
1260                                             nullptr, nullptr);
1261   auto term_matches = FindTermMatches(params.input.text(), match.contents);
1262   match.contents_class = ClassifyTermMatches(
1263       term_matches, match.contents.size(),
1264       ACMatchClassification::URL | ACMatchClassification::MATCH,
1265       ACMatchClassification::URL);
1266 
1267   match.description = info.title();
1268   match.description_class =
1269       ClassifyDescription(params.input.text(), match.description);
1270 
1271   // |inline_autocomplete_offset| was guaranteed not to be npos before the call
1272   // to FormatUrl().  If it is npos now, that means the represented location no
1273   // longer exists as such in the formatted string, e.g. if the offset pointed
1274   // into the middle of a punycode sequence fixed up to Unicode.  In this case,
1275   // there can be no inline autocompletion, and the match must not be allowed to
1276   // be default.
1277   if (match.TryRichAutocompletion(match.contents, match.description,
1278                                   params.input_before_fixup)) {
1279     // If rich autocompletion applies, we skip trying the alternatives below.
1280   } else if (inline_autocomplete_offset != base::string16::npos) {
1281     DCHECK(inline_autocomplete_offset <= match.fill_into_edit.length());
1282     match.inline_autocompletion =
1283         match.fill_into_edit.substr(inline_autocomplete_offset);
1284     match.SetAllowedToBeDefault(params.input_before_fixup);
1285   }
1286 
1287   RecordAdditionalInfoFromUrlRow(info, &match);
1288   return match;
1289 }
1290