1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/omnibox/browser/history_url_provider.h"
6
7 #include <algorithm>
8 #include <memory>
9 #include <utility>
10
11 #include "base/bind.h"
12 #include "base/command_line.h"
13 #include "base/feature_list.h"
14 #include "base/location.h"
15 #include "base/metrics/histogram_macros.h"
16 #include "base/single_thread_task_runner.h"
17 #include "base/strings/string_util.h"
18 #include "base/strings/utf_string_conversions.h"
19 #include "base/threading/sequenced_task_runner_handle.h"
20 #include "base/time/time.h"
21 #include "base/trace_event/memory_usage_estimator.h"
22 #include "base/trace_event/trace_event.h"
23 #include "components/bookmarks/browser/bookmark_utils.h"
24 #include "components/history/core/browser/history_backend.h"
25 #include "components/history/core/browser/history_database.h"
26 #include "components/history/core/browser/history_service.h"
27 #include "components/history/core/browser/history_types.h"
28 #include "components/omnibox/browser/autocomplete_match.h"
29 #include "components/omnibox/browser/autocomplete_match_classification.h"
30 #include "components/omnibox/browser/autocomplete_provider.h"
31 #include "components/omnibox/browser/autocomplete_provider_listener.h"
32 #include "components/omnibox/browser/autocomplete_result.h"
33 #include "components/omnibox/browser/history_provider.h"
34 #include "components/omnibox/browser/in_memory_url_index_types.h"
35 #include "components/omnibox/browser/omnibox_field_trial.h"
36 #include "components/omnibox/browser/url_index_private_data.h"
37 #include "components/omnibox/browser/url_prefix.h"
38 #include "components/omnibox/common/omnibox_features.h"
39 #include "components/prefs/pref_service.h"
40 #include "components/search_engines/omnibox_focus_type.h"
41 #include "components/search_engines/search_terms_data.h"
42 #include "components/search_engines/template_url_service.h"
43 #include "components/url_formatter/url_fixer.h"
44 #include "components/url_formatter/url_formatter.h"
45 #include "net/base/registry_controlled_domains/registry_controlled_domain.h"
46 #include "third_party/metrics_proto/omnibox_input_type.pb.h"
47 #include "url/gurl.h"
48 #include "url/third_party/mozilla/url_parse.h"
49 #include "url/url_util.h"
50
51 namespace {
52
53 // Acts like the > operator for URLInfo classes.
CompareHistoryMatch(const history::HistoryMatch & a,const history::HistoryMatch & b)54 bool CompareHistoryMatch(const history::HistoryMatch& a,
55 const history::HistoryMatch& b) {
56 // A URL that has been typed at all is better than one that has never been
57 // typed. (Note "!"s on each side)
58 if (!a.url_info.typed_count() != !b.url_info.typed_count())
59 return a.url_info.typed_count() > b.url_info.typed_count();
60
61 // Innermost matches (matches after any scheme or "www.") are better than
62 // non-innermost matches.
63 if (a.innermost_match != b.innermost_match)
64 return a.innermost_match;
65
66 // URLs that have been typed more often are better.
67 if (a.url_info.typed_count() != b.url_info.typed_count())
68 return a.url_info.typed_count() > b.url_info.typed_count();
69
70 // For URLs that have each been typed once, a host (alone) is better than a
71 // page inside.
72 if ((a.url_info.typed_count() == 1) && (a.IsHostOnly() != b.IsHostOnly()))
73 return a.IsHostOnly();
74
75 // URLs that have been visited more often are better.
76 if (a.url_info.visit_count() != b.url_info.visit_count())
77 return a.url_info.visit_count() > b.url_info.visit_count();
78
79 // URLs that have been visited more recently are better.
80 if (a.url_info.last_visit() != b.url_info.last_visit())
81 return a.url_info.last_visit() > b.url_info.last_visit();
82
83 // Use alphabetical order on the url spec as a tie-breaker.
84 return a.url_info.url().spec() > b.url_info.url().spec();
85 }
86
87 // Sorts and dedups the given list of matches.
SortAndDedupMatches(history::HistoryMatches * matches)88 void SortAndDedupMatches(history::HistoryMatches* matches) {
89 // Sort by quality, best first.
90 std::sort(matches->begin(), matches->end(), &CompareHistoryMatch);
91
92 // Remove duplicate matches (caused by the search string appearing in one of
93 // the prefixes as well as after it). Consider the following scenario:
94 //
95 // User has visited "http://http.com" once and "http://htaccess.com" twice.
96 // User types "http". The autocomplete search with prefix "http://" returns
97 // the first host, while the search with prefix "" returns both hosts. Now
98 // we sort them into rank order:
99 // http://http.com (innermost_match)
100 // http://htaccess.com (!innermost_match, url_info.visit_count == 2)
101 // http://http.com (!innermost_match, url_info.visit_count == 1)
102 //
103 // The above scenario tells us we can't use std::unique(), since our
104 // duplicates are not always sequential. It also tells us we should remove
105 // the lower-quality duplicate(s), since otherwise the returned results won't
106 // be ordered correctly. This is easy to do: we just always remove the later
107 // element of a duplicate pair.
108 // Be careful! Because the vector contents may change as we remove elements,
109 // we use an index instead of an iterator in the outer loop, and don't
110 // precalculate the ending position.
111 for (size_t i = 0; i < matches->size(); ++i) {
112 for (history::HistoryMatches::iterator j(matches->begin() + i + 1);
113 j != matches->end(); ) {
114 if ((*matches)[i].url_info.url() == j->url_info.url())
115 j = matches->erase(j);
116 else
117 ++j;
118 }
119 }
120 }
121
122 // Calculates a new relevance score applying half-life time decaying to |count|
123 // using |time_since_last_visit| and |score_buckets|. This function will never
124 // return a score higher than |undecayed_relevance|; in other words, it can only
125 // demote the old score.
CalculateRelevanceUsingScoreBuckets(const HUPScoringParams::ScoreBuckets & score_buckets,const base::TimeDelta & time_since_last_visit,int undecayed_relevance,int undecayed_count)126 double CalculateRelevanceUsingScoreBuckets(
127 const HUPScoringParams::ScoreBuckets& score_buckets,
128 const base::TimeDelta& time_since_last_visit,
129 int undecayed_relevance,
130 int undecayed_count) {
131 // Back off if above relevance cap.
132 if ((score_buckets.relevance_cap() != -1) &&
133 (undecayed_relevance >= score_buckets.relevance_cap()))
134 return undecayed_relevance;
135
136 // Time based decay using half-life time.
137 double decayed_count = undecayed_count;
138 double decay_factor = score_buckets.HalfLifeTimeDecay(time_since_last_visit);
139 if (decayed_count > 0)
140 decayed_count *= decay_factor;
141
142 const HUPScoringParams::ScoreBuckets::CountMaxRelevance* score_bucket =
143 nullptr;
144 const double factor = (score_buckets.use_decay_factor() ?
145 decay_factor : decayed_count);
146 for (size_t i = 0; i < score_buckets.buckets().size(); ++i) {
147 score_bucket = &score_buckets.buckets()[i];
148 if (factor >= score_bucket->first)
149 break;
150 }
151
152 return (score_bucket && (undecayed_relevance > score_bucket->second)) ?
153 score_bucket->second : undecayed_relevance;
154 }
155
156 // Returns a new relevance score for the given |match| based on the
157 // |old_relevance| score and |scoring_params|. The new relevance score is
158 // guaranteed to be less than or equal to |old_relevance|. In other words, this
159 // function can only demote a score, never boost it. Returns |old_relevance| if
160 // experimental scoring is disabled.
CalculateRelevanceScoreUsingScoringParams(const history::HistoryMatch & match,int old_relevance,const HUPScoringParams & scoring_params)161 int CalculateRelevanceScoreUsingScoringParams(
162 const history::HistoryMatch& match,
163 int old_relevance,
164 const HUPScoringParams& scoring_params) {
165 const base::TimeDelta time_since_last_visit =
166 base::Time::Now() - match.url_info.last_visit();
167
168 int relevance = CalculateRelevanceUsingScoreBuckets(
169 scoring_params.typed_count_buckets, time_since_last_visit, old_relevance,
170 match.url_info.typed_count());
171
172 // Additional demotion (on top of typed_count demotion) of URLs that were
173 // never typed.
174 if (match.url_info.typed_count() == 0) {
175 relevance = CalculateRelevanceUsingScoreBuckets(
176 scoring_params.visited_count_buckets, time_since_last_visit, relevance,
177 match.url_info.visit_count());
178 }
179
180 DCHECK_LE(relevance, old_relevance);
181 return relevance;
182 }
183
184 // Extracts typed_count, visit_count, and last_visited time from the URLRow and
185 // puts them in the additional info field of the |match| for display in
186 // about:omnibox.
RecordAdditionalInfoFromUrlRow(const history::URLRow & info,AutocompleteMatch * match)187 void RecordAdditionalInfoFromUrlRow(const history::URLRow& info,
188 AutocompleteMatch* match) {
189 match->RecordAdditionalInfo("typed count", info.typed_count());
190 match->RecordAdditionalInfo("visit count", info.visit_count());
191 match->RecordAdditionalInfo("last visit", info.last_visit());
192 }
193
194 // If |create_if_necessary| is true, ensures that |matches| contains an entry
195 // for |info|, creating a new such entry if necessary (using |match_template|
196 // to get all the other match data).
197 //
198 // If |promote| is true, this also ensures the entry is the first element in
199 // |matches|, moving or adding it to the front as appropriate. When |promote|
200 // is false, existing matches are left in place, and newly added matches are
201 // placed at the back.
202 //
203 // It's OK to call this function with both |create_if_necessary| and |promote|
204 // false, in which case we'll do nothing.
205 //
206 // Returns whether the match exists regardless if it was promoted/created.
CreateOrPromoteMatch(const history::URLRow & info,const history::HistoryMatch & match_template,history::HistoryMatches * matches,bool create_if_necessary,bool promote)207 bool CreateOrPromoteMatch(const history::URLRow& info,
208 const history::HistoryMatch& match_template,
209 history::HistoryMatches* matches,
210 bool create_if_necessary,
211 bool promote) {
212 // |matches| may already have an entry for this.
213 for (history::HistoryMatches::iterator i(matches->begin());
214 i != matches->end(); ++i) {
215 if (i->url_info.url() == info.url()) {
216 // Rotate it to the front if the caller wishes.
217 if (promote)
218 std::rotate(matches->begin(), i, i + 1);
219 return true;
220 }
221 }
222
223 if (!create_if_necessary)
224 return false;
225
226 // No entry, so create one using |match_template| as a basis.
227 history::HistoryMatch match = match_template;
228 match.url_info = info;
229 if (promote)
230 matches->push_front(match);
231 else
232 matches->push_back(match);
233
234 return true;
235 }
236
237 // Returns whether |match| is suitable for inline autocompletion.
CanPromoteMatchForInlineAutocomplete(const history::HistoryMatch & match)238 bool CanPromoteMatchForInlineAutocomplete(const history::HistoryMatch& match) {
239 // We can promote this match if it's been typed at least n times, where n == 1
240 // for "simple" (host-only) URLs and n == 2 for others. We set a higher bar
241 // for these long URLs because it's less likely that users will want to visit
242 // them again. Even though we don't increment the typed_count for pasted-in
243 // URLs, if the user manually edits the URL or types some long thing in by
244 // hand, we wouldn't want to immediately start autocompleting it.
245 return match.url_info.typed_count() &&
246 ((match.url_info.typed_count() > 1) || match.IsHostOnly());
247 }
248
249 // Given the user's |input| and a |match| created from it, reduce the match's
250 // URL to just a host. If this host still matches the user input, return it.
251 // Returns the empty string on failure.
ConvertToHostOnly(const history::HistoryMatch & match,const base::string16 & input)252 GURL ConvertToHostOnly(const history::HistoryMatch& match,
253 const base::string16& input) {
254 // See if we should try to do host-only suggestions for this URL. Nonstandard
255 // schemes means there's no authority section, so suggesting the host name
256 // is useless. File URLs are standard, but host suggestion is not useful for
257 // them either.
258 const GURL& url = match.url_info.url();
259 if (!url.is_valid() || !url.IsStandard() || url.SchemeIsFile())
260 return GURL();
261
262 // Transform to a host-only match. Bail if the host no longer matches the
263 // user input (e.g. because the user typed more than just a host).
264 GURL host = url.GetWithEmptyPath();
265 if ((host.spec().length() < (match.input_location + input.length())))
266 return GURL(); // User typing is longer than this host suggestion.
267
268 const base::string16 spec = base::UTF8ToUTF16(host.spec());
269 if (spec.compare(match.input_location, input.length(), input))
270 return GURL(); // User typing is no longer a prefix.
271
272 return host;
273 }
274
275 } // namespace
276
277 // -----------------------------------------------------------------
278 // SearchTermsDataSnapshot
279
280 // Implementation of SearchTermsData that takes a snapshot of another
281 // SearchTermsData by copying all the responses to the different getters into
282 // member strings, then returning those strings when its own getters are called.
283 // This will typically be constructed on the UI thread from
284 // UIThreadSearchTermsData but is subsequently safe to use on any thread.
285 class SearchTermsDataSnapshot : public SearchTermsData {
286 public:
287 explicit SearchTermsDataSnapshot(const SearchTermsData* search_terms_data);
288 ~SearchTermsDataSnapshot() override;
289 SearchTermsDataSnapshot(const SearchTermsDataSnapshot&) = delete;
290 SearchTermsDataSnapshot& operator=(const SearchTermsDataSnapshot&) = delete;
291
292 std::string GoogleBaseURLValue() const override;
293 std::string GetApplicationLocale() const override;
294 base::string16 GetRlzParameterValue(bool from_app_list) const override;
295 std::string GetSearchClient() const override;
296 std::string GoogleImageSearchSource() const override;
297
298 // Estimates dynamic memory usage.
299 // See base/trace_event/memory_usage_estimator.h for more info.
300 size_t EstimateMemoryUsage() const override;
301
302 private:
303 std::string google_base_url_value_;
304 std::string application_locale_;
305 base::string16 rlz_parameter_value_;
306 std::string search_client_;
307 std::string google_image_search_source_;
308 };
309
SearchTermsDataSnapshot(const SearchTermsData * search_terms_data)310 SearchTermsDataSnapshot::SearchTermsDataSnapshot(
311 const SearchTermsData* search_terms_data) {
312 if (search_terms_data) {
313 google_base_url_value_ = search_terms_data->GoogleBaseURLValue();
314 application_locale_ = search_terms_data->GetApplicationLocale();
315 rlz_parameter_value_ = search_terms_data->GetRlzParameterValue(false);
316 search_client_ = search_terms_data->GetSearchClient();
317 google_image_search_source_ = search_terms_data->GoogleImageSearchSource();
318 }
319 }
320
~SearchTermsDataSnapshot()321 SearchTermsDataSnapshot::~SearchTermsDataSnapshot() {
322 }
323
GoogleBaseURLValue() const324 std::string SearchTermsDataSnapshot::GoogleBaseURLValue() const {
325 return google_base_url_value_;
326 }
327
GetApplicationLocale() const328 std::string SearchTermsDataSnapshot::GetApplicationLocale() const {
329 return application_locale_;
330 }
331
GetRlzParameterValue(bool from_app_list) const332 base::string16 SearchTermsDataSnapshot::GetRlzParameterValue(
333 bool from_app_list) const {
334 return rlz_parameter_value_;
335 }
336
GetSearchClient() const337 std::string SearchTermsDataSnapshot::GetSearchClient() const {
338 return search_client_;
339 }
340
GoogleImageSearchSource() const341 std::string SearchTermsDataSnapshot::GoogleImageSearchSource() const {
342 return google_image_search_source_;
343 }
344
EstimateMemoryUsage() const345 size_t SearchTermsDataSnapshot::EstimateMemoryUsage() const {
346 size_t res = 0;
347
348 res += base::trace_event::EstimateMemoryUsage(google_base_url_value_);
349 res += base::trace_event::EstimateMemoryUsage(application_locale_);
350 res += base::trace_event::EstimateMemoryUsage(rlz_parameter_value_);
351 res += base::trace_event::EstimateMemoryUsage(search_client_);
352 res += base::trace_event::EstimateMemoryUsage(google_image_search_source_);
353
354 return res;
355 }
356
357 // -----------------------------------------------------------------
358 // HistoryURLProvider
359
360 // These ugly magic numbers will go away once we switch all scoring
361 // behavior (including URL-what-you-typed) to HistoryQuick provider.
362 const int HistoryURLProvider::kScoreForBestInlineableResult = 1413;
363 const int HistoryURLProvider::kScoreForUnvisitedIntranetResult = 1403;
364 const int HistoryURLProvider::kScoreForWhatYouTypedResult = 1203;
365 const int HistoryURLProvider::kBaseScoreForNonInlineableResult = 900;
366
367 // VisitClassifier is used to classify the type of visit to a particular url.
368 class HistoryURLProvider::VisitClassifier {
369 public:
370 enum Type {
371 INVALID, // Navigations to the URL are not allowed.
372 UNVISITED_INTRANET, // A navigable URL for which we have no visit data but
373 // which is known to refer to a visited intranet host.
374 VISITED, // The site has been previously visited.
375 };
376
377 VisitClassifier(HistoryURLProvider* provider,
378 const AutocompleteInput& input,
379 history::URLDatabase* db);
380
381 VisitClassifier(const VisitClassifier&) = delete;
382 VisitClassifier& operator=(const VisitClassifier&) = delete;
383
384 // Returns the type of visit for the specified input.
type() const385 Type type() const { return type_; }
386
387 // Returns the URLRow for the visit.
388 // If the type of the visit is UNVISITED_INTRANET, the return value of this
389 // function does not have any visit data; only the URL field is set.
url_row() const390 const history::URLRow& url_row() const { return url_row_; }
391
392 private:
393 HistoryURLProvider* provider_;
394 history::URLDatabase* db_;
395 Type type_;
396 history::URLRow url_row_;
397 };
398
VisitClassifier(HistoryURLProvider * provider,const AutocompleteInput & input,history::URLDatabase * db)399 HistoryURLProvider::VisitClassifier::VisitClassifier(
400 HistoryURLProvider* provider,
401 const AutocompleteInput& input,
402 history::URLDatabase* db)
403 : provider_(provider),
404 db_(db),
405 type_(INVALID) {
406 // Detect email addresses. These cases will look like "http://user@site/",
407 // and because the history backend strips auth creds, we'll get a bogus exact
408 // match below if the user has visited "site".
409 if ((input.type() == metrics::OmniboxInputType::UNKNOWN) &&
410 input.parts().username.is_nonempty() &&
411 !input.parts().password.is_nonempty() &&
412 !input.parts().path.is_nonempty())
413 return;
414
415 // If the input can be canonicalized to a valid URL, look up all
416 // prefix+input combinations in the URL database to determine if the input
417 // corresponds to any visited URL.
418 if (!input.canonicalized_url().is_valid())
419 return;
420
421 // Iterate over all prefixes in ascending number of components (i.e. from the
422 // empty prefix to those that have most components).
423 const std::string& desired_tld = input.desired_tld();
424 const URLPrefixes& url_prefixes = URLPrefix::GetURLPrefixes();
425 for (auto prefix_it = url_prefixes.rbegin(); prefix_it != url_prefixes.rend();
426 ++prefix_it) {
427 const GURL url_with_prefix = url_formatter::FixupURL(
428 base::UTF16ToUTF8(prefix_it->prefix + input.text()), desired_tld);
429 if (url_with_prefix.is_valid() &&
430 db_->GetRowForURL(url_with_prefix, &url_row_) && !url_row_.hidden()) {
431 type_ = VISITED;
432 return;
433 }
434 }
435
436 // If the input does not correspond to a visited URL, we check if the
437 // canonical URL has an intranet hostname that the user visited (albeit with a
438 // different port and/or path) before. If this is true, |url_row_| will be
439 // mostly empty: the URL field will be set to an unvisited URL with the same
440 // scheme and host as some visited URL in the db.
441 const GURL as_known_intranet_url = provider_->AsKnownIntranetURL(db_, input);
442 if (as_known_intranet_url.is_valid()) {
443 url_row_ = history::URLRow(as_known_intranet_url);
444 type_ = UNVISITED_INTRANET;
445 }
446 }
447
HistoryURLProviderParams(const AutocompleteInput & input,const AutocompleteInput & input_before_fixup,bool trim_http,const AutocompleteMatch & what_you_typed_match,const TemplateURL * default_search_provider,const SearchTermsData * search_terms_data)448 HistoryURLProviderParams::HistoryURLProviderParams(
449 const AutocompleteInput& input,
450 const AutocompleteInput& input_before_fixup,
451 bool trim_http,
452 const AutocompleteMatch& what_you_typed_match,
453 const TemplateURL* default_search_provider,
454 const SearchTermsData* search_terms_data)
455 : origin_task_runner(base::SequencedTaskRunnerHandle::Get()),
456 input(input),
457 input_before_fixup(input_before_fixup),
458 trim_http(trim_http),
459 what_you_typed_match(what_you_typed_match),
460 failed(false),
461 exact_suggestion_is_in_history(false),
462 promote_type(NEITHER),
463 default_search_provider(
464 default_search_provider
465 ? new TemplateURL(default_search_provider->data())
466 : nullptr),
467 search_terms_data(new SearchTermsDataSnapshot(search_terms_data)) {}
468
~HistoryURLProviderParams()469 HistoryURLProviderParams::~HistoryURLProviderParams() {
470 }
471
EstimateMemoryUsage() const472 size_t HistoryURLProviderParams::EstimateMemoryUsage() const {
473 size_t res = 0;
474
475 res += base::trace_event::EstimateMemoryUsage(input);
476 res += base::trace_event::EstimateMemoryUsage(what_you_typed_match);
477 res += base::trace_event::EstimateMemoryUsage(matches);
478 res += base::trace_event::EstimateMemoryUsage(default_search_provider);
479 res += base::trace_event::EstimateMemoryUsage(search_terms_data);
480
481 return res;
482 }
483
HistoryURLProvider(AutocompleteProviderClient * client,AutocompleteProviderListener * listener)484 HistoryURLProvider::HistoryURLProvider(AutocompleteProviderClient* client,
485 AutocompleteProviderListener* listener)
486 : HistoryProvider(AutocompleteProvider::TYPE_HISTORY_URL, client),
487 listener_(listener),
488 params_(nullptr),
489 search_url_database_(OmniboxFieldTrial::HUPSearchDatabase()) {
490 // Initialize the default HUP scoring params.
491 OmniboxFieldTrial::GetDefaultHUPScoringParams(&scoring_params_);
492 // Initialize HUP scoring params based on the current experiment.
493 OmniboxFieldTrial::GetExperimentalHUPScoringParams(&scoring_params_);
494 }
495
Start(const AutocompleteInput & input,bool minimal_changes)496 void HistoryURLProvider::Start(const AutocompleteInput& input,
497 bool minimal_changes) {
498 TRACE_EVENT0("omnibox", "HistoryURLProvider::Start");
499 // NOTE: We could try hard to do less work in the |minimal_changes| case
500 // here; some clever caching would let us reuse the raw matches from the
501 // history DB without re-querying. However, we'd still have to go back to
502 // the history thread to mark these up properly, and if pass 2 is currently
503 // running, we'd need to wait for it to return to the main thread before
504 // doing this (we can't just write new data for it to read due to thread
505 // safety issues). At that point it's just as fast, and easier, to simply
506 // re-run the query from scratch and ignore |minimal_changes|.
507
508 // Cancel any in-progress query.
509 Stop(false, false);
510
511 matches_.clear();
512
513 if (input.focus_type() != OmniboxFocusType::DEFAULT ||
514 (input.type() == metrics::OmniboxInputType::EMPTY))
515 return;
516
517 // Do some fixup on the user input before matching against it, so we provide
518 // good results for local file paths, input with spaces, etc.
519 const FixupReturn fixup_return(FixupUserInput(input));
520 if (!fixup_return.first)
521 return;
522 url::Parsed parts;
523 url_formatter::SegmentURL(fixup_return.second, &parts);
524 AutocompleteInput fixed_up_input(input);
525 fixed_up_input.UpdateText(fixup_return.second, base::string16::npos, parts);
526
527 // Create a match for what the user typed.
528 const bool trim_http = !AutocompleteInput::HasHTTPScheme(input.text());
529 AutocompleteMatch what_you_typed_match(SuggestExactInput(
530 fixed_up_input, fixed_up_input.canonicalized_url(), trim_http));
531
532 // If the input fix-up above added characters, show them as an
533 // autocompletion, unless directed not to.
534 if (!input.prevent_inline_autocomplete() &&
535 fixed_up_input.text().size() > input.text().size() &&
536 base::StartsWith(fixed_up_input.text(), input.text(),
537 base::CompareCase::SENSITIVE)) {
538 what_you_typed_match.fill_into_edit = fixed_up_input.text();
539 what_you_typed_match.inline_autocompletion =
540 fixed_up_input.text().substr(input.text().size());
541 what_you_typed_match.contents_class.push_back(
542 {input.text().length(), ACMatchClassification::URL});
543 }
544
545 what_you_typed_match.relevance = CalculateRelevance(WHAT_YOU_TYPED, 0);
546
547 // Add the what-you-typed match as a fallback in case we can't get the history
548 // service or URL DB; otherwise, we'll replace this match lower down. Don't
549 // do this for queries, though -- while we can sometimes mark up a match for
550 // them, it's not what the user wants, and just adds noise.
551 if (fixed_up_input.type() != metrics::OmniboxInputType::QUERY)
552 matches_.push_back(what_you_typed_match);
553
554 // We'll need the history service to run both passes, so try to obtain it.
555 history::HistoryService* const history_service =
556 client()->GetHistoryService();
557 if (!history_service)
558 return;
559
560 // Get the default search provider and search terms data now since we have to
561 // retrieve these on the UI thread, and the second pass runs on the history
562 // thread. |template_url_service| can be null when testing.
563 TemplateURLService* template_url_service = client()->GetTemplateURLService();
564 const TemplateURL* default_search_provider = template_url_service ?
565 template_url_service->GetDefaultSearchProvider() : nullptr;
566 const SearchTermsData* search_terms_data =
567 template_url_service ? &template_url_service->search_terms_data()
568 : nullptr;
569
570 // Create the data structure for the autocomplete passes. We'll save this off
571 // onto the |params_| member for later deletion below if we need to run pass
572 // 2.
573 std::unique_ptr<HistoryURLProviderParams> params(new HistoryURLProviderParams(
574 fixed_up_input, input, trim_http, what_you_typed_match,
575 default_search_provider, search_terms_data));
576
577 // Pass 1: Get the in-memory URL database, and use it to find and promote
578 // the inline autocomplete match, if any.
579 history::URLDatabase* url_db = history_service->InMemoryDatabase();
580 // url_db can be null if it hasn't finished initializing (or failed to
581 // initialize). In this case all we can do is fall back on the second pass.
582 //
583 // TODO(pkasting): We should just block here until this loads. Any time
584 // someone unloads the history backend, we'll get inconsistent inline
585 // autocomplete behavior here.
586 if (url_db) {
587 DoAutocomplete(nullptr, url_db, params.get());
588 matches_.clear();
589 PromoteMatchesIfNecessary(*params);
590 // NOTE: We don't reset |params| here since at least the |promote_type|
591 // field on it will be read by the second pass -- see comments in
592 // DoAutocomplete().
593 }
594
595 // Pass 2: Ask the history service to call us back on the history thread,
596 // where we can read the full on-disk DB.
597 if (search_url_database_ && input.want_asynchronous_matches()) {
598 done_ = false;
599 params_ = params.release(); // This object will be destroyed in
600 // QueryComplete() once we're done with it.
601 history_service->ScheduleAutocomplete(
602 base::BindOnce(&HistoryURLProvider::ExecuteWithDB, this, params_));
603 }
604 }
605
Stop(bool clear_cached_results,bool due_to_user_inactivity)606 void HistoryURLProvider::Stop(bool clear_cached_results,
607 bool due_to_user_inactivity) {
608 done_ = true;
609
610 if (params_)
611 params_->cancel_flag.Set();
612 }
613
EstimateMemoryUsage() const614 size_t HistoryURLProvider::EstimateMemoryUsage() const {
615 size_t res = HistoryProvider::EstimateMemoryUsage();
616
617 if (params_)
618 res += base::trace_event::EstimateMemoryUsage(*params_);
619 res += base::trace_event::EstimateMemoryUsage(scoring_params_);
620
621 return res;
622 }
623
SuggestExactInput(const AutocompleteInput & input,const GURL & destination_url,bool trim_http)624 AutocompleteMatch HistoryURLProvider::SuggestExactInput(
625 const AutocompleteInput& input,
626 const GURL& destination_url,
627 bool trim_http) {
628 // The FormattedStringWithEquivalentMeaning() call below requires callers to
629 // be on the main thread.
630 DCHECK(thread_checker_.CalledOnValidThread());
631
632 AutocompleteMatch match(this, 0, false,
633 AutocompleteMatchType::URL_WHAT_YOU_TYPED);
634
635 if (destination_url.is_valid()) {
636 match.destination_url = destination_url;
637
638 // If the input explicitly contains "http://", callers must set |trim_http|
639 // to false. Otherwise, |trim_http| may be either true or false.
640 DCHECK(!(trim_http && AutocompleteInput::HasHTTPScheme(input.text())));
641 base::string16 display_string(url_formatter::FormatUrl(
642 destination_url,
643 url_formatter::kFormatUrlOmitDefaults &
644 ~url_formatter::kFormatUrlOmitHTTP,
645 net::UnescapeRule::SPACES, nullptr, nullptr, nullptr));
646 if (trim_http)
647 TrimHttpPrefix(&display_string);
648 match.fill_into_edit =
649 AutocompleteInput::FormattedStringWithEquivalentMeaning(
650 destination_url, display_string, client()->GetSchemeClassifier(),
651 nullptr);
652 // The what-you-typed match is generally only allowed to be default for
653 // URL inputs or when there is no default search provider. (It's also
654 // allowed to be default for UNKNOWN inputs where the destination is a known
655 // intranet site. In this case, |allowed_to_be_default_match| is revised in
656 // FixupExactSuggestion().)
657 const bool has_default_search_provider =
658 client()->GetTemplateURLService() &&
659 client()->GetTemplateURLService()->GetDefaultSearchProvider();
660 match.allowed_to_be_default_match =
661 (input.type() == metrics::OmniboxInputType::URL) ||
662 !has_default_search_provider;
663 // NOTE: Don't set match.inline_autocompletion to something non-empty here;
664 // it's surprising and annoying.
665
666 // Try to highlight "innermost" match location. If we fix up "w" into
667 // "www.w.com", we want to highlight the fifth character, not the first.
668 // This relies on match.destination_url being the non-prefix-trimmed version
669 // of match.contents.
670 match.contents = display_string;
671
672 TermMatches termMatches = {{0, 0, input.text().length()}};
673 match.contents_class = ClassifyTermMatches(
674 termMatches, match.contents.size(),
675 ACMatchClassification::MATCH | ACMatchClassification::URL,
676 ACMatchClassification::URL);
677 }
678
679 return match;
680 }
681
ExecuteWithDB(HistoryURLProviderParams * params,history::HistoryBackend * backend,history::URLDatabase * db)682 void HistoryURLProvider::ExecuteWithDB(HistoryURLProviderParams* params,
683 history::HistoryBackend* backend,
684 history::URLDatabase* db) {
685 // We may get called with a null database if it couldn't be properly
686 // initialized.
687 if (!db) {
688 params->failed = true;
689 } else if (!params->cancel_flag.IsSet()) {
690 base::TimeTicks beginning_time = base::TimeTicks::Now();
691
692 DoAutocomplete(backend, db, params);
693
694 UMA_HISTOGRAM_TIMES("Autocomplete.HistoryAsyncQueryTime",
695 base::TimeTicks::Now() - beginning_time);
696 }
697
698 // Return the results (if any) to the originating sequence.
699 params->origin_task_runner->PostTask(
700 FROM_HERE,
701 base::BindOnce(&HistoryURLProvider::QueryComplete, this, params));
702 }
703
~HistoryURLProvider()704 HistoryURLProvider::~HistoryURLProvider() {
705 // Note: This object can get leaked on shutdown if there are pending
706 // requests on the database (which hold a reference to us). Normally, these
707 // messages get flushed for each thread. We do a round trip from main, to
708 // history, back to main while holding a reference. If the main thread
709 // completes before the history thread, the message to delegate back to the
710 // main thread will not run and the reference will leak. Therefore, don't do
711 // anything on destruction.
712 }
713
714 // static
CalculateRelevance(MatchType match_type,int match_number)715 int HistoryURLProvider::CalculateRelevance(MatchType match_type,
716 int match_number) {
717 switch (match_type) {
718 case INLINE_AUTOCOMPLETE:
719 return kScoreForBestInlineableResult;
720
721 case UNVISITED_INTRANET:
722 return kScoreForUnvisitedIntranetResult;
723
724 case WHAT_YOU_TYPED:
725 return kScoreForWhatYouTypedResult;
726
727 default: // NORMAL
728 return kBaseScoreForNonInlineableResult + match_number;
729 }
730 }
731
732 // static
ClassifyDescription(const base::string16 & input_text,const base::string16 & description)733 ACMatchClassifications HistoryURLProvider::ClassifyDescription(
734 const base::string16& input_text,
735 const base::string16& description) {
736 TermMatches term_matches = FindTermMatches(input_text, description);
737 return ClassifyTermMatches(term_matches, description.size(),
738 ACMatchClassification::MATCH,
739 ACMatchClassification::NONE);
740 }
741
DoAutocomplete(history::HistoryBackend * backend,history::URLDatabase * db,HistoryURLProviderParams * params)742 void HistoryURLProvider::DoAutocomplete(history::HistoryBackend* backend,
743 history::URLDatabase* db,
744 HistoryURLProviderParams* params) {
745 // Get the matching URLs from the DB.
746 params->matches.clear();
747 history::URLRows url_matches;
748
749 if (search_url_database_) {
750 const URLPrefixes& prefixes = URLPrefix::GetURLPrefixes();
751 const bool hide_visits_from_cct =
752 base::FeatureList::IsEnabled(omnibox::kHideVisitsFromCct);
753 for (auto i(prefixes.begin()); i != prefixes.end(); ++i) {
754 if (params->cancel_flag.IsSet())
755 return; // Canceled in the middle of a query, give up.
756
757 // We only need provider_max_matches_ results in the end, but before we
758 // get there we need to promote lower-quality matches that are prefixes of
759 // higher- quality matches, and remove lower-quality redirects. So we ask
760 // for more results than we need, of every prefix type, in hopes this will
761 // give us far more than enough to work with. CullRedirects() will then
762 // reduce the list to the best provider_max_matches_ results.
763 std::string prefixed_input =
764 base::UTF16ToUTF8(i->prefix + params->input.text());
765 db->AutocompleteForPrefix(prefixed_input, provider_max_matches_ * 2,
766 !backend, &url_matches);
767 for (history::URLRows::const_iterator j(url_matches.begin());
768 j != url_matches.end(); ++j) {
769 if (hide_visits_from_cct && backend) {
770 history::VisitVector visits;
771 if (db->GetVisitsForUrl2(j->id(), &visits) &&
772 URLIndexPrivateData::ShouldExcludeBecauseOfCctVisits(visits))
773 continue;
774 }
775 const GURL& row_url = j->url();
776 const URLPrefix* best_prefix = URLPrefix::BestURLPrefix(
777 base::UTF8ToUTF16(row_url.spec()), base::string16());
778 DCHECK(best_prefix);
779 history::HistoryMatch match;
780 match.url_info = *j;
781 match.input_location = i->prefix.length();
782 match.innermost_match =
783 i->num_components >= best_prefix->num_components;
784
785 AutocompleteMatch::GetMatchComponents(
786 row_url, {{match.input_location, prefixed_input.length()}},
787 &match.match_in_scheme, &match.match_in_subdomain);
788
789 params->matches.push_back(std::move(match));
790 }
791 }
792
793 // Create sorted list of suggestions.
794 CullPoorMatches(params);
795 SortAndDedupMatches(¶ms->matches);
796 }
797
798 // Try to create a shorter suggestion from the best match.
799 // We consider the what you typed match eligible for display when it's
800 // navigable and there's a reasonable chance the user intended to do
801 // something other than search. We use a variety of heuristics to determine
802 // this, e.g. whether the user explicitly typed a scheme, or if omnibox
803 // searching has been disabled by policy. In the cases where we've parsed as
804 // UNKNOWN, we'll still show an accidental search infobar if need be.
805 VisitClassifier classifier(this, params->input, db);
806 params->have_what_you_typed_match =
807 (params->input.type() != metrics::OmniboxInputType::QUERY) &&
808 ((params->input.type() != metrics::OmniboxInputType::UNKNOWN) ||
809 (classifier.type() == VisitClassifier::UNVISITED_INTRANET) ||
810 !params->trim_http ||
811 (AutocompleteInput::NumNonHostComponents(params->input.parts()) > 0) ||
812 !params->default_search_provider);
813 const bool have_shorter_suggestion_suitable_for_inline_autocomplete =
814 PromoteOrCreateShorterSuggestion(db, params);
815
816 // Check whether what the user typed appears in history.
817 const bool can_check_history_for_exact_match =
818 // Checking what_you_typed_match.destination_url.is_valid() tells us
819 // whether SuggestExactInput() succeeded in constructing a valid match.
820 params->what_you_typed_match.destination_url.is_valid() &&
821 // Additionally, in the case where the user has typed "foo.com" and
822 // visited (but not typed) "foo/", and the input is "foo", the first pass
823 // will fall into the FRONT_HISTORY_MATCH case for "foo.com" but the
824 // second pass can suggest the exact input as a better URL. Since we need
825 // both passes to agree, and since during the first pass there's no way to
826 // know about "foo/", ensure that if the promote type was set to
827 // FRONT_HISTORY_MATCH during the first pass, the second pass will not
828 // consider the exact suggestion to be in history and therefore will not
829 // suggest the exact input as a better match. (Note that during the first
830 // pass, this conditional will always succeed since |promote_type| is
831 // initialized to NEITHER.)
832 (params->promote_type != HistoryURLProviderParams::FRONT_HISTORY_MATCH);
833 params->exact_suggestion_is_in_history = can_check_history_for_exact_match &&
834 FixupExactSuggestion(db, classifier, params);
835
836 // If we succeeded in fixing up the exact match based on the user's history,
837 // we should treat it as the best match regardless of input type. If not,
838 // then we check whether there's an inline autocompletion we can create from
839 // this input, so we can promote that as the best match.
840 if (params->exact_suggestion_is_in_history) {
841 params->promote_type = HistoryURLProviderParams::WHAT_YOU_TYPED_MATCH;
842 } else if (!params->matches.empty() &&
843 (have_shorter_suggestion_suitable_for_inline_autocomplete ||
844 CanPromoteMatchForInlineAutocomplete(params->matches[0]))) {
845 // Note that we promote this inline-autocompleted match even when
846 // params->prevent_inline_autocomplete is true. This is safe because in
847 // this case the match will be marked as "not allowed to be default", and
848 // a non-inlined match that is "allowed to be default" will be reordered
849 // above it by the controller/AutocompleteResult. We ensure there is such
850 // a match in two ways:
851 // * If params->have_what_you_typed_match is true, we force the
852 // what-you-typed match to be added in this case. See comments in
853 // PromoteMatchesIfNecessary().
854 // * Otherwise, we should have some sort of QUERY or UNKNOWN input that
855 // the SearchProvider will provide a defaultable what-you-typed match
856 // for.
857 params->promote_type = HistoryURLProviderParams::FRONT_HISTORY_MATCH;
858 } else {
859 // Failed to promote any URLs. Use the What You Typed match, if we have it.
860 params->promote_type = params->have_what_you_typed_match ?
861 HistoryURLProviderParams::WHAT_YOU_TYPED_MATCH :
862 HistoryURLProviderParams::NEITHER;
863 }
864
865 const size_t max_results =
866 provider_max_matches_ + (params->exact_suggestion_is_in_history ? 1 : 0);
867 if (backend) {
868 // Remove redirects and trim list to size. We want to provide up to
869 // provider_max_matches_ results plus the What You Typed result, if it was
870 // added to params->matches above.
871 CullRedirects(backend, ¶ms->matches, max_results);
872 } else if (params->matches.size() > max_results) {
873 // Simply trim the list to size.
874 params->matches.resize(max_results);
875 }
876 }
877
PromoteMatchesIfNecessary(const HistoryURLProviderParams & params)878 void HistoryURLProvider::PromoteMatchesIfNecessary(
879 const HistoryURLProviderParams& params) {
880 if (params.promote_type == HistoryURLProviderParams::NEITHER)
881 return;
882 if (params.promote_type == HistoryURLProviderParams::FRONT_HISTORY_MATCH) {
883 matches_.push_back(
884 HistoryMatchToACMatch(params, 0,
885 CalculateRelevance(INLINE_AUTOCOMPLETE, 0)));
886 }
887 // There are two cases where we need to add the what-you-typed-match:
888 // * If params.promote_type is WHAT_YOU_TYPED_MATCH, we're being explicitly
889 // directed to.
890 // * If params.have_what_you_typed_match is true, then params.promote_type
891 // can't be NEITHER (see code near the end of DoAutocomplete()), so if
892 // it's not WHAT_YOU_TYPED_MATCH, it must be FRONT_HISTORY_MATCH, and
893 // we'll have promoted the history match above. If
894 // params.prevent_inline_autocomplete is also true, then this match
895 // will be marked "not allowed to be default", and we need to add the
896 // what-you-typed match to ensure there's a legal default match for the
897 // controller/AutocompleteResult to promote. (If
898 // params.have_what_you_typed_match is false, the SearchProvider should
899 // take care of adding this defaultable match.)
900 if ((params.promote_type == HistoryURLProviderParams::WHAT_YOU_TYPED_MATCH) ||
901 (!matches_.back().allowed_to_be_default_match &&
902 params.have_what_you_typed_match)) {
903 matches_.push_back(params.what_you_typed_match);
904 }
905 }
906
QueryComplete(HistoryURLProviderParams * params_gets_deleted)907 void HistoryURLProvider::QueryComplete(
908 HistoryURLProviderParams* params_gets_deleted) {
909 TRACE_EVENT0("omnibox", "HistoryURLProvider::QueryComplete");
910 // Ensure |params_gets_deleted| gets deleted on exit.
911 std::unique_ptr<HistoryURLProviderParams> params(params_gets_deleted);
912
913 // If the user hasn't already started another query, clear our member pointer
914 // so we can't write into deleted memory.
915 if (params_ == params_gets_deleted)
916 params_ = nullptr;
917
918 // Don't send responses for queries that have been canceled.
919 if (params->cancel_flag.IsSet())
920 return; // Already set done_ when we canceled, no need to set it again.
921
922 // Don't modify |matches_| if the query failed, since it might have a default
923 // match in it, whereas |params->matches| will be empty.
924 if (!params->failed) {
925 matches_.clear();
926 PromoteMatchesIfNecessary(*params);
927
928 // Determine relevance of highest scoring match, if any.
929 int relevance = matches_.empty() ?
930 CalculateRelevance(NORMAL,
931 static_cast<int>(params->matches.size() - 1)) :
932 matches_[0].relevance;
933
934 // Convert the history matches to autocomplete matches. If we promoted the
935 // first match, skip over it.
936 const size_t first_match =
937 (params->exact_suggestion_is_in_history ||
938 (params->promote_type ==
939 HistoryURLProviderParams::FRONT_HISTORY_MATCH)) ? 1 : 0;
940 for (size_t i = first_match; i < params->matches.size(); ++i) {
941 // All matches score one less than the previous match.
942 --relevance;
943 // The experimental scoring must not change the top result's score.
944 if (!matches_.empty()) {
945 relevance = CalculateRelevanceScoreUsingScoringParams(
946 params->matches[i], relevance, scoring_params_);
947 }
948 matches_.push_back(HistoryMatchToACMatch(*params, i, relevance));
949 }
950 }
951
952 done_ = true;
953 listener_->OnProviderUpdate(true);
954 }
955
FixupExactSuggestion(history::URLDatabase * db,const VisitClassifier & classifier,HistoryURLProviderParams * params) const956 bool HistoryURLProvider::FixupExactSuggestion(
957 history::URLDatabase* db,
958 const VisitClassifier& classifier,
959 HistoryURLProviderParams* params) const {
960 MatchType type = INLINE_AUTOCOMPLETE;
961
962 switch (classifier.type()) {
963 case VisitClassifier::INVALID:
964 return false;
965 case VisitClassifier::UNVISITED_INTRANET:
966 params->what_you_typed_match.destination_url = classifier.url_row().url();
967 type = UNVISITED_INTRANET;
968 break;
969 default:
970 DCHECK_EQ(VisitClassifier::VISITED, classifier.type());
971 // We have data for this match, use it.
972 params->what_you_typed_match.deletable = true;
973 auto title = classifier.url_row().title();
974 if (OmniboxFieldTrial::RichAutocompletionShowTitles())
975 params->what_you_typed_match.fill_into_edit_additional_text = title;
976 params->what_you_typed_match.description = title;
977 params->what_you_typed_match.destination_url = classifier.url_row().url();
978 RecordAdditionalInfoFromUrlRow(classifier.url_row(),
979 ¶ms->what_you_typed_match);
980 params->what_you_typed_match.description_class = ClassifyDescription(
981 params->input.text(), params->what_you_typed_match.description);
982 if (!classifier.url_row().typed_count()) {
983 // If we reach here, we must be in the second pass, and we must not have
984 // this row's data available during the first pass. That means we
985 // either scored it as WHAT_YOU_TYPED or UNVISITED_INTRANET, and to
986 // maintain the ordering between passes consistent, we need to score it
987 // the same way here.
988 const GURL as_known_intranet_url =
989 AsKnownIntranetURL(db, params->input);
990 type = as_known_intranet_url.is_valid() ? UNVISITED_INTRANET
991 : WHAT_YOU_TYPED;
992 }
993 break;
994 }
995
996 const GURL& url = params->what_you_typed_match.destination_url;
997 const url::Parsed& parsed = url.parsed_for_possibly_invalid_spec();
998 // If the what-you-typed result looks like a single word (which can be
999 // interpreted as an intranet address) followed by a pound sign ("#"), leave
1000 // the score for the url-what-you-typed result as is and also don't mark it
1001 // as allowed to be the default match. It will likely be outscored by a
1002 // search query from the SearchProvider or, if not, the search query default
1003 // match will in any case--which is allowed to be the default match--will be
1004 // reordered to be first. This test fixes cases such as "c#" and "c# foo"
1005 // where the user has visited an intranet site "c". We want the search-what-
1006 // you-typed score to beat the URL-what-you-typed score in this case. Most
1007 // of the below test tries to make sure that this code does not trigger if
1008 // the user did anything to indicate the desired match is a URL. For
1009 // instance, "c/# foo" will not pass the test because that will be classified
1010 // as input type URL. The parsed.CountCharactersBefore() in the test looks
1011 // for the presence of a reference fragment in the URL by checking whether
1012 // the position differs included the delimiter (pound sign) versus not
1013 // including the delimiter. (One cannot simply check url.ref() because it
1014 // will not distinguish between the input "c" and the input "c#", both of
1015 // which will have empty reference fragments.)
1016 if ((type == UNVISITED_INTRANET) &&
1017 (params->input.type() != metrics::OmniboxInputType::URL) &&
1018 url.username().empty() && url.password().empty() && url.port().empty() &&
1019 (url.path_piece() == "/") && url.query().empty() &&
1020 (parsed.CountCharactersBefore(url::Parsed::REF, true) !=
1021 parsed.CountCharactersBefore(url::Parsed::REF, false))) {
1022 return false;
1023 }
1024
1025 params->what_you_typed_match.allowed_to_be_default_match = true;
1026 params->what_you_typed_match.relevance = CalculateRelevance(type, 0);
1027
1028 // If there are any other matches, then don't promote this match here, in
1029 // hopes the caller will be able to inline autocomplete a better suggestion.
1030 // DoAutocomplete() will fall back on this match if inline autocompletion
1031 // fails. This matches how we react to never-visited URL inputs in the non-
1032 // intranet case.
1033 if (type == UNVISITED_INTRANET && !params->matches.empty())
1034 return false;
1035
1036 // Put it on the front of the HistoryMatches for redirect culling.
1037 CreateOrPromoteMatch(classifier.url_row(), history::HistoryMatch(),
1038 ¶ms->matches, true, true);
1039 return true;
1040 }
1041
AsKnownIntranetURL(history::URLDatabase * db,const AutocompleteInput & input) const1042 GURL HistoryURLProvider::AsKnownIntranetURL(
1043 history::URLDatabase* db,
1044 const AutocompleteInput& input) const {
1045 // Normally passing the first two conditions below ought to guarantee the
1046 // third condition, but because FixupUserInput() can run and modify the
1047 // input's text and parts between Parse() and here, it seems better to be
1048 // paranoid and check.
1049 if ((input.type() != metrics::OmniboxInputType::UNKNOWN) ||
1050 !base::LowerCaseEqualsASCII(input.scheme(), url::kHttpScheme) ||
1051 !input.parts().host.is_nonempty())
1052 return GURL();
1053
1054 const std::string host(base::UTF16ToUTF8(
1055 input.text().substr(input.parts().host.begin, input.parts().host.len)));
1056
1057 // Check if the host has registry domain.
1058 if (net::registry_controlled_domains::HostHasRegistryControlledDomain(
1059 host, net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
1060 net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES))
1061 return GURL();
1062
1063 const GURL& url = input.canonicalized_url();
1064 // Check if the host of the canonical URL can be found in the database.
1065 std::string scheme_in_db;
1066 if (db->IsTypedHost(host, &scheme_in_db)) {
1067 GURL::Replacements replace_scheme;
1068 replace_scheme.SetSchemeStr(scheme_in_db);
1069 return url.ReplaceComponents(replace_scheme);
1070 }
1071
1072 // Check if appending "www." to the canonicalized URL generates a URL found in
1073 // the database.
1074 const std::string alternative_host = "www." + host;
1075 if (!base::StartsWith(host, "www.", base::CompareCase::INSENSITIVE_ASCII) &&
1076 db->IsTypedHost(alternative_host, &scheme_in_db)) {
1077 GURL::Replacements replace_scheme_and_host;
1078 replace_scheme_and_host.SetHostStr(alternative_host);
1079 replace_scheme_and_host.SetSchemeStr(scheme_in_db);
1080 return url.ReplaceComponents(replace_scheme_and_host);
1081 }
1082
1083 return GURL();
1084 }
1085
PromoteOrCreateShorterSuggestion(history::URLDatabase * db,HistoryURLProviderParams * params)1086 bool HistoryURLProvider::PromoteOrCreateShorterSuggestion(
1087 history::URLDatabase* db,
1088 HistoryURLProviderParams* params) {
1089 if (params->matches.empty())
1090 return false; // No matches, nothing to do.
1091
1092 // Determine the base URL from which to search, and whether that URL could
1093 // itself be added as a match. We can add the base iff it's not "effectively
1094 // the same" as any "what you typed" match.
1095 const history::HistoryMatch& match = params->matches[0];
1096 GURL search_base = ConvertToHostOnly(match, params->input.text());
1097 bool can_add_search_base_to_matches = !params->have_what_you_typed_match;
1098 if (search_base.is_empty()) {
1099 // Search from what the user typed when we couldn't reduce the best match
1100 // to a host. Careful: use a substring of |match| here, rather than the
1101 // first match in |params|, because they might have different prefixes. If
1102 // the user typed "google.com", params->what_you_typed_match will hold
1103 // "http://google.com/", but |match| might begin with
1104 // "http://www.google.com/".
1105 // TODO: this should be cleaned up, and is probably incorrect for IDN.
1106 std::string new_match = match.url_info.url().possibly_invalid_spec().
1107 substr(0, match.input_location + params->input.text().length());
1108 search_base = GURL(new_match);
1109 if (search_base.is_empty())
1110 return false; // Can't construct a URL from which to start a search.
1111 } else if (!can_add_search_base_to_matches) {
1112 can_add_search_base_to_matches =
1113 (search_base != params->what_you_typed_match.destination_url);
1114 }
1115 if (search_base == match.url_info.url())
1116 return false; // Couldn't shorten |match|, so no URLs to search over.
1117
1118 // Search the DB for short URLs between our base and |match|.
1119 history::URLRow info(search_base);
1120 bool promote = true;
1121 // A short URL is only worth suggesting if it's been visited at least a third
1122 // as often as the longer URL.
1123 const int min_visit_count = ((match.url_info.visit_count() - 1) / 3) + 1;
1124 // For stability between the in-memory and on-disk autocomplete passes, when
1125 // the long URL has been typed before, only suggest shorter URLs that have
1126 // also been typed. Otherwise, the on-disk pass could suggest a shorter URL
1127 // (which hasn't been typed) that the in-memory pass doesn't know about,
1128 // thereby making the top match, and thus the behavior of inline
1129 // autocomplete, unstable.
1130 const int min_typed_count = match.url_info.typed_count() ? 1 : 0;
1131 if (!db->FindShortestURLFromBase(search_base.possibly_invalid_spec(),
1132 match.url_info.url().possibly_invalid_spec(), min_visit_count,
1133 min_typed_count, can_add_search_base_to_matches, &info)) {
1134 if (!can_add_search_base_to_matches)
1135 return false; // Couldn't find anything and can't add the search base.
1136
1137 // Try to get info on the search base itself. Promote it to the top if the
1138 // original best match isn't good enough to autocomplete.
1139 db->GetRowForURL(search_base, &info);
1140 promote = match.url_info.typed_count() <= 1;
1141 }
1142
1143 // Promote or add the desired URL to the list of matches.
1144 const bool ensure_can_inline =
1145 promote && CanPromoteMatchForInlineAutocomplete(match);
1146 return CreateOrPromoteMatch(info, match, ¶ms->matches, true, promote) &&
1147 ensure_can_inline;
1148 }
1149
CullPoorMatches(HistoryURLProviderParams * params) const1150 void HistoryURLProvider::CullPoorMatches(
1151 HistoryURLProviderParams* params) const {
1152 const base::Time& threshold(history::AutocompleteAgeThreshold());
1153 for (history::HistoryMatches::iterator i(params->matches.begin());
1154 i != params->matches.end(); ) {
1155 if (RowQualifiesAsSignificant(i->url_info, threshold) &&
1156 (!params->default_search_provider ||
1157 !params->default_search_provider->IsSearchURL(
1158 i->url_info.url(), *params->search_terms_data))) {
1159 ++i;
1160 } else {
1161 i = params->matches.erase(i);
1162 }
1163 }
1164 }
1165
CullRedirects(history::HistoryBackend * backend,history::HistoryMatches * matches,size_t max_results) const1166 void HistoryURLProvider::CullRedirects(history::HistoryBackend* backend,
1167 history::HistoryMatches* matches,
1168 size_t max_results) const {
1169 for (size_t source = 0;
1170 (source < matches->size()) && (source < max_results); ) {
1171 const GURL& url = (*matches)[source].url_info.url();
1172 // TODO(brettw) this should go away when everything uses GURL.
1173 history::RedirectList redirects = backend->QueryRedirectsFrom(url);
1174 if (!redirects.empty()) {
1175 // Remove all but the first occurrence of any of these redirects in the
1176 // search results. We also must add the URL we queried for, since it may
1177 // not be the first match and we'd want to remove it.
1178 //
1179 // For example, when A redirects to B and our matches are [A, X, B],
1180 // we'll get B as the redirects from, and we want to remove the second
1181 // item of that pair, removing B. If A redirects to B and our matches are
1182 // [B, X, A], we'll want to remove A instead.
1183 redirects.push_back(url);
1184 source = RemoveSubsequentMatchesOf(matches, source, redirects);
1185 } else {
1186 // Advance to next item.
1187 source++;
1188 }
1189 }
1190
1191 if (matches->size() > max_results)
1192 matches->resize(max_results);
1193 }
1194
RemoveSubsequentMatchesOf(history::HistoryMatches * matches,size_t source_index,const std::vector<GURL> & remove) const1195 size_t HistoryURLProvider::RemoveSubsequentMatchesOf(
1196 history::HistoryMatches* matches,
1197 size_t source_index,
1198 const std::vector<GURL>& remove) const {
1199 size_t next_index = source_index + 1; // return value = item after source
1200
1201 // Find the first occurrence of any URL in the redirect chain. We want to
1202 // keep this one since it is rated the highest.
1203 history::HistoryMatches::iterator first(std::find_first_of(
1204 matches->begin(), matches->end(), remove.begin(), remove.end(),
1205 history::HistoryMatch::EqualsGURL));
1206 DCHECK(first != matches->end()) << "We should have always found at least the "
1207 "original URL.";
1208
1209 // Find any following occurrences of any URL in the redirect chain, these
1210 // should be deleted.
1211 for (history::HistoryMatches::iterator next(std::find_first_of(first + 1,
1212 matches->end(), remove.begin(), remove.end(),
1213 history::HistoryMatch::EqualsGURL));
1214 next != matches->end(); next = std::find_first_of(next, matches->end(),
1215 remove.begin(), remove.end(), history::HistoryMatch::EqualsGURL)) {
1216 // Remove this item. When we remove an item before the source index, we
1217 // need to shift it to the right and remember that so we can return it.
1218 next = matches->erase(next);
1219 if (static_cast<size_t>(next - matches->begin()) < next_index)
1220 --next_index;
1221 }
1222 return next_index;
1223 }
1224
HistoryMatchToACMatch(const HistoryURLProviderParams & params,size_t match_number,int relevance)1225 AutocompleteMatch HistoryURLProvider::HistoryMatchToACMatch(
1226 const HistoryURLProviderParams& params,
1227 size_t match_number,
1228 int relevance) {
1229 // The FormattedStringWithEquivalentMeaning() call below requires callers to
1230 // be on the main thread.
1231 DCHECK(thread_checker_.CalledOnValidThread());
1232
1233 const history::HistoryMatch& history_match = params.matches[match_number];
1234 const history::URLRow& info = history_match.url_info;
1235 AutocompleteMatch match(this, relevance,
1236 !!info.visit_count(), AutocompleteMatchType::HISTORY_URL);
1237 match.typed_count = info.typed_count();
1238 match.destination_url = info.url();
1239 DCHECK(match.destination_url.is_valid());
1240 size_t inline_autocomplete_offset =
1241 history_match.input_location + params.input.text().length();
1242
1243 auto fill_into_edit_format_types = url_formatter::kFormatUrlOmitDefaults;
1244 if (!params.trim_http || history_match.match_in_scheme)
1245 fill_into_edit_format_types &= ~url_formatter::kFormatUrlOmitHTTP;
1246 match.fill_into_edit =
1247 AutocompleteInput::FormattedStringWithEquivalentMeaning(
1248 info.url(),
1249 url_formatter::FormatUrl(info.url(), fill_into_edit_format_types,
1250 net::UnescapeRule::SPACES, nullptr, nullptr,
1251 &inline_autocomplete_offset),
1252 client()->GetSchemeClassifier(), &inline_autocomplete_offset);
1253
1254 const auto format_types = AutocompleteMatch::GetFormatTypes(
1255 params.input.parts().scheme.len > 0 || !params.trim_http ||
1256 history_match.match_in_scheme,
1257 history_match.match_in_subdomain);
1258 match.contents = url_formatter::FormatUrl(info.url(), format_types,
1259 net::UnescapeRule::SPACES, nullptr,
1260 nullptr, nullptr);
1261 auto term_matches = FindTermMatches(params.input.text(), match.contents);
1262 match.contents_class = ClassifyTermMatches(
1263 term_matches, match.contents.size(),
1264 ACMatchClassification::URL | ACMatchClassification::MATCH,
1265 ACMatchClassification::URL);
1266
1267 match.description = info.title();
1268 match.description_class =
1269 ClassifyDescription(params.input.text(), match.description);
1270
1271 // |inline_autocomplete_offset| was guaranteed not to be npos before the call
1272 // to FormatUrl(). If it is npos now, that means the represented location no
1273 // longer exists as such in the formatted string, e.g. if the offset pointed
1274 // into the middle of a punycode sequence fixed up to Unicode. In this case,
1275 // there can be no inline autocompletion, and the match must not be allowed to
1276 // be default.
1277 if (match.TryRichAutocompletion(match.contents, match.description,
1278 params.input_before_fixup)) {
1279 // If rich autocompletion applies, we skip trying the alternatives below.
1280 } else if (inline_autocomplete_offset != base::string16::npos) {
1281 DCHECK(inline_autocomplete_offset <= match.fill_into_edit.length());
1282 match.inline_autocompletion =
1283 match.fill_into_edit.substr(inline_autocomplete_offset);
1284 match.SetAllowedToBeDefault(params.input_before_fixup);
1285 }
1286
1287 RecordAdditionalInfoFromUrlRow(info, &match);
1288 return match;
1289 }
1290