1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/omnibox/browser/autocomplete_provider.h"
6 
7 #include <algorithm>
8 #include <set>
9 #include <string>
10 
11 #include "base/feature_list.h"
12 #include "base/i18n/case_conversion.h"
13 #include "base/logging.h"
14 #include "base/no_destructor.h"
15 #include "base/strings/string_split.h"
16 #include "base/strings/string_util.h"
17 #include "base/strings/utf_string_conversions.h"
18 #include "base/trace_event/memory_usage_estimator.h"
19 #include "components/bookmarks/browser/bookmark_utils.h"
20 #include "components/omnibox/browser/autocomplete_i18n.h"
21 #include "components/omnibox/browser/autocomplete_input.h"
22 #include "components/omnibox/browser/autocomplete_match.h"
23 #include "components/omnibox/browser/autocomplete_match_classification.h"
24 #include "components/omnibox/browser/history_provider.h"
25 #include "components/omnibox/browser/omnibox_field_trial.h"
26 #include "components/omnibox/browser/scored_history_match.h"
27 #include "components/omnibox/common/omnibox_features.h"
28 #include "components/url_formatter/url_fixer.h"
29 #include "url/gurl.h"
30 
AutocompleteProvider(Type type)31 AutocompleteProvider::AutocompleteProvider(Type type)
32     : provider_max_matches_(OmniboxFieldTrial::GetProviderMaxMatches(type)),
33       done_(true),
34       type_(type) {}
35 
36 // static
TypeToString(Type type)37 const char* AutocompleteProvider::TypeToString(Type type) {
38   switch (type) {
39     case TYPE_BOOKMARK:
40       return "Bookmark";
41     case TYPE_BUILTIN:
42       return "Builtin";
43     case TYPE_CLIPBOARD:
44       return "Clipboard";
45     case TYPE_DOCUMENT:
46       return "Document";
47     case TYPE_HISTORY_QUICK:
48       return "HistoryQuick";
49     case TYPE_HISTORY_URL:
50       return "HistoryURL";
51     case TYPE_KEYWORD:
52       return "Keyword";
53     case TYPE_ON_DEVICE_HEAD:
54       return "OnDeviceHead";
55     case TYPE_SEARCH:
56       return "Search";
57     case TYPE_SHORTCUTS:
58       return "Shortcuts";
59     case TYPE_ZERO_SUGGEST:
60       return "ZeroSuggest";
61     case TYPE_ZERO_SUGGEST_LOCAL_HISTORY:
62       return "LocalHistoryZeroSuggest";
63     case TYPE_QUERY_TILE:
64       return "QueryTile";
65     case TYPE_MOST_VISITED_SITES:
66       return "MostVisitedSites";
67     case TYPE_VERBATIM_MATCH:
68       return "VerbatimMatch";
69     default:
70       NOTREACHED() << "Unhandled AutocompleteProvider::Type " << type;
71       return "Unknown";
72   }
73 }
74 
Stop(bool clear_cached_results,bool due_to_user_inactivity)75 void AutocompleteProvider::Stop(bool clear_cached_results,
76                                 bool due_to_user_inactivity) {
77   done_ = true;
78 }
79 
GetName() const80 const char* AutocompleteProvider::GetName() const {
81   return TypeToString(type_);
82 }
83 
84 // static
ClassifyAllMatchesInString(const base::string16 & find_text,const base::string16 & text,const bool text_is_search_query,const ACMatchClassifications & original_class)85 ACMatchClassifications AutocompleteProvider::ClassifyAllMatchesInString(
86     const base::string16& find_text,
87     const base::string16& text,
88     const bool text_is_search_query,
89     const ACMatchClassifications& original_class) {
90   // TODO (manukh) Move this function to autocomplete_match_classification
91   DCHECK(!find_text.empty());
92 
93   if (text.empty())
94     return original_class;
95 
96   TermMatches term_matches = FindTermMatches(find_text, text);
97 
98   ACMatchClassifications classifications;
99   if (text_is_search_query) {
100     classifications = ClassifyTermMatches(term_matches, text.size(),
101                                           ACMatchClassification::NONE,
102                                           ACMatchClassification::MATCH);
103   } else
104     classifications = ClassifyTermMatches(term_matches, text.size(),
105                                           ACMatchClassification::MATCH,
106                                           ACMatchClassification::NONE);
107 
108   return AutocompleteMatch::MergeClassifications(original_class,
109                                                  classifications);
110 }
111 
112 metrics::OmniboxEventProto_ProviderType AutocompleteProvider::
AsOmniboxEventProviderType() const113     AsOmniboxEventProviderType() const {
114   switch (type_) {
115     case TYPE_BOOKMARK:
116       return metrics::OmniboxEventProto::BOOKMARK;
117     case TYPE_BUILTIN:
118       return metrics::OmniboxEventProto::BUILTIN;
119     case TYPE_CLIPBOARD:
120       return metrics::OmniboxEventProto::CLIPBOARD;
121     case TYPE_DOCUMENT:
122       return metrics::OmniboxEventProto::DOCUMENT;
123     case TYPE_HISTORY_QUICK:
124       return metrics::OmniboxEventProto::HISTORY_QUICK;
125     case TYPE_HISTORY_URL:
126       return metrics::OmniboxEventProto::HISTORY_URL;
127     case TYPE_KEYWORD:
128       return metrics::OmniboxEventProto::KEYWORD;
129     case TYPE_ON_DEVICE_HEAD:
130       return metrics::OmniboxEventProto::ON_DEVICE_HEAD;
131     case TYPE_SEARCH:
132       return metrics::OmniboxEventProto::SEARCH;
133     case TYPE_SHORTCUTS:
134       return metrics::OmniboxEventProto::SHORTCUTS;
135     case TYPE_ZERO_SUGGEST:
136       return metrics::OmniboxEventProto::ZERO_SUGGEST;
137     case TYPE_ZERO_SUGGEST_LOCAL_HISTORY:
138       return metrics::OmniboxEventProto::ZERO_SUGGEST_LOCAL_HISTORY;
139     case TYPE_QUERY_TILE:
140       return metrics::OmniboxEventProto::QUERY_TILE;
141     case TYPE_MOST_VISITED_SITES:
142       return metrics::OmniboxEventProto::ZERO_SUGGEST;
143     case TYPE_VERBATIM_MATCH:
144       return metrics::OmniboxEventProto::ZERO_SUGGEST;
145     default:
146       NOTREACHED() << "Unhandled AutocompleteProvider::Type " << type_;
147       return metrics::OmniboxEventProto::UNKNOWN_PROVIDER;
148   }
149 }
150 
DeleteMatch(const AutocompleteMatch & match)151 void AutocompleteProvider::DeleteMatch(const AutocompleteMatch& match) {
152   DLOG(WARNING) << "The AutocompleteProvider '" << GetName()
153                 << "' has not implemented DeleteMatch.";
154 }
155 
AddProviderInfo(ProvidersInfo * provider_info) const156 void AutocompleteProvider::AddProviderInfo(ProvidersInfo* provider_info) const {
157 }
158 
ResetSession()159 void AutocompleteProvider::ResetSession() {
160 }
161 
EstimateMemoryUsage() const162 size_t AutocompleteProvider::EstimateMemoryUsage() const {
163   return base::trace_event::EstimateMemoryUsage(matches_);
164 }
165 
~AutocompleteProvider()166 AutocompleteProvider::~AutocompleteProvider() {
167   Stop(false, false);
168 }
169 
170 // static
FixupUserInput(const AutocompleteInput & input)171 AutocompleteProvider::FixupReturn AutocompleteProvider::FixupUserInput(
172     const AutocompleteInput& input) {
173   const base::string16& input_text = input.text();
174   const FixupReturn failed(false, input_text);
175 
176   // Fixup and canonicalize user input.
177   const GURL canonical_gurl(
178       url_formatter::FixupURL(base::UTF16ToUTF8(input_text), std::string()));
179   std::string canonical_gurl_str(canonical_gurl.possibly_invalid_spec());
180   if (canonical_gurl_str.empty()) {
181     // This probably won't happen, but there are no guarantees.
182     return failed;
183   }
184 
185   // If the user types a number, GURL will convert it to a dotted quad.
186   // However, if the parser did not mark this as a URL, then the user probably
187   // didn't intend this interpretation.  Since this can break history matching
188   // for hostname beginning with numbers (e.g. input of "17173" will be matched
189   // against "0.0.67.21" instead of the original "17173", failing to find
190   // "17173.com"), swap the original hostname in for the fixed-up one.
191   if ((input.type() != metrics::OmniboxInputType::URL) &&
192       canonical_gurl.HostIsIPAddress()) {
193     std::string original_hostname =
194         base::UTF16ToUTF8(input_text.substr(input.parts().host.begin,
195                                             input.parts().host.len));
196     const url::Parsed& parts =
197         canonical_gurl.parsed_for_possibly_invalid_spec();
198     // parts.host must not be empty when HostIsIPAddress() is true.
199     DCHECK(parts.host.is_nonempty());
200     canonical_gurl_str.replace(parts.host.begin, parts.host.len,
201                                original_hostname);
202   }
203   base::string16 output(base::UTF8ToUTF16(canonical_gurl_str));
204   // Don't prepend a scheme when the user didn't have one.  Since the fixer
205   // upper only prepends the "http" scheme, that's all we need to check for.
206   if (!AutocompleteInput::HasHTTPScheme(input_text))
207     TrimHttpPrefix(&output);
208 
209   // Make the number of trailing slashes on the output exactly match the input.
210   // Examples of why not doing this would matter:
211   // * The user types "a" and has this fixed up to "a/".  Now no other sites
212   //   beginning with "a" will match.
213   // * The user types "file:" and has this fixed up to "file://".  Now inline
214   //   autocomplete will append too few slashes, resulting in e.g. "file:/b..."
215   //   instead of "file:///b..."
216   // * The user types "http:/" and has this fixed up to "http:".  Now inline
217   //   autocomplete will append too many slashes, resulting in e.g.
218   //   "http:///c..." instead of "http://c...".
219   // NOTE: We do this after calling TrimHttpPrefix() since that can strip
220   // trailing slashes (if the scheme is the only thing in the input).  It's not
221   // clear that the result of fixup really matters in this case, but there's no
222   // harm in making sure.
223   const size_t last_input_nonslash =
224       input_text.find_last_not_of(base::ASCIIToUTF16("/\\"));
225   size_t num_input_slashes =
226       (last_input_nonslash == base::string16::npos)
227           ? input_text.length()
228           : (input_text.length() - 1 - last_input_nonslash);
229   // If we appended text, user slashes are irrelevant.
230   if (output.length() > input_text.length() &&
231       base::StartsWith(output, input_text, base::CompareCase::SENSITIVE))
232     num_input_slashes = 0;
233   const size_t last_output_nonslash =
234       output.find_last_not_of(base::ASCIIToUTF16("/\\"));
235   const size_t num_output_slashes =
236       (last_output_nonslash == base::string16::npos) ?
237       output.length() : (output.length() - 1 - last_output_nonslash);
238   if (num_output_slashes < num_input_slashes)
239     output.append(num_input_slashes - num_output_slashes, '/');
240   else if (num_output_slashes > num_input_slashes)
241     output.erase(output.length() - num_output_slashes + num_input_slashes);
242   if (output.empty())
243     return failed;
244 
245   return FixupReturn(true, output);
246 }
247 
248 // static
TrimHttpPrefix(base::string16 * url)249 size_t AutocompleteProvider::TrimHttpPrefix(base::string16* url) {
250   // Find any "http:".
251   if (!AutocompleteInput::HasHTTPScheme(*url))
252     return 0;
253   size_t scheme_pos =
254       url->find(base::ASCIIToUTF16(url::kHttpScheme) + base::char16(':'));
255   DCHECK_NE(base::string16::npos, scheme_pos);
256 
257   // Erase scheme plus up to two slashes.
258   size_t prefix_end = scheme_pos + strlen(url::kHttpScheme) + 1;
259   const size_t after_slashes = std::min(url->length(), prefix_end + 2);
260   while ((prefix_end < after_slashes) && ((*url)[prefix_end] == '/'))
261     ++prefix_end;
262   url->erase(scheme_pos, prefix_end - scheme_pos);
263   return (scheme_pos == 0) ? prefix_end : 0;
264 }
265 
266 // static
InExplicitExperimentalKeywordMode(const AutocompleteInput & input,const base::string16 & keyword)267 bool AutocompleteProvider::InExplicitExperimentalKeywordMode(
268     const AutocompleteInput& input,
269     const base::string16& keyword) {
270   return OmniboxFieldTrial::IsExperimentalKeywordModeEnabled() &&
271          input.prefer_keyword() &&
272          base::StartsWith(input.text(), keyword,
273                           base::CompareCase::SENSITIVE) &&
274          IsExplicitlyInKeywordMode(input, keyword);
275 }
276 
277 // static
IsExplicitlyInKeywordMode(const AutocompleteInput & input,const base::string16 & keyword)278 bool AutocompleteProvider::IsExplicitlyInKeywordMode(
279     const AutocompleteInput& input,
280     const base::string16& keyword) {
281   // It is important to this method that we determine if the user entered
282   // keyword mode intentionally, as we use this routine to e.g. filter
283   // all but keyword results. Currently we assume that the user entered
284   // keyword mode intentionally with all entry methods except with a
285   // space (and disregard entry method during a backspace). However, if the
286   // user has typed a char past the space, we again assume keyword mode.
287   return (((input.keyword_mode_entry_method() !=
288                 metrics::OmniboxEventProto::SPACE_AT_END &&
289             input.keyword_mode_entry_method() !=
290                 metrics::OmniboxEventProto::SPACE_IN_MIDDLE) &&
291            !input.prevent_inline_autocomplete()) ||
292           input.text().size() > keyword.size() + 1);
293 }
294