1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/omnibox/browser/autocomplete_provider.h"
6
7 #include <algorithm>
8 #include <set>
9 #include <string>
10
11 #include "base/feature_list.h"
12 #include "base/i18n/case_conversion.h"
13 #include "base/logging.h"
14 #include "base/no_destructor.h"
15 #include "base/strings/string_split.h"
16 #include "base/strings/string_util.h"
17 #include "base/strings/utf_string_conversions.h"
18 #include "base/trace_event/memory_usage_estimator.h"
19 #include "components/bookmarks/browser/bookmark_utils.h"
20 #include "components/omnibox/browser/autocomplete_i18n.h"
21 #include "components/omnibox/browser/autocomplete_input.h"
22 #include "components/omnibox/browser/autocomplete_match.h"
23 #include "components/omnibox/browser/autocomplete_match_classification.h"
24 #include "components/omnibox/browser/history_provider.h"
25 #include "components/omnibox/browser/omnibox_field_trial.h"
26 #include "components/omnibox/browser/scored_history_match.h"
27 #include "components/omnibox/common/omnibox_features.h"
28 #include "components/url_formatter/url_fixer.h"
29 #include "url/gurl.h"
30
AutocompleteProvider(Type type)31 AutocompleteProvider::AutocompleteProvider(Type type)
32 : provider_max_matches_(OmniboxFieldTrial::GetProviderMaxMatches(type)),
33 done_(true),
34 type_(type) {}
35
36 // static
TypeToString(Type type)37 const char* AutocompleteProvider::TypeToString(Type type) {
38 switch (type) {
39 case TYPE_BOOKMARK:
40 return "Bookmark";
41 case TYPE_BUILTIN:
42 return "Builtin";
43 case TYPE_CLIPBOARD:
44 return "Clipboard";
45 case TYPE_DOCUMENT:
46 return "Document";
47 case TYPE_HISTORY_QUICK:
48 return "HistoryQuick";
49 case TYPE_HISTORY_URL:
50 return "HistoryURL";
51 case TYPE_KEYWORD:
52 return "Keyword";
53 case TYPE_ON_DEVICE_HEAD:
54 return "OnDeviceHead";
55 case TYPE_SEARCH:
56 return "Search";
57 case TYPE_SHORTCUTS:
58 return "Shortcuts";
59 case TYPE_ZERO_SUGGEST:
60 return "ZeroSuggest";
61 case TYPE_ZERO_SUGGEST_LOCAL_HISTORY:
62 return "LocalHistoryZeroSuggest";
63 case TYPE_QUERY_TILE:
64 return "QueryTile";
65 case TYPE_MOST_VISITED_SITES:
66 return "MostVisitedSites";
67 case TYPE_VERBATIM_MATCH:
68 return "VerbatimMatch";
69 default:
70 NOTREACHED() << "Unhandled AutocompleteProvider::Type " << type;
71 return "Unknown";
72 }
73 }
74
Stop(bool clear_cached_results,bool due_to_user_inactivity)75 void AutocompleteProvider::Stop(bool clear_cached_results,
76 bool due_to_user_inactivity) {
77 done_ = true;
78 }
79
GetName() const80 const char* AutocompleteProvider::GetName() const {
81 return TypeToString(type_);
82 }
83
84 // static
ClassifyAllMatchesInString(const base::string16 & find_text,const base::string16 & text,const bool text_is_search_query,const ACMatchClassifications & original_class)85 ACMatchClassifications AutocompleteProvider::ClassifyAllMatchesInString(
86 const base::string16& find_text,
87 const base::string16& text,
88 const bool text_is_search_query,
89 const ACMatchClassifications& original_class) {
90 // TODO (manukh) Move this function to autocomplete_match_classification
91 DCHECK(!find_text.empty());
92
93 if (text.empty())
94 return original_class;
95
96 TermMatches term_matches = FindTermMatches(find_text, text);
97
98 ACMatchClassifications classifications;
99 if (text_is_search_query) {
100 classifications = ClassifyTermMatches(term_matches, text.size(),
101 ACMatchClassification::NONE,
102 ACMatchClassification::MATCH);
103 } else
104 classifications = ClassifyTermMatches(term_matches, text.size(),
105 ACMatchClassification::MATCH,
106 ACMatchClassification::NONE);
107
108 return AutocompleteMatch::MergeClassifications(original_class,
109 classifications);
110 }
111
112 metrics::OmniboxEventProto_ProviderType AutocompleteProvider::
AsOmniboxEventProviderType() const113 AsOmniboxEventProviderType() const {
114 switch (type_) {
115 case TYPE_BOOKMARK:
116 return metrics::OmniboxEventProto::BOOKMARK;
117 case TYPE_BUILTIN:
118 return metrics::OmniboxEventProto::BUILTIN;
119 case TYPE_CLIPBOARD:
120 return metrics::OmniboxEventProto::CLIPBOARD;
121 case TYPE_DOCUMENT:
122 return metrics::OmniboxEventProto::DOCUMENT;
123 case TYPE_HISTORY_QUICK:
124 return metrics::OmniboxEventProto::HISTORY_QUICK;
125 case TYPE_HISTORY_URL:
126 return metrics::OmniboxEventProto::HISTORY_URL;
127 case TYPE_KEYWORD:
128 return metrics::OmniboxEventProto::KEYWORD;
129 case TYPE_ON_DEVICE_HEAD:
130 return metrics::OmniboxEventProto::ON_DEVICE_HEAD;
131 case TYPE_SEARCH:
132 return metrics::OmniboxEventProto::SEARCH;
133 case TYPE_SHORTCUTS:
134 return metrics::OmniboxEventProto::SHORTCUTS;
135 case TYPE_ZERO_SUGGEST:
136 return metrics::OmniboxEventProto::ZERO_SUGGEST;
137 case TYPE_ZERO_SUGGEST_LOCAL_HISTORY:
138 return metrics::OmniboxEventProto::ZERO_SUGGEST_LOCAL_HISTORY;
139 case TYPE_QUERY_TILE:
140 return metrics::OmniboxEventProto::QUERY_TILE;
141 case TYPE_MOST_VISITED_SITES:
142 return metrics::OmniboxEventProto::ZERO_SUGGEST;
143 case TYPE_VERBATIM_MATCH:
144 return metrics::OmniboxEventProto::ZERO_SUGGEST;
145 default:
146 NOTREACHED() << "Unhandled AutocompleteProvider::Type " << type_;
147 return metrics::OmniboxEventProto::UNKNOWN_PROVIDER;
148 }
149 }
150
DeleteMatch(const AutocompleteMatch & match)151 void AutocompleteProvider::DeleteMatch(const AutocompleteMatch& match) {
152 DLOG(WARNING) << "The AutocompleteProvider '" << GetName()
153 << "' has not implemented DeleteMatch.";
154 }
155
AddProviderInfo(ProvidersInfo * provider_info) const156 void AutocompleteProvider::AddProviderInfo(ProvidersInfo* provider_info) const {
157 }
158
ResetSession()159 void AutocompleteProvider::ResetSession() {
160 }
161
EstimateMemoryUsage() const162 size_t AutocompleteProvider::EstimateMemoryUsage() const {
163 return base::trace_event::EstimateMemoryUsage(matches_);
164 }
165
~AutocompleteProvider()166 AutocompleteProvider::~AutocompleteProvider() {
167 Stop(false, false);
168 }
169
170 // static
FixupUserInput(const AutocompleteInput & input)171 AutocompleteProvider::FixupReturn AutocompleteProvider::FixupUserInput(
172 const AutocompleteInput& input) {
173 const base::string16& input_text = input.text();
174 const FixupReturn failed(false, input_text);
175
176 // Fixup and canonicalize user input.
177 const GURL canonical_gurl(
178 url_formatter::FixupURL(base::UTF16ToUTF8(input_text), std::string()));
179 std::string canonical_gurl_str(canonical_gurl.possibly_invalid_spec());
180 if (canonical_gurl_str.empty()) {
181 // This probably won't happen, but there are no guarantees.
182 return failed;
183 }
184
185 // If the user types a number, GURL will convert it to a dotted quad.
186 // However, if the parser did not mark this as a URL, then the user probably
187 // didn't intend this interpretation. Since this can break history matching
188 // for hostname beginning with numbers (e.g. input of "17173" will be matched
189 // against "0.0.67.21" instead of the original "17173", failing to find
190 // "17173.com"), swap the original hostname in for the fixed-up one.
191 if ((input.type() != metrics::OmniboxInputType::URL) &&
192 canonical_gurl.HostIsIPAddress()) {
193 std::string original_hostname =
194 base::UTF16ToUTF8(input_text.substr(input.parts().host.begin,
195 input.parts().host.len));
196 const url::Parsed& parts =
197 canonical_gurl.parsed_for_possibly_invalid_spec();
198 // parts.host must not be empty when HostIsIPAddress() is true.
199 DCHECK(parts.host.is_nonempty());
200 canonical_gurl_str.replace(parts.host.begin, parts.host.len,
201 original_hostname);
202 }
203 base::string16 output(base::UTF8ToUTF16(canonical_gurl_str));
204 // Don't prepend a scheme when the user didn't have one. Since the fixer
205 // upper only prepends the "http" scheme, that's all we need to check for.
206 if (!AutocompleteInput::HasHTTPScheme(input_text))
207 TrimHttpPrefix(&output);
208
209 // Make the number of trailing slashes on the output exactly match the input.
210 // Examples of why not doing this would matter:
211 // * The user types "a" and has this fixed up to "a/". Now no other sites
212 // beginning with "a" will match.
213 // * The user types "file:" and has this fixed up to "file://". Now inline
214 // autocomplete will append too few slashes, resulting in e.g. "file:/b..."
215 // instead of "file:///b..."
216 // * The user types "http:/" and has this fixed up to "http:". Now inline
217 // autocomplete will append too many slashes, resulting in e.g.
218 // "http:///c..." instead of "http://c...".
219 // NOTE: We do this after calling TrimHttpPrefix() since that can strip
220 // trailing slashes (if the scheme is the only thing in the input). It's not
221 // clear that the result of fixup really matters in this case, but there's no
222 // harm in making sure.
223 const size_t last_input_nonslash =
224 input_text.find_last_not_of(base::ASCIIToUTF16("/\\"));
225 size_t num_input_slashes =
226 (last_input_nonslash == base::string16::npos)
227 ? input_text.length()
228 : (input_text.length() - 1 - last_input_nonslash);
229 // If we appended text, user slashes are irrelevant.
230 if (output.length() > input_text.length() &&
231 base::StartsWith(output, input_text, base::CompareCase::SENSITIVE))
232 num_input_slashes = 0;
233 const size_t last_output_nonslash =
234 output.find_last_not_of(base::ASCIIToUTF16("/\\"));
235 const size_t num_output_slashes =
236 (last_output_nonslash == base::string16::npos) ?
237 output.length() : (output.length() - 1 - last_output_nonslash);
238 if (num_output_slashes < num_input_slashes)
239 output.append(num_input_slashes - num_output_slashes, '/');
240 else if (num_output_slashes > num_input_slashes)
241 output.erase(output.length() - num_output_slashes + num_input_slashes);
242 if (output.empty())
243 return failed;
244
245 return FixupReturn(true, output);
246 }
247
248 // static
TrimHttpPrefix(base::string16 * url)249 size_t AutocompleteProvider::TrimHttpPrefix(base::string16* url) {
250 // Find any "http:".
251 if (!AutocompleteInput::HasHTTPScheme(*url))
252 return 0;
253 size_t scheme_pos =
254 url->find(base::ASCIIToUTF16(url::kHttpScheme) + base::char16(':'));
255 DCHECK_NE(base::string16::npos, scheme_pos);
256
257 // Erase scheme plus up to two slashes.
258 size_t prefix_end = scheme_pos + strlen(url::kHttpScheme) + 1;
259 const size_t after_slashes = std::min(url->length(), prefix_end + 2);
260 while ((prefix_end < after_slashes) && ((*url)[prefix_end] == '/'))
261 ++prefix_end;
262 url->erase(scheme_pos, prefix_end - scheme_pos);
263 return (scheme_pos == 0) ? prefix_end : 0;
264 }
265
266 // static
InExplicitExperimentalKeywordMode(const AutocompleteInput & input,const base::string16 & keyword)267 bool AutocompleteProvider::InExplicitExperimentalKeywordMode(
268 const AutocompleteInput& input,
269 const base::string16& keyword) {
270 return OmniboxFieldTrial::IsExperimentalKeywordModeEnabled() &&
271 input.prefer_keyword() &&
272 base::StartsWith(input.text(), keyword,
273 base::CompareCase::SENSITIVE) &&
274 IsExplicitlyInKeywordMode(input, keyword);
275 }
276
277 // static
IsExplicitlyInKeywordMode(const AutocompleteInput & input,const base::string16 & keyword)278 bool AutocompleteProvider::IsExplicitlyInKeywordMode(
279 const AutocompleteInput& input,
280 const base::string16& keyword) {
281 // It is important to this method that we determine if the user entered
282 // keyword mode intentionally, as we use this routine to e.g. filter
283 // all but keyword results. Currently we assume that the user entered
284 // keyword mode intentionally with all entry methods except with a
285 // space (and disregard entry method during a backspace). However, if the
286 // user has typed a char past the space, we again assume keyword mode.
287 return (((input.keyword_mode_entry_method() !=
288 metrics::OmniboxEventProto::SPACE_AT_END &&
289 input.keyword_mode_entry_method() !=
290 metrics::OmniboxEventProto::SPACE_IN_MIDDLE) &&
291 !input.prevent_inline_autocomplete()) ||
292 input.text().size() > keyword.size() + 1);
293 }
294