1 // Copyright 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "components/omnibox/browser/omnibox_pedal_provider.h"
6
7 #include "base/i18n/case_conversion.h"
8 #include "base/i18n/char_iterator.h"
9 #include "base/json/json_reader.h"
10 #include "base/metrics/field_trial_params.h"
11 #include "base/strings/string_tokenizer.h"
12 #include "base/strings/string_util.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "components/omnibox/browser/autocomplete_input.h"
15 #include "components/omnibox/browser/autocomplete_provider_client.h"
16 #include "components/omnibox/browser/omnibox_field_trial.h"
17 #include "components/omnibox/browser/omnibox_pedal.h"
18 #include "components/omnibox/browser/omnibox_pedal_implementations.h"
19 #include "components/omnibox/common/omnibox_features.h"
20 #include "components/omnibox/resources/grit/omnibox_resources.h"
21 #include "ui/base/resource/resource_bundle.h"
22
23 namespace {
24 typedef base::StringTokenizerT<base::string16, base::string16::const_iterator>
25 StringTokenizer16;
26
27 } // namespace
28
OmniboxPedalProvider(AutocompleteProviderClient & client)29 OmniboxPedalProvider::OmniboxPedalProvider(AutocompleteProviderClient& client)
30 : client_(client),
31 pedals_(GetPedalImplementations()),
32 ignore_group_(false, false, 0) {
33 LoadPedalConcepts();
34 }
35
~OmniboxPedalProvider()36 OmniboxPedalProvider::~OmniboxPedalProvider() {}
37
AddProviderInfo(ProvidersInfo * provider_info) const38 void OmniboxPedalProvider::AddProviderInfo(ProvidersInfo* provider_info) const {
39 provider_info->push_back(metrics::OmniboxEventProto_ProviderInfo());
40 metrics::OmniboxEventProto_ProviderInfo& new_entry = provider_info->back();
41 // Note: SEARCH is used here because the suggestions that Pedals attach to are
42 // almost exclusively coming from search suggestions (they could in theory
43 // attach to others if the match content were a concept match, but in practice
44 // only search suggestions have the relevant text). PEDAL is not used because
45 // Pedals are not themselves suggestions produced by an autocomplete provider.
46 // This may change. See http://cl/327103601 for context and discussion.
47 new_entry.set_provider(metrics::OmniboxEventProto::SEARCH);
48 new_entry.set_provider_done(true);
49
50 if (field_trial_triggered_ || field_trial_triggered_in_session_) {
51 std::vector<uint32_t> field_trial_hashes;
52 OmniboxFieldTrial::GetActiveSuggestFieldTrialHashes(&field_trial_hashes);
53 for (uint32_t trial : field_trial_hashes) {
54 if (field_trial_triggered_)
55 new_entry.mutable_field_trial_triggered()->Add(trial);
56 if (field_trial_triggered_in_session_)
57 new_entry.mutable_field_trial_triggered_in_session()->Add(trial);
58 }
59 }
60 }
61
ResetSession()62 void OmniboxPedalProvider::ResetSession() {
63 field_trial_triggered_in_session_ = false;
64 field_trial_triggered_ = false;
65 }
66
FindPedalMatch(const AutocompleteInput & input,const base::string16 & match_text)67 OmniboxPedal* OmniboxPedalProvider::FindPedalMatch(
68 const AutocompleteInput& input,
69 const base::string16& match_text) {
70 OmniboxPedal::Tokens match_tokens = Tokenize(match_text);
71 if (match_tokens.empty()) {
72 return nullptr;
73 }
74
75 // Some users may be in a counterfactual study arm in which the pedal button
76 // is not attached to the suggestion.
77 bool in_pedal_counterfactual_group = base::GetFieldTrialParamByFeatureAsBool(
78 omnibox::kOmniboxPedalSuggestions, "PedalSuggestionsCounterfactualArm",
79 false);
80
81 for (const auto& pedal : pedals_) {
82 if (pedal.second->IsTriggerMatch(match_tokens) &&
83 pedal.second->IsReadyToTrigger(input, client_)) {
84 field_trial_triggered_ = true;
85 field_trial_triggered_in_session_ = true;
86
87 return in_pedal_counterfactual_group ? nullptr : pedal.second.get();
88 }
89 }
90 return nullptr;
91 }
92
Tokenize(const base::string16 & text) const93 OmniboxPedal::Tokens OmniboxPedalProvider::Tokenize(
94 const base::string16& text) const {
95 base::string16 reduced_text = base::i18n::ToLower(text);
96 OmniboxPedal::Tokens match_tokens;
97 match_tokens.reserve(max_tokens_);
98 if (tokenize_characters_.empty()) {
99 // Tokenize on Unicode character boundaries when we have no delimiters.
100 base::i18n::UTF16CharIterator char_iter(reduced_text);
101 int32_t left = 0;
102 while (!char_iter.end()) {
103 char_iter.Advance();
104 int32_t right = char_iter.array_pos();
105 if (right > left) {
106 const auto token = reduced_text.substr(left, right - left);
107 const auto iter = dictionary_.find(token);
108 if (iter == dictionary_.end() || match_tokens.size() >= max_tokens_) {
109 // No Pedal can possibly match because we found a token not
110 // present in the token dictionary, or the text has too many tokens.
111 return OmniboxPedal::Tokens();
112 } else {
113 match_tokens.push_back(iter->second);
114 }
115 left = right;
116 } else {
117 break;
118 }
119 }
120 } else {
121 // Delimiters will neatly divide the string into tokens.
122 StringTokenizer16 tokenizer(reduced_text, tokenize_characters_);
123 while (tokenizer.GetNext()) {
124 const auto iter = dictionary_.find(tokenizer.token());
125 if (iter == dictionary_.end() || match_tokens.size() >= max_tokens_) {
126 // No Pedal can possibly match because we found a token not
127 // present in the token dictionary, or the text has too many tokens.
128 return OmniboxPedal::Tokens();
129 } else {
130 match_tokens.push_back(iter->second);
131 }
132 }
133 }
134
135 ignore_group_.EraseMatchesIn(&match_tokens);
136
137 return match_tokens;
138 }
139
LoadPedalConcepts()140 void OmniboxPedalProvider::LoadPedalConcepts() {
141 // Load concept data then parse to base::Value in order to construct Pedals.
142 std::string uncompressed_data =
143 ui::ResourceBundle::GetSharedInstance().LoadLocalizedResourceString(
144 IDR_OMNIBOX_PEDAL_CONCEPTS);
145 const auto concept_data = base::JSONReader::Read(uncompressed_data);
146
147 DCHECK(concept_data);
148 DCHECK(concept_data->is_dict());
149
150 const int data_version = concept_data->FindKey("data_version")->GetInt();
151 CHECK_EQ(data_version, OMNIBOX_PEDAL_CONCEPTS_DATA_VERSION);
152
153 max_tokens_ = concept_data->FindKey("max_tokens")->GetInt();
154 // It is conceivable that some language may need more here, but the goal is
155 // to sanity check input since it is trusted and used for vector reserve.
156 DCHECK_LT(max_tokens_, size_t{64});
157
158 concept_data->FindKey("tokenize_characters")
159 ->GetAsString(&tokenize_characters_);
160
161 const auto& dictionary = concept_data->FindKey("dictionary")->GetList();
162 dictionary_.reserve(dictionary.size());
163 int id = 0;
164 for (const auto& token_value : dictionary) {
165 base::string16 token;
166 token_value.GetAsString(&token);
167 dictionary_.insert({token, id});
168 ++id;
169 }
170
171 const base::Value* ignore_group_value = concept_data->FindKey("ignore_group");
172 DCHECK_NE(ignore_group_value, nullptr);
173 ignore_group_ = LoadSynonymGroup(*ignore_group_value);
174
175 for (const auto& pedal_value : concept_data->FindKey("pedals")->GetList()) {
176 DCHECK(pedal_value.is_dict());
177 const OmniboxPedalId pedal_id =
178 static_cast<OmniboxPedalId>(pedal_value.FindKey("id")->GetInt());
179 const auto pedal = pedals_.find(pedal_id);
180 if (pedal == pedals_.end()) {
181 CHECK(false) << "OmniboxPedalId " << static_cast<int>(pedal_id)
182 << " not found. Are all data-referenced implementations "
183 "added to provider?";
184 }
185 for (const auto& group_value : pedal_value.FindKey("groups")->GetList()) {
186 pedal->second->AddSynonymGroup(LoadSynonymGroup(group_value));
187 }
188 }
189 }
190
LoadSynonymGroup(const base::Value & group_value) const191 OmniboxPedal::SynonymGroup OmniboxPedalProvider::LoadSynonymGroup(
192 const base::Value& group_value) const {
193 DCHECK(group_value.is_dict());
194 const bool required = group_value.FindKey("required")->GetBool();
195 const bool single = group_value.FindKey("single")->GetBool();
196 const auto& synonyms = group_value.FindKey("synonyms")->GetList();
197 OmniboxPedal::SynonymGroup synonym_group(required, single, synonyms.size());
198 for (const auto& synonyms_value : synonyms) {
199 DCHECK(synonyms_value.is_list());
200 const auto& synonyms_value_list = synonyms_value.GetList();
201 OmniboxPedal::Tokens synonym_all_tokens;
202 synonym_all_tokens.reserve(synonyms_value_list.size());
203 for (const auto& token_index_value : synonyms_value_list) {
204 synonym_all_tokens.push_back(token_index_value.GetInt());
205 }
206 synonym_group.AddSynonym(std::move(synonym_all_tokens));
207 }
208 return synonym_group;
209 }
210