1 // Copyright 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "components/omnibox/browser/omnibox_pedal_provider.h"
6 
7 #include "base/i18n/case_conversion.h"
8 #include "base/i18n/char_iterator.h"
9 #include "base/json/json_reader.h"
10 #include "base/metrics/field_trial_params.h"
11 #include "base/strings/string_tokenizer.h"
12 #include "base/strings/string_util.h"
13 #include "base/strings/utf_string_conversions.h"
14 #include "components/omnibox/browser/autocomplete_input.h"
15 #include "components/omnibox/browser/autocomplete_provider_client.h"
16 #include "components/omnibox/browser/omnibox_field_trial.h"
17 #include "components/omnibox/browser/omnibox_pedal.h"
18 #include "components/omnibox/browser/omnibox_pedal_implementations.h"
19 #include "components/omnibox/common/omnibox_features.h"
20 #include "components/omnibox/resources/grit/omnibox_resources.h"
21 #include "ui/base/resource/resource_bundle.h"
22 
23 namespace {
24 typedef base::StringTokenizerT<base::string16, base::string16::const_iterator>
25     StringTokenizer16;
26 
27 }  // namespace
28 
OmniboxPedalProvider(AutocompleteProviderClient & client)29 OmniboxPedalProvider::OmniboxPedalProvider(AutocompleteProviderClient& client)
30     : client_(client),
31       pedals_(GetPedalImplementations()),
32       ignore_group_(false, false, 0) {
33   LoadPedalConcepts();
34 }
35 
~OmniboxPedalProvider()36 OmniboxPedalProvider::~OmniboxPedalProvider() {}
37 
AddProviderInfo(ProvidersInfo * provider_info) const38 void OmniboxPedalProvider::AddProviderInfo(ProvidersInfo* provider_info) const {
39   provider_info->push_back(metrics::OmniboxEventProto_ProviderInfo());
40   metrics::OmniboxEventProto_ProviderInfo& new_entry = provider_info->back();
41   // Note: SEARCH is used here because the suggestions that Pedals attach to are
42   // almost exclusively coming from search suggestions (they could in theory
43   // attach to others if the match content were a concept match, but in practice
44   // only search suggestions have the relevant text). PEDAL is not used because
45   // Pedals are not themselves suggestions produced by an autocomplete provider.
46   // This may change. See http://cl/327103601 for context and discussion.
47   new_entry.set_provider(metrics::OmniboxEventProto::SEARCH);
48   new_entry.set_provider_done(true);
49 
50   if (field_trial_triggered_ || field_trial_triggered_in_session_) {
51     std::vector<uint32_t> field_trial_hashes;
52     OmniboxFieldTrial::GetActiveSuggestFieldTrialHashes(&field_trial_hashes);
53     for (uint32_t trial : field_trial_hashes) {
54       if (field_trial_triggered_)
55         new_entry.mutable_field_trial_triggered()->Add(trial);
56       if (field_trial_triggered_in_session_)
57         new_entry.mutable_field_trial_triggered_in_session()->Add(trial);
58     }
59   }
60 }
61 
ResetSession()62 void OmniboxPedalProvider::ResetSession() {
63   field_trial_triggered_in_session_ = false;
64   field_trial_triggered_ = false;
65 }
66 
FindPedalMatch(const AutocompleteInput & input,const base::string16 & match_text)67 OmniboxPedal* OmniboxPedalProvider::FindPedalMatch(
68     const AutocompleteInput& input,
69     const base::string16& match_text) {
70   OmniboxPedal::Tokens match_tokens = Tokenize(match_text);
71   if (match_tokens.empty()) {
72     return nullptr;
73   }
74 
75   // Some users may be in a counterfactual study arm in which the pedal button
76   // is not attached to the suggestion.
77   bool in_pedal_counterfactual_group = base::GetFieldTrialParamByFeatureAsBool(
78       omnibox::kOmniboxPedalSuggestions, "PedalSuggestionsCounterfactualArm",
79       false);
80 
81   for (const auto& pedal : pedals_) {
82     if (pedal.second->IsTriggerMatch(match_tokens) &&
83         pedal.second->IsReadyToTrigger(input, client_)) {
84       field_trial_triggered_ = true;
85       field_trial_triggered_in_session_ = true;
86 
87       return in_pedal_counterfactual_group ? nullptr : pedal.second.get();
88     }
89   }
90   return nullptr;
91 }
92 
Tokenize(const base::string16 & text) const93 OmniboxPedal::Tokens OmniboxPedalProvider::Tokenize(
94     const base::string16& text) const {
95   base::string16 reduced_text = base::i18n::ToLower(text);
96   OmniboxPedal::Tokens match_tokens;
97   match_tokens.reserve(max_tokens_);
98   if (tokenize_characters_.empty()) {
99     // Tokenize on Unicode character boundaries when we have no delimiters.
100     base::i18n::UTF16CharIterator char_iter(reduced_text);
101     int32_t left = 0;
102     while (!char_iter.end()) {
103       char_iter.Advance();
104       int32_t right = char_iter.array_pos();
105       if (right > left) {
106         const auto token = reduced_text.substr(left, right - left);
107         const auto iter = dictionary_.find(token);
108         if (iter == dictionary_.end() || match_tokens.size() >= max_tokens_) {
109           // No Pedal can possibly match because we found a token not
110           // present in the token dictionary, or the text has too many tokens.
111           return OmniboxPedal::Tokens();
112         } else {
113           match_tokens.push_back(iter->second);
114         }
115         left = right;
116       } else {
117         break;
118       }
119     }
120   } else {
121     // Delimiters will neatly divide the string into tokens.
122     StringTokenizer16 tokenizer(reduced_text, tokenize_characters_);
123     while (tokenizer.GetNext()) {
124       const auto iter = dictionary_.find(tokenizer.token());
125       if (iter == dictionary_.end() || match_tokens.size() >= max_tokens_) {
126         // No Pedal can possibly match because we found a token not
127         // present in the token dictionary, or the text has too many tokens.
128         return OmniboxPedal::Tokens();
129       } else {
130         match_tokens.push_back(iter->second);
131       }
132     }
133   }
134 
135   ignore_group_.EraseMatchesIn(&match_tokens);
136 
137   return match_tokens;
138 }
139 
LoadPedalConcepts()140 void OmniboxPedalProvider::LoadPedalConcepts() {
141   // Load concept data then parse to base::Value in order to construct Pedals.
142   std::string uncompressed_data =
143       ui::ResourceBundle::GetSharedInstance().LoadLocalizedResourceString(
144           IDR_OMNIBOX_PEDAL_CONCEPTS);
145   const auto concept_data = base::JSONReader::Read(uncompressed_data);
146 
147   DCHECK(concept_data);
148   DCHECK(concept_data->is_dict());
149 
150   const int data_version = concept_data->FindKey("data_version")->GetInt();
151   CHECK_EQ(data_version, OMNIBOX_PEDAL_CONCEPTS_DATA_VERSION);
152 
153   max_tokens_ = concept_data->FindKey("max_tokens")->GetInt();
154   // It is conceivable that some language may need more here, but the goal is
155   // to sanity check input since it is trusted and used for vector reserve.
156   DCHECK_LT(max_tokens_, size_t{64});
157 
158   concept_data->FindKey("tokenize_characters")
159       ->GetAsString(&tokenize_characters_);
160 
161   const auto& dictionary = concept_data->FindKey("dictionary")->GetList();
162   dictionary_.reserve(dictionary.size());
163   int id = 0;
164   for (const auto& token_value : dictionary) {
165     base::string16 token;
166     token_value.GetAsString(&token);
167     dictionary_.insert({token, id});
168     ++id;
169   }
170 
171   const base::Value* ignore_group_value = concept_data->FindKey("ignore_group");
172   DCHECK_NE(ignore_group_value, nullptr);
173   ignore_group_ = LoadSynonymGroup(*ignore_group_value);
174 
175   for (const auto& pedal_value : concept_data->FindKey("pedals")->GetList()) {
176     DCHECK(pedal_value.is_dict());
177     const OmniboxPedalId pedal_id =
178         static_cast<OmniboxPedalId>(pedal_value.FindKey("id")->GetInt());
179     const auto pedal = pedals_.find(pedal_id);
180     if (pedal == pedals_.end()) {
181       CHECK(false) << "OmniboxPedalId " << static_cast<int>(pedal_id)
182                    << " not found. Are all data-referenced implementations "
183                       "added to provider?";
184     }
185     for (const auto& group_value : pedal_value.FindKey("groups")->GetList()) {
186       pedal->second->AddSynonymGroup(LoadSynonymGroup(group_value));
187     }
188   }
189 }
190 
LoadSynonymGroup(const base::Value & group_value) const191 OmniboxPedal::SynonymGroup OmniboxPedalProvider::LoadSynonymGroup(
192     const base::Value& group_value) const {
193   DCHECK(group_value.is_dict());
194   const bool required = group_value.FindKey("required")->GetBool();
195   const bool single = group_value.FindKey("single")->GetBool();
196   const auto& synonyms = group_value.FindKey("synonyms")->GetList();
197   OmniboxPedal::SynonymGroup synonym_group(required, single, synonyms.size());
198   for (const auto& synonyms_value : synonyms) {
199     DCHECK(synonyms_value.is_list());
200     const auto& synonyms_value_list = synonyms_value.GetList();
201     OmniboxPedal::Tokens synonym_all_tokens;
202     synonym_all_tokens.reserve(synonyms_value_list.size());
203     for (const auto& token_index_value : synonyms_value_list) {
204       synonym_all_tokens.push_back(token_index_value.GetInt());
205     }
206     synonym_group.AddSynonym(std::move(synonym_all_tokens));
207   }
208   return synonym_group;
209 }
210