1 // Copyright 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "extensions/renderer/i18n_hooks_delegate.h"
6 
7 #include <vector>
8 
9 #include "base/logging.h"
10 #include "base/metrics/histogram_macros.h"
11 #include "base/stl_util.h"
12 #include "base/strings/string_util.h"
13 #include "content/public/renderer/render_frame.h"
14 #include "content/public/renderer/render_thread.h"
15 #include "extensions/common/extension.h"
16 #include "extensions/common/extension_messages.h"
17 #include "extensions/common/message_bundle.h"
18 #include "extensions/renderer/bindings/api_signature.h"
19 #include "extensions/renderer/bindings/js_runner.h"
20 #include "extensions/renderer/get_script_context.h"
21 #include "extensions/renderer/script_context.h"
22 #include "gin/converter.h"
23 #include "gin/data_object_builder.h"
24 #if !defined(TOOLKIT_QT)
25 #include "third_party/cld_3/src/src/nnet_language_identifier.h"
26 #endif // !defined(TOOLKIT_QT)
27 
28 namespace extensions {
29 
30 namespace {
31 
32 constexpr char kGetMessage[] = "i18n.getMessage";
33 constexpr char kGetUILanguage[] = "i18n.getUILanguage";
34 constexpr char kDetectLanguage[] = "i18n.detectLanguage";
35 
36 // Max number of languages to detect.
37 const int kCldNumLangs = 3;
38 
39 // CLD3 minimum reliable byte threshold. Predictions for inputs below this size
40 // in bytes will be considered unreliable.
41 const int kCld3MinimumByteThreshold = 50;
42 
43 struct DetectedLanguage {
DetectedLanguageextensions::__anon1e358f360111::DetectedLanguage44   DetectedLanguage(const std::string& language, int percentage)
45       : language(language), percentage(percentage) {}
46 
47   // Returns a new v8::Local<v8::Value> representing the serialized form of
48   // this DetectedLanguage object.
49   v8::Local<v8::Value> ToV8(v8::Isolate* isolate) const;
50 
51   std::string language;
52   int percentage;
53 };
54 
55 // LanguageDetectionResult object that holds detected langugae reliability and
56 // array of DetectedLanguage
57 struct LanguageDetectionResult {
LanguageDetectionResultextensions::__anon1e358f360111::LanguageDetectionResult58   LanguageDetectionResult() {}
~LanguageDetectionResultextensions::__anon1e358f360111::LanguageDetectionResult59   ~LanguageDetectionResult() {}
60 
61   // Returns a new v8::Local<v8::Value> representing the serialized form of
62   // this Result object.
63   v8::Local<v8::Value> ToV8(v8::Local<v8::Context> context) const;
64 
65   // CLD detected language reliability
66   bool is_reliable = false;
67 
68   // Array of detectedLanguage of size 1-3. The null is returned if
69   // there were no languages detected
70   std::vector<DetectedLanguage> languages;
71 
72  private:
73   DISALLOW_COPY_AND_ASSIGN(LanguageDetectionResult);
74 };
75 
ToV8(v8::Isolate * isolate) const76 v8::Local<v8::Value> DetectedLanguage::ToV8(v8::Isolate* isolate) const {
77   return gin::DataObjectBuilder(isolate)
78       .Set("language", language)
79       .Set("percentage", percentage)
80       .Build();
81 }
82 
ToV8(v8::Local<v8::Context> context) const83 v8::Local<v8::Value> LanguageDetectionResult::ToV8(
84     v8::Local<v8::Context> context) const {
85   v8::Isolate* isolate = context->GetIsolate();
86   DCHECK(isolate->GetCurrentContext() == context);
87 
88   v8::Local<v8::Array> v8_languages = v8::Array::New(isolate, languages.size());
89   for (uint32_t i = 0; i < languages.size(); ++i) {
90     bool success =
91         v8_languages->CreateDataProperty(context, i, languages[i].ToV8(isolate))
92             .ToChecked();
93     DCHECK(success) << "CreateDataProperty() should never fail.";
94   }
95   return gin::DataObjectBuilder(isolate)
96       .Set("isReliable", is_reliable)
97       .Set("languages", v8_languages.As<v8::Value>())
98       .Build();
99 }
100 
101 #if !defined(TOOLKIT_QT)
InitDetectedLanguages(const std::vector<chrome_lang_id::NNetLanguageIdentifier::Result> & lang_results,LanguageDetectionResult * result)102 void InitDetectedLanguages(
103     const std::vector<chrome_lang_id::NNetLanguageIdentifier::Result>&
104         lang_results,
105     LanguageDetectionResult* result) {
106   std::vector<DetectedLanguage>* detected_languages = &result->languages;
107   DCHECK(detected_languages->empty());
108   bool* is_reliable = &result->is_reliable;
109 
110   // is_reliable is set to "true", so that the reliability can be calculated by
111   // &&'ing the reliability of each predicted language.
112   *is_reliable = true;
113   for (const auto& lang_result : lang_results) {
114     const std::string& language_code = lang_result.language;
115 
116     // If a language is kUnknown, then the remaining ones are also kUnknown.
117     if (language_code == chrome_lang_id::NNetLanguageIdentifier::kUnknown) {
118       break;
119     }
120 
121     // The list of languages supported by CLD3 is saved in kLanguageNames
122     // in the following file:
123     // //src/third_party/cld_3/src/src/task_context_params.cc
124     // Among the entries in this list are transliterated languages
125     // (called xx-Latn) which don't belong to the spec ISO639-1 used by
126     // the previous model, CLD2. Thus, to maintain backwards compatibility,
127     // xx-Latn predictions are ignored for now.
128     if (base::EndsWith(language_code, "-Latn",
129                        base::CompareCase::INSENSITIVE_ASCII)) {
130       continue;
131     }
132 
133     *is_reliable = *is_reliable && lang_result.is_reliable;
134     const int percent = static_cast<int>(100 * lang_result.proportion);
135     detected_languages->emplace_back(language_code, percent);
136   }
137 
138   if (detected_languages->empty())
139     *is_reliable = false;
140 }
141 #endif // !defined(TOOLKIT_QT)
142 
143 // Returns the localized method for the given |message_name| and
144 // substitutions. This can result in a synchronous IPC being sent to the browser
145 // for the first call related to an extension in this process.
GetI18nMessage(const std::string & message_name,const std::string & extension_id,v8::Local<v8::Value> v8_substitutions,v8::Local<v8::Value> v8_options,content::RenderFrame * render_frame,v8::Local<v8::Context> context)146 v8::Local<v8::Value> GetI18nMessage(const std::string& message_name,
147                                     const std::string& extension_id,
148                                     v8::Local<v8::Value> v8_substitutions,
149                                     v8::Local<v8::Value> v8_options,
150                                     content::RenderFrame* render_frame,
151                                     v8::Local<v8::Context> context) {
152   v8::Isolate* isolate = context->GetIsolate();
153   L10nMessagesMap* l10n_messages = nullptr;
154   {
155     ExtensionToL10nMessagesMap& messages_map = *GetExtensionToL10nMessagesMap();
156     auto iter = messages_map.find(extension_id);
157     if (iter != messages_map.end()) {
158       l10n_messages = &iter->second;
159     } else {
160       if (!render_frame)
161         return v8::Undefined(isolate);
162 
163       l10n_messages = &messages_map[extension_id];
164       // A sync call to load message catalogs for current extension.
165       // TODO(devlin): Wait, what?! A synchronous call to the browser to perform
166       // potentially blocking work reading files from disk? That's Bad.
167       {
168         SCOPED_UMA_HISTOGRAM_TIMER("Extensions.SyncGetMessageBundle");
169         render_frame->Send(
170             new ExtensionHostMsg_GetMessageBundle(extension_id, l10n_messages));
171       }
172     }
173   }
174 
175   std::string message =
176       MessageBundle::GetL10nMessage(message_name, *l10n_messages);
177 
178   std::vector<std::string> substitutions;
179   // For now, we just suppress all errors, but that's really not the best.
180   // See https://crbug.com/807769.
181   v8::TryCatch try_catch(isolate);
182   if (v8_substitutions->IsArray()) {
183     // chrome.i18n.getMessage("message_name", ["more", "params"]);
184     v8::Local<v8::Array> placeholders = v8_substitutions.As<v8::Array>();
185     uint32_t count = placeholders->Length();
186     if (count > 9)
187       return v8::Undefined(isolate);
188 
189     for (uint32_t i = 0; i < count; ++i) {
190       v8::Local<v8::Value> placeholder;
191       if (!placeholders->Get(context, i).ToLocal(&placeholder))
192         return v8::Undefined(isolate);
193       // Note: this tries to convert each entry to a JS string, which can fail.
194       // If it does, String::Utf8Value() catches the error and doesn't surface
195       // it to the calling script (though the call may still be observable,
196       // since this goes through an object's toString() method). If it fails,
197       // we just silently ignore the value.
198       v8::String::Utf8Value string_value(isolate, placeholder);
199       if (*string_value)
200         substitutions.push_back(*string_value);
201     }
202   } else if (v8_substitutions->IsString()) {
203     // chrome.i18n.getMessage("message_name", "one param");
204     substitutions.push_back(gin::V8ToString(isolate, v8_substitutions));
205   }
206   // TODO(devlin): We currently just ignore any non-string, non-array values
207   // for substitutions, but the type is documented as 'any'. We should either
208   // enforce type more heavily, or throw an error here.
209 
210   if (v8_options->IsObject()) {
211     v8::Local<v8::Object> options = v8_options.As<v8::Object>();
212     v8::Local<v8::Value> key =
213         v8::String::NewFromUtf8(isolate, "escapeLt").ToLocalChecked();
214     v8::Local<v8::Value> html;
215     if (options->Get(context, key).ToLocal(&html) && html->IsBoolean() &&
216         html.As<v8::Boolean>()->Value()) {
217       base::ReplaceChars(message, "<", "&lt;", &message);
218     }
219   }
220 
221   // NOTE: We call ReplaceStringPlaceholders even if |substitutions| is empty
222   // because we substitute $$ to be $ (in order to display a dollar sign in a
223   // message). See https://crbug.com/127243.
224   message = base::ReplaceStringPlaceholders(message, substitutions, nullptr);
225   return gin::StringToV8(isolate, message);
226 }
227 
228 // Returns the detected language for the sample |text|.
DetectTextLanguage(v8::Local<v8::Context> context,const std::string & text)229 v8::Local<v8::Value> DetectTextLanguage(v8::Local<v8::Context> context,
230                                         const std::string& text) {
231 #if !defined(TOOLKIT_QT)
232   chrome_lang_id::NNetLanguageIdentifier nnet_lang_id(/*min_num_bytes=*/0,
233                                                       /*max_num_bytes=*/512);
234   std::vector<chrome_lang_id::NNetLanguageIdentifier::Result> lang_results =
235       nnet_lang_id.FindTopNMostFreqLangs(text, kCldNumLangs);
236 
237   // is_reliable is set to false if we believe the input is too short to be
238   // accurately identified by the current model.
239   if (text.size() < kCld3MinimumByteThreshold) {
240     for (auto& result : lang_results)
241       result.is_reliable = false;
242   }
243 #endif // !defined(TOOLKIT_QT)
244 
245   LanguageDetectionResult result;
246 
247 #if !defined(TOOLKIT_QT)
248   // Populate LanguageDetectionResult with prediction reliability, languages,
249   // and the corresponding percentages.
250   InitDetectedLanguages(lang_results, &result);
251 #endif // !defined(TOOLKIT_QT)
252   return result.ToV8(context);
253 }
254 
255 }  // namespace
256 
257 using RequestResult = APIBindingHooks::RequestResult;
258 
I18nHooksDelegate()259 I18nHooksDelegate::I18nHooksDelegate() {}
260 I18nHooksDelegate::~I18nHooksDelegate() = default;
261 
HandleRequest(const std::string & method_name,const APISignature * signature,v8::Local<v8::Context> context,std::vector<v8::Local<v8::Value>> * arguments,const APITypeReferenceMap & refs)262 RequestResult I18nHooksDelegate::HandleRequest(
263     const std::string& method_name,
264     const APISignature* signature,
265     v8::Local<v8::Context> context,
266     std::vector<v8::Local<v8::Value>>* arguments,
267     const APITypeReferenceMap& refs) {
268   using Handler = RequestResult (I18nHooksDelegate::*)(
269       ScriptContext*, const std::vector<v8::Local<v8::Value>>&);
270   static const struct {
271     Handler handler;
272     base::StringPiece method;
273   } kHandlers[] = {
274       {&I18nHooksDelegate::HandleGetMessage, kGetMessage},
275       {&I18nHooksDelegate::HandleGetUILanguage, kGetUILanguage},
276       {&I18nHooksDelegate::HandleDetectLanguage, kDetectLanguage},
277   };
278 
279   ScriptContext* script_context = GetScriptContextFromV8ContextChecked(context);
280 
281   Handler handler = nullptr;
282   for (const auto& handler_entry : kHandlers) {
283     if (handler_entry.method == method_name) {
284       handler = handler_entry.handler;
285       break;
286     }
287   }
288 
289   if (!handler)
290     return RequestResult(RequestResult::NOT_HANDLED);
291 
292   APISignature::V8ParseResult parse_result =
293       signature->ParseArgumentsToV8(context, *arguments, refs);
294   if (!parse_result.succeeded()) {
295     RequestResult result(RequestResult::INVALID_INVOCATION);
296     result.error = std::move(*parse_result.error);
297     return result;
298   }
299 
300   return (this->*handler)(script_context, *parse_result.arguments);
301 }
302 
HandleGetMessage(ScriptContext * script_context,const std::vector<v8::Local<v8::Value>> & parsed_arguments)303 RequestResult I18nHooksDelegate::HandleGetMessage(
304     ScriptContext* script_context,
305     const std::vector<v8::Local<v8::Value>>& parsed_arguments) {
306   DCHECK(script_context->extension());
307   DCHECK(parsed_arguments[0]->IsString());
308   v8::Local<v8::Value> message = GetI18nMessage(
309       gin::V8ToString(script_context->isolate(), parsed_arguments[0]),
310       script_context->extension()->id(), parsed_arguments[1],
311       parsed_arguments[2], script_context->GetRenderFrame(),
312       script_context->v8_context());
313 
314   RequestResult result(RequestResult::HANDLED);
315   result.return_value = message;
316   return result;
317 }
318 
HandleGetUILanguage(ScriptContext * script_context,const std::vector<v8::Local<v8::Value>> & parsed_arguments)319 RequestResult I18nHooksDelegate::HandleGetUILanguage(
320     ScriptContext* script_context,
321     const std::vector<v8::Local<v8::Value>>& parsed_arguments) {
322   RequestResult result(RequestResult::HANDLED);
323   result.return_value = gin::StringToSymbol(
324       script_context->isolate(), content::RenderThread::Get()->GetLocale());
325   return result;
326 }
327 
HandleDetectLanguage(ScriptContext * script_context,const std::vector<v8::Local<v8::Value>> & parsed_arguments)328 RequestResult I18nHooksDelegate::HandleDetectLanguage(
329     ScriptContext* script_context,
330     const std::vector<v8::Local<v8::Value>>& parsed_arguments) {
331   DCHECK(parsed_arguments[0]->IsString());
332   DCHECK(parsed_arguments[1]->IsFunction());
333 
334   v8::Local<v8::Context> v8_context = script_context->v8_context();
335 
336   v8::Local<v8::Value> detected_languages = DetectTextLanguage(
337       v8_context,
338       gin::V8ToString(script_context->isolate(), parsed_arguments[0]));
339 
340   // NOTE(devlin): The JS bindings make this callback asynchronous through a
341   // setTimeout, but it shouldn't be necessary.
342   v8::Local<v8::Value> callback_args[] = {detected_languages};
343   JSRunner::Get(v8_context)
344       ->RunJSFunction(parsed_arguments[1].As<v8::Function>(),
345                       script_context->v8_context(), base::size(callback_args),
346                       callback_args);
347 
348   return RequestResult(RequestResult::HANDLED);
349 }
350 
351 }  // namespace extensions
352