1 // Copyright 2018 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "extensions/renderer/i18n_hooks_delegate.h"
6
7 #include <vector>
8
9 #include "base/logging.h"
10 #include "base/metrics/histogram_macros.h"
11 #include "base/stl_util.h"
12 #include "base/strings/string_util.h"
13 #include "content/public/renderer/render_frame.h"
14 #include "content/public/renderer/render_thread.h"
15 #include "extensions/common/extension.h"
16 #include "extensions/common/extension_messages.h"
17 #include "extensions/common/message_bundle.h"
18 #include "extensions/renderer/bindings/api_signature.h"
19 #include "extensions/renderer/bindings/js_runner.h"
20 #include "extensions/renderer/get_script_context.h"
21 #include "extensions/renderer/script_context.h"
22 #include "gin/converter.h"
23 #include "gin/data_object_builder.h"
24 #if !defined(TOOLKIT_QT)
25 #include "third_party/cld_3/src/src/nnet_language_identifier.h"
26 #endif // !defined(TOOLKIT_QT)
27
28 namespace extensions {
29
30 namespace {
31
32 constexpr char kGetMessage[] = "i18n.getMessage";
33 constexpr char kGetUILanguage[] = "i18n.getUILanguage";
34 constexpr char kDetectLanguage[] = "i18n.detectLanguage";
35
36 // Max number of languages to detect.
37 const int kCldNumLangs = 3;
38
39 // CLD3 minimum reliable byte threshold. Predictions for inputs below this size
40 // in bytes will be considered unreliable.
41 const int kCld3MinimumByteThreshold = 50;
42
43 struct DetectedLanguage {
DetectedLanguageextensions::__anon1e358f360111::DetectedLanguage44 DetectedLanguage(const std::string& language, int percentage)
45 : language(language), percentage(percentage) {}
46
47 // Returns a new v8::Local<v8::Value> representing the serialized form of
48 // this DetectedLanguage object.
49 v8::Local<v8::Value> ToV8(v8::Isolate* isolate) const;
50
51 std::string language;
52 int percentage;
53 };
54
55 // LanguageDetectionResult object that holds detected langugae reliability and
56 // array of DetectedLanguage
57 struct LanguageDetectionResult {
LanguageDetectionResultextensions::__anon1e358f360111::LanguageDetectionResult58 LanguageDetectionResult() {}
~LanguageDetectionResultextensions::__anon1e358f360111::LanguageDetectionResult59 ~LanguageDetectionResult() {}
60
61 // Returns a new v8::Local<v8::Value> representing the serialized form of
62 // this Result object.
63 v8::Local<v8::Value> ToV8(v8::Local<v8::Context> context) const;
64
65 // CLD detected language reliability
66 bool is_reliable = false;
67
68 // Array of detectedLanguage of size 1-3. The null is returned if
69 // there were no languages detected
70 std::vector<DetectedLanguage> languages;
71
72 private:
73 DISALLOW_COPY_AND_ASSIGN(LanguageDetectionResult);
74 };
75
ToV8(v8::Isolate * isolate) const76 v8::Local<v8::Value> DetectedLanguage::ToV8(v8::Isolate* isolate) const {
77 return gin::DataObjectBuilder(isolate)
78 .Set("language", language)
79 .Set("percentage", percentage)
80 .Build();
81 }
82
ToV8(v8::Local<v8::Context> context) const83 v8::Local<v8::Value> LanguageDetectionResult::ToV8(
84 v8::Local<v8::Context> context) const {
85 v8::Isolate* isolate = context->GetIsolate();
86 DCHECK(isolate->GetCurrentContext() == context);
87
88 v8::Local<v8::Array> v8_languages = v8::Array::New(isolate, languages.size());
89 for (uint32_t i = 0; i < languages.size(); ++i) {
90 bool success =
91 v8_languages->CreateDataProperty(context, i, languages[i].ToV8(isolate))
92 .ToChecked();
93 DCHECK(success) << "CreateDataProperty() should never fail.";
94 }
95 return gin::DataObjectBuilder(isolate)
96 .Set("isReliable", is_reliable)
97 .Set("languages", v8_languages.As<v8::Value>())
98 .Build();
99 }
100
101 #if !defined(TOOLKIT_QT)
InitDetectedLanguages(const std::vector<chrome_lang_id::NNetLanguageIdentifier::Result> & lang_results,LanguageDetectionResult * result)102 void InitDetectedLanguages(
103 const std::vector<chrome_lang_id::NNetLanguageIdentifier::Result>&
104 lang_results,
105 LanguageDetectionResult* result) {
106 std::vector<DetectedLanguage>* detected_languages = &result->languages;
107 DCHECK(detected_languages->empty());
108 bool* is_reliable = &result->is_reliable;
109
110 // is_reliable is set to "true", so that the reliability can be calculated by
111 // &&'ing the reliability of each predicted language.
112 *is_reliable = true;
113 for (const auto& lang_result : lang_results) {
114 const std::string& language_code = lang_result.language;
115
116 // If a language is kUnknown, then the remaining ones are also kUnknown.
117 if (language_code == chrome_lang_id::NNetLanguageIdentifier::kUnknown) {
118 break;
119 }
120
121 // The list of languages supported by CLD3 is saved in kLanguageNames
122 // in the following file:
123 // //src/third_party/cld_3/src/src/task_context_params.cc
124 // Among the entries in this list are transliterated languages
125 // (called xx-Latn) which don't belong to the spec ISO639-1 used by
126 // the previous model, CLD2. Thus, to maintain backwards compatibility,
127 // xx-Latn predictions are ignored for now.
128 if (base::EndsWith(language_code, "-Latn",
129 base::CompareCase::INSENSITIVE_ASCII)) {
130 continue;
131 }
132
133 *is_reliable = *is_reliable && lang_result.is_reliable;
134 const int percent = static_cast<int>(100 * lang_result.proportion);
135 detected_languages->emplace_back(language_code, percent);
136 }
137
138 if (detected_languages->empty())
139 *is_reliable = false;
140 }
141 #endif // !defined(TOOLKIT_QT)
142
143 // Returns the localized method for the given |message_name| and
144 // substitutions. This can result in a synchronous IPC being sent to the browser
145 // for the first call related to an extension in this process.
GetI18nMessage(const std::string & message_name,const std::string & extension_id,v8::Local<v8::Value> v8_substitutions,v8::Local<v8::Value> v8_options,content::RenderFrame * render_frame,v8::Local<v8::Context> context)146 v8::Local<v8::Value> GetI18nMessage(const std::string& message_name,
147 const std::string& extension_id,
148 v8::Local<v8::Value> v8_substitutions,
149 v8::Local<v8::Value> v8_options,
150 content::RenderFrame* render_frame,
151 v8::Local<v8::Context> context) {
152 v8::Isolate* isolate = context->GetIsolate();
153 L10nMessagesMap* l10n_messages = nullptr;
154 {
155 ExtensionToL10nMessagesMap& messages_map = *GetExtensionToL10nMessagesMap();
156 auto iter = messages_map.find(extension_id);
157 if (iter != messages_map.end()) {
158 l10n_messages = &iter->second;
159 } else {
160 if (!render_frame)
161 return v8::Undefined(isolate);
162
163 l10n_messages = &messages_map[extension_id];
164 // A sync call to load message catalogs for current extension.
165 // TODO(devlin): Wait, what?! A synchronous call to the browser to perform
166 // potentially blocking work reading files from disk? That's Bad.
167 {
168 SCOPED_UMA_HISTOGRAM_TIMER("Extensions.SyncGetMessageBundle");
169 render_frame->Send(
170 new ExtensionHostMsg_GetMessageBundle(extension_id, l10n_messages));
171 }
172 }
173 }
174
175 std::string message =
176 MessageBundle::GetL10nMessage(message_name, *l10n_messages);
177
178 std::vector<std::string> substitutions;
179 // For now, we just suppress all errors, but that's really not the best.
180 // See https://crbug.com/807769.
181 v8::TryCatch try_catch(isolate);
182 if (v8_substitutions->IsArray()) {
183 // chrome.i18n.getMessage("message_name", ["more", "params"]);
184 v8::Local<v8::Array> placeholders = v8_substitutions.As<v8::Array>();
185 uint32_t count = placeholders->Length();
186 if (count > 9)
187 return v8::Undefined(isolate);
188
189 for (uint32_t i = 0; i < count; ++i) {
190 v8::Local<v8::Value> placeholder;
191 if (!placeholders->Get(context, i).ToLocal(&placeholder))
192 return v8::Undefined(isolate);
193 // Note: this tries to convert each entry to a JS string, which can fail.
194 // If it does, String::Utf8Value() catches the error and doesn't surface
195 // it to the calling script (though the call may still be observable,
196 // since this goes through an object's toString() method). If it fails,
197 // we just silently ignore the value.
198 v8::String::Utf8Value string_value(isolate, placeholder);
199 if (*string_value)
200 substitutions.push_back(*string_value);
201 }
202 } else if (v8_substitutions->IsString()) {
203 // chrome.i18n.getMessage("message_name", "one param");
204 substitutions.push_back(gin::V8ToString(isolate, v8_substitutions));
205 }
206 // TODO(devlin): We currently just ignore any non-string, non-array values
207 // for substitutions, but the type is documented as 'any'. We should either
208 // enforce type more heavily, or throw an error here.
209
210 if (v8_options->IsObject()) {
211 v8::Local<v8::Object> options = v8_options.As<v8::Object>();
212 v8::Local<v8::Value> key =
213 v8::String::NewFromUtf8(isolate, "escapeLt").ToLocalChecked();
214 v8::Local<v8::Value> html;
215 if (options->Get(context, key).ToLocal(&html) && html->IsBoolean() &&
216 html.As<v8::Boolean>()->Value()) {
217 base::ReplaceChars(message, "<", "<", &message);
218 }
219 }
220
221 // NOTE: We call ReplaceStringPlaceholders even if |substitutions| is empty
222 // because we substitute $$ to be $ (in order to display a dollar sign in a
223 // message). See https://crbug.com/127243.
224 message = base::ReplaceStringPlaceholders(message, substitutions, nullptr);
225 return gin::StringToV8(isolate, message);
226 }
227
228 // Returns the detected language for the sample |text|.
DetectTextLanguage(v8::Local<v8::Context> context,const std::string & text)229 v8::Local<v8::Value> DetectTextLanguage(v8::Local<v8::Context> context,
230 const std::string& text) {
231 #if !defined(TOOLKIT_QT)
232 chrome_lang_id::NNetLanguageIdentifier nnet_lang_id(/*min_num_bytes=*/0,
233 /*max_num_bytes=*/512);
234 std::vector<chrome_lang_id::NNetLanguageIdentifier::Result> lang_results =
235 nnet_lang_id.FindTopNMostFreqLangs(text, kCldNumLangs);
236
237 // is_reliable is set to false if we believe the input is too short to be
238 // accurately identified by the current model.
239 if (text.size() < kCld3MinimumByteThreshold) {
240 for (auto& result : lang_results)
241 result.is_reliable = false;
242 }
243 #endif // !defined(TOOLKIT_QT)
244
245 LanguageDetectionResult result;
246
247 #if !defined(TOOLKIT_QT)
248 // Populate LanguageDetectionResult with prediction reliability, languages,
249 // and the corresponding percentages.
250 InitDetectedLanguages(lang_results, &result);
251 #endif // !defined(TOOLKIT_QT)
252 return result.ToV8(context);
253 }
254
255 } // namespace
256
257 using RequestResult = APIBindingHooks::RequestResult;
258
I18nHooksDelegate()259 I18nHooksDelegate::I18nHooksDelegate() {}
260 I18nHooksDelegate::~I18nHooksDelegate() = default;
261
HandleRequest(const std::string & method_name,const APISignature * signature,v8::Local<v8::Context> context,std::vector<v8::Local<v8::Value>> * arguments,const APITypeReferenceMap & refs)262 RequestResult I18nHooksDelegate::HandleRequest(
263 const std::string& method_name,
264 const APISignature* signature,
265 v8::Local<v8::Context> context,
266 std::vector<v8::Local<v8::Value>>* arguments,
267 const APITypeReferenceMap& refs) {
268 using Handler = RequestResult (I18nHooksDelegate::*)(
269 ScriptContext*, const std::vector<v8::Local<v8::Value>>&);
270 static const struct {
271 Handler handler;
272 base::StringPiece method;
273 } kHandlers[] = {
274 {&I18nHooksDelegate::HandleGetMessage, kGetMessage},
275 {&I18nHooksDelegate::HandleGetUILanguage, kGetUILanguage},
276 {&I18nHooksDelegate::HandleDetectLanguage, kDetectLanguage},
277 };
278
279 ScriptContext* script_context = GetScriptContextFromV8ContextChecked(context);
280
281 Handler handler = nullptr;
282 for (const auto& handler_entry : kHandlers) {
283 if (handler_entry.method == method_name) {
284 handler = handler_entry.handler;
285 break;
286 }
287 }
288
289 if (!handler)
290 return RequestResult(RequestResult::NOT_HANDLED);
291
292 APISignature::V8ParseResult parse_result =
293 signature->ParseArgumentsToV8(context, *arguments, refs);
294 if (!parse_result.succeeded()) {
295 RequestResult result(RequestResult::INVALID_INVOCATION);
296 result.error = std::move(*parse_result.error);
297 return result;
298 }
299
300 return (this->*handler)(script_context, *parse_result.arguments);
301 }
302
HandleGetMessage(ScriptContext * script_context,const std::vector<v8::Local<v8::Value>> & parsed_arguments)303 RequestResult I18nHooksDelegate::HandleGetMessage(
304 ScriptContext* script_context,
305 const std::vector<v8::Local<v8::Value>>& parsed_arguments) {
306 DCHECK(script_context->extension());
307 DCHECK(parsed_arguments[0]->IsString());
308 v8::Local<v8::Value> message = GetI18nMessage(
309 gin::V8ToString(script_context->isolate(), parsed_arguments[0]),
310 script_context->extension()->id(), parsed_arguments[1],
311 parsed_arguments[2], script_context->GetRenderFrame(),
312 script_context->v8_context());
313
314 RequestResult result(RequestResult::HANDLED);
315 result.return_value = message;
316 return result;
317 }
318
HandleGetUILanguage(ScriptContext * script_context,const std::vector<v8::Local<v8::Value>> & parsed_arguments)319 RequestResult I18nHooksDelegate::HandleGetUILanguage(
320 ScriptContext* script_context,
321 const std::vector<v8::Local<v8::Value>>& parsed_arguments) {
322 RequestResult result(RequestResult::HANDLED);
323 result.return_value = gin::StringToSymbol(
324 script_context->isolate(), content::RenderThread::Get()->GetLocale());
325 return result;
326 }
327
HandleDetectLanguage(ScriptContext * script_context,const std::vector<v8::Local<v8::Value>> & parsed_arguments)328 RequestResult I18nHooksDelegate::HandleDetectLanguage(
329 ScriptContext* script_context,
330 const std::vector<v8::Local<v8::Value>>& parsed_arguments) {
331 DCHECK(parsed_arguments[0]->IsString());
332 DCHECK(parsed_arguments[1]->IsFunction());
333
334 v8::Local<v8::Context> v8_context = script_context->v8_context();
335
336 v8::Local<v8::Value> detected_languages = DetectTextLanguage(
337 v8_context,
338 gin::V8ToString(script_context->isolate(), parsed_arguments[0]));
339
340 // NOTE(devlin): The JS bindings make this callback asynchronous through a
341 // setTimeout, but it shouldn't be necessary.
342 v8::Local<v8::Value> callback_args[] = {detected_languages};
343 JSRunner::Get(v8_context)
344 ->RunJSFunction(parsed_arguments[1].As<v8::Function>(),
345 script_context->v8_context(), base::size(callback_args),
346 callback_args);
347
348 return RequestResult(RequestResult::HANDLED);
349 }
350
351 } // namespace extensions
352