1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "chrome/browser/speech/extension_api/tts_engine_extension_api.h"
6 
7 #include <stddef.h>
8 #include <string>
9 #include <utility>
10 
11 #include "base/json/json_writer.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "base/values.h"
14 #include "build/build_config.h"
15 #include "chrome/browser/extensions/component_loader.h"
16 #include "chrome/browser/extensions/extension_service.h"
17 #include "chrome/browser/profiles/profile.h"
18 #include "chrome/browser/speech/extension_api/tts_extension_api.h"
19 #include "chrome/browser/speech/extension_api/tts_extension_api_constants.h"
20 #include "chrome/common/extensions/api/speech/tts_engine_manifest_handler.h"
21 #include "chrome/common/extensions/extension_constants.h"
22 #include "content/public/browser/render_frame_host.h"
23 #include "content/public/browser/render_process_host.h"
24 #include "content/public/browser/tts_controller.h"
25 #include "content/public/browser/web_contents.h"
26 #include "extensions/browser/event_router.h"
27 #include "extensions/browser/extension_host.h"
28 #include "extensions/browser/extension_registry.h"
29 #include "extensions/browser/extension_system.h"
30 #include "extensions/browser/process_manager.h"
31 #include "extensions/common/extension.h"
32 #include "extensions/common/extension_set.h"
33 #include "net/base/network_change_notifier.h"
34 #include "third_party/blink/public/mojom/devtools/console_message.mojom.h"
35 #include "ui/base/l10n/l10n_util.h"
36 
37 using extensions::EventRouter;
38 using extensions::Extension;
39 using extensions::ExtensionSystem;
40 
41 namespace constants = tts_extension_api_constants;
42 
43 namespace tts_engine_events {
44 const char kOnSpeak[] = "ttsEngine.onSpeak";
45 const char kOnStop[] = "ttsEngine.onStop";
46 const char kOnPause[] = "ttsEngine.onPause";
47 const char kOnResume[] = "ttsEngine.onResume";
48 }  // namespace tts_engine_events
49 
50 namespace {
51 
52 // An extension preference to keep track of the TTS voices that a
53 // TTS engine extension makes available.
54 const char kPrefTtsVoices[] = "tts_voices";
55 
WarnIfMissingPauseOrResumeListener(Profile * profile,EventRouter * event_router,std::string extension_id)56 void WarnIfMissingPauseOrResumeListener(Profile* profile,
57                                         EventRouter* event_router,
58                                         std::string extension_id) {
59   bool has_onpause = event_router->ExtensionHasEventListener(
60       extension_id, tts_engine_events::kOnPause);
61   bool has_onresume = event_router->ExtensionHasEventListener(
62       extension_id, tts_engine_events::kOnResume);
63   if (has_onpause == has_onresume)
64     return;
65 
66   extensions::ExtensionHost* host =
67       extensions::ProcessManager::Get(profile)->GetBackgroundHostForExtension(
68           extension_id);
69   host->host_contents()->GetMainFrame()->AddMessageToConsole(
70       blink::mojom::ConsoleMessageLevel::kWarning,
71       constants::kErrorMissingPauseOrResume);
72 }
73 
74 std::unique_ptr<std::vector<extensions::TtsVoice>>
ValidateAndConvertToTtsVoiceVector(const extensions::Extension * extension,const base::ListValue & voices_data,bool return_after_first_error,const char ** error)75 ValidateAndConvertToTtsVoiceVector(const extensions::Extension* extension,
76                                    const base::ListValue& voices_data,
77                                    bool return_after_first_error,
78                                    const char** error) {
79   auto tts_voices = std::make_unique<std::vector<extensions::TtsVoice>>();
80   for (size_t i = 0; i < voices_data.GetSize(); i++) {
81     extensions::TtsVoice voice;
82     const base::DictionaryValue* voice_data = nullptr;
83     voices_data.GetDictionary(i, &voice_data);
84 
85     // Note partial validation of these attributes occurs based on tts engine's
86     // json schema (e.g. for data type matching). The missing checks follow
87     // similar checks in manifest parsing.
88     if (voice_data->HasKey(constants::kVoiceNameKey))
89       voice_data->GetString(constants::kVoiceNameKey, &voice.voice_name);
90     if (voice_data->HasKey(constants::kLangKey)) {
91       voice_data->GetString(constants::kLangKey, &voice.lang);
92       if (!l10n_util::IsValidLocaleSyntax(voice.lang)) {
93         *error = constants::kErrorInvalidLang;
94         if (return_after_first_error) {
95           tts_voices->clear();
96           return tts_voices;
97         }
98         continue;
99       }
100     }
101     if (voice_data->HasKey(constants::kRemoteKey))
102       voice_data->GetBoolean(constants::kRemoteKey, &voice.remote);
103     if (voice_data->HasKey(constants::kExtensionIdKey)) {
104       // Allow this for clients who might have used |chrome.tts.getVoices| to
105       // update existing voices. However, trying to update the voice of another
106       // extension should trigger an error.
107       std::string extension_id;
108       voice_data->GetString(constants::kExtensionIdKey, &extension_id);
109       if (extension->id() != extension_id) {
110         *error = constants::kErrorExtensionIdMismatch;
111         if (return_after_first_error) {
112           tts_voices->clear();
113           return tts_voices;
114         }
115         continue;
116       }
117     }
118     const base::ListValue* event_types = nullptr;
119     if (voice_data->HasKey(constants::kEventTypesKey))
120       voice_data->GetList(constants::kEventTypesKey, &event_types);
121 
122     if (event_types) {
123       for (size_t j = 0; j < event_types->GetSize(); j++) {
124         std::string event_type;
125         event_types->GetString(j, &event_type);
126         voice.event_types.insert(event_type);
127       }
128     }
129 
130     tts_voices->push_back(voice);
131   }
132   return tts_voices;
133 }
134 
135 // Get the voices for an extension, checking the preferences first
136 // (in case the extension has ever called UpdateVoices in the past),
137 // and the manifest second.
GetVoicesInternal(content::BrowserContext * context,const extensions::Extension * extension)138 std::unique_ptr<std::vector<extensions::TtsVoice>> GetVoicesInternal(
139     content::BrowserContext* context,
140     const extensions::Extension* extension) {
141   // First try to get the saved set of voices from extension prefs.
142   auto* extension_prefs = extensions::ExtensionPrefs::Get(context);
143   const base::ListValue* voices_data = nullptr;
144   if (extension_prefs->ReadPrefAsList(extension->id(), kPrefTtsVoices,
145                                       &voices_data)) {
146     const char* error = nullptr;
147     return ValidateAndConvertToTtsVoiceVector(
148         extension, *voices_data,
149         /* return_after_first_error = */ false, &error);
150   }
151 
152   // Fall back on the extension manifest.
153   auto* manifest_voices = extensions::TtsVoices::GetTtsVoices(extension);
154   if (manifest_voices)
155     return std::make_unique<std::vector<extensions::TtsVoice>>(
156         *manifest_voices);
157   return std::make_unique<std::vector<extensions::TtsVoice>>();
158 }
159 
160 }  // namespace
161 
GetInstance()162 TtsExtensionEngine* TtsExtensionEngine::GetInstance() {
163   return base::Singleton<TtsExtensionEngine>::get();
164 }
165 
GetVoices(content::BrowserContext * browser_context,std::vector<content::VoiceData> * out_voices)166 void TtsExtensionEngine::GetVoices(
167     content::BrowserContext* browser_context,
168     std::vector<content::VoiceData>* out_voices) {
169   Profile* profile = Profile::FromBrowserContext(browser_context);
170   EventRouter* event_router = EventRouter::Get(profile);
171   DCHECK(event_router);
172 
173   bool is_offline = (net::NetworkChangeNotifier::GetConnectionType() ==
174                      net::NetworkChangeNotifier::CONNECTION_NONE);
175 
176   const extensions::ExtensionSet& extensions =
177       extensions::ExtensionRegistry::Get(profile)->enabled_extensions();
178   extensions::ExtensionSet::const_iterator iter;
179   for (iter = extensions.begin(); iter != extensions.end(); ++iter) {
180     const Extension* extension = iter->get();
181 
182     if (!event_router->ExtensionHasEventListener(extension->id(),
183                                                  tts_engine_events::kOnSpeak) ||
184         !event_router->ExtensionHasEventListener(extension->id(),
185                                                  tts_engine_events::kOnStop)) {
186       continue;
187     }
188 
189     auto tts_voices = GetVoicesInternal(profile, extension);
190     if (!tts_voices)
191       continue;
192 
193     for (size_t i = 0; i < tts_voices->size(); ++i) {
194       const extensions::TtsVoice& voice = tts_voices->at(i);
195 
196       // Don't return remote voices when the system is offline.
197       if (voice.remote && is_offline)
198         continue;
199 
200       out_voices->push_back(content::VoiceData());
201       content::VoiceData& result_voice = out_voices->back();
202 
203       result_voice.native = false;
204       result_voice.name = voice.voice_name;
205       result_voice.lang = voice.lang;
206       result_voice.remote = voice.remote;
207       result_voice.engine_id = extension->id();
208 
209       for (auto iter = voice.event_types.begin();
210            iter != voice.event_types.end(); ++iter) {
211         result_voice.events.insert(TtsEventTypeFromString(*iter));
212       }
213 
214       // If the extension sends end events, the controller will handle
215       // queueing and send interrupted and cancelled events.
216       if (voice.event_types.find(constants::kEventTypeEnd) !=
217           voice.event_types.end()) {
218         result_voice.events.insert(content::TTS_EVENT_CANCELLED);
219         result_voice.events.insert(content::TTS_EVENT_INTERRUPTED);
220       }
221     }
222   }
223 }
224 
Speak(content::TtsUtterance * utterance,const content::VoiceData & voice)225 void TtsExtensionEngine::Speak(content::TtsUtterance* utterance,
226                                const content::VoiceData& voice) {
227   // See if the engine supports the "end" event; if so, we can keep the
228   // utterance around and track it. If not, we're finished with this
229   // utterance now.
230   bool sends_end_event =
231       voice.events.find(content::TTS_EVENT_END) != voice.events.end();
232 
233   std::unique_ptr<base::ListValue> args(new base::ListValue());
234   args->AppendString(utterance->GetText());
235 
236   // Pass through most options to the speech engine, but remove some
237   // that are handled internally.
238   std::unique_ptr<base::DictionaryValue> options(
239       static_cast<base::DictionaryValue*>(utterance->GetOptions()->DeepCopy()));
240   if (options->HasKey(constants::kRequiredEventTypesKey))
241     options->Remove(constants::kRequiredEventTypesKey, NULL);
242   if (options->HasKey(constants::kDesiredEventTypesKey))
243     options->Remove(constants::kDesiredEventTypesKey, NULL);
244   if (sends_end_event && options->HasKey(constants::kEnqueueKey))
245     options->Remove(constants::kEnqueueKey, NULL);
246   if (options->HasKey(constants::kSrcIdKey))
247     options->Remove(constants::kSrcIdKey, NULL);
248   if (options->HasKey(constants::kIsFinalEventKey))
249     options->Remove(constants::kIsFinalEventKey, NULL);
250   if (options->HasKey(constants::kOnEventKey))
251     options->Remove(constants::kOnEventKey, NULL);
252 
253   // Get the volume, pitch, and rate, but only if they weren't already in
254   // the options. TODO(dmazzoni): these shouldn't be redundant.
255   // http://crbug.com/463264
256   if (!options->HasKey(constants::kRateKey)) {
257     options->SetDouble(constants::kRateKey,
258                        utterance->GetContinuousParameters().rate);
259   }
260   if (!options->HasKey(constants::kPitchKey)) {
261     options->SetDouble(constants::kPitchKey,
262                        utterance->GetContinuousParameters().pitch);
263   }
264   if (!options->HasKey(constants::kVolumeKey)) {
265     options->SetDouble(constants::kVolumeKey,
266                        utterance->GetContinuousParameters().volume);
267   }
268 
269   // Add the voice name and language to the options if they're not
270   // already there, since they might have been picked by the TTS controller
271   // rather than directly by the client that requested the speech.
272   if (!options->HasKey(constants::kVoiceNameKey))
273     options->SetString(constants::kVoiceNameKey, voice.name);
274   if (!options->HasKey(constants::kLangKey))
275     options->SetString(constants::kLangKey, voice.lang);
276 
277   args->Append(std::move(options));
278   args->AppendInteger(utterance->GetId());
279 
280   std::string json;
281   base::JSONWriter::Write(*args, &json);
282 
283   Profile* profile =
284       Profile::FromBrowserContext(utterance->GetBrowserContext());
285   auto event = std::make_unique<extensions::Event>(
286       extensions::events::TTS_ENGINE_ON_SPEAK, tts_engine_events::kOnSpeak,
287       std::move(args), profile);
288   EventRouter::Get(profile)->DispatchEventToExtension(utterance->GetEngineId(),
289                                                       std::move(event));
290 }
291 
Stop(content::TtsUtterance * utterance)292 void TtsExtensionEngine::Stop(content::TtsUtterance* utterance) {
293   std::unique_ptr<base::ListValue> args(new base::ListValue());
294   Profile* profile =
295       Profile::FromBrowserContext(utterance->GetBrowserContext());
296   auto event = std::make_unique<extensions::Event>(
297       extensions::events::TTS_ENGINE_ON_STOP, tts_engine_events::kOnStop,
298       std::move(args), profile);
299   EventRouter::Get(profile)->DispatchEventToExtension(utterance->GetEngineId(),
300                                                       std::move(event));
301 }
302 
Pause(content::TtsUtterance * utterance)303 void TtsExtensionEngine::Pause(content::TtsUtterance* utterance) {
304   std::unique_ptr<base::ListValue> args(new base::ListValue());
305   Profile* profile =
306       Profile::FromBrowserContext(utterance->GetBrowserContext());
307   auto event = std::make_unique<extensions::Event>(
308       extensions::events::TTS_ENGINE_ON_PAUSE, tts_engine_events::kOnPause,
309       std::move(args), profile);
310   EventRouter* event_router = EventRouter::Get(profile);
311   std::string id = utterance->GetEngineId();
312   event_router->DispatchEventToExtension(id, std::move(event));
313   WarnIfMissingPauseOrResumeListener(profile, event_router, id);
314 }
315 
Resume(content::TtsUtterance * utterance)316 void TtsExtensionEngine::Resume(content::TtsUtterance* utterance) {
317   std::unique_ptr<base::ListValue> args(new base::ListValue());
318   Profile* profile =
319       Profile::FromBrowserContext(utterance->GetBrowserContext());
320   auto event = std::make_unique<extensions::Event>(
321       extensions::events::TTS_ENGINE_ON_RESUME, tts_engine_events::kOnResume,
322       std::move(args), profile);
323   EventRouter* event_router = EventRouter::Get(profile);
324   std::string id = utterance->GetEngineId();
325   event_router->DispatchEventToExtension(id, std::move(event));
326   WarnIfMissingPauseOrResumeListener(profile, event_router, id);
327 }
328 
LoadBuiltInTtsEngine(content::BrowserContext * browser_context)329 bool TtsExtensionEngine::LoadBuiltInTtsEngine(
330     content::BrowserContext* browser_context) {
331   if (disable_built_in_tts_engine_for_testing_)
332     return false;
333 
334 #if defined(OS_CHROMEOS)
335   Profile* profile = Profile::FromBrowserContext(browser_context);
336 
337   // Load the component extensions into this profile.
338   extensions::ExtensionService* extension_service =
339       extensions::ExtensionSystem::Get(profile)->extension_service();
340   DCHECK(extension_service);
341   extension_service->component_loader()->AddChromeOsSpeechSynthesisExtensions();
342   return true;
343 #else
344   return false;
345 #endif
346 }
347 
348 ExtensionFunction::ResponseAction
Run()349 ExtensionTtsEngineUpdateVoicesFunction::Run() {
350   base::ListValue* voices_data = nullptr;
351   EXTENSION_FUNCTION_VALIDATE(args_->GetList(0, &voices_data));
352 
353   // Validate the voices and return an error if there's a problem.
354   const char* error = nullptr;
355   auto tts_voices = ValidateAndConvertToTtsVoiceVector(
356       extension(), *voices_data,
357       /* return_after_first_error = */ true, &error);
358   if (error)
359     return RespondNow(Error(error));
360 
361   // Save these voices to the extension's prefs if they validated.
362   auto* extension_prefs = extensions::ExtensionPrefs::Get(browser_context());
363   extension_prefs->UpdateExtensionPref(
364       extension()->id(), kPrefTtsVoices,
365       std::make_unique<base::Value>(voices_data->Clone()));
366 
367   // Notify that voices have changed.
368   content::TtsController::GetInstance()->VoicesChanged();
369 
370   return RespondNow(NoArguments());
371 }
372 
373 ExtensionFunction::ResponseAction
Run()374 ExtensionTtsEngineSendTtsEventFunction::Run() {
375   int utterance_id = 0;
376   EXTENSION_FUNCTION_VALIDATE(args_->GetInteger(0, &utterance_id));
377 
378   base::DictionaryValue* event = nullptr;
379   EXTENSION_FUNCTION_VALIDATE(args_->GetDictionary(1, &event));
380 
381   std::string event_type;
382   EXTENSION_FUNCTION_VALIDATE(
383       event->GetString(constants::kEventTypeKey, &event_type));
384 
385   int char_index = 0;
386   if (event->HasKey(constants::kCharIndexKey)) {
387     EXTENSION_FUNCTION_VALIDATE(
388         event->GetInteger(constants::kCharIndexKey, &char_index));
389   }
390 
391   int length = -1;
392   if (event->HasKey(constants::kLengthKey)) {
393     EXTENSION_FUNCTION_VALIDATE(
394         event->GetInteger(constants::kLengthKey, &length));
395   }
396 
397   // Make sure the extension has included this event type in its manifest.
398   bool event_type_allowed = false;
399   Profile* profile = Profile::FromBrowserContext(browser_context());
400   auto tts_voices = GetVoicesInternal(profile, extension());
401   if (!tts_voices)
402     return RespondNow(Error(constants::kErrorUndeclaredEventType));
403 
404   for (size_t i = 0; i < tts_voices->size(); i++) {
405     const extensions::TtsVoice& voice = tts_voices->at(i);
406     if (voice.event_types.find(event_type) != voice.event_types.end()) {
407       event_type_allowed = true;
408       break;
409     }
410   }
411   if (!event_type_allowed)
412     return RespondNow(Error(constants::kErrorUndeclaredEventType));
413 
414   content::TtsController* controller = content::TtsController::GetInstance();
415   if (event_type == constants::kEventTypeStart) {
416     controller->OnTtsEvent(utterance_id, content::TTS_EVENT_START, char_index,
417                            length, std::string());
418   } else if (event_type == constants::kEventTypeEnd) {
419     controller->OnTtsEvent(utterance_id, content::TTS_EVENT_END, char_index,
420                            length, std::string());
421   } else if (event_type == constants::kEventTypeWord) {
422     controller->OnTtsEvent(utterance_id, content::TTS_EVENT_WORD, char_index,
423                            length, std::string());
424   } else if (event_type == constants::kEventTypeSentence) {
425     controller->OnTtsEvent(utterance_id, content::TTS_EVENT_SENTENCE,
426                            char_index, length, std::string());
427   } else if (event_type == constants::kEventTypeMarker) {
428     controller->OnTtsEvent(utterance_id, content::TTS_EVENT_MARKER, char_index,
429                            length, std::string());
430   } else if (event_type == constants::kEventTypeError) {
431     std::string error_message;
432     event->GetString(constants::kErrorMessageKey, &error_message);
433     controller->OnTtsEvent(utterance_id, content::TTS_EVENT_ERROR, char_index,
434                            length, error_message);
435   } else if (event_type == constants::kEventTypePause) {
436     controller->OnTtsEvent(utterance_id, content::TTS_EVENT_PAUSE, char_index,
437                            length, std::string());
438   } else if (event_type == constants::kEventTypeResume) {
439     controller->OnTtsEvent(utterance_id, content::TTS_EVENT_RESUME, char_index,
440                            length, std::string());
441   } else {
442     EXTENSION_FUNCTION_VALIDATE(false);
443   }
444 
445   return RespondNow(NoArguments());
446 }
447