1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/speech/extension_api/tts_engine_extension_api.h"
6
7 #include <stddef.h>
8 #include <string>
9 #include <utility>
10
11 #include "base/json/json_writer.h"
12 #include "base/strings/utf_string_conversions.h"
13 #include "base/values.h"
14 #include "build/build_config.h"
15 #include "chrome/browser/extensions/component_loader.h"
16 #include "chrome/browser/extensions/extension_service.h"
17 #include "chrome/browser/profiles/profile.h"
18 #include "chrome/browser/speech/extension_api/tts_extension_api.h"
19 #include "chrome/browser/speech/extension_api/tts_extension_api_constants.h"
20 #include "chrome/common/extensions/api/speech/tts_engine_manifest_handler.h"
21 #include "chrome/common/extensions/extension_constants.h"
22 #include "content/public/browser/render_frame_host.h"
23 #include "content/public/browser/render_process_host.h"
24 #include "content/public/browser/tts_controller.h"
25 #include "content/public/browser/web_contents.h"
26 #include "extensions/browser/event_router.h"
27 #include "extensions/browser/extension_host.h"
28 #include "extensions/browser/extension_registry.h"
29 #include "extensions/browser/extension_system.h"
30 #include "extensions/browser/process_manager.h"
31 #include "extensions/common/extension.h"
32 #include "extensions/common/extension_set.h"
33 #include "net/base/network_change_notifier.h"
34 #include "third_party/blink/public/mojom/devtools/console_message.mojom.h"
35 #include "ui/base/l10n/l10n_util.h"
36
37 using extensions::EventRouter;
38 using extensions::Extension;
39 using extensions::ExtensionSystem;
40
41 namespace constants = tts_extension_api_constants;
42
43 namespace tts_engine_events {
44 const char kOnSpeak[] = "ttsEngine.onSpeak";
45 const char kOnStop[] = "ttsEngine.onStop";
46 const char kOnPause[] = "ttsEngine.onPause";
47 const char kOnResume[] = "ttsEngine.onResume";
48 } // namespace tts_engine_events
49
50 namespace {
51
52 // An extension preference to keep track of the TTS voices that a
53 // TTS engine extension makes available.
54 const char kPrefTtsVoices[] = "tts_voices";
55
WarnIfMissingPauseOrResumeListener(Profile * profile,EventRouter * event_router,std::string extension_id)56 void WarnIfMissingPauseOrResumeListener(Profile* profile,
57 EventRouter* event_router,
58 std::string extension_id) {
59 bool has_onpause = event_router->ExtensionHasEventListener(
60 extension_id, tts_engine_events::kOnPause);
61 bool has_onresume = event_router->ExtensionHasEventListener(
62 extension_id, tts_engine_events::kOnResume);
63 if (has_onpause == has_onresume)
64 return;
65
66 extensions::ExtensionHost* host =
67 extensions::ProcessManager::Get(profile)->GetBackgroundHostForExtension(
68 extension_id);
69 host->host_contents()->GetMainFrame()->AddMessageToConsole(
70 blink::mojom::ConsoleMessageLevel::kWarning,
71 constants::kErrorMissingPauseOrResume);
72 }
73
74 std::unique_ptr<std::vector<extensions::TtsVoice>>
ValidateAndConvertToTtsVoiceVector(const extensions::Extension * extension,const base::ListValue & voices_data,bool return_after_first_error,const char ** error)75 ValidateAndConvertToTtsVoiceVector(const extensions::Extension* extension,
76 const base::ListValue& voices_data,
77 bool return_after_first_error,
78 const char** error) {
79 auto tts_voices = std::make_unique<std::vector<extensions::TtsVoice>>();
80 for (size_t i = 0; i < voices_data.GetSize(); i++) {
81 extensions::TtsVoice voice;
82 const base::DictionaryValue* voice_data = nullptr;
83 voices_data.GetDictionary(i, &voice_data);
84
85 // Note partial validation of these attributes occurs based on tts engine's
86 // json schema (e.g. for data type matching). The missing checks follow
87 // similar checks in manifest parsing.
88 if (voice_data->HasKey(constants::kVoiceNameKey))
89 voice_data->GetString(constants::kVoiceNameKey, &voice.voice_name);
90 if (voice_data->HasKey(constants::kLangKey)) {
91 voice_data->GetString(constants::kLangKey, &voice.lang);
92 if (!l10n_util::IsValidLocaleSyntax(voice.lang)) {
93 *error = constants::kErrorInvalidLang;
94 if (return_after_first_error) {
95 tts_voices->clear();
96 return tts_voices;
97 }
98 continue;
99 }
100 }
101 if (voice_data->HasKey(constants::kRemoteKey))
102 voice_data->GetBoolean(constants::kRemoteKey, &voice.remote);
103 if (voice_data->HasKey(constants::kExtensionIdKey)) {
104 // Allow this for clients who might have used |chrome.tts.getVoices| to
105 // update existing voices. However, trying to update the voice of another
106 // extension should trigger an error.
107 std::string extension_id;
108 voice_data->GetString(constants::kExtensionIdKey, &extension_id);
109 if (extension->id() != extension_id) {
110 *error = constants::kErrorExtensionIdMismatch;
111 if (return_after_first_error) {
112 tts_voices->clear();
113 return tts_voices;
114 }
115 continue;
116 }
117 }
118 const base::ListValue* event_types = nullptr;
119 if (voice_data->HasKey(constants::kEventTypesKey))
120 voice_data->GetList(constants::kEventTypesKey, &event_types);
121
122 if (event_types) {
123 for (size_t j = 0; j < event_types->GetSize(); j++) {
124 std::string event_type;
125 event_types->GetString(j, &event_type);
126 voice.event_types.insert(event_type);
127 }
128 }
129
130 tts_voices->push_back(voice);
131 }
132 return tts_voices;
133 }
134
135 // Get the voices for an extension, checking the preferences first
136 // (in case the extension has ever called UpdateVoices in the past),
137 // and the manifest second.
GetVoicesInternal(content::BrowserContext * context,const extensions::Extension * extension)138 std::unique_ptr<std::vector<extensions::TtsVoice>> GetVoicesInternal(
139 content::BrowserContext* context,
140 const extensions::Extension* extension) {
141 // First try to get the saved set of voices from extension prefs.
142 auto* extension_prefs = extensions::ExtensionPrefs::Get(context);
143 const base::ListValue* voices_data = nullptr;
144 if (extension_prefs->ReadPrefAsList(extension->id(), kPrefTtsVoices,
145 &voices_data)) {
146 const char* error = nullptr;
147 return ValidateAndConvertToTtsVoiceVector(
148 extension, *voices_data,
149 /* return_after_first_error = */ false, &error);
150 }
151
152 // Fall back on the extension manifest.
153 auto* manifest_voices = extensions::TtsVoices::GetTtsVoices(extension);
154 if (manifest_voices)
155 return std::make_unique<std::vector<extensions::TtsVoice>>(
156 *manifest_voices);
157 return std::make_unique<std::vector<extensions::TtsVoice>>();
158 }
159
160 } // namespace
161
GetInstance()162 TtsExtensionEngine* TtsExtensionEngine::GetInstance() {
163 return base::Singleton<TtsExtensionEngine>::get();
164 }
165
GetVoices(content::BrowserContext * browser_context,std::vector<content::VoiceData> * out_voices)166 void TtsExtensionEngine::GetVoices(
167 content::BrowserContext* browser_context,
168 std::vector<content::VoiceData>* out_voices) {
169 Profile* profile = Profile::FromBrowserContext(browser_context);
170 EventRouter* event_router = EventRouter::Get(profile);
171 DCHECK(event_router);
172
173 bool is_offline = (net::NetworkChangeNotifier::GetConnectionType() ==
174 net::NetworkChangeNotifier::CONNECTION_NONE);
175
176 const extensions::ExtensionSet& extensions =
177 extensions::ExtensionRegistry::Get(profile)->enabled_extensions();
178 extensions::ExtensionSet::const_iterator iter;
179 for (iter = extensions.begin(); iter != extensions.end(); ++iter) {
180 const Extension* extension = iter->get();
181
182 if (!event_router->ExtensionHasEventListener(extension->id(),
183 tts_engine_events::kOnSpeak) ||
184 !event_router->ExtensionHasEventListener(extension->id(),
185 tts_engine_events::kOnStop)) {
186 continue;
187 }
188
189 auto tts_voices = GetVoicesInternal(profile, extension);
190 if (!tts_voices)
191 continue;
192
193 for (size_t i = 0; i < tts_voices->size(); ++i) {
194 const extensions::TtsVoice& voice = tts_voices->at(i);
195
196 // Don't return remote voices when the system is offline.
197 if (voice.remote && is_offline)
198 continue;
199
200 out_voices->push_back(content::VoiceData());
201 content::VoiceData& result_voice = out_voices->back();
202
203 result_voice.native = false;
204 result_voice.name = voice.voice_name;
205 result_voice.lang = voice.lang;
206 result_voice.remote = voice.remote;
207 result_voice.engine_id = extension->id();
208
209 for (auto iter = voice.event_types.begin();
210 iter != voice.event_types.end(); ++iter) {
211 result_voice.events.insert(TtsEventTypeFromString(*iter));
212 }
213
214 // If the extension sends end events, the controller will handle
215 // queueing and send interrupted and cancelled events.
216 if (voice.event_types.find(constants::kEventTypeEnd) !=
217 voice.event_types.end()) {
218 result_voice.events.insert(content::TTS_EVENT_CANCELLED);
219 result_voice.events.insert(content::TTS_EVENT_INTERRUPTED);
220 }
221 }
222 }
223 }
224
Speak(content::TtsUtterance * utterance,const content::VoiceData & voice)225 void TtsExtensionEngine::Speak(content::TtsUtterance* utterance,
226 const content::VoiceData& voice) {
227 // See if the engine supports the "end" event; if so, we can keep the
228 // utterance around and track it. If not, we're finished with this
229 // utterance now.
230 bool sends_end_event =
231 voice.events.find(content::TTS_EVENT_END) != voice.events.end();
232
233 std::unique_ptr<base::ListValue> args(new base::ListValue());
234 args->AppendString(utterance->GetText());
235
236 // Pass through most options to the speech engine, but remove some
237 // that are handled internally.
238 std::unique_ptr<base::DictionaryValue> options(
239 static_cast<base::DictionaryValue*>(utterance->GetOptions()->DeepCopy()));
240 if (options->HasKey(constants::kRequiredEventTypesKey))
241 options->Remove(constants::kRequiredEventTypesKey, NULL);
242 if (options->HasKey(constants::kDesiredEventTypesKey))
243 options->Remove(constants::kDesiredEventTypesKey, NULL);
244 if (sends_end_event && options->HasKey(constants::kEnqueueKey))
245 options->Remove(constants::kEnqueueKey, NULL);
246 if (options->HasKey(constants::kSrcIdKey))
247 options->Remove(constants::kSrcIdKey, NULL);
248 if (options->HasKey(constants::kIsFinalEventKey))
249 options->Remove(constants::kIsFinalEventKey, NULL);
250 if (options->HasKey(constants::kOnEventKey))
251 options->Remove(constants::kOnEventKey, NULL);
252
253 // Get the volume, pitch, and rate, but only if they weren't already in
254 // the options. TODO(dmazzoni): these shouldn't be redundant.
255 // http://crbug.com/463264
256 if (!options->HasKey(constants::kRateKey)) {
257 options->SetDouble(constants::kRateKey,
258 utterance->GetContinuousParameters().rate);
259 }
260 if (!options->HasKey(constants::kPitchKey)) {
261 options->SetDouble(constants::kPitchKey,
262 utterance->GetContinuousParameters().pitch);
263 }
264 if (!options->HasKey(constants::kVolumeKey)) {
265 options->SetDouble(constants::kVolumeKey,
266 utterance->GetContinuousParameters().volume);
267 }
268
269 // Add the voice name and language to the options if they're not
270 // already there, since they might have been picked by the TTS controller
271 // rather than directly by the client that requested the speech.
272 if (!options->HasKey(constants::kVoiceNameKey))
273 options->SetString(constants::kVoiceNameKey, voice.name);
274 if (!options->HasKey(constants::kLangKey))
275 options->SetString(constants::kLangKey, voice.lang);
276
277 args->Append(std::move(options));
278 args->AppendInteger(utterance->GetId());
279
280 std::string json;
281 base::JSONWriter::Write(*args, &json);
282
283 Profile* profile =
284 Profile::FromBrowserContext(utterance->GetBrowserContext());
285 auto event = std::make_unique<extensions::Event>(
286 extensions::events::TTS_ENGINE_ON_SPEAK, tts_engine_events::kOnSpeak,
287 std::move(args), profile);
288 EventRouter::Get(profile)->DispatchEventToExtension(utterance->GetEngineId(),
289 std::move(event));
290 }
291
Stop(content::TtsUtterance * utterance)292 void TtsExtensionEngine::Stop(content::TtsUtterance* utterance) {
293 std::unique_ptr<base::ListValue> args(new base::ListValue());
294 Profile* profile =
295 Profile::FromBrowserContext(utterance->GetBrowserContext());
296 auto event = std::make_unique<extensions::Event>(
297 extensions::events::TTS_ENGINE_ON_STOP, tts_engine_events::kOnStop,
298 std::move(args), profile);
299 EventRouter::Get(profile)->DispatchEventToExtension(utterance->GetEngineId(),
300 std::move(event));
301 }
302
Pause(content::TtsUtterance * utterance)303 void TtsExtensionEngine::Pause(content::TtsUtterance* utterance) {
304 std::unique_ptr<base::ListValue> args(new base::ListValue());
305 Profile* profile =
306 Profile::FromBrowserContext(utterance->GetBrowserContext());
307 auto event = std::make_unique<extensions::Event>(
308 extensions::events::TTS_ENGINE_ON_PAUSE, tts_engine_events::kOnPause,
309 std::move(args), profile);
310 EventRouter* event_router = EventRouter::Get(profile);
311 std::string id = utterance->GetEngineId();
312 event_router->DispatchEventToExtension(id, std::move(event));
313 WarnIfMissingPauseOrResumeListener(profile, event_router, id);
314 }
315
Resume(content::TtsUtterance * utterance)316 void TtsExtensionEngine::Resume(content::TtsUtterance* utterance) {
317 std::unique_ptr<base::ListValue> args(new base::ListValue());
318 Profile* profile =
319 Profile::FromBrowserContext(utterance->GetBrowserContext());
320 auto event = std::make_unique<extensions::Event>(
321 extensions::events::TTS_ENGINE_ON_RESUME, tts_engine_events::kOnResume,
322 std::move(args), profile);
323 EventRouter* event_router = EventRouter::Get(profile);
324 std::string id = utterance->GetEngineId();
325 event_router->DispatchEventToExtension(id, std::move(event));
326 WarnIfMissingPauseOrResumeListener(profile, event_router, id);
327 }
328
LoadBuiltInTtsEngine(content::BrowserContext * browser_context)329 bool TtsExtensionEngine::LoadBuiltInTtsEngine(
330 content::BrowserContext* browser_context) {
331 if (disable_built_in_tts_engine_for_testing_)
332 return false;
333
334 #if defined(OS_CHROMEOS)
335 Profile* profile = Profile::FromBrowserContext(browser_context);
336
337 // Load the component extensions into this profile.
338 extensions::ExtensionService* extension_service =
339 extensions::ExtensionSystem::Get(profile)->extension_service();
340 DCHECK(extension_service);
341 extension_service->component_loader()->AddChromeOsSpeechSynthesisExtensions();
342 return true;
343 #else
344 return false;
345 #endif
346 }
347
348 ExtensionFunction::ResponseAction
Run()349 ExtensionTtsEngineUpdateVoicesFunction::Run() {
350 base::ListValue* voices_data = nullptr;
351 EXTENSION_FUNCTION_VALIDATE(args_->GetList(0, &voices_data));
352
353 // Validate the voices and return an error if there's a problem.
354 const char* error = nullptr;
355 auto tts_voices = ValidateAndConvertToTtsVoiceVector(
356 extension(), *voices_data,
357 /* return_after_first_error = */ true, &error);
358 if (error)
359 return RespondNow(Error(error));
360
361 // Save these voices to the extension's prefs if they validated.
362 auto* extension_prefs = extensions::ExtensionPrefs::Get(browser_context());
363 extension_prefs->UpdateExtensionPref(
364 extension()->id(), kPrefTtsVoices,
365 std::make_unique<base::Value>(voices_data->Clone()));
366
367 // Notify that voices have changed.
368 content::TtsController::GetInstance()->VoicesChanged();
369
370 return RespondNow(NoArguments());
371 }
372
373 ExtensionFunction::ResponseAction
Run()374 ExtensionTtsEngineSendTtsEventFunction::Run() {
375 int utterance_id = 0;
376 EXTENSION_FUNCTION_VALIDATE(args_->GetInteger(0, &utterance_id));
377
378 base::DictionaryValue* event = nullptr;
379 EXTENSION_FUNCTION_VALIDATE(args_->GetDictionary(1, &event));
380
381 std::string event_type;
382 EXTENSION_FUNCTION_VALIDATE(
383 event->GetString(constants::kEventTypeKey, &event_type));
384
385 int char_index = 0;
386 if (event->HasKey(constants::kCharIndexKey)) {
387 EXTENSION_FUNCTION_VALIDATE(
388 event->GetInteger(constants::kCharIndexKey, &char_index));
389 }
390
391 int length = -1;
392 if (event->HasKey(constants::kLengthKey)) {
393 EXTENSION_FUNCTION_VALIDATE(
394 event->GetInteger(constants::kLengthKey, &length));
395 }
396
397 // Make sure the extension has included this event type in its manifest.
398 bool event_type_allowed = false;
399 Profile* profile = Profile::FromBrowserContext(browser_context());
400 auto tts_voices = GetVoicesInternal(profile, extension());
401 if (!tts_voices)
402 return RespondNow(Error(constants::kErrorUndeclaredEventType));
403
404 for (size_t i = 0; i < tts_voices->size(); i++) {
405 const extensions::TtsVoice& voice = tts_voices->at(i);
406 if (voice.event_types.find(event_type) != voice.event_types.end()) {
407 event_type_allowed = true;
408 break;
409 }
410 }
411 if (!event_type_allowed)
412 return RespondNow(Error(constants::kErrorUndeclaredEventType));
413
414 content::TtsController* controller = content::TtsController::GetInstance();
415 if (event_type == constants::kEventTypeStart) {
416 controller->OnTtsEvent(utterance_id, content::TTS_EVENT_START, char_index,
417 length, std::string());
418 } else if (event_type == constants::kEventTypeEnd) {
419 controller->OnTtsEvent(utterance_id, content::TTS_EVENT_END, char_index,
420 length, std::string());
421 } else if (event_type == constants::kEventTypeWord) {
422 controller->OnTtsEvent(utterance_id, content::TTS_EVENT_WORD, char_index,
423 length, std::string());
424 } else if (event_type == constants::kEventTypeSentence) {
425 controller->OnTtsEvent(utterance_id, content::TTS_EVENT_SENTENCE,
426 char_index, length, std::string());
427 } else if (event_type == constants::kEventTypeMarker) {
428 controller->OnTtsEvent(utterance_id, content::TTS_EVENT_MARKER, char_index,
429 length, std::string());
430 } else if (event_type == constants::kEventTypeError) {
431 std::string error_message;
432 event->GetString(constants::kErrorMessageKey, &error_message);
433 controller->OnTtsEvent(utterance_id, content::TTS_EVENT_ERROR, char_index,
434 length, error_message);
435 } else if (event_type == constants::kEventTypePause) {
436 controller->OnTtsEvent(utterance_id, content::TTS_EVENT_PAUSE, char_index,
437 length, std::string());
438 } else if (event_type == constants::kEventTypeResume) {
439 controller->OnTtsEvent(utterance_id, content::TTS_EVENT_RESUME, char_index,
440 length, std::string());
441 } else {
442 EXTENSION_FUNCTION_VALIDATE(false);
443 }
444
445 return RespondNow(NoArguments());
446 }
447