1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include <math.h>
6 #include <stddef.h>
7
8 #include <map>
9 #include <memory>
10
11 #include "base/bind.h"
12 #include "base/command_line.h"
13 #include "base/debug/leak_annotations.h"
14 #include "base/macros.h"
15 #include "base/no_destructor.h"
16 #include "base/sequenced_task_runner.h"
17 #include "base/synchronization/lock.h"
18 #include "base/task/task_traits.h"
19 #include "base/task/thread_pool.h"
20 #include "base/task_runner.h"
21 #include "base/threading/sequence_bound.h"
22 #include "content/browser/speech/tts_platform_impl.h"
23 #include "content/public/browser/browser_task_traits.h"
24 #include "content/public/browser/browser_thread.h"
25 #include "content/public/browser/tts_controller.h"
26 #include "content/public/common/content_switches.h"
27 #include "library_loaders/libspeechd.h"
28
29 namespace content {
30
31 namespace {
32
33 struct SPDChromeVoice {
34 std::string name;
35 std::string module;
36 std::string language;
37 };
38
39 using PlatformVoices = std::map<std::string, SPDChromeVoice>;
40
41 constexpr int kInvalidUtteranceId = -1;
42 constexpr int kInvalidMessageUid = -1;
43
44 } // namespace
45
46 class TtsPlatformImplBackgroundWorker {
47 public:
48 TtsPlatformImplBackgroundWorker() = default;
49 TtsPlatformImplBackgroundWorker(const TtsPlatformImplBackgroundWorker&) =
50 delete;
51 TtsPlatformImplBackgroundWorker& operator=(
52 const TtsPlatformImplBackgroundWorker&) = delete;
53 ~TtsPlatformImplBackgroundWorker() = default;
54
55 void Initialize();
56
57 void ProcessSpeech(int utterance_id,
58 const std::string& parsed_utterance,
59 const std::string& lang,
60 float rate,
61 float pitch,
62 SPDChromeVoice voice,
63 base::OnceCallback<void(bool)> on_speak_finished);
64
65 void Pause();
66 void Resume();
67 void StopSpeaking();
68 void Shutdown();
69
70 private:
71 bool InitializeSpeechd();
72 void InitializeVoices(PlatformVoices*);
73 void OpenConnection();
74 void CloseConnection();
75
76 void OnSpeechEvent(int msg_id, SPDNotificationType type);
77
78 // Send an TTS event notification to the TTS controller.
79 void SendTtsEvent(int utterance_id,
80 TtsEventType event_type,
81 int char_index,
82 int length = -1);
83
84 static void NotificationCallback(size_t msg_id,
85 size_t client_id,
86 SPDNotificationType type);
87
88 static void IndexMarkCallback(size_t msg_id,
89 size_t client_id,
90 SPDNotificationType state,
91 char* index_mark);
92
93 LibSpeechdLoader libspeechd_loader_;
94 SPDConnection* conn_ = nullptr;
95 int msg_uid_ = kInvalidMessageUid;
96
97 // These apply to the current utterance only that is currently being
98 // processed.
99 int utterance_id_ = kInvalidUtteranceId;
100 size_t utterance_length_ = 0;
101 size_t utterance_char_position_ = 0;
102 };
103
104 class TtsPlatformImplLinux : public TtsPlatformImpl {
105 public:
106 TtsPlatformImplLinux(const TtsPlatformImplLinux&) = delete;
107 TtsPlatformImplLinux& operator=(const TtsPlatformImplLinux&) = delete;
108
109 bool PlatformImplSupported() override;
110 bool PlatformImplInitialized() override;
111 void Speak(int utterance_id,
112 const std::string& utterance,
113 const std::string& lang,
114 const VoiceData& voice,
115 const UtteranceContinuousParameters& params,
116 base::OnceCallback<void(bool)> on_speak_finished) override;
117 bool StopSpeaking() override;
118 void Pause() override;
119 void Resume() override;
120 bool IsSpeaking() override;
121 void GetVoices(std::vector<VoiceData>* out_voices) override;
122 void Shutdown() override;
123
124 void OnInitialized(bool success, PlatformVoices voices);
125 void OnSpeakScheduled(base::OnceCallback<void(bool)> on_speak_finished,
126 bool success);
127 void OnSpeakFinished(int utterance_id);
128
worker()129 base::SequenceBound<TtsPlatformImplBackgroundWorker>* worker() {
130 return &worker_;
131 }
132
133 // Get the single instance of this class.
134 static TtsPlatformImplLinux* GetInstance();
135
136 private:
137 friend base::NoDestructor<TtsPlatformImplLinux>;
138 TtsPlatformImplLinux();
139
140 void ProcessSpeech(int utterance_id,
141 const std::string& lang,
142 const VoiceData& voice,
143 const UtteranceContinuousParameters& params,
144 base::OnceCallback<void(bool)> on_speak_finished,
145 const std::string& parsed_utterance);
146
147 // Holds the platform state.
148 bool is_supported_ = false;
149 bool is_initialized_ = false;
150 bool is_speaking_ = false;
151 bool paused_ = false;
152
153 // The current utterance being spoke.
154 int utterance_id_ = kInvalidUtteranceId;
155
156 // Map a string composed of a voicename and module to the voicename. Used to
157 // uniquely identify a voice across all available modules.
158 PlatformVoices voices_;
159
160 // Hold the state and the code of the background implementation.
161 base::SequenceBound<TtsPlatformImplBackgroundWorker> worker_;
162 };
163
164 //
165 // TtsPlatformImplBackgroundWorker
166 //
167
Initialize()168 void TtsPlatformImplBackgroundWorker::Initialize() {
169 PlatformVoices voices;
170 if (InitializeSpeechd()) {
171 OpenConnection();
172 InitializeVoices(&voices);
173 }
174
175 bool success = (conn_ != nullptr);
176 GetUIThreadTaskRunner({})->PostTask(
177 FROM_HERE,
178 base::BindOnce(&TtsPlatformImplLinux::OnInitialized,
179 base::Unretained(TtsPlatformImplLinux::GetInstance()),
180 success, std::move(voices)));
181 }
182
ProcessSpeech(int utterance_id,const std::string & parsed_utterance,const std::string & lang,float rate,float pitch,SPDChromeVoice voice,base::OnceCallback<void (bool)> on_speak_finished)183 void TtsPlatformImplBackgroundWorker::ProcessSpeech(
184 int utterance_id,
185 const std::string& parsed_utterance,
186 const std::string& lang,
187 float rate,
188 float pitch,
189 SPDChromeVoice voice,
190 base::OnceCallback<void(bool)> on_speak_finished) {
191 libspeechd_loader_.spd_set_output_module(conn_, voice.module.c_str());
192 libspeechd_loader_.spd_set_synthesis_voice(conn_, voice.name.c_str());
193
194 // Map our multiplicative range to Speech Dispatcher's linear range.
195 // .334 = -100.
196 // 3 = 100.
197 libspeechd_loader_.spd_set_voice_rate(conn_, 100 * log10(rate) / log10(3));
198 libspeechd_loader_.spd_set_voice_pitch(conn_, 100 * log10(pitch) / log10(3));
199
200 // Support languages other than the default
201 if (!lang.empty())
202 libspeechd_loader_.spd_set_language(conn_, lang.c_str());
203
204 utterance_id_ = utterance_id;
205 utterance_char_position_ = 0;
206 utterance_length_ = parsed_utterance.size();
207
208 // spd_say(...) returns msg_uid on success, -1 otherwise. Each call to spd_say
209 // returns a different msg_uid.
210 msg_uid_ =
211 libspeechd_loader_.spd_say(conn_, SPD_TEXT, parsed_utterance.c_str());
212
213 bool success = (msg_uid_ != kInvalidMessageUid);
214 GetUIThreadTaskRunner({})->PostTask(
215 FROM_HERE, base::BindOnce(std::move(on_speak_finished), success));
216 }
217
Pause()218 void TtsPlatformImplBackgroundWorker::Pause() {
219 if (msg_uid_ != kInvalidMessageUid)
220 libspeechd_loader_.spd_pause(conn_);
221 }
222
Resume()223 void TtsPlatformImplBackgroundWorker::Resume() {
224 if (msg_uid_ != kInvalidMessageUid)
225 libspeechd_loader_.spd_resume(conn_);
226 }
227
StopSpeaking()228 void TtsPlatformImplBackgroundWorker::StopSpeaking() {
229 if (msg_uid_ != kInvalidMessageUid) {
230 int result = libspeechd_loader_.spd_stop(conn_);
231 if (result == -1) {
232 CloseConnection();
233 OpenConnection();
234 }
235 msg_uid_ = kInvalidMessageUid;
236 utterance_id_ = kInvalidUtteranceId;
237 }
238 }
239
Shutdown()240 void TtsPlatformImplBackgroundWorker::Shutdown() {
241 CloseConnection();
242 }
243
InitializeSpeechd()244 bool TtsPlatformImplBackgroundWorker::InitializeSpeechd() {
245 return libspeechd_loader_.Load("libspeechd.so.2");
246 }
247
InitializeVoices(PlatformVoices * voices)248 void TtsPlatformImplBackgroundWorker::InitializeVoices(PlatformVoices* voices) {
249 if (!conn_)
250 return;
251
252 char** modules = libspeechd_loader_.spd_list_modules(conn_);
253 if (!modules)
254 return;
255 for (int i = 0; modules[i]; i++) {
256 char* module = modules[i];
257 libspeechd_loader_.spd_set_output_module(conn_, module);
258 SPDVoice** spd_voices = libspeechd_loader_.spd_list_synthesis_voices(conn_);
259 if (!spd_voices) {
260 free(module);
261 continue;
262 }
263 for (int j = 0; spd_voices[j]; j++) {
264 SPDVoice* spd_voice = spd_voices[j];
265 SPDChromeVoice spd_data;
266 spd_data.name = spd_voice->name;
267 spd_data.module = module;
268 spd_data.language = spd_voice->language;
269 std::string key;
270 key.append(spd_data.name);
271 key.append(" ");
272 key.append(spd_data.module);
273 voices->insert(std::pair<std::string, SPDChromeVoice>(key, spd_data));
274 free(spd_voices[j]);
275 }
276 free(modules[i]);
277 }
278 }
279
OpenConnection()280 void TtsPlatformImplBackgroundWorker::OpenConnection() {
281 {
282 // spd_open has memory leaks which are hard to suppress.
283 // http://crbug.com/317360
284 ANNOTATE_SCOPED_MEMORY_LEAK;
285 conn_ = libspeechd_loader_.spd_open("chrome", "extension_api", nullptr,
286 SPD_MODE_THREADED);
287 }
288 if (!conn_)
289 return;
290
291 // Register callbacks for all events.
292 conn_->callback_begin = conn_->callback_end = conn_->callback_cancel =
293 conn_->callback_pause = conn_->callback_resume =
294 &TtsPlatformImplBackgroundWorker::NotificationCallback;
295
296 conn_->callback_im = &TtsPlatformImplBackgroundWorker::IndexMarkCallback;
297
298 libspeechd_loader_.spd_set_notification_on(conn_, SPD_BEGIN);
299 libspeechd_loader_.spd_set_notification_on(conn_, SPD_END);
300 libspeechd_loader_.spd_set_notification_on(conn_, SPD_CANCEL);
301 libspeechd_loader_.spd_set_notification_on(conn_, SPD_PAUSE);
302 libspeechd_loader_.spd_set_notification_on(conn_, SPD_RESUME);
303 }
304
CloseConnection()305 void TtsPlatformImplBackgroundWorker::CloseConnection() {
306 if (conn_) {
307 libspeechd_loader_.spd_close(conn_);
308 conn_ = nullptr;
309 }
310 }
311
OnSpeechEvent(int msg_id,SPDNotificationType type)312 void TtsPlatformImplBackgroundWorker::OnSpeechEvent(int msg_id,
313 SPDNotificationType type) {
314 DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
315 if (msg_id != msg_uid_)
316 return;
317
318 switch (type) {
319 case SPD_EVENT_BEGIN:
320 utterance_char_position_ = 0;
321 SendTtsEvent(utterance_id_, TTS_EVENT_START, utterance_char_position_,
322 -1);
323 break;
324 case SPD_EVENT_RESUME:
325 SendTtsEvent(utterance_id_, TTS_EVENT_RESUME, utterance_char_position_,
326 -1);
327 break;
328 case SPD_EVENT_END:
329 GetUIThreadTaskRunner({})->PostTask(
330 FROM_HERE,
331 base::BindOnce(&TtsPlatformImplLinux::OnSpeakFinished,
332 base::Unretained(TtsPlatformImplLinux::GetInstance()),
333 utterance_id_));
334
335 utterance_char_position_ = utterance_length_;
336 SendTtsEvent(utterance_id_, TTS_EVENT_END, utterance_char_position_, 0);
337 break;
338 case SPD_EVENT_PAUSE:
339 SendTtsEvent(utterance_id_, TTS_EVENT_PAUSE, utterance_char_position_,
340 -1);
341 break;
342 case SPD_EVENT_CANCEL:
343 SendTtsEvent(utterance_id_, TTS_EVENT_CANCELLED, utterance_char_position_,
344 -1);
345 break;
346 case SPD_EVENT_INDEX_MARK:
347 // TODO: Can we get length from linux? If so, update
348 // utterance_char_position_.
349 SendTtsEvent(utterance_id_, TTS_EVENT_MARKER, utterance_char_position_,
350 -1);
351 break;
352 }
353 }
354
SendTtsEvent(int utterance_id,TtsEventType event_type,int char_index,int length)355 void TtsPlatformImplBackgroundWorker::SendTtsEvent(int utterance_id,
356 TtsEventType event_type,
357 int char_index,
358 int length) {
359 GetUIThreadTaskRunner({})->PostTask(
360 FROM_HERE, base::BindOnce(&TtsController::OnTtsEvent,
361 base::Unretained(TtsController::GetInstance()),
362 utterance_id, event_type, char_index, length,
363 std::string()));
364 }
365
366 // static
NotificationCallback(size_t msg_id,size_t client_id,SPDNotificationType type)367 void TtsPlatformImplBackgroundWorker::NotificationCallback(
368 size_t msg_id,
369 size_t client_id,
370 SPDNotificationType type) {
371 TtsPlatformImplLinux::GetInstance()->worker()->Post(
372 FROM_HERE, &TtsPlatformImplBackgroundWorker::OnSpeechEvent, msg_id, type);
373 }
374
375 // static
IndexMarkCallback(size_t msg_id,size_t client_id,SPDNotificationType type,char * index_mark)376 void TtsPlatformImplBackgroundWorker::IndexMarkCallback(
377 size_t msg_id,
378 size_t client_id,
379 SPDNotificationType type,
380 char* index_mark) {
381 // TODO(dtseng): index_mark appears to specify an index type supplied by a
382 // client. Need to explore how this is used before hooking it up with existing
383 // word, sentence events.
384 TtsPlatformImplLinux::GetInstance()->worker()->Post(
385 FROM_HERE, &TtsPlatformImplBackgroundWorker::OnSpeechEvent, msg_id, type);
386 }
387
388 //
389 // TtsPlatformImplLinux
390 //
391
TtsPlatformImplLinux()392 TtsPlatformImplLinux::TtsPlatformImplLinux()
393 : worker_(base::ThreadPool::CreateSequencedTaskRunner({base::MayBlock()})) {
394 DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
395 const base::CommandLine& command_line =
396 *base::CommandLine::ForCurrentProcess();
397 if (!command_line.HasSwitch(switches::kEnableSpeechDispatcher))
398 return;
399
400 // The TTS platform is supported. The Tts platform initialisation will happen
401 // on a worker thread and it will become initialized.
402 is_supported_ = true;
403 worker_.Post(FROM_HERE, &TtsPlatformImplBackgroundWorker::Initialize);
404 }
405
PlatformImplSupported()406 bool TtsPlatformImplLinux::PlatformImplSupported() {
407 return is_supported_;
408 }
409
PlatformImplInitialized()410 bool TtsPlatformImplLinux::PlatformImplInitialized() {
411 return is_initialized_;
412 }
413
Speak(int utterance_id,const std::string & utterance,const std::string & lang,const VoiceData & voice,const UtteranceContinuousParameters & params,base::OnceCallback<void (bool)> on_speak_finished)414 void TtsPlatformImplLinux::Speak(
415 int utterance_id,
416 const std::string& utterance,
417 const std::string& lang,
418 const VoiceData& voice,
419 const UtteranceContinuousParameters& params,
420 base::OnceCallback<void(bool)> on_speak_finished) {
421 DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
422 DCHECK(PlatformImplInitialized());
423
424 if (paused_ || is_speaking_) {
425 std::move(on_speak_finished).Run(false);
426 return;
427 }
428
429 // Flag that a utterance is getting emitted. The |is_speaking_| flag will be
430 // set back to false when the utterance will be fully spoken, stopped or if
431 // the voice synthetizer was not able to emit it.
432 is_speaking_ = true;
433 utterance_id_ = utterance_id;
434
435 // Parse SSML and process speech.
436 TtsController::GetInstance()->StripSSML(
437 utterance,
438 base::BindOnce(&TtsPlatformImplLinux::ProcessSpeech,
439 base::Unretained(this), utterance_id, lang, voice, params,
440 base::BindOnce(&TtsPlatformImplLinux::OnSpeakScheduled,
441 base::Unretained(this),
442 std::move(on_speak_finished))));
443 }
444
StopSpeaking()445 bool TtsPlatformImplLinux::StopSpeaking() {
446 DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
447 DCHECK(PlatformImplInitialized());
448
449 worker_.Post(FROM_HERE, &TtsPlatformImplBackgroundWorker::StopSpeaking);
450 paused_ = false;
451
452 is_speaking_ = false;
453 utterance_id_ = kInvalidUtteranceId;
454
455 return true;
456 }
457
Pause()458 void TtsPlatformImplLinux::Pause() {
459 DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
460 DCHECK(PlatformImplInitialized());
461
462 if (paused_ || !is_speaking_)
463 return;
464
465 worker_.Post(FROM_HERE, &TtsPlatformImplBackgroundWorker::Pause);
466 paused_ = true;
467 }
468
Resume()469 void TtsPlatformImplLinux::Resume() {
470 DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
471 DCHECK(PlatformImplInitialized());
472
473 if (!paused_ || !is_speaking_)
474 return;
475
476 worker_.Post(FROM_HERE, &TtsPlatformImplBackgroundWorker::Resume);
477 paused_ = false;
478 }
479
IsSpeaking()480 bool TtsPlatformImplLinux::IsSpeaking() {
481 return is_speaking_;
482 }
483
GetVoices(std::vector<VoiceData> * out_voices)484 void TtsPlatformImplLinux::GetVoices(std::vector<VoiceData>* out_voices) {
485 DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
486 DCHECK(PlatformImplInitialized());
487
488 for (auto it = voices_.begin(); it != voices_.end(); ++it) {
489 out_voices->push_back(VoiceData());
490 VoiceData& voice = out_voices->back();
491 voice.native = true;
492 voice.name = it->first;
493 voice.lang = it->second.language;
494 voice.events.insert(TTS_EVENT_START);
495 voice.events.insert(TTS_EVENT_END);
496 voice.events.insert(TTS_EVENT_CANCELLED);
497 voice.events.insert(TTS_EVENT_MARKER);
498 voice.events.insert(TTS_EVENT_PAUSE);
499 voice.events.insert(TTS_EVENT_RESUME);
500 }
501 }
502
Shutdown()503 void TtsPlatformImplLinux::Shutdown() {
504 worker_.Post(FROM_HERE, &TtsPlatformImplBackgroundWorker::Shutdown);
505 }
506
OnInitialized(bool success,PlatformVoices voices)507 void TtsPlatformImplLinux::OnInitialized(bool success, PlatformVoices voices) {
508 DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
509 if (success)
510 voices_ = std::move(voices);
511 is_initialized_ = true;
512 TtsController::GetInstance()->VoicesChanged();
513 }
514
OnSpeakScheduled(base::OnceCallback<void (bool)> on_speak_finished,bool success)515 void TtsPlatformImplLinux::OnSpeakScheduled(
516 base::OnceCallback<void(bool)> on_speak_finished,
517 bool success) {
518 DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
519 DCHECK(is_speaking_);
520
521 // If the utterance was not able to be emitted, stop the speaking. There
522 // won't be any asynchronous TTS event to confirm the end of the speech.
523 if (!success) {
524 is_speaking_ = false;
525 utterance_id_ = kInvalidUtteranceId;
526 }
527
528 // Pass the results to our caller.
529 std::move(on_speak_finished).Run(success);
530 }
531
OnSpeakFinished(int utterance_id)532 void TtsPlatformImplLinux::OnSpeakFinished(int utterance_id) {
533 DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
534 if (utterance_id != utterance_id_)
535 return;
536
537 DCHECK(is_speaking_);
538 DCHECK_NE(utterance_id_, kInvalidUtteranceId);
539 is_speaking_ = false;
540 utterance_id_ = kInvalidUtteranceId;
541 }
542
ProcessSpeech(int utterance_id,const std::string & lang,const VoiceData & voice,const UtteranceContinuousParameters & params,base::OnceCallback<void (bool)> on_speak_finished,const std::string & parsed_utterance)543 void TtsPlatformImplLinux::ProcessSpeech(
544 int utterance_id,
545 const std::string& lang,
546 const VoiceData& voice,
547 const UtteranceContinuousParameters& params,
548 base::OnceCallback<void(bool)> on_speak_finished,
549 const std::string& parsed_utterance) {
550 DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
551
552 // Speech dispatcher's speech params are around 3x at either limit.
553 float rate = params.rate > 3 ? 3 : params.rate;
554 rate = params.rate < 0.334 ? 0.334 : rate;
555 float pitch = params.pitch > 3 ? 3 : params.pitch;
556 pitch = params.pitch < 0.334 ? 0.334 : pitch;
557
558 SPDChromeVoice matched_voice;
559 auto it = voices_.find(voice.name);
560 if (it != voices_.end())
561 matched_voice = it->second;
562
563 worker_.Post(FROM_HERE, &TtsPlatformImplBackgroundWorker::ProcessSpeech,
564 utterance_id, parsed_utterance, lang, rate, pitch, matched_voice,
565 std::move(on_speak_finished));
566 }
567
568 // static
GetInstance()569 TtsPlatformImplLinux* TtsPlatformImplLinux::GetInstance() {
570 static base::NoDestructor<TtsPlatformImplLinux> tts_platform;
571 return tts_platform.get();
572 }
573
574 // static
GetInstance()575 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
576 return TtsPlatformImplLinux::GetInstance();
577 }
578
579 } // namespace content
580