1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include <math.h>
6 #include <stddef.h>
7 
8 #include <map>
9 #include <memory>
10 
11 #include "base/bind.h"
12 #include "base/command_line.h"
13 #include "base/debug/leak_annotations.h"
14 #include "base/macros.h"
15 #include "base/no_destructor.h"
16 #include "base/sequenced_task_runner.h"
17 #include "base/synchronization/lock.h"
18 #include "base/task/task_traits.h"
19 #include "base/task/thread_pool.h"
20 #include "base/task_runner.h"
21 #include "base/threading/sequence_bound.h"
22 #include "content/browser/speech/tts_platform_impl.h"
23 #include "content/public/browser/browser_task_traits.h"
24 #include "content/public/browser/browser_thread.h"
25 #include "content/public/browser/tts_controller.h"
26 #include "content/public/common/content_switches.h"
27 #include "library_loaders/libspeechd.h"
28 
29 namespace content {
30 
31 namespace {
32 
33 struct SPDChromeVoice {
34   std::string name;
35   std::string module;
36   std::string language;
37 };
38 
39 using PlatformVoices = std::map<std::string, SPDChromeVoice>;
40 
41 constexpr int kInvalidUtteranceId = -1;
42 constexpr int kInvalidMessageUid = -1;
43 
44 }  // namespace
45 
46 class TtsPlatformImplBackgroundWorker {
47  public:
48   TtsPlatformImplBackgroundWorker() = default;
49   TtsPlatformImplBackgroundWorker(const TtsPlatformImplBackgroundWorker&) =
50       delete;
51   TtsPlatformImplBackgroundWorker& operator=(
52       const TtsPlatformImplBackgroundWorker&) = delete;
53   ~TtsPlatformImplBackgroundWorker() = default;
54 
55   void Initialize();
56 
57   void ProcessSpeech(int utterance_id,
58                      const std::string& parsed_utterance,
59                      const std::string& lang,
60                      float rate,
61                      float pitch,
62                      SPDChromeVoice voice,
63                      base::OnceCallback<void(bool)> on_speak_finished);
64 
65   void Pause();
66   void Resume();
67   void StopSpeaking();
68   void Shutdown();
69 
70  private:
71   bool InitializeSpeechd();
72   void InitializeVoices(PlatformVoices*);
73   void OpenConnection();
74   void CloseConnection();
75 
76   void OnSpeechEvent(int msg_id, SPDNotificationType type);
77 
78   // Send an TTS event notification to the TTS controller.
79   void SendTtsEvent(int utterance_id,
80                     TtsEventType event_type,
81                     int char_index,
82                     int length = -1);
83 
84   static void NotificationCallback(size_t msg_id,
85                                    size_t client_id,
86                                    SPDNotificationType type);
87 
88   static void IndexMarkCallback(size_t msg_id,
89                                 size_t client_id,
90                                 SPDNotificationType state,
91                                 char* index_mark);
92 
93   LibSpeechdLoader libspeechd_loader_;
94   SPDConnection* conn_ = nullptr;
95   int msg_uid_ = kInvalidMessageUid;
96 
97   // These apply to the current utterance only that is currently being
98   // processed.
99   int utterance_id_ = kInvalidUtteranceId;
100   size_t utterance_length_ = 0;
101   size_t utterance_char_position_ = 0;
102 };
103 
104 class TtsPlatformImplLinux : public TtsPlatformImpl {
105  public:
106   TtsPlatformImplLinux(const TtsPlatformImplLinux&) = delete;
107   TtsPlatformImplLinux& operator=(const TtsPlatformImplLinux&) = delete;
108 
109   bool PlatformImplSupported() override;
110   bool PlatformImplInitialized() override;
111   void Speak(int utterance_id,
112              const std::string& utterance,
113              const std::string& lang,
114              const VoiceData& voice,
115              const UtteranceContinuousParameters& params,
116              base::OnceCallback<void(bool)> on_speak_finished) override;
117   bool StopSpeaking() override;
118   void Pause() override;
119   void Resume() override;
120   bool IsSpeaking() override;
121   void GetVoices(std::vector<VoiceData>* out_voices) override;
122   void Shutdown() override;
123 
124   void OnInitialized(bool success, PlatformVoices voices);
125   void OnSpeakScheduled(base::OnceCallback<void(bool)> on_speak_finished,
126                         bool success);
127   void OnSpeakFinished(int utterance_id);
128 
worker()129   base::SequenceBound<TtsPlatformImplBackgroundWorker>* worker() {
130     return &worker_;
131   }
132 
133   // Get the single instance of this class.
134   static TtsPlatformImplLinux* GetInstance();
135 
136  private:
137   friend base::NoDestructor<TtsPlatformImplLinux>;
138   TtsPlatformImplLinux();
139 
140   void ProcessSpeech(int utterance_id,
141                      const std::string& lang,
142                      const VoiceData& voice,
143                      const UtteranceContinuousParameters& params,
144                      base::OnceCallback<void(bool)> on_speak_finished,
145                      const std::string& parsed_utterance);
146 
147   // Holds the platform state.
148   bool is_supported_ = false;
149   bool is_initialized_ = false;
150   bool is_speaking_ = false;
151   bool paused_ = false;
152 
153   // The current utterance being spoke.
154   int utterance_id_ = kInvalidUtteranceId;
155 
156   // Map a string composed of a voicename and module to the voicename. Used to
157   // uniquely identify a voice across all available modules.
158   PlatformVoices voices_;
159 
160   // Hold the state and the code of the background implementation.
161   base::SequenceBound<TtsPlatformImplBackgroundWorker> worker_;
162 };
163 
164 //
165 // TtsPlatformImplBackgroundWorker
166 //
167 
Initialize()168 void TtsPlatformImplBackgroundWorker::Initialize() {
169   PlatformVoices voices;
170   if (InitializeSpeechd()) {
171     OpenConnection();
172     InitializeVoices(&voices);
173   }
174 
175   bool success = (conn_ != nullptr);
176   GetUIThreadTaskRunner({})->PostTask(
177       FROM_HERE,
178       base::BindOnce(&TtsPlatformImplLinux::OnInitialized,
179                      base::Unretained(TtsPlatformImplLinux::GetInstance()),
180                      success, std::move(voices)));
181 }
182 
ProcessSpeech(int utterance_id,const std::string & parsed_utterance,const std::string & lang,float rate,float pitch,SPDChromeVoice voice,base::OnceCallback<void (bool)> on_speak_finished)183 void TtsPlatformImplBackgroundWorker::ProcessSpeech(
184     int utterance_id,
185     const std::string& parsed_utterance,
186     const std::string& lang,
187     float rate,
188     float pitch,
189     SPDChromeVoice voice,
190     base::OnceCallback<void(bool)> on_speak_finished) {
191   libspeechd_loader_.spd_set_output_module(conn_, voice.module.c_str());
192   libspeechd_loader_.spd_set_synthesis_voice(conn_, voice.name.c_str());
193 
194   // Map our multiplicative range to Speech Dispatcher's linear range.
195   // .334 = -100.
196   // 3 = 100.
197   libspeechd_loader_.spd_set_voice_rate(conn_, 100 * log10(rate) / log10(3));
198   libspeechd_loader_.spd_set_voice_pitch(conn_, 100 * log10(pitch) / log10(3));
199 
200   // Support languages other than the default
201   if (!lang.empty())
202     libspeechd_loader_.spd_set_language(conn_, lang.c_str());
203 
204   utterance_id_ = utterance_id;
205   utterance_char_position_ = 0;
206   utterance_length_ = parsed_utterance.size();
207 
208   // spd_say(...) returns msg_uid on success, -1 otherwise. Each call to spd_say
209   // returns a different msg_uid.
210   msg_uid_ =
211       libspeechd_loader_.spd_say(conn_, SPD_TEXT, parsed_utterance.c_str());
212 
213   bool success = (msg_uid_ != kInvalidMessageUid);
214   GetUIThreadTaskRunner({})->PostTask(
215       FROM_HERE, base::BindOnce(std::move(on_speak_finished), success));
216 }
217 
Pause()218 void TtsPlatformImplBackgroundWorker::Pause() {
219   if (msg_uid_ != kInvalidMessageUid)
220     libspeechd_loader_.spd_pause(conn_);
221 }
222 
Resume()223 void TtsPlatformImplBackgroundWorker::Resume() {
224   if (msg_uid_ != kInvalidMessageUid)
225     libspeechd_loader_.spd_resume(conn_);
226 }
227 
StopSpeaking()228 void TtsPlatformImplBackgroundWorker::StopSpeaking() {
229   if (msg_uid_ != kInvalidMessageUid) {
230     int result = libspeechd_loader_.spd_stop(conn_);
231     if (result == -1) {
232       CloseConnection();
233       OpenConnection();
234     }
235     msg_uid_ = kInvalidMessageUid;
236     utterance_id_ = kInvalidUtteranceId;
237   }
238 }
239 
Shutdown()240 void TtsPlatformImplBackgroundWorker::Shutdown() {
241   CloseConnection();
242 }
243 
InitializeSpeechd()244 bool TtsPlatformImplBackgroundWorker::InitializeSpeechd() {
245   return libspeechd_loader_.Load("libspeechd.so.2");
246 }
247 
InitializeVoices(PlatformVoices * voices)248 void TtsPlatformImplBackgroundWorker::InitializeVoices(PlatformVoices* voices) {
249   if (!conn_)
250     return;
251 
252   char** modules = libspeechd_loader_.spd_list_modules(conn_);
253   if (!modules)
254     return;
255   for (int i = 0; modules[i]; i++) {
256     char* module = modules[i];
257     libspeechd_loader_.spd_set_output_module(conn_, module);
258     SPDVoice** spd_voices = libspeechd_loader_.spd_list_synthesis_voices(conn_);
259     if (!spd_voices) {
260       free(module);
261       continue;
262     }
263     for (int j = 0; spd_voices[j]; j++) {
264       SPDVoice* spd_voice = spd_voices[j];
265       SPDChromeVoice spd_data;
266       spd_data.name = spd_voice->name;
267       spd_data.module = module;
268       spd_data.language = spd_voice->language;
269       std::string key;
270       key.append(spd_data.name);
271       key.append(" ");
272       key.append(spd_data.module);
273       voices->insert(std::pair<std::string, SPDChromeVoice>(key, spd_data));
274       free(spd_voices[j]);
275     }
276     free(modules[i]);
277   }
278 }
279 
OpenConnection()280 void TtsPlatformImplBackgroundWorker::OpenConnection() {
281   {
282     // spd_open has memory leaks which are hard to suppress.
283     // http://crbug.com/317360
284     ANNOTATE_SCOPED_MEMORY_LEAK;
285     conn_ = libspeechd_loader_.spd_open("chrome", "extension_api", nullptr,
286                                         SPD_MODE_THREADED);
287   }
288   if (!conn_)
289     return;
290 
291   // Register callbacks for all events.
292   conn_->callback_begin = conn_->callback_end = conn_->callback_cancel =
293       conn_->callback_pause = conn_->callback_resume =
294           &TtsPlatformImplBackgroundWorker::NotificationCallback;
295 
296   conn_->callback_im = &TtsPlatformImplBackgroundWorker::IndexMarkCallback;
297 
298   libspeechd_loader_.spd_set_notification_on(conn_, SPD_BEGIN);
299   libspeechd_loader_.spd_set_notification_on(conn_, SPD_END);
300   libspeechd_loader_.spd_set_notification_on(conn_, SPD_CANCEL);
301   libspeechd_loader_.spd_set_notification_on(conn_, SPD_PAUSE);
302   libspeechd_loader_.spd_set_notification_on(conn_, SPD_RESUME);
303 }
304 
CloseConnection()305 void TtsPlatformImplBackgroundWorker::CloseConnection() {
306   if (conn_) {
307     libspeechd_loader_.spd_close(conn_);
308     conn_ = nullptr;
309   }
310 }
311 
OnSpeechEvent(int msg_id,SPDNotificationType type)312 void TtsPlatformImplBackgroundWorker::OnSpeechEvent(int msg_id,
313                                                     SPDNotificationType type) {
314   DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
315   if (msg_id != msg_uid_)
316     return;
317 
318   switch (type) {
319     case SPD_EVENT_BEGIN:
320       utterance_char_position_ = 0;
321       SendTtsEvent(utterance_id_, TTS_EVENT_START, utterance_char_position_,
322                    -1);
323       break;
324     case SPD_EVENT_RESUME:
325       SendTtsEvent(utterance_id_, TTS_EVENT_RESUME, utterance_char_position_,
326                    -1);
327       break;
328     case SPD_EVENT_END:
329       GetUIThreadTaskRunner({})->PostTask(
330           FROM_HERE,
331           base::BindOnce(&TtsPlatformImplLinux::OnSpeakFinished,
332                          base::Unretained(TtsPlatformImplLinux::GetInstance()),
333                          utterance_id_));
334 
335       utterance_char_position_ = utterance_length_;
336       SendTtsEvent(utterance_id_, TTS_EVENT_END, utterance_char_position_, 0);
337       break;
338     case SPD_EVENT_PAUSE:
339       SendTtsEvent(utterance_id_, TTS_EVENT_PAUSE, utterance_char_position_,
340                    -1);
341       break;
342     case SPD_EVENT_CANCEL:
343       SendTtsEvent(utterance_id_, TTS_EVENT_CANCELLED, utterance_char_position_,
344                    -1);
345       break;
346     case SPD_EVENT_INDEX_MARK:
347       // TODO: Can we get length from linux? If so, update
348       // utterance_char_position_.
349       SendTtsEvent(utterance_id_, TTS_EVENT_MARKER, utterance_char_position_,
350                    -1);
351       break;
352   }
353 }
354 
SendTtsEvent(int utterance_id,TtsEventType event_type,int char_index,int length)355 void TtsPlatformImplBackgroundWorker::SendTtsEvent(int utterance_id,
356                                                    TtsEventType event_type,
357                                                    int char_index,
358                                                    int length) {
359   GetUIThreadTaskRunner({})->PostTask(
360       FROM_HERE, base::BindOnce(&TtsController::OnTtsEvent,
361                                 base::Unretained(TtsController::GetInstance()),
362                                 utterance_id, event_type, char_index, length,
363                                 std::string()));
364 }
365 
366 // static
NotificationCallback(size_t msg_id,size_t client_id,SPDNotificationType type)367 void TtsPlatformImplBackgroundWorker::NotificationCallback(
368     size_t msg_id,
369     size_t client_id,
370     SPDNotificationType type) {
371   TtsPlatformImplLinux::GetInstance()->worker()->Post(
372       FROM_HERE, &TtsPlatformImplBackgroundWorker::OnSpeechEvent, msg_id, type);
373 }
374 
375 // static
IndexMarkCallback(size_t msg_id,size_t client_id,SPDNotificationType type,char * index_mark)376 void TtsPlatformImplBackgroundWorker::IndexMarkCallback(
377     size_t msg_id,
378     size_t client_id,
379     SPDNotificationType type,
380     char* index_mark) {
381   // TODO(dtseng): index_mark appears to specify an index type supplied by a
382   // client. Need to explore how this is used before hooking it up with existing
383   // word, sentence events.
384   TtsPlatformImplLinux::GetInstance()->worker()->Post(
385       FROM_HERE, &TtsPlatformImplBackgroundWorker::OnSpeechEvent, msg_id, type);
386 }
387 
388 //
389 // TtsPlatformImplLinux
390 //
391 
TtsPlatformImplLinux()392 TtsPlatformImplLinux::TtsPlatformImplLinux()
393     : worker_(base::ThreadPool::CreateSequencedTaskRunner({base::MayBlock()})) {
394   DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
395   const base::CommandLine& command_line =
396       *base::CommandLine::ForCurrentProcess();
397   if (!command_line.HasSwitch(switches::kEnableSpeechDispatcher))
398     return;
399 
400   // The TTS platform is supported. The Tts platform initialisation will happen
401   // on a worker thread and it will become initialized.
402   is_supported_ = true;
403   worker_.Post(FROM_HERE, &TtsPlatformImplBackgroundWorker::Initialize);
404 }
405 
PlatformImplSupported()406 bool TtsPlatformImplLinux::PlatformImplSupported() {
407   return is_supported_;
408 }
409 
PlatformImplInitialized()410 bool TtsPlatformImplLinux::PlatformImplInitialized() {
411   return is_initialized_;
412 }
413 
Speak(int utterance_id,const std::string & utterance,const std::string & lang,const VoiceData & voice,const UtteranceContinuousParameters & params,base::OnceCallback<void (bool)> on_speak_finished)414 void TtsPlatformImplLinux::Speak(
415     int utterance_id,
416     const std::string& utterance,
417     const std::string& lang,
418     const VoiceData& voice,
419     const UtteranceContinuousParameters& params,
420     base::OnceCallback<void(bool)> on_speak_finished) {
421   DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
422   DCHECK(PlatformImplInitialized());
423 
424   if (paused_ || is_speaking_) {
425     std::move(on_speak_finished).Run(false);
426     return;
427   }
428 
429   // Flag that a utterance is getting emitted. The |is_speaking_| flag will be
430   // set back to false when the utterance will be fully spoken, stopped or if
431   // the voice synthetizer was not able to emit it.
432   is_speaking_ = true;
433   utterance_id_ = utterance_id;
434 
435   // Parse SSML and process speech.
436   TtsController::GetInstance()->StripSSML(
437       utterance,
438       base::BindOnce(&TtsPlatformImplLinux::ProcessSpeech,
439                      base::Unretained(this), utterance_id, lang, voice, params,
440                      base::BindOnce(&TtsPlatformImplLinux::OnSpeakScheduled,
441                                     base::Unretained(this),
442                                     std::move(on_speak_finished))));
443 }
444 
StopSpeaking()445 bool TtsPlatformImplLinux::StopSpeaking() {
446   DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
447   DCHECK(PlatformImplInitialized());
448 
449   worker_.Post(FROM_HERE, &TtsPlatformImplBackgroundWorker::StopSpeaking);
450   paused_ = false;
451 
452   is_speaking_ = false;
453   utterance_id_ = kInvalidUtteranceId;
454 
455   return true;
456 }
457 
Pause()458 void TtsPlatformImplLinux::Pause() {
459   DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
460   DCHECK(PlatformImplInitialized());
461 
462   if (paused_ || !is_speaking_)
463     return;
464 
465   worker_.Post(FROM_HERE, &TtsPlatformImplBackgroundWorker::Pause);
466   paused_ = true;
467 }
468 
Resume()469 void TtsPlatformImplLinux::Resume() {
470   DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
471   DCHECK(PlatformImplInitialized());
472 
473   if (!paused_ || !is_speaking_)
474     return;
475 
476   worker_.Post(FROM_HERE, &TtsPlatformImplBackgroundWorker::Resume);
477   paused_ = false;
478 }
479 
IsSpeaking()480 bool TtsPlatformImplLinux::IsSpeaking() {
481   return is_speaking_;
482 }
483 
GetVoices(std::vector<VoiceData> * out_voices)484 void TtsPlatformImplLinux::GetVoices(std::vector<VoiceData>* out_voices) {
485   DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
486   DCHECK(PlatformImplInitialized());
487 
488   for (auto it = voices_.begin(); it != voices_.end(); ++it) {
489     out_voices->push_back(VoiceData());
490     VoiceData& voice = out_voices->back();
491     voice.native = true;
492     voice.name = it->first;
493     voice.lang = it->second.language;
494     voice.events.insert(TTS_EVENT_START);
495     voice.events.insert(TTS_EVENT_END);
496     voice.events.insert(TTS_EVENT_CANCELLED);
497     voice.events.insert(TTS_EVENT_MARKER);
498     voice.events.insert(TTS_EVENT_PAUSE);
499     voice.events.insert(TTS_EVENT_RESUME);
500   }
501 }
502 
Shutdown()503 void TtsPlatformImplLinux::Shutdown() {
504   worker_.Post(FROM_HERE, &TtsPlatformImplBackgroundWorker::Shutdown);
505 }
506 
OnInitialized(bool success,PlatformVoices voices)507 void TtsPlatformImplLinux::OnInitialized(bool success, PlatformVoices voices) {
508   DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
509   if (success)
510     voices_ = std::move(voices);
511   is_initialized_ = true;
512   TtsController::GetInstance()->VoicesChanged();
513 }
514 
OnSpeakScheduled(base::OnceCallback<void (bool)> on_speak_finished,bool success)515 void TtsPlatformImplLinux::OnSpeakScheduled(
516     base::OnceCallback<void(bool)> on_speak_finished,
517     bool success) {
518   DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
519   DCHECK(is_speaking_);
520 
521   // If the utterance was not able to be emitted, stop the speaking. There
522   // won't be any asynchronous TTS event to confirm the end of the speech.
523   if (!success) {
524     is_speaking_ = false;
525     utterance_id_ = kInvalidUtteranceId;
526   }
527 
528   // Pass the results to our caller.
529   std::move(on_speak_finished).Run(success);
530 }
531 
OnSpeakFinished(int utterance_id)532 void TtsPlatformImplLinux::OnSpeakFinished(int utterance_id) {
533   DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
534   if (utterance_id != utterance_id_)
535     return;
536 
537   DCHECK(is_speaking_);
538   DCHECK_NE(utterance_id_, kInvalidUtteranceId);
539   is_speaking_ = false;
540   utterance_id_ = kInvalidUtteranceId;
541 }
542 
ProcessSpeech(int utterance_id,const std::string & lang,const VoiceData & voice,const UtteranceContinuousParameters & params,base::OnceCallback<void (bool)> on_speak_finished,const std::string & parsed_utterance)543 void TtsPlatformImplLinux::ProcessSpeech(
544     int utterance_id,
545     const std::string& lang,
546     const VoiceData& voice,
547     const UtteranceContinuousParameters& params,
548     base::OnceCallback<void(bool)> on_speak_finished,
549     const std::string& parsed_utterance) {
550   DCHECK(BrowserThread::CurrentlyOn(content::BrowserThread::UI));
551 
552   // Speech dispatcher's speech params are around 3x at either limit.
553   float rate = params.rate > 3 ? 3 : params.rate;
554   rate = params.rate < 0.334 ? 0.334 : rate;
555   float pitch = params.pitch > 3 ? 3 : params.pitch;
556   pitch = params.pitch < 0.334 ? 0.334 : pitch;
557 
558   SPDChromeVoice matched_voice;
559   auto it = voices_.find(voice.name);
560   if (it != voices_.end())
561     matched_voice = it->second;
562 
563   worker_.Post(FROM_HERE, &TtsPlatformImplBackgroundWorker::ProcessSpeech,
564                utterance_id, parsed_utterance, lang, rate, pitch, matched_voice,
565                std::move(on_speak_finished));
566 }
567 
568 // static
GetInstance()569 TtsPlatformImplLinux* TtsPlatformImplLinux::GetInstance() {
570   static base::NoDestructor<TtsPlatformImplLinux> tts_platform;
571   return tts_platform.get();
572 }
573 
574 // static
GetInstance()575 TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
576   return TtsPlatformImplLinux::GetInstance();
577 }
578 
579 }  // namespace content
580