1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "AudioChannelService.h"
8 #include "AudioSegment.h"
9 #include "nsSpeechTask.h"
10 #include "nsSynthVoiceRegistry.h"
11 #include "nsXULAppAPI.h"
12 #include "SharedBuffer.h"
13 #include "SpeechSynthesis.h"
14 
15 #undef LOG
16 extern mozilla::LogModule* GetSpeechSynthLog();
17 #define LOG(type, msg) MOZ_LOG(GetSpeechSynthLog(), type, msg)
18 
19 #define AUDIO_TRACK 1
20 
21 namespace mozilla::dom {
22 
23 // nsSpeechTask
24 
NS_IMPL_CYCLE_COLLECTION_WEAK(nsSpeechTask,mSpeechSynthesis,mUtterance,mCallback)25 NS_IMPL_CYCLE_COLLECTION_WEAK(nsSpeechTask, mSpeechSynthesis, mUtterance,
26                               mCallback)
27 
28 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsSpeechTask)
29   NS_INTERFACE_MAP_ENTRY(nsISpeechTask)
30   NS_INTERFACE_MAP_ENTRY(nsIAudioChannelAgentCallback)
31   NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
32   NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTask)
33 NS_INTERFACE_MAP_END
34 
35 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsSpeechTask)
36 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsSpeechTask)
37 
38 nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance, bool aIsChrome)
39     : mUtterance(aUtterance),
40       mInited(false),
41       mPrePaused(false),
42       mPreCanceled(false),
43       mCallback(nullptr),
44       mIsChrome(aIsChrome),
45       mState(STATE_PENDING) {
46   mText = aUtterance->mText;
47   mVolume = aUtterance->Volume();
48 }
49 
nsSpeechTask(float aVolume,const nsAString & aText,bool aIsChrome)50 nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText,
51                            bool aIsChrome)
52     : mUtterance(nullptr),
53       mVolume(aVolume),
54       mText(aText),
55       mInited(false),
56       mPrePaused(false),
57       mPreCanceled(false),
58       mCallback(nullptr),
59       mIsChrome(aIsChrome),
60       mState(STATE_PENDING) {}
61 
~nsSpeechTask()62 nsSpeechTask::~nsSpeechTask() { LOG(LogLevel::Debug, ("~nsSpeechTask")); }
63 
Init()64 void nsSpeechTask::Init() { mInited = true; }
65 
SetChosenVoiceURI(const nsAString & aUri)66 void nsSpeechTask::SetChosenVoiceURI(const nsAString& aUri) {
67   mChosenVoiceURI = aUri;
68 }
69 
70 NS_IMETHODIMP
Setup(nsISpeechTaskCallback * aCallback)71 nsSpeechTask::Setup(nsISpeechTaskCallback* aCallback) {
72   MOZ_ASSERT(XRE_IsParentProcess());
73 
74   LOG(LogLevel::Debug, ("nsSpeechTask::Setup"));
75 
76   mCallback = aCallback;
77 
78   return NS_OK;
79 }
80 
81 NS_IMETHODIMP
DispatchStart()82 nsSpeechTask::DispatchStart() {
83   nsSynthVoiceRegistry::GetInstance()->SetIsSpeaking(true);
84   return DispatchStartImpl();
85 }
86 
DispatchStartImpl()87 nsresult nsSpeechTask::DispatchStartImpl() {
88   return DispatchStartImpl(mChosenVoiceURI);
89 }
90 
DispatchStartImpl(const nsAString & aUri)91 nsresult nsSpeechTask::DispatchStartImpl(const nsAString& aUri) {
92   LOG(LogLevel::Debug, ("nsSpeechTask::DispatchStartImpl"));
93 
94   MOZ_ASSERT(mUtterance);
95   if (NS_WARN_IF(mState != STATE_PENDING)) {
96     return NS_ERROR_NOT_AVAILABLE;
97   }
98 
99   CreateAudioChannelAgent();
100 
101   mState = STATE_SPEAKING;
102   mUtterance->mChosenVoiceURI = aUri;
103   mUtterance->DispatchSpeechSynthesisEvent(u"start"_ns, 0, nullptr, 0, u""_ns);
104 
105   return NS_OK;
106 }
107 
108 NS_IMETHODIMP
DispatchEnd(float aElapsedTime,uint32_t aCharIndex)109 nsSpeechTask::DispatchEnd(float aElapsedTime, uint32_t aCharIndex) {
110   // After we end, no callback functions should go through.
111   mCallback = nullptr;
112 
113   if (!mPreCanceled) {
114     nsSynthVoiceRegistry::GetInstance()->SpeakNext();
115   }
116 
117   return DispatchEndImpl(aElapsedTime, aCharIndex);
118 }
119 
DispatchEndImpl(float aElapsedTime,uint32_t aCharIndex)120 nsresult nsSpeechTask::DispatchEndImpl(float aElapsedTime,
121                                        uint32_t aCharIndex) {
122   LOG(LogLevel::Debug, ("nsSpeechTask::DispatchEndImpl"));
123 
124   DestroyAudioChannelAgent();
125 
126   MOZ_ASSERT(mUtterance);
127   if (NS_WARN_IF(mState == STATE_ENDED)) {
128     return NS_ERROR_NOT_AVAILABLE;
129   }
130 
131   RefPtr<SpeechSynthesisUtterance> utterance = mUtterance;
132 
133   if (mSpeechSynthesis) {
134     mSpeechSynthesis->OnEnd(this);
135   }
136 
137   mState = STATE_ENDED;
138   utterance->DispatchSpeechSynthesisEvent(u"end"_ns, aCharIndex, nullptr,
139                                           aElapsedTime, u""_ns);
140 
141   return NS_OK;
142 }
143 
144 NS_IMETHODIMP
DispatchPause(float aElapsedTime,uint32_t aCharIndex)145 nsSpeechTask::DispatchPause(float aElapsedTime, uint32_t aCharIndex) {
146   return DispatchPauseImpl(aElapsedTime, aCharIndex);
147 }
148 
DispatchPauseImpl(float aElapsedTime,uint32_t aCharIndex)149 nsresult nsSpeechTask::DispatchPauseImpl(float aElapsedTime,
150                                          uint32_t aCharIndex) {
151   LOG(LogLevel::Debug, ("nsSpeechTask::DispatchPauseImpl"));
152   MOZ_ASSERT(mUtterance);
153   if (NS_WARN_IF(mUtterance->mPaused)) {
154     return NS_ERROR_NOT_AVAILABLE;
155   }
156   if (NS_WARN_IF(mState == STATE_ENDED)) {
157     return NS_ERROR_NOT_AVAILABLE;
158   }
159 
160   mUtterance->mPaused = true;
161   if (mState == STATE_SPEAKING) {
162     mUtterance->DispatchSpeechSynthesisEvent(u"pause"_ns, aCharIndex, nullptr,
163                                              aElapsedTime, u""_ns);
164   }
165 
166   return NS_OK;
167 }
168 
169 NS_IMETHODIMP
DispatchResume(float aElapsedTime,uint32_t aCharIndex)170 nsSpeechTask::DispatchResume(float aElapsedTime, uint32_t aCharIndex) {
171   return DispatchResumeImpl(aElapsedTime, aCharIndex);
172 }
173 
DispatchResumeImpl(float aElapsedTime,uint32_t aCharIndex)174 nsresult nsSpeechTask::DispatchResumeImpl(float aElapsedTime,
175                                           uint32_t aCharIndex) {
176   LOG(LogLevel::Debug, ("nsSpeechTask::DispatchResumeImpl"));
177   MOZ_ASSERT(mUtterance);
178   if (NS_WARN_IF(!(mUtterance->mPaused))) {
179     return NS_ERROR_NOT_AVAILABLE;
180   }
181   if (NS_WARN_IF(mState == STATE_ENDED)) {
182     return NS_ERROR_NOT_AVAILABLE;
183   }
184 
185   mUtterance->mPaused = false;
186   if (mState == STATE_SPEAKING) {
187     mUtterance->DispatchSpeechSynthesisEvent(u"resume"_ns, aCharIndex, nullptr,
188                                              aElapsedTime, u""_ns);
189   }
190 
191   return NS_OK;
192 }
193 
ForceError(float aElapsedTime,uint32_t aCharIndex)194 void nsSpeechTask::ForceError(float aElapsedTime, uint32_t aCharIndex) {
195   DispatchError(aElapsedTime, aCharIndex);
196 }
197 
198 NS_IMETHODIMP
DispatchError(float aElapsedTime,uint32_t aCharIndex)199 nsSpeechTask::DispatchError(float aElapsedTime, uint32_t aCharIndex) {
200   if (!mPreCanceled) {
201     nsSynthVoiceRegistry::GetInstance()->SpeakNext();
202   }
203 
204   return DispatchErrorImpl(aElapsedTime, aCharIndex);
205 }
206 
DispatchErrorImpl(float aElapsedTime,uint32_t aCharIndex)207 nsresult nsSpeechTask::DispatchErrorImpl(float aElapsedTime,
208                                          uint32_t aCharIndex) {
209   LOG(LogLevel::Debug, ("nsSpeechTask::DispatchErrorImpl"));
210 
211   DestroyAudioChannelAgent();
212 
213   MOZ_ASSERT(mUtterance);
214   if (NS_WARN_IF(mState == STATE_ENDED)) {
215     return NS_ERROR_NOT_AVAILABLE;
216   }
217 
218   if (mSpeechSynthesis) {
219     mSpeechSynthesis->OnEnd(this);
220   }
221 
222   mState = STATE_ENDED;
223   mUtterance->DispatchSpeechSynthesisEvent(u"error"_ns, aCharIndex, nullptr,
224                                            aElapsedTime, u""_ns);
225   return NS_OK;
226 }
227 
228 NS_IMETHODIMP
DispatchBoundary(const nsAString & aName,float aElapsedTime,uint32_t aCharIndex,uint32_t aCharLength,uint8_t argc)229 nsSpeechTask::DispatchBoundary(const nsAString& aName, float aElapsedTime,
230                                uint32_t aCharIndex, uint32_t aCharLength,
231                                uint8_t argc) {
232   return DispatchBoundaryImpl(aName, aElapsedTime, aCharIndex, aCharLength,
233                               argc);
234 }
235 
DispatchBoundaryImpl(const nsAString & aName,float aElapsedTime,uint32_t aCharIndex,uint32_t aCharLength,uint8_t argc)236 nsresult nsSpeechTask::DispatchBoundaryImpl(const nsAString& aName,
237                                             float aElapsedTime,
238                                             uint32_t aCharIndex,
239                                             uint32_t aCharLength,
240                                             uint8_t argc) {
241   MOZ_ASSERT(mUtterance);
242   if (NS_WARN_IF(mState != STATE_SPEAKING)) {
243     return NS_ERROR_NOT_AVAILABLE;
244   }
245   mUtterance->DispatchSpeechSynthesisEvent(
246       u"boundary"_ns, aCharIndex,
247       argc ? static_cast<Nullable<uint32_t> >(aCharLength) : nullptr,
248       aElapsedTime, aName);
249 
250   return NS_OK;
251 }
252 
253 NS_IMETHODIMP
DispatchMark(const nsAString & aName,float aElapsedTime,uint32_t aCharIndex)254 nsSpeechTask::DispatchMark(const nsAString& aName, float aElapsedTime,
255                            uint32_t aCharIndex) {
256   return DispatchMarkImpl(aName, aElapsedTime, aCharIndex);
257 }
258 
DispatchMarkImpl(const nsAString & aName,float aElapsedTime,uint32_t aCharIndex)259 nsresult nsSpeechTask::DispatchMarkImpl(const nsAString& aName,
260                                         float aElapsedTime,
261                                         uint32_t aCharIndex) {
262   MOZ_ASSERT(mUtterance);
263   if (NS_WARN_IF(mState != STATE_SPEAKING)) {
264     return NS_ERROR_NOT_AVAILABLE;
265   }
266   mUtterance->DispatchSpeechSynthesisEvent(u"mark"_ns, aCharIndex, nullptr,
267                                            aElapsedTime, aName);
268   return NS_OK;
269 }
270 
Pause()271 void nsSpeechTask::Pause() {
272   MOZ_ASSERT(XRE_IsParentProcess());
273 
274   if (mCallback) {
275     DebugOnly<nsresult> rv = mCallback->OnPause();
276     NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Unable to call onPause() callback");
277   }
278 
279   if (!mInited) {
280     mPrePaused = true;
281   }
282 }
283 
Resume()284 void nsSpeechTask::Resume() {
285   MOZ_ASSERT(XRE_IsParentProcess());
286 
287   if (mCallback) {
288     DebugOnly<nsresult> rv = mCallback->OnResume();
289     NS_WARNING_ASSERTION(NS_SUCCEEDED(rv),
290                          "Unable to call onResume() callback");
291   }
292 
293   if (mPrePaused) {
294     mPrePaused = false;
295     nsSynthVoiceRegistry::GetInstance()->ResumeQueue();
296   }
297 }
298 
Cancel()299 void nsSpeechTask::Cancel() {
300   MOZ_ASSERT(XRE_IsParentProcess());
301 
302   LOG(LogLevel::Debug, ("nsSpeechTask::Cancel"));
303 
304   if (mCallback) {
305     DebugOnly<nsresult> rv = mCallback->OnCancel();
306     NS_WARNING_ASSERTION(NS_SUCCEEDED(rv),
307                          "Unable to call onCancel() callback");
308   }
309 
310   if (!mInited) {
311     mPreCanceled = true;
312   }
313 }
314 
ForceEnd()315 void nsSpeechTask::ForceEnd() {
316   if (!mInited) {
317     mPreCanceled = true;
318   }
319 
320   DispatchEnd(0, 0);
321 }
322 
SetSpeechSynthesis(SpeechSynthesis * aSpeechSynthesis)323 void nsSpeechTask::SetSpeechSynthesis(SpeechSynthesis* aSpeechSynthesis) {
324   mSpeechSynthesis = aSpeechSynthesis;
325 }
326 
CreateAudioChannelAgent()327 void nsSpeechTask::CreateAudioChannelAgent() {
328   if (!mUtterance) {
329     return;
330   }
331 
332   if (mAudioChannelAgent) {
333     mAudioChannelAgent->NotifyStoppedPlaying();
334   }
335 
336   mAudioChannelAgent = new AudioChannelAgent();
337   mAudioChannelAgent->InitWithWeakCallback(mUtterance->GetOwner(), this);
338 
339   nsresult rv = mAudioChannelAgent->NotifyStartedPlaying(
340       AudioChannelService::AudibleState::eAudible);
341   if (NS_WARN_IF(NS_FAILED(rv))) {
342     return;
343   }
344 
345   mAudioChannelAgent->PullInitialUpdate();
346 }
347 
DestroyAudioChannelAgent()348 void nsSpeechTask::DestroyAudioChannelAgent() {
349   if (mAudioChannelAgent) {
350     mAudioChannelAgent->NotifyStoppedPlaying();
351     mAudioChannelAgent = nullptr;
352   }
353 }
354 
355 NS_IMETHODIMP
WindowVolumeChanged(float aVolume,bool aMuted)356 nsSpeechTask::WindowVolumeChanged(float aVolume, bool aMuted) {
357   SetAudioOutputVolume(aMuted ? 0.0 : mVolume * aVolume);
358   return NS_OK;
359 }
360 
361 NS_IMETHODIMP
WindowSuspendChanged(nsSuspendedTypes aSuspend)362 nsSpeechTask::WindowSuspendChanged(nsSuspendedTypes aSuspend) {
363   if (!mUtterance) {
364     return NS_OK;
365   }
366 
367   if (aSuspend == nsISuspendedTypes::NONE_SUSPENDED && mUtterance->mPaused) {
368     Resume();
369   } else if (aSuspend != nsISuspendedTypes::NONE_SUSPENDED &&
370              !mUtterance->mPaused) {
371     Pause();
372   }
373   return NS_OK;
374 }
375 
376 NS_IMETHODIMP
WindowAudioCaptureChanged(bool aCapture)377 nsSpeechTask::WindowAudioCaptureChanged(bool aCapture) {
378   // This is not supported yet.
379   return NS_OK;
380 }
381 
SetAudioOutputVolume(float aVolume)382 void nsSpeechTask::SetAudioOutputVolume(float aVolume) {
383   if (mCallback) {
384     mCallback->OnVolumeChanged(aVolume);
385   }
386 }
387 
388 }  // namespace mozilla::dom
389