1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "AudioChannelService.h"
8 #include "AudioSegment.h"
9 #include "nsSpeechTask.h"
10 #include "nsSynthVoiceRegistry.h"
11 #include "nsXULAppAPI.h"
12 #include "SharedBuffer.h"
13 #include "SpeechSynthesis.h"
14
15 #undef LOG
16 extern mozilla::LogModule* GetSpeechSynthLog();
17 #define LOG(type, msg) MOZ_LOG(GetSpeechSynthLog(), type, msg)
18
19 #define AUDIO_TRACK 1
20
21 namespace mozilla::dom {
22
23 // nsSpeechTask
24
NS_IMPL_CYCLE_COLLECTION_WEAK(nsSpeechTask,mSpeechSynthesis,mUtterance,mCallback)25 NS_IMPL_CYCLE_COLLECTION_WEAK(nsSpeechTask, mSpeechSynthesis, mUtterance,
26 mCallback)
27
28 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsSpeechTask)
29 NS_INTERFACE_MAP_ENTRY(nsISpeechTask)
30 NS_INTERFACE_MAP_ENTRY(nsIAudioChannelAgentCallback)
31 NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
32 NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTask)
33 NS_INTERFACE_MAP_END
34
35 NS_IMPL_CYCLE_COLLECTING_ADDREF(nsSpeechTask)
36 NS_IMPL_CYCLE_COLLECTING_RELEASE(nsSpeechTask)
37
38 nsSpeechTask::nsSpeechTask(SpeechSynthesisUtterance* aUtterance, bool aIsChrome)
39 : mUtterance(aUtterance),
40 mInited(false),
41 mPrePaused(false),
42 mPreCanceled(false),
43 mCallback(nullptr),
44 mIsChrome(aIsChrome),
45 mState(STATE_PENDING) {
46 mText = aUtterance->mText;
47 mVolume = aUtterance->Volume();
48 }
49
nsSpeechTask(float aVolume,const nsAString & aText,bool aIsChrome)50 nsSpeechTask::nsSpeechTask(float aVolume, const nsAString& aText,
51 bool aIsChrome)
52 : mUtterance(nullptr),
53 mVolume(aVolume),
54 mText(aText),
55 mInited(false),
56 mPrePaused(false),
57 mPreCanceled(false),
58 mCallback(nullptr),
59 mIsChrome(aIsChrome),
60 mState(STATE_PENDING) {}
61
~nsSpeechTask()62 nsSpeechTask::~nsSpeechTask() { LOG(LogLevel::Debug, ("~nsSpeechTask")); }
63
Init()64 void nsSpeechTask::Init() { mInited = true; }
65
SetChosenVoiceURI(const nsAString & aUri)66 void nsSpeechTask::SetChosenVoiceURI(const nsAString& aUri) {
67 mChosenVoiceURI = aUri;
68 }
69
70 NS_IMETHODIMP
Setup(nsISpeechTaskCallback * aCallback)71 nsSpeechTask::Setup(nsISpeechTaskCallback* aCallback) {
72 MOZ_ASSERT(XRE_IsParentProcess());
73
74 LOG(LogLevel::Debug, ("nsSpeechTask::Setup"));
75
76 mCallback = aCallback;
77
78 return NS_OK;
79 }
80
81 NS_IMETHODIMP
DispatchStart()82 nsSpeechTask::DispatchStart() {
83 nsSynthVoiceRegistry::GetInstance()->SetIsSpeaking(true);
84 return DispatchStartImpl();
85 }
86
DispatchStartImpl()87 nsresult nsSpeechTask::DispatchStartImpl() {
88 return DispatchStartImpl(mChosenVoiceURI);
89 }
90
DispatchStartImpl(const nsAString & aUri)91 nsresult nsSpeechTask::DispatchStartImpl(const nsAString& aUri) {
92 LOG(LogLevel::Debug, ("nsSpeechTask::DispatchStartImpl"));
93
94 MOZ_ASSERT(mUtterance);
95 if (NS_WARN_IF(mState != STATE_PENDING)) {
96 return NS_ERROR_NOT_AVAILABLE;
97 }
98
99 CreateAudioChannelAgent();
100
101 mState = STATE_SPEAKING;
102 mUtterance->mChosenVoiceURI = aUri;
103 mUtterance->DispatchSpeechSynthesisEvent(u"start"_ns, 0, nullptr, 0, u""_ns);
104
105 return NS_OK;
106 }
107
108 NS_IMETHODIMP
DispatchEnd(float aElapsedTime,uint32_t aCharIndex)109 nsSpeechTask::DispatchEnd(float aElapsedTime, uint32_t aCharIndex) {
110 // After we end, no callback functions should go through.
111 mCallback = nullptr;
112
113 if (!mPreCanceled) {
114 nsSynthVoiceRegistry::GetInstance()->SpeakNext();
115 }
116
117 return DispatchEndImpl(aElapsedTime, aCharIndex);
118 }
119
DispatchEndImpl(float aElapsedTime,uint32_t aCharIndex)120 nsresult nsSpeechTask::DispatchEndImpl(float aElapsedTime,
121 uint32_t aCharIndex) {
122 LOG(LogLevel::Debug, ("nsSpeechTask::DispatchEndImpl"));
123
124 DestroyAudioChannelAgent();
125
126 MOZ_ASSERT(mUtterance);
127 if (NS_WARN_IF(mState == STATE_ENDED)) {
128 return NS_ERROR_NOT_AVAILABLE;
129 }
130
131 RefPtr<SpeechSynthesisUtterance> utterance = mUtterance;
132
133 if (mSpeechSynthesis) {
134 mSpeechSynthesis->OnEnd(this);
135 }
136
137 mState = STATE_ENDED;
138 utterance->DispatchSpeechSynthesisEvent(u"end"_ns, aCharIndex, nullptr,
139 aElapsedTime, u""_ns);
140
141 return NS_OK;
142 }
143
144 NS_IMETHODIMP
DispatchPause(float aElapsedTime,uint32_t aCharIndex)145 nsSpeechTask::DispatchPause(float aElapsedTime, uint32_t aCharIndex) {
146 return DispatchPauseImpl(aElapsedTime, aCharIndex);
147 }
148
DispatchPauseImpl(float aElapsedTime,uint32_t aCharIndex)149 nsresult nsSpeechTask::DispatchPauseImpl(float aElapsedTime,
150 uint32_t aCharIndex) {
151 LOG(LogLevel::Debug, ("nsSpeechTask::DispatchPauseImpl"));
152 MOZ_ASSERT(mUtterance);
153 if (NS_WARN_IF(mUtterance->mPaused)) {
154 return NS_ERROR_NOT_AVAILABLE;
155 }
156 if (NS_WARN_IF(mState == STATE_ENDED)) {
157 return NS_ERROR_NOT_AVAILABLE;
158 }
159
160 mUtterance->mPaused = true;
161 if (mState == STATE_SPEAKING) {
162 mUtterance->DispatchSpeechSynthesisEvent(u"pause"_ns, aCharIndex, nullptr,
163 aElapsedTime, u""_ns);
164 }
165
166 return NS_OK;
167 }
168
169 NS_IMETHODIMP
DispatchResume(float aElapsedTime,uint32_t aCharIndex)170 nsSpeechTask::DispatchResume(float aElapsedTime, uint32_t aCharIndex) {
171 return DispatchResumeImpl(aElapsedTime, aCharIndex);
172 }
173
DispatchResumeImpl(float aElapsedTime,uint32_t aCharIndex)174 nsresult nsSpeechTask::DispatchResumeImpl(float aElapsedTime,
175 uint32_t aCharIndex) {
176 LOG(LogLevel::Debug, ("nsSpeechTask::DispatchResumeImpl"));
177 MOZ_ASSERT(mUtterance);
178 if (NS_WARN_IF(!(mUtterance->mPaused))) {
179 return NS_ERROR_NOT_AVAILABLE;
180 }
181 if (NS_WARN_IF(mState == STATE_ENDED)) {
182 return NS_ERROR_NOT_AVAILABLE;
183 }
184
185 mUtterance->mPaused = false;
186 if (mState == STATE_SPEAKING) {
187 mUtterance->DispatchSpeechSynthesisEvent(u"resume"_ns, aCharIndex, nullptr,
188 aElapsedTime, u""_ns);
189 }
190
191 return NS_OK;
192 }
193
ForceError(float aElapsedTime,uint32_t aCharIndex)194 void nsSpeechTask::ForceError(float aElapsedTime, uint32_t aCharIndex) {
195 DispatchError(aElapsedTime, aCharIndex);
196 }
197
198 NS_IMETHODIMP
DispatchError(float aElapsedTime,uint32_t aCharIndex)199 nsSpeechTask::DispatchError(float aElapsedTime, uint32_t aCharIndex) {
200 if (!mPreCanceled) {
201 nsSynthVoiceRegistry::GetInstance()->SpeakNext();
202 }
203
204 return DispatchErrorImpl(aElapsedTime, aCharIndex);
205 }
206
DispatchErrorImpl(float aElapsedTime,uint32_t aCharIndex)207 nsresult nsSpeechTask::DispatchErrorImpl(float aElapsedTime,
208 uint32_t aCharIndex) {
209 LOG(LogLevel::Debug, ("nsSpeechTask::DispatchErrorImpl"));
210
211 DestroyAudioChannelAgent();
212
213 MOZ_ASSERT(mUtterance);
214 if (NS_WARN_IF(mState == STATE_ENDED)) {
215 return NS_ERROR_NOT_AVAILABLE;
216 }
217
218 if (mSpeechSynthesis) {
219 mSpeechSynthesis->OnEnd(this);
220 }
221
222 mState = STATE_ENDED;
223 mUtterance->DispatchSpeechSynthesisEvent(u"error"_ns, aCharIndex, nullptr,
224 aElapsedTime, u""_ns);
225 return NS_OK;
226 }
227
228 NS_IMETHODIMP
DispatchBoundary(const nsAString & aName,float aElapsedTime,uint32_t aCharIndex,uint32_t aCharLength,uint8_t argc)229 nsSpeechTask::DispatchBoundary(const nsAString& aName, float aElapsedTime,
230 uint32_t aCharIndex, uint32_t aCharLength,
231 uint8_t argc) {
232 return DispatchBoundaryImpl(aName, aElapsedTime, aCharIndex, aCharLength,
233 argc);
234 }
235
DispatchBoundaryImpl(const nsAString & aName,float aElapsedTime,uint32_t aCharIndex,uint32_t aCharLength,uint8_t argc)236 nsresult nsSpeechTask::DispatchBoundaryImpl(const nsAString& aName,
237 float aElapsedTime,
238 uint32_t aCharIndex,
239 uint32_t aCharLength,
240 uint8_t argc) {
241 MOZ_ASSERT(mUtterance);
242 if (NS_WARN_IF(mState != STATE_SPEAKING)) {
243 return NS_ERROR_NOT_AVAILABLE;
244 }
245 mUtterance->DispatchSpeechSynthesisEvent(
246 u"boundary"_ns, aCharIndex,
247 argc ? static_cast<Nullable<uint32_t> >(aCharLength) : nullptr,
248 aElapsedTime, aName);
249
250 return NS_OK;
251 }
252
253 NS_IMETHODIMP
DispatchMark(const nsAString & aName,float aElapsedTime,uint32_t aCharIndex)254 nsSpeechTask::DispatchMark(const nsAString& aName, float aElapsedTime,
255 uint32_t aCharIndex) {
256 return DispatchMarkImpl(aName, aElapsedTime, aCharIndex);
257 }
258
DispatchMarkImpl(const nsAString & aName,float aElapsedTime,uint32_t aCharIndex)259 nsresult nsSpeechTask::DispatchMarkImpl(const nsAString& aName,
260 float aElapsedTime,
261 uint32_t aCharIndex) {
262 MOZ_ASSERT(mUtterance);
263 if (NS_WARN_IF(mState != STATE_SPEAKING)) {
264 return NS_ERROR_NOT_AVAILABLE;
265 }
266 mUtterance->DispatchSpeechSynthesisEvent(u"mark"_ns, aCharIndex, nullptr,
267 aElapsedTime, aName);
268 return NS_OK;
269 }
270
Pause()271 void nsSpeechTask::Pause() {
272 MOZ_ASSERT(XRE_IsParentProcess());
273
274 if (mCallback) {
275 DebugOnly<nsresult> rv = mCallback->OnPause();
276 NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Unable to call onPause() callback");
277 }
278
279 if (!mInited) {
280 mPrePaused = true;
281 }
282 }
283
Resume()284 void nsSpeechTask::Resume() {
285 MOZ_ASSERT(XRE_IsParentProcess());
286
287 if (mCallback) {
288 DebugOnly<nsresult> rv = mCallback->OnResume();
289 NS_WARNING_ASSERTION(NS_SUCCEEDED(rv),
290 "Unable to call onResume() callback");
291 }
292
293 if (mPrePaused) {
294 mPrePaused = false;
295 nsSynthVoiceRegistry::GetInstance()->ResumeQueue();
296 }
297 }
298
Cancel()299 void nsSpeechTask::Cancel() {
300 MOZ_ASSERT(XRE_IsParentProcess());
301
302 LOG(LogLevel::Debug, ("nsSpeechTask::Cancel"));
303
304 if (mCallback) {
305 DebugOnly<nsresult> rv = mCallback->OnCancel();
306 NS_WARNING_ASSERTION(NS_SUCCEEDED(rv),
307 "Unable to call onCancel() callback");
308 }
309
310 if (!mInited) {
311 mPreCanceled = true;
312 }
313 }
314
ForceEnd()315 void nsSpeechTask::ForceEnd() {
316 if (!mInited) {
317 mPreCanceled = true;
318 }
319
320 DispatchEnd(0, 0);
321 }
322
SetSpeechSynthesis(SpeechSynthesis * aSpeechSynthesis)323 void nsSpeechTask::SetSpeechSynthesis(SpeechSynthesis* aSpeechSynthesis) {
324 mSpeechSynthesis = aSpeechSynthesis;
325 }
326
CreateAudioChannelAgent()327 void nsSpeechTask::CreateAudioChannelAgent() {
328 if (!mUtterance) {
329 return;
330 }
331
332 if (mAudioChannelAgent) {
333 mAudioChannelAgent->NotifyStoppedPlaying();
334 }
335
336 mAudioChannelAgent = new AudioChannelAgent();
337 mAudioChannelAgent->InitWithWeakCallback(mUtterance->GetOwner(), this);
338
339 nsresult rv = mAudioChannelAgent->NotifyStartedPlaying(
340 AudioChannelService::AudibleState::eAudible);
341 if (NS_WARN_IF(NS_FAILED(rv))) {
342 return;
343 }
344
345 mAudioChannelAgent->PullInitialUpdate();
346 }
347
DestroyAudioChannelAgent()348 void nsSpeechTask::DestroyAudioChannelAgent() {
349 if (mAudioChannelAgent) {
350 mAudioChannelAgent->NotifyStoppedPlaying();
351 mAudioChannelAgent = nullptr;
352 }
353 }
354
355 NS_IMETHODIMP
WindowVolumeChanged(float aVolume,bool aMuted)356 nsSpeechTask::WindowVolumeChanged(float aVolume, bool aMuted) {
357 SetAudioOutputVolume(aMuted ? 0.0 : mVolume * aVolume);
358 return NS_OK;
359 }
360
361 NS_IMETHODIMP
WindowSuspendChanged(nsSuspendedTypes aSuspend)362 nsSpeechTask::WindowSuspendChanged(nsSuspendedTypes aSuspend) {
363 if (!mUtterance) {
364 return NS_OK;
365 }
366
367 if (aSuspend == nsISuspendedTypes::NONE_SUSPENDED && mUtterance->mPaused) {
368 Resume();
369 } else if (aSuspend != nsISuspendedTypes::NONE_SUSPENDED &&
370 !mUtterance->mPaused) {
371 Pause();
372 }
373 return NS_OK;
374 }
375
376 NS_IMETHODIMP
WindowAudioCaptureChanged(bool aCapture)377 nsSpeechTask::WindowAudioCaptureChanged(bool aCapture) {
378 // This is not supported yet.
379 return NS_OK;
380 }
381
SetAudioOutputVolume(float aVolume)382 void nsSpeechTask::SetAudioOutputVolume(float aVolume) {
383 if (mCallback) {
384 mCallback->OnVolumeChanged(aVolume);
385 }
386 }
387
388 } // namespace mozilla::dom
389