1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "nsISupportsPrimitives.h"
8 #include "nsSpeechTask.h"
9 #include "mozilla/Logging.h"
10
11 #include "mozilla/dom/Element.h"
12
13 #include "mozilla/dom/SpeechSynthesisBinding.h"
14 #include "mozilla/dom/WindowGlobalChild.h"
15 #include "SpeechSynthesis.h"
16 #include "nsContentUtils.h"
17 #include "nsSynthVoiceRegistry.h"
18 #include "mozilla/dom/Document.h"
19 #include "nsIDocShell.h"
20
21 #undef LOG
GetSpeechSynthLog()22 mozilla::LogModule* GetSpeechSynthLog() {
23 static mozilla::LazyLogModule sLog("SpeechSynthesis");
24
25 return sLog;
26 }
27 #define LOG(type, msg) MOZ_LOG(GetSpeechSynthLog(), type, msg)
28
29 namespace mozilla::dom {
30
31 NS_IMPL_CYCLE_COLLECTION_CLASS(SpeechSynthesis)
32
33 NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN_INHERITED(SpeechSynthesis,
34 DOMEventTargetHelper)
35 NS_IMPL_CYCLE_COLLECTION_UNLINK(mCurrentTask)
36 NS_IMPL_CYCLE_COLLECTION_UNLINK(mSpeechQueue)
37 tmp->mVoiceCache.Clear();
38 NS_IMPL_CYCLE_COLLECTION_UNLINK_WEAK_REFERENCE
39 NS_IMPL_CYCLE_COLLECTION_UNLINK_END
40
41 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN_INHERITED(SpeechSynthesis,
42 DOMEventTargetHelper)
43 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mCurrentTask)
44 NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSpeechQueue)
45 for (SpeechSynthesisVoice* voice : tmp->mVoiceCache.Values()) {
46 cb.NoteXPCOMChild(voice);
47 }
48 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
49
NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechSynthesis)50 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechSynthesis)
51 NS_INTERFACE_MAP_ENTRY(nsIObserver)
52 NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
53 NS_INTERFACE_MAP_END_INHERITING(DOMEventTargetHelper)
54
55 NS_IMPL_ADDREF_INHERITED(SpeechSynthesis, DOMEventTargetHelper)
56 NS_IMPL_RELEASE_INHERITED(SpeechSynthesis, DOMEventTargetHelper)
57
58 SpeechSynthesis::SpeechSynthesis(nsPIDOMWindowInner* aParent)
59 : DOMEventTargetHelper(aParent),
60 mHoldQueue(false),
61 mInnerID(aParent->WindowID()) {
62 MOZ_ASSERT(NS_IsMainThread());
63
64 nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService();
65 if (obs) {
66 obs->AddObserver(this, "inner-window-destroyed", true);
67 obs->AddObserver(this, "synth-voices-changed", true);
68 }
69 }
70
71 SpeechSynthesis::~SpeechSynthesis() = default;
72
WrapObject(JSContext * aCx,JS::Handle<JSObject * > aGivenProto)73 JSObject* SpeechSynthesis::WrapObject(JSContext* aCx,
74 JS::Handle<JSObject*> aGivenProto) {
75 return SpeechSynthesis_Binding::Wrap(aCx, this, aGivenProto);
76 }
77
Pending() const78 bool SpeechSynthesis::Pending() const {
79 // If we don't have any task, nothing is pending. If we have only one task,
80 // check if that task is currently pending. If we have more than one task,
81 // then the tasks after the first one are definitely pending.
82 return mSpeechQueue.Length() > 1 ||
83 (mSpeechQueue.Length() == 1 &&
84 (!mCurrentTask || mCurrentTask->IsPending()));
85 }
86
Speaking() const87 bool SpeechSynthesis::Speaking() const {
88 // Check global speaking state if there is no active speaking task.
89 return (!mSpeechQueue.IsEmpty() && HasSpeakingTask()) ||
90 nsSynthVoiceRegistry::GetInstance()->IsSpeaking();
91 }
92
Paused() const93 bool SpeechSynthesis::Paused() const {
94 return mHoldQueue || (mCurrentTask && mCurrentTask->IsPrePaused()) ||
95 (!mSpeechQueue.IsEmpty() && mSpeechQueue.ElementAt(0)->IsPaused());
96 }
97
HasEmptyQueue() const98 bool SpeechSynthesis::HasEmptyQueue() const {
99 return mSpeechQueue.Length() == 0;
100 }
101
HasVoices() const102 bool SpeechSynthesis::HasVoices() const {
103 uint32_t voiceCount = mVoiceCache.Count();
104 if (voiceCount == 0) {
105 nsresult rv =
106 nsSynthVoiceRegistry::GetInstance()->GetVoiceCount(&voiceCount);
107 if (NS_WARN_IF(NS_FAILED(rv))) {
108 return false;
109 }
110 }
111
112 return voiceCount != 0;
113 }
114
Speak(SpeechSynthesisUtterance & aUtterance)115 void SpeechSynthesis::Speak(SpeechSynthesisUtterance& aUtterance) {
116 if (!mInnerID) {
117 return;
118 }
119
120 mSpeechQueue.AppendElement(&aUtterance);
121
122 if (mSpeechQueue.Length() == 1) {
123 RefPtr<WindowGlobalChild> wgc =
124 WindowGlobalChild::GetByInnerWindowId(mInnerID);
125 if (wgc) {
126 wgc->BlockBFCacheFor(BFCacheStatus::HAS_ACTIVE_SPEECH_SYNTHESIS);
127 }
128
129 // If we only have one item in the queue, we aren't pre-paused, and
130 // we have voices available, speak it.
131 if (!mCurrentTask && !mHoldQueue && HasVoices()) {
132 AdvanceQueue();
133 }
134 }
135 }
136
AdvanceQueue()137 void SpeechSynthesis::AdvanceQueue() {
138 LOG(LogLevel::Debug,
139 ("SpeechSynthesis::AdvanceQueue length=%zu", mSpeechQueue.Length()));
140
141 if (mSpeechQueue.IsEmpty()) {
142 return;
143 }
144
145 RefPtr<SpeechSynthesisUtterance> utterance = mSpeechQueue.ElementAt(0);
146
147 nsAutoString docLang;
148 nsCOMPtr<nsPIDOMWindowInner> window = GetOwner();
149 Document* doc = window ? window->GetExtantDoc() : nullptr;
150
151 if (doc) {
152 Element* elm = doc->GetHtmlElement();
153
154 if (elm) {
155 elm->GetLang(docLang);
156 }
157 }
158
159 mCurrentTask =
160 nsSynthVoiceRegistry::GetInstance()->SpeakUtterance(*utterance, docLang);
161
162 if (mCurrentTask) {
163 mCurrentTask->SetSpeechSynthesis(this);
164 }
165 }
166
Cancel()167 void SpeechSynthesis::Cancel() {
168 if (!mSpeechQueue.IsEmpty() && HasSpeakingTask()) {
169 // Remove all queued utterances except for current one, we will remove it
170 // in OnEnd
171 mSpeechQueue.RemoveLastElements(mSpeechQueue.Length() - 1);
172 } else {
173 mSpeechQueue.Clear();
174 }
175
176 if (mCurrentTask) {
177 mCurrentTask->Cancel();
178 }
179 }
180
Pause()181 void SpeechSynthesis::Pause() {
182 if (Paused()) {
183 return;
184 }
185
186 if (!mSpeechQueue.IsEmpty() && HasSpeakingTask()) {
187 mCurrentTask->Pause();
188 } else {
189 mHoldQueue = true;
190 }
191 }
192
Resume()193 void SpeechSynthesis::Resume() {
194 if (!Paused()) {
195 return;
196 }
197
198 mHoldQueue = false;
199
200 if (mCurrentTask) {
201 mCurrentTask->Resume();
202 } else {
203 AdvanceQueue();
204 }
205 }
206
OnEnd(const nsSpeechTask * aTask)207 void SpeechSynthesis::OnEnd(const nsSpeechTask* aTask) {
208 MOZ_ASSERT(mCurrentTask == aTask);
209
210 if (!mSpeechQueue.IsEmpty()) {
211 mSpeechQueue.RemoveElementAt(0);
212 if (mSpeechQueue.IsEmpty()) {
213 RefPtr<WindowGlobalChild> wgc =
214 WindowGlobalChild::GetByInnerWindowId(mInnerID);
215 if (wgc) {
216 wgc->UnblockBFCacheFor(BFCacheStatus::HAS_ACTIVE_SPEECH_SYNTHESIS);
217 }
218 }
219 }
220
221 mCurrentTask = nullptr;
222 AdvanceQueue();
223 }
224
GetVoices(nsTArray<RefPtr<SpeechSynthesisVoice>> & aResult)225 void SpeechSynthesis::GetVoices(
226 nsTArray<RefPtr<SpeechSynthesisVoice> >& aResult) {
227 aResult.Clear();
228 uint32_t voiceCount = 0;
229 nsCOMPtr<nsPIDOMWindowInner> window = GetOwner();
230 nsCOMPtr<nsIDocShell> docShell = window ? window->GetDocShell() : nullptr;
231
232 if (nsContentUtils::ShouldResistFingerprinting(docShell)) {
233 return;
234 }
235
236 nsresult rv = nsSynthVoiceRegistry::GetInstance()->GetVoiceCount(&voiceCount);
237 if (NS_WARN_IF(NS_FAILED(rv))) {
238 return;
239 }
240
241 nsISupports* voiceParent = NS_ISUPPORTS_CAST(nsIObserver*, this);
242
243 for (uint32_t i = 0; i < voiceCount; i++) {
244 nsAutoString uri;
245 rv = nsSynthVoiceRegistry::GetInstance()->GetVoice(i, uri);
246
247 if (NS_FAILED(rv)) {
248 NS_WARNING("Failed to retrieve voice from registry");
249 continue;
250 }
251
252 SpeechSynthesisVoice* voice = mVoiceCache.GetWeak(uri);
253
254 if (!voice) {
255 voice = new SpeechSynthesisVoice(voiceParent, uri);
256 }
257
258 aResult.AppendElement(voice);
259 }
260
261 mVoiceCache.Clear();
262
263 for (uint32_t i = 0; i < aResult.Length(); i++) {
264 SpeechSynthesisVoice* voice = aResult[i];
265 mVoiceCache.InsertOrUpdate(voice->mUri, RefPtr{voice});
266 }
267 }
268
269 // For testing purposes, allows us to cancel the current task that is
270 // misbehaving, and flush the queue.
ForceEnd()271 void SpeechSynthesis::ForceEnd() {
272 if (mCurrentTask) {
273 mCurrentTask->ForceEnd();
274 }
275 }
276
277 NS_IMETHODIMP
Observe(nsISupports * aSubject,const char * aTopic,const char16_t * aData)278 SpeechSynthesis::Observe(nsISupports* aSubject, const char* aTopic,
279 const char16_t* aData) {
280 MOZ_ASSERT(NS_IsMainThread());
281
282 if (strcmp(aTopic, "inner-window-destroyed") == 0) {
283 nsCOMPtr<nsISupportsPRUint64> wrapper = do_QueryInterface(aSubject);
284 NS_ENSURE_TRUE(wrapper, NS_ERROR_FAILURE);
285
286 uint64_t innerID;
287 nsresult rv = wrapper->GetData(&innerID);
288 NS_ENSURE_SUCCESS(rv, rv);
289
290 if (innerID == mInnerID) {
291 mInnerID = 0;
292 Cancel();
293
294 nsCOMPtr<nsIObserverService> obs =
295 mozilla::services::GetObserverService();
296 if (obs) {
297 obs->RemoveObserver(this, "inner-window-destroyed");
298 }
299 }
300 } else if (strcmp(aTopic, "synth-voices-changed") == 0) {
301 LOG(LogLevel::Debug, ("SpeechSynthesis::onvoiceschanged"));
302 nsCOMPtr<nsPIDOMWindowInner> window = GetOwner();
303 nsCOMPtr<nsIDocShell> docShell = window ? window->GetDocShell() : nullptr;
304
305 if (!nsContentUtils::ShouldResistFingerprinting(docShell)) {
306 DispatchTrustedEvent(u"voiceschanged"_ns);
307 // If we have a pending item, and voices become available, speak it.
308 if (!mCurrentTask && !mHoldQueue && HasVoices()) {
309 AdvanceQueue();
310 }
311 }
312 }
313
314 return NS_OK;
315 }
316
317 } // namespace mozilla::dom
318