1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "nsISupportsPrimitives.h"
8 #include "nsSpeechTask.h"
9 #include "mozilla/Logging.h"
10 
11 #include "mozilla/dom/Element.h"
12 
13 #include "mozilla/dom/SpeechSynthesisBinding.h"
14 #include "mozilla/dom/WindowGlobalChild.h"
15 #include "SpeechSynthesis.h"
16 #include "nsContentUtils.h"
17 #include "nsSynthVoiceRegistry.h"
18 #include "mozilla/dom/Document.h"
19 #include "nsIDocShell.h"
20 
21 #undef LOG
GetSpeechSynthLog()22 mozilla::LogModule* GetSpeechSynthLog() {
23   static mozilla::LazyLogModule sLog("SpeechSynthesis");
24 
25   return sLog;
26 }
27 #define LOG(type, msg) MOZ_LOG(GetSpeechSynthLog(), type, msg)
28 
29 namespace mozilla::dom {
30 
31 NS_IMPL_CYCLE_COLLECTION_CLASS(SpeechSynthesis)
32 
33 NS_IMPL_CYCLE_COLLECTION_UNLINK_BEGIN_INHERITED(SpeechSynthesis,
34                                                 DOMEventTargetHelper)
35   NS_IMPL_CYCLE_COLLECTION_UNLINK(mCurrentTask)
36   NS_IMPL_CYCLE_COLLECTION_UNLINK(mSpeechQueue)
37   tmp->mVoiceCache.Clear();
38   NS_IMPL_CYCLE_COLLECTION_UNLINK_WEAK_REFERENCE
39 NS_IMPL_CYCLE_COLLECTION_UNLINK_END
40 
41 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_BEGIN_INHERITED(SpeechSynthesis,
42                                                   DOMEventTargetHelper)
43   NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mCurrentTask)
44   NS_IMPL_CYCLE_COLLECTION_TRAVERSE(mSpeechQueue)
45   for (SpeechSynthesisVoice* voice : tmp->mVoiceCache.Values()) {
46     cb.NoteXPCOMChild(voice);
47   }
48 NS_IMPL_CYCLE_COLLECTION_TRAVERSE_END
49 
NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechSynthesis)50 NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechSynthesis)
51   NS_INTERFACE_MAP_ENTRY(nsIObserver)
52   NS_INTERFACE_MAP_ENTRY(nsISupportsWeakReference)
53 NS_INTERFACE_MAP_END_INHERITING(DOMEventTargetHelper)
54 
55 NS_IMPL_ADDREF_INHERITED(SpeechSynthesis, DOMEventTargetHelper)
56 NS_IMPL_RELEASE_INHERITED(SpeechSynthesis, DOMEventTargetHelper)
57 
58 SpeechSynthesis::SpeechSynthesis(nsPIDOMWindowInner* aParent)
59     : DOMEventTargetHelper(aParent),
60       mHoldQueue(false),
61       mInnerID(aParent->WindowID()) {
62   MOZ_ASSERT(NS_IsMainThread());
63 
64   nsCOMPtr<nsIObserverService> obs = mozilla::services::GetObserverService();
65   if (obs) {
66     obs->AddObserver(this, "inner-window-destroyed", true);
67     obs->AddObserver(this, "synth-voices-changed", true);
68   }
69 }
70 
71 SpeechSynthesis::~SpeechSynthesis() = default;
72 
WrapObject(JSContext * aCx,JS::Handle<JSObject * > aGivenProto)73 JSObject* SpeechSynthesis::WrapObject(JSContext* aCx,
74                                       JS::Handle<JSObject*> aGivenProto) {
75   return SpeechSynthesis_Binding::Wrap(aCx, this, aGivenProto);
76 }
77 
Pending() const78 bool SpeechSynthesis::Pending() const {
79   // If we don't have any task, nothing is pending. If we have only one task,
80   // check if that task is currently pending. If we have more than one task,
81   // then the tasks after the first one are definitely pending.
82   return mSpeechQueue.Length() > 1 ||
83          (mSpeechQueue.Length() == 1 &&
84           (!mCurrentTask || mCurrentTask->IsPending()));
85 }
86 
Speaking() const87 bool SpeechSynthesis::Speaking() const {
88   // Check global speaking state if there is no active speaking task.
89   return (!mSpeechQueue.IsEmpty() && HasSpeakingTask()) ||
90          nsSynthVoiceRegistry::GetInstance()->IsSpeaking();
91 }
92 
Paused() const93 bool SpeechSynthesis::Paused() const {
94   return mHoldQueue || (mCurrentTask && mCurrentTask->IsPrePaused()) ||
95          (!mSpeechQueue.IsEmpty() && mSpeechQueue.ElementAt(0)->IsPaused());
96 }
97 
HasEmptyQueue() const98 bool SpeechSynthesis::HasEmptyQueue() const {
99   return mSpeechQueue.Length() == 0;
100 }
101 
HasVoices() const102 bool SpeechSynthesis::HasVoices() const {
103   uint32_t voiceCount = mVoiceCache.Count();
104   if (voiceCount == 0) {
105     nsresult rv =
106         nsSynthVoiceRegistry::GetInstance()->GetVoiceCount(&voiceCount);
107     if (NS_WARN_IF(NS_FAILED(rv))) {
108       return false;
109     }
110   }
111 
112   return voiceCount != 0;
113 }
114 
Speak(SpeechSynthesisUtterance & aUtterance)115 void SpeechSynthesis::Speak(SpeechSynthesisUtterance& aUtterance) {
116   if (!mInnerID) {
117     return;
118   }
119 
120   mSpeechQueue.AppendElement(&aUtterance);
121 
122   if (mSpeechQueue.Length() == 1) {
123     RefPtr<WindowGlobalChild> wgc =
124         WindowGlobalChild::GetByInnerWindowId(mInnerID);
125     if (wgc) {
126       wgc->BlockBFCacheFor(BFCacheStatus::HAS_ACTIVE_SPEECH_SYNTHESIS);
127     }
128 
129     // If we only have one item in the queue, we aren't pre-paused, and
130     // we have voices available, speak it.
131     if (!mCurrentTask && !mHoldQueue && HasVoices()) {
132       AdvanceQueue();
133     }
134   }
135 }
136 
AdvanceQueue()137 void SpeechSynthesis::AdvanceQueue() {
138   LOG(LogLevel::Debug,
139       ("SpeechSynthesis::AdvanceQueue length=%zu", mSpeechQueue.Length()));
140 
141   if (mSpeechQueue.IsEmpty()) {
142     return;
143   }
144 
145   RefPtr<SpeechSynthesisUtterance> utterance = mSpeechQueue.ElementAt(0);
146 
147   nsAutoString docLang;
148   nsCOMPtr<nsPIDOMWindowInner> window = GetOwner();
149   Document* doc = window ? window->GetExtantDoc() : nullptr;
150 
151   if (doc) {
152     Element* elm = doc->GetHtmlElement();
153 
154     if (elm) {
155       elm->GetLang(docLang);
156     }
157   }
158 
159   mCurrentTask =
160       nsSynthVoiceRegistry::GetInstance()->SpeakUtterance(*utterance, docLang);
161 
162   if (mCurrentTask) {
163     mCurrentTask->SetSpeechSynthesis(this);
164   }
165 }
166 
Cancel()167 void SpeechSynthesis::Cancel() {
168   if (!mSpeechQueue.IsEmpty() && HasSpeakingTask()) {
169     // Remove all queued utterances except for current one, we will remove it
170     // in OnEnd
171     mSpeechQueue.RemoveLastElements(mSpeechQueue.Length() - 1);
172   } else {
173     mSpeechQueue.Clear();
174   }
175 
176   if (mCurrentTask) {
177     mCurrentTask->Cancel();
178   }
179 }
180 
Pause()181 void SpeechSynthesis::Pause() {
182   if (Paused()) {
183     return;
184   }
185 
186   if (!mSpeechQueue.IsEmpty() && HasSpeakingTask()) {
187     mCurrentTask->Pause();
188   } else {
189     mHoldQueue = true;
190   }
191 }
192 
Resume()193 void SpeechSynthesis::Resume() {
194   if (!Paused()) {
195     return;
196   }
197 
198   mHoldQueue = false;
199 
200   if (mCurrentTask) {
201     mCurrentTask->Resume();
202   } else {
203     AdvanceQueue();
204   }
205 }
206 
OnEnd(const nsSpeechTask * aTask)207 void SpeechSynthesis::OnEnd(const nsSpeechTask* aTask) {
208   MOZ_ASSERT(mCurrentTask == aTask);
209 
210   if (!mSpeechQueue.IsEmpty()) {
211     mSpeechQueue.RemoveElementAt(0);
212     if (mSpeechQueue.IsEmpty()) {
213       RefPtr<WindowGlobalChild> wgc =
214           WindowGlobalChild::GetByInnerWindowId(mInnerID);
215       if (wgc) {
216         wgc->UnblockBFCacheFor(BFCacheStatus::HAS_ACTIVE_SPEECH_SYNTHESIS);
217       }
218     }
219   }
220 
221   mCurrentTask = nullptr;
222   AdvanceQueue();
223 }
224 
GetVoices(nsTArray<RefPtr<SpeechSynthesisVoice>> & aResult)225 void SpeechSynthesis::GetVoices(
226     nsTArray<RefPtr<SpeechSynthesisVoice> >& aResult) {
227   aResult.Clear();
228   uint32_t voiceCount = 0;
229   nsCOMPtr<nsPIDOMWindowInner> window = GetOwner();
230   nsCOMPtr<nsIDocShell> docShell = window ? window->GetDocShell() : nullptr;
231 
232   if (nsContentUtils::ShouldResistFingerprinting(docShell)) {
233     return;
234   }
235 
236   nsresult rv = nsSynthVoiceRegistry::GetInstance()->GetVoiceCount(&voiceCount);
237   if (NS_WARN_IF(NS_FAILED(rv))) {
238     return;
239   }
240 
241   nsISupports* voiceParent = NS_ISUPPORTS_CAST(nsIObserver*, this);
242 
243   for (uint32_t i = 0; i < voiceCount; i++) {
244     nsAutoString uri;
245     rv = nsSynthVoiceRegistry::GetInstance()->GetVoice(i, uri);
246 
247     if (NS_FAILED(rv)) {
248       NS_WARNING("Failed to retrieve voice from registry");
249       continue;
250     }
251 
252     SpeechSynthesisVoice* voice = mVoiceCache.GetWeak(uri);
253 
254     if (!voice) {
255       voice = new SpeechSynthesisVoice(voiceParent, uri);
256     }
257 
258     aResult.AppendElement(voice);
259   }
260 
261   mVoiceCache.Clear();
262 
263   for (uint32_t i = 0; i < aResult.Length(); i++) {
264     SpeechSynthesisVoice* voice = aResult[i];
265     mVoiceCache.InsertOrUpdate(voice->mUri, RefPtr{voice});
266   }
267 }
268 
269 // For testing purposes, allows us to cancel the current task that is
270 // misbehaving, and flush the queue.
ForceEnd()271 void SpeechSynthesis::ForceEnd() {
272   if (mCurrentTask) {
273     mCurrentTask->ForceEnd();
274   }
275 }
276 
277 NS_IMETHODIMP
Observe(nsISupports * aSubject,const char * aTopic,const char16_t * aData)278 SpeechSynthesis::Observe(nsISupports* aSubject, const char* aTopic,
279                          const char16_t* aData) {
280   MOZ_ASSERT(NS_IsMainThread());
281 
282   if (strcmp(aTopic, "inner-window-destroyed") == 0) {
283     nsCOMPtr<nsISupportsPRUint64> wrapper = do_QueryInterface(aSubject);
284     NS_ENSURE_TRUE(wrapper, NS_ERROR_FAILURE);
285 
286     uint64_t innerID;
287     nsresult rv = wrapper->GetData(&innerID);
288     NS_ENSURE_SUCCESS(rv, rv);
289 
290     if (innerID == mInnerID) {
291       mInnerID = 0;
292       Cancel();
293 
294       nsCOMPtr<nsIObserverService> obs =
295           mozilla::services::GetObserverService();
296       if (obs) {
297         obs->RemoveObserver(this, "inner-window-destroyed");
298       }
299     }
300   } else if (strcmp(aTopic, "synth-voices-changed") == 0) {
301     LOG(LogLevel::Debug, ("SpeechSynthesis::onvoiceschanged"));
302     nsCOMPtr<nsPIDOMWindowInner> window = GetOwner();
303     nsCOMPtr<nsIDocShell> docShell = window ? window->GetDocShell() : nullptr;
304 
305     if (!nsContentUtils::ShouldResistFingerprinting(docShell)) {
306       DispatchTrustedEvent(u"voiceschanged"_ns);
307       // If we have a pending item, and voices become available, speak it.
308       if (!mCurrentTask && !mHoldQueue && HasVoices()) {
309         AdvanceQueue();
310       }
311     }
312   }
313 
314   return NS_OK;
315 }
316 
317 }  // namespace mozilla::dom
318