1 /****************************************************************************
2 **
3 ** Copyright (C) 2015 The Qt Company Ltd.
4 ** Contact: http://www.qt.io/licensing/
5 **
6 ** This file is part of the Qt Speech module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL3$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see http://www.qt.io/terms-conditions. For further
15 ** information use the contact form at http://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPLv3 included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 3 requirements
23 ** will be met: https://www.gnu.org/licenses/lgpl.html.
24 **
25 ** GNU General Public License Usage
26 ** Alternatively, this file may be used under the terms of the GNU
27 ** General Public License version 2.0 or later as published by the Free
28 ** Software Foundation and appearing in the file LICENSE.GPL included in
29 ** the packaging of this file. Please review the following information to
30 ** ensure the GNU General Public License version 2.0 requirements will be
31 ** met: http://www.gnu.org/licenses/gpl-2.0.html.
32 **
33 ** $QT_END_LICENSE$
34 **
35 ****************************************************************************/
36
37 #include "qtexttospeech_winrt.h"
38
39 #include <QtCore/QCoreApplication>
40 #include <QtCore/qfunctions_winrt.h>
41 #include <QtCore/QMap>
42 #include <QtCore/QTimer>
43 #include <private/qeventdispatcher_winrt_p.h>
44
45 #include <windows.foundation.h>
46 #include <windows.foundation.collections.h>
47 #include <windows.media.speechsynthesis.h>
48 #include <windows.storage.streams.h>
49 #include <windows.ui.xaml.h>
50 #include <windows.ui.xaml.controls.h>
51 #include <windows.ui.xaml.markup.h>
52
53 #include <functional>
54 #include <wrl.h>
55
56 using namespace ABI::Windows::Foundation;
57 using namespace ABI::Windows::Foundation::Collections;
58 using namespace ABI::Windows::Media::SpeechSynthesis;
59 using namespace ABI::Windows::Storage::Streams;
60 using namespace ABI::Windows::UI::Xaml;
61 using namespace ABI::Windows::UI::Xaml::Controls;
62 using namespace ABI::Windows::UI::Xaml::Markup;
63 using namespace ABI::Windows::UI::Xaml::Media;
64 using namespace Microsoft::WRL;
65 using namespace Microsoft::WRL::Wrappers;
66
67 QT_BEGIN_NAMESPACE
68
69 #define LSTRING(str) L#str
70 static const wchar_t webviewXaml[] = LSTRING(
71 <MediaElement xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation" />
72 );
73
74 class QTextToSpeechEngineWinRTPrivate
75 {
76 public:
77 QTimer timer;
78 ComPtr<IXamlReaderStatics> xamlReader;
79 ComPtr<ISpeechSynthesizer> synth;
80 QVector<QLocale> locales;
81 QVector<QVoice> voices;
82 QVector<ComPtr<IVoiceInformation>> infos;
83 EventRegistrationToken tok;
84
85 ComPtr<IMediaElement> media;
86
87 double rate;
88 double volume;
89
90 QTextToSpeech::State state;
91 };
92
QTextToSpeechEngineWinRT(const QVariantMap &,QObject * parent)93 QTextToSpeechEngineWinRT::QTextToSpeechEngineWinRT(const QVariantMap &, QObject *parent)
94 : QTextToSpeechEngine(parent)
95 , d_ptr(new QTextToSpeechEngineWinRTPrivate)
96 {
97 d_ptr->rate = 0;
98 d_ptr->volume = 1.0;
99 d_ptr->timer.setInterval(100);
100 connect(&d_ptr->timer, &QTimer::timeout, this, &QTextToSpeechEngineWinRT::checkElementState);
101
102 init();
103 }
104
~QTextToSpeechEngineWinRT()105 QTextToSpeechEngineWinRT::~QTextToSpeechEngineWinRT()
106 {
107 }
108
availableLocales() const109 QVector<QLocale> QTextToSpeechEngineWinRT::availableLocales() const
110 {
111 Q_D(const QTextToSpeechEngineWinRT);
112 return d->locales;
113 }
114
availableVoices() const115 QVector<QVoice> QTextToSpeechEngineWinRT::availableVoices() const
116 {
117 Q_D(const QTextToSpeechEngineWinRT);
118 return d->voices;
119 }
120
say(const QString & text)121 void QTextToSpeechEngineWinRT::say(const QString &text)
122 {
123 Q_D(QTextToSpeechEngineWinRT);
124
125 HRESULT hr;
126
127 hr = QEventDispatcherWinRT::runOnXamlThread([text, d]() {
128 HRESULT hr;
129 HStringReference nativeText(reinterpret_cast<LPCWSTR>(text.utf16()), text.length());
130 ComPtr<IAsyncOperation<SpeechSynthesisStream*>> op;
131
132 hr = d->synth->SynthesizeTextToStreamAsync(nativeText.Get(), &op);
133 RETURN_HR_IF_FAILED("Could not synthesize text.");
134
135 ComPtr<ISpeechSynthesisStream> stream;
136 hr = QWinRTFunctions::await(op, stream.GetAddressOf());
137 RETURN_HR_IF_FAILED("Synthesizing failed.");
138
139 ComPtr<IRandomAccessStream> randomStream;
140 hr = stream.As(&randomStream);
141 RETURN_HR_IF_FAILED("Could not cast to RandomAccessStream.");
142
143 // Directly instantiating a MediaElement works, but it throws an exception
144 // when setting the source. Using a XamlReader appears to set it up properly.
145 ComPtr<IInspectable> element;
146 hr = d->xamlReader->Load(HString::MakeReference(webviewXaml).Get(), &element);
147 Q_ASSERT_SUCCEEDED(hr);
148
149 if (d->media)
150 d->media.Reset();
151
152 hr = element.As(&d->media);
153 RETURN_HR_IF_FAILED("Could not create MediaElement for playback.");
154
155 // Volume and Playback Rate cannot be changed for synthesized audio once
156 // it has been created. Hence QTextToSpeechEngineWinRT::setVolume/Rate
157 // only cache the value until playback is started.
158 hr = d->media->put_DefaultPlaybackRate(d->rate + 1);
159 if (FAILED(hr))
160 qWarning("Could not set playback rate.");
161
162 const DOUBLE vol = DOUBLE(d->volume);
163 hr = d->media->put_Volume(vol);
164 if (FAILED(hr))
165 qWarning("Could not set volume.");
166
167 static const HStringReference empty(L"");
168 hr = d->media->SetSource(randomStream.Get(), empty.Get());
169 RETURN_HR_IF_FAILED("Could not set media source.");
170
171 hr = d->media->Play();
172 RETURN_HR_IF_FAILED("Could not initiate playback.");
173
174 return S_OK;
175 });
176 if (SUCCEEDED(hr)) {
177 d->timer.start();
178 d->state = QTextToSpeech::Speaking;
179 } else {
180 d->state = QTextToSpeech::BackendError;
181 }
182 emit stateChanged(d->state);
183 }
184
stop()185 void QTextToSpeechEngineWinRT::stop()
186 {
187 Q_D(QTextToSpeechEngineWinRT);
188
189 if (!d->media)
190 return;
191
192 HRESULT hr;
193 hr = QEventDispatcherWinRT::runOnXamlThread([d]() {
194 HRESULT hr = d->media->Stop();
195 RETURN_HR_IF_FAILED("Could not stop playback.");
196
197 d->media.Reset();
198 return hr;
199 });
200 if (SUCCEEDED(hr)) {
201 d->timer.stop();
202 d->state = QTextToSpeech::Ready;
203 emit stateChanged(d->state);
204 }
205 }
206
pause()207 void QTextToSpeechEngineWinRT::pause()
208 {
209 Q_D(QTextToSpeechEngineWinRT);
210
211 if (!d->media)
212 return;
213
214 // Stop timer first to not have checkElementState being invoked
215 // while context switch to/from Xaml thread happens.
216 d->timer.stop();
217
218 HRESULT hr;
219 hr = QEventDispatcherWinRT::runOnXamlThread([d]() {
220 HRESULT hr = d->media->Pause();
221 RETURN_HR_IF_FAILED("Could not pause playback.");
222 return hr;
223 });
224 if (SUCCEEDED(hr)) {
225 d->state = QTextToSpeech::Paused;
226 emit stateChanged(d->state);
227 }
228 }
229
resume()230 void QTextToSpeechEngineWinRT::resume()
231 {
232 Q_D(QTextToSpeechEngineWinRT);
233
234 if (!d->media)
235 return;
236
237 HRESULT hr;
238 hr = QEventDispatcherWinRT::runOnXamlThread([d]() {
239 HRESULT hr = d->media->Play();
240 RETURN_HR_IF_FAILED("Could not resume playback.");
241 return hr;
242 });
243 if (SUCCEEDED(hr)) {
244 d->timer.start();
245 d->state = QTextToSpeech::Speaking;
246 emit stateChanged(d->state);
247 }
248 }
249
rate() const250 double QTextToSpeechEngineWinRT::rate() const
251 {
252 Q_D(const QTextToSpeechEngineWinRT);
253
254 return d->rate;
255 }
256
setRate(double rate)257 bool QTextToSpeechEngineWinRT::setRate(double rate)
258 {
259 Q_D(QTextToSpeechEngineWinRT);
260
261 d->rate = rate;
262 return true;
263 }
264
pitch() const265 double QTextToSpeechEngineWinRT::pitch() const
266 {
267 // Not supported for WinRT
268 Q_UNIMPLEMENTED();
269 return 1.;
270 }
271
setPitch(double pitch)272 bool QTextToSpeechEngineWinRT::setPitch(double pitch)
273 {
274 // Not supported for WinRT
275 Q_UNUSED(pitch);
276 Q_UNIMPLEMENTED();
277 return false;
278 }
279
locale() const280 QLocale QTextToSpeechEngineWinRT::locale() const
281 {
282 Q_D(const QTextToSpeechEngineWinRT);
283
284 HRESULT hr;
285 ComPtr<IVoiceInformation> info;
286 hr = d->synth->get_Voice(&info);
287
288 HString language;
289 hr = info->get_Language(language.GetAddressOf());
290
291 return QLocale(QString::fromWCharArray(language.GetRawBuffer(0)));
292 }
293
setLocale(const QLocale & locale)294 bool QTextToSpeechEngineWinRT::setLocale(const QLocale &locale)
295 {
296 Q_D(QTextToSpeechEngineWinRT);
297
298 const int index = d->locales.indexOf(locale);
299 if (index == -1)
300 return false;
301
302 return setVoice(d->voices.at(index));
303 }
304
volume() const305 double QTextToSpeechEngineWinRT::volume() const
306 {
307 Q_D(const QTextToSpeechEngineWinRT);
308
309 return d->volume;
310 }
311
setVolume(double volume)312 bool QTextToSpeechEngineWinRT::setVolume(double volume)
313 {
314 Q_D(QTextToSpeechEngineWinRT);
315
316 d->volume = volume;
317 return true;
318 }
319
voice() const320 QVoice QTextToSpeechEngineWinRT::voice() const
321 {
322 Q_D(const QTextToSpeechEngineWinRT);
323
324 HRESULT hr;
325 ComPtr<IVoiceInformation> info;
326 hr = d->synth->get_Voice(&info);
327
328 return createVoiceForInformation(info);
329 }
330
setVoice(const QVoice & voice)331 bool QTextToSpeechEngineWinRT::setVoice(const QVoice &voice)
332 {
333 Q_D(QTextToSpeechEngineWinRT);
334
335 const int index = d->voices.indexOf(voice);
336 if (index == -1)
337 return false;
338
339 HRESULT hr;
340 hr = d->synth->put_Voice(d->infos.at(index).Get());
341 return SUCCEEDED(hr);
342 }
343
state() const344 QTextToSpeech::State QTextToSpeechEngineWinRT::state() const
345 {
346 Q_D(const QTextToSpeechEngineWinRT);
347 return d->state;
348 }
349
checkElementState()350 void QTextToSpeechEngineWinRT::checkElementState()
351 {
352 Q_D(QTextToSpeechEngineWinRT);
353
354 // MediaElement does not move into Stopped or Closed state when it finished
355 // playback of synthesised text. Instead it goes into Pause mode.
356 // Because of this MediaElement::add_MediaEnded() is not invoked and we
357 // cannot add an event listener to the Media Element to properly emit
358 // state changes.
359 // To still be able to capture when it is ready, use a periodic timer and
360 // check if the MediaElement went into Pause state.
361 bool finished = false;
362 HRESULT hr;
363 hr = QEventDispatcherWinRT::runOnXamlThread([d, &finished]() {
364 HRESULT hr;
365 ABI::Windows::UI::Xaml::Media::MediaElementState s;
366 hr = d->media.Get()->get_CurrentState(&s);
367 if (SUCCEEDED(hr) && s == MediaElementState_Paused)
368 finished = true;
369 return hr;
370 });
371
372 if (finished)
373 stop();
374 }
375
init()376 void QTextToSpeechEngineWinRT::init()
377 {
378 Q_D(QTextToSpeechEngineWinRT);
379
380 d->state = QTextToSpeech::BackendError;
381
382 HRESULT hr;
383
384 hr = QEventDispatcherWinRT::runOnXamlThread([d]() {
385 HRESULT hr = RoGetActivationFactory(HString::MakeReference(RuntimeClass_Windows_UI_Xaml_Markup_XamlReader).Get(),
386 IID_PPV_ARGS(&d->xamlReader));
387 Q_ASSERT_SUCCEEDED(hr);
388
389 return hr;
390 });
391
392 ComPtr<IInstalledVoicesStatic> stat;
393 hr = RoGetActivationFactory(HString::MakeReference(RuntimeClass_Windows_Media_SpeechSynthesis_SpeechSynthesizer).Get(),
394 IID_PPV_ARGS(&stat));
395 Q_ASSERT_SUCCEEDED(hr);
396
397 hr = RoActivateInstance(HString::MakeReference(RuntimeClass_Windows_Media_SpeechSynthesis_SpeechSynthesizer).Get(),
398 &d->synth);
399 Q_ASSERT_SUCCEEDED(hr);
400
401 ComPtr<IVectorView<VoiceInformation*>> voices;
402 hr = stat->get_AllVoices(&voices);
403 RETURN_VOID_IF_FAILED("Could not get voice information.");
404
405 quint32 voiceSize;
406 hr = voices->get_Size(&voiceSize);
407 RETURN_VOID_IF_FAILED("Could not access size of voice information.");
408
409 for (quint32 i = 0; i < voiceSize; ++i) {
410 ComPtr<IVoiceInformation> info;
411 hr = voices->GetAt(i, &info);
412 Q_ASSERT_SUCCEEDED(hr);
413
414 HString nativeLanguage;
415 hr = info->get_Language(nativeLanguage.GetAddressOf());
416 Q_ASSERT_SUCCEEDED(hr);
417
418 const QString languageString = QString::fromWCharArray(nativeLanguage.GetRawBuffer(0));
419 QLocale locale(languageString);
420 if (!d->locales.contains(locale))
421 d->locales.append(locale);
422
423 QVoice voice = createVoiceForInformation(info);
424 d->voices.append(voice);
425 d->infos.append(info);
426 }
427
428 d->state = QTextToSpeech::Ready;
429 }
430
createVoiceForInformation(ComPtr<IVoiceInformation> info) const431 QVoice QTextToSpeechEngineWinRT::createVoiceForInformation(ComPtr<IVoiceInformation> info) const
432 {
433 HRESULT hr;
434 HString nativeName;
435 hr = info->get_DisplayName(nativeName.GetAddressOf());
436 Q_ASSERT_SUCCEEDED(hr);
437
438 const QString name = QString::fromWCharArray(nativeName.GetRawBuffer(0));
439
440 VoiceGender gender;
441 hr = info->get_Gender(&gender);
442 Q_ASSERT_SUCCEEDED(hr);
443
444 return QTextToSpeechEngine::createVoice(name, gender == VoiceGender_Male ? QVoice::Male : QVoice::Female,
445 QVoice::Other, QVariant());
446 }
447
448 QT_END_NAMESPACE
449