1 /****************************************************************************
2 **
3 ** Copyright (C) 2015 The Qt Company Ltd.
4 ** Contact: http://www.qt.io/licensing/
5 **
6 ** This file is part of the Qt Speech module of the Qt Toolkit.
7 **
8 ** $QT_BEGIN_LICENSE:LGPL3$
9 ** Commercial License Usage
10 ** Licensees holding valid commercial Qt licenses may use this file in
11 ** accordance with the commercial license agreement provided with the
12 ** Software or, alternatively, in accordance with the terms contained in
13 ** a written agreement between you and The Qt Company. For licensing terms
14 ** and conditions see http://www.qt.io/terms-conditions. For further
15 ** information use the contact form at http://www.qt.io/contact-us.
16 **
17 ** GNU Lesser General Public License Usage
18 ** Alternatively, this file may be used under the terms of the GNU Lesser
19 ** General Public License version 3 as published by the Free Software
20 ** Foundation and appearing in the file LICENSE.LGPLv3 included in the
21 ** packaging of this file. Please review the following information to
22 ** ensure the GNU Lesser General Public License version 3 requirements
23 ** will be met: https://www.gnu.org/licenses/lgpl.html.
24 **
25 ** GNU General Public License Usage
26 ** Alternatively, this file may be used under the terms of the GNU
27 ** General Public License version 2.0 or later as published by the Free
28 ** Software Foundation and appearing in the file LICENSE.GPL included in
29 ** the packaging of this file. Please review the following information to
30 ** ensure the GNU General Public License version 2.0 requirements will be
31 ** met: http://www.gnu.org/licenses/gpl-2.0.html.
32 **
33 ** $QT_END_LICENSE$
34 **
35 ****************************************************************************/
36 
37 #include "qtexttospeech_winrt.h"
38 
39 #include <QtCore/QCoreApplication>
40 #include <QtCore/qfunctions_winrt.h>
41 #include <QtCore/QMap>
42 #include <QtCore/QTimer>
43 #include <private/qeventdispatcher_winrt_p.h>
44 
45 #include <windows.foundation.h>
46 #include <windows.foundation.collections.h>
47 #include <windows.media.speechsynthesis.h>
48 #include <windows.storage.streams.h>
49 #include <windows.ui.xaml.h>
50 #include <windows.ui.xaml.controls.h>
51 #include <windows.ui.xaml.markup.h>
52 
53 #include <functional>
54 #include <wrl.h>
55 
56 using namespace ABI::Windows::Foundation;
57 using namespace ABI::Windows::Foundation::Collections;
58 using namespace ABI::Windows::Media::SpeechSynthesis;
59 using namespace ABI::Windows::Storage::Streams;
60 using namespace ABI::Windows::UI::Xaml;
61 using namespace ABI::Windows::UI::Xaml::Controls;
62 using namespace ABI::Windows::UI::Xaml::Markup;
63 using namespace ABI::Windows::UI::Xaml::Media;
64 using namespace Microsoft::WRL;
65 using namespace Microsoft::WRL::Wrappers;
66 
67 QT_BEGIN_NAMESPACE
68 
69 #define LSTRING(str) L#str
70 static const wchar_t webviewXaml[] = LSTRING(
71 <MediaElement xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation" />
72 );
73 
74 class QTextToSpeechEngineWinRTPrivate
75 {
76 public:
77     QTimer timer;
78     ComPtr<IXamlReaderStatics> xamlReader;
79     ComPtr<ISpeechSynthesizer> synth;
80     QVector<QLocale> locales;
81     QVector<QVoice> voices;
82     QVector<ComPtr<IVoiceInformation>> infos;
83     EventRegistrationToken tok;
84 
85     ComPtr<IMediaElement> media;
86 
87     double rate;
88     double volume;
89 
90     QTextToSpeech::State state;
91 };
92 
QTextToSpeechEngineWinRT(const QVariantMap &,QObject * parent)93 QTextToSpeechEngineWinRT::QTextToSpeechEngineWinRT(const QVariantMap &, QObject *parent)
94     : QTextToSpeechEngine(parent)
95     , d_ptr(new QTextToSpeechEngineWinRTPrivate)
96 {
97     d_ptr->rate = 0;
98     d_ptr->volume = 1.0;
99     d_ptr->timer.setInterval(100);
100     connect(&d_ptr->timer, &QTimer::timeout, this, &QTextToSpeechEngineWinRT::checkElementState);
101 
102     init();
103 }
104 
~QTextToSpeechEngineWinRT()105 QTextToSpeechEngineWinRT::~QTextToSpeechEngineWinRT()
106 {
107 }
108 
availableLocales() const109 QVector<QLocale> QTextToSpeechEngineWinRT::availableLocales() const
110 {
111     Q_D(const QTextToSpeechEngineWinRT);
112     return d->locales;
113 }
114 
availableVoices() const115 QVector<QVoice> QTextToSpeechEngineWinRT::availableVoices() const
116 {
117     Q_D(const QTextToSpeechEngineWinRT);
118     return d->voices;
119 }
120 
say(const QString & text)121 void QTextToSpeechEngineWinRT::say(const QString &text)
122 {
123     Q_D(QTextToSpeechEngineWinRT);
124 
125     HRESULT hr;
126 
127     hr = QEventDispatcherWinRT::runOnXamlThread([text, d]() {
128         HRESULT hr;
129         HStringReference nativeText(reinterpret_cast<LPCWSTR>(text.utf16()), text.length());
130         ComPtr<IAsyncOperation<SpeechSynthesisStream*>> op;
131 
132         hr = d->synth->SynthesizeTextToStreamAsync(nativeText.Get(), &op);
133         RETURN_HR_IF_FAILED("Could not synthesize text.");
134 
135         ComPtr<ISpeechSynthesisStream> stream;
136         hr = QWinRTFunctions::await(op, stream.GetAddressOf());
137         RETURN_HR_IF_FAILED("Synthesizing failed.");
138 
139         ComPtr<IRandomAccessStream> randomStream;
140         hr = stream.As(&randomStream);
141         RETURN_HR_IF_FAILED("Could not cast to RandomAccessStream.");
142 
143         // Directly instantiating a MediaElement works, but it throws an exception
144         // when setting the source. Using a XamlReader appears to set it up properly.
145         ComPtr<IInspectable> element;
146         hr = d->xamlReader->Load(HString::MakeReference(webviewXaml).Get(), &element);
147         Q_ASSERT_SUCCEEDED(hr);
148 
149         if (d->media)
150             d->media.Reset();
151 
152         hr = element.As(&d->media);
153         RETURN_HR_IF_FAILED("Could not create MediaElement for playback.");
154 
155         // Volume and Playback Rate cannot be changed for synthesized audio once
156         // it has been created. Hence QTextToSpeechEngineWinRT::setVolume/Rate
157         // only cache the value until playback is started.
158         hr = d->media->put_DefaultPlaybackRate(d->rate + 1);
159         if (FAILED(hr))
160             qWarning("Could not set playback rate.");
161 
162         const DOUBLE vol = DOUBLE(d->volume);
163         hr = d->media->put_Volume(vol);
164         if (FAILED(hr))
165             qWarning("Could not set volume.");
166 
167         static const HStringReference empty(L"");
168         hr = d->media->SetSource(randomStream.Get(), empty.Get());
169         RETURN_HR_IF_FAILED("Could not set media source.");
170 
171         hr = d->media->Play();
172         RETURN_HR_IF_FAILED("Could not initiate playback.");
173 
174         return S_OK;
175     });
176     if (SUCCEEDED(hr)) {
177         d->timer.start();
178         d->state = QTextToSpeech::Speaking;
179     } else {
180         d->state = QTextToSpeech::BackendError;
181     }
182     emit stateChanged(d->state);
183 }
184 
stop()185 void QTextToSpeechEngineWinRT::stop()
186 {
187     Q_D(QTextToSpeechEngineWinRT);
188 
189     if (!d->media)
190         return;
191 
192     HRESULT hr;
193     hr = QEventDispatcherWinRT::runOnXamlThread([d]() {
194         HRESULT hr = d->media->Stop();
195         RETURN_HR_IF_FAILED("Could not stop playback.");
196 
197         d->media.Reset();
198         return hr;
199     });
200     if (SUCCEEDED(hr)) {
201         d->timer.stop();
202         d->state = QTextToSpeech::Ready;
203         emit stateChanged(d->state);
204     }
205 }
206 
pause()207 void QTextToSpeechEngineWinRT::pause()
208 {
209     Q_D(QTextToSpeechEngineWinRT);
210 
211     if (!d->media)
212         return;
213 
214     // Stop timer first to not have checkElementState being invoked
215     // while context switch to/from Xaml thread happens.
216     d->timer.stop();
217 
218     HRESULT hr;
219     hr = QEventDispatcherWinRT::runOnXamlThread([d]() {
220         HRESULT hr = d->media->Pause();
221         RETURN_HR_IF_FAILED("Could not pause playback.");
222         return hr;
223     });
224     if (SUCCEEDED(hr)) {
225         d->state = QTextToSpeech::Paused;
226         emit stateChanged(d->state);
227     }
228 }
229 
resume()230 void QTextToSpeechEngineWinRT::resume()
231 {
232     Q_D(QTextToSpeechEngineWinRT);
233 
234     if (!d->media)
235         return;
236 
237     HRESULT hr;
238     hr = QEventDispatcherWinRT::runOnXamlThread([d]() {
239         HRESULT hr = d->media->Play();
240         RETURN_HR_IF_FAILED("Could not resume playback.");
241         return hr;
242     });
243     if (SUCCEEDED(hr)) {
244         d->timer.start();
245         d->state = QTextToSpeech::Speaking;
246         emit stateChanged(d->state);
247     }
248 }
249 
rate() const250 double QTextToSpeechEngineWinRT::rate() const
251 {
252     Q_D(const QTextToSpeechEngineWinRT);
253 
254     return d->rate;
255 }
256 
setRate(double rate)257 bool QTextToSpeechEngineWinRT::setRate(double rate)
258 {
259     Q_D(QTextToSpeechEngineWinRT);
260 
261     d->rate = rate;
262     return true;
263 }
264 
pitch() const265 double QTextToSpeechEngineWinRT::pitch() const
266 {
267     // Not supported for WinRT
268     Q_UNIMPLEMENTED();
269     return 1.;
270 }
271 
setPitch(double pitch)272 bool QTextToSpeechEngineWinRT::setPitch(double pitch)
273 {
274     // Not supported for WinRT
275     Q_UNUSED(pitch);
276     Q_UNIMPLEMENTED();
277     return false;
278 }
279 
locale() const280 QLocale QTextToSpeechEngineWinRT::locale() const
281 {
282     Q_D(const QTextToSpeechEngineWinRT);
283 
284     HRESULT hr;
285     ComPtr<IVoiceInformation> info;
286     hr = d->synth->get_Voice(&info);
287 
288     HString language;
289     hr = info->get_Language(language.GetAddressOf());
290 
291     return QLocale(QString::fromWCharArray(language.GetRawBuffer(0)));
292 }
293 
setLocale(const QLocale & locale)294 bool QTextToSpeechEngineWinRT::setLocale(const QLocale &locale)
295 {
296     Q_D(QTextToSpeechEngineWinRT);
297 
298     const int index = d->locales.indexOf(locale);
299     if (index == -1)
300         return false;
301 
302     return setVoice(d->voices.at(index));
303 }
304 
volume() const305 double QTextToSpeechEngineWinRT::volume() const
306 {
307     Q_D(const QTextToSpeechEngineWinRT);
308 
309     return d->volume;
310 }
311 
setVolume(double volume)312 bool QTextToSpeechEngineWinRT::setVolume(double volume)
313 {
314     Q_D(QTextToSpeechEngineWinRT);
315 
316     d->volume = volume;
317     return true;
318 }
319 
voice() const320 QVoice QTextToSpeechEngineWinRT::voice() const
321 {
322     Q_D(const QTextToSpeechEngineWinRT);
323 
324     HRESULT hr;
325     ComPtr<IVoiceInformation> info;
326     hr = d->synth->get_Voice(&info);
327 
328     return createVoiceForInformation(info);
329 }
330 
setVoice(const QVoice & voice)331 bool QTextToSpeechEngineWinRT::setVoice(const QVoice &voice)
332 {
333     Q_D(QTextToSpeechEngineWinRT);
334 
335     const int index = d->voices.indexOf(voice);
336     if (index == -1)
337         return false;
338 
339     HRESULT hr;
340     hr = d->synth->put_Voice(d->infos.at(index).Get());
341     return SUCCEEDED(hr);
342 }
343 
state() const344 QTextToSpeech::State QTextToSpeechEngineWinRT::state() const
345 {
346     Q_D(const QTextToSpeechEngineWinRT);
347     return d->state;
348 }
349 
checkElementState()350 void QTextToSpeechEngineWinRT::checkElementState()
351 {
352     Q_D(QTextToSpeechEngineWinRT);
353 
354     // MediaElement does not move into Stopped or Closed state when it finished
355     // playback of synthesised text. Instead it goes into Pause mode.
356     // Because of this MediaElement::add_MediaEnded() is not invoked and we
357     // cannot add an event listener to the Media Element to properly emit
358     // state changes.
359     // To still be able to capture when it is ready, use a periodic timer and
360     // check if the MediaElement went into Pause state.
361     bool finished = false;
362     HRESULT hr;
363     hr = QEventDispatcherWinRT::runOnXamlThread([d, &finished]() {
364         HRESULT hr;
365         ABI::Windows::UI::Xaml::Media::MediaElementState s;
366         hr = d->media.Get()->get_CurrentState(&s);
367         if (SUCCEEDED(hr) && s == MediaElementState_Paused)
368             finished = true;
369         return hr;
370     });
371 
372     if (finished)
373         stop();
374 }
375 
init()376 void QTextToSpeechEngineWinRT::init()
377 {
378     Q_D(QTextToSpeechEngineWinRT);
379 
380     d->state = QTextToSpeech::BackendError;
381 
382     HRESULT hr;
383 
384     hr = QEventDispatcherWinRT::runOnXamlThread([d]() {
385         HRESULT hr = RoGetActivationFactory(HString::MakeReference(RuntimeClass_Windows_UI_Xaml_Markup_XamlReader).Get(),
386                                             IID_PPV_ARGS(&d->xamlReader));
387         Q_ASSERT_SUCCEEDED(hr);
388 
389         return hr;
390     });
391 
392     ComPtr<IInstalledVoicesStatic> stat;
393     hr = RoGetActivationFactory(HString::MakeReference(RuntimeClass_Windows_Media_SpeechSynthesis_SpeechSynthesizer).Get(),
394                                 IID_PPV_ARGS(&stat));
395     Q_ASSERT_SUCCEEDED(hr);
396 
397     hr = RoActivateInstance(HString::MakeReference(RuntimeClass_Windows_Media_SpeechSynthesis_SpeechSynthesizer).Get(),
398                             &d->synth);
399     Q_ASSERT_SUCCEEDED(hr);
400 
401     ComPtr<IVectorView<VoiceInformation*>> voices;
402     hr = stat->get_AllVoices(&voices);
403     RETURN_VOID_IF_FAILED("Could not get voice information.");
404 
405     quint32 voiceSize;
406     hr = voices->get_Size(&voiceSize);
407     RETURN_VOID_IF_FAILED("Could not access size of voice information.");
408 
409     for (quint32 i = 0; i < voiceSize; ++i) {
410         ComPtr<IVoiceInformation> info;
411         hr = voices->GetAt(i, &info);
412         Q_ASSERT_SUCCEEDED(hr);
413 
414         HString nativeLanguage;
415         hr = info->get_Language(nativeLanguage.GetAddressOf());
416         Q_ASSERT_SUCCEEDED(hr);
417 
418         const QString languageString = QString::fromWCharArray(nativeLanguage.GetRawBuffer(0));
419         QLocale locale(languageString);
420         if (!d->locales.contains(locale))
421             d->locales.append(locale);
422 
423         QVoice voice = createVoiceForInformation(info);
424         d->voices.append(voice);
425         d->infos.append(info);
426     }
427 
428     d->state = QTextToSpeech::Ready;
429 }
430 
createVoiceForInformation(ComPtr<IVoiceInformation> info) const431 QVoice QTextToSpeechEngineWinRT::createVoiceForInformation(ComPtr<IVoiceInformation> info) const
432 {
433     HRESULT hr;
434     HString nativeName;
435     hr = info->get_DisplayName(nativeName.GetAddressOf());
436     Q_ASSERT_SUCCEEDED(hr);
437 
438     const QString name = QString::fromWCharArray(nativeName.GetRawBuffer(0));
439 
440     VoiceGender gender;
441     hr = info->get_Gender(&gender);
442     Q_ASSERT_SUCCEEDED(hr);
443 
444     return QTextToSpeechEngine::createVoice(name, gender == VoiceGender_Male ? QVoice::Male : QVoice::Female,
445                                             QVoice::Other, QVariant());
446 }
447 
448 QT_END_NAMESPACE
449