1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #ifndef mozilla_dom_OnlineRecognitionService_h
8 #define mozilla_dom_OnlineRecognitionService_h
9 
10 #include "nsCOMPtr.h"
11 #include "nsTArray.h"
12 #include "nsISpeechRecognitionService.h"
13 #include "speex/speex_resampler.h"
14 #include "nsIStreamListener.h"
15 #include "OpusTrackEncoder.h"
16 #include "ContainerWriter.h"
17 
18 #define NS_ONLINE_SPEECH_RECOGNITION_SERVICE_CID \
19   {0x0ff5ce56,                                   \
20    0x5b09,                                       \
21    0x4db8,                                       \
22    {0xad, 0xc6, 0x82, 0x66, 0xaf, 0x95, 0xf8, 0x64}};
23 
24 namespace mozilla {
25 
26 namespace ipc {
27 class PrincipalInfo;
28 }  // namespace ipc
29 
30 /**
31  * Online implementation of the nsISpeechRecognitionService interface
32  */
33 class OnlineSpeechRecognitionService : public nsISpeechRecognitionService,
34                                        public nsIStreamListener {
35  public:
36   // Add XPCOM glue code
37   NS_DECL_THREADSAFE_ISUPPORTS
38   NS_DECL_NSISPEECHRECOGNITIONSERVICE
39   NS_DECL_NSIREQUESTOBSERVER
40   NS_DECL_NSISTREAMLISTENER
41 
42   /**
43    * Listener responsible for handling the events raised by the TrackEncoder
44    */
45   class SpeechEncoderListener : public TrackEncoderListener {
46    public:
SpeechEncoderListener(OnlineSpeechRecognitionService * aService)47     explicit SpeechEncoderListener(OnlineSpeechRecognitionService* aService)
48         : mService(aService), mOwningThread(AbstractThread::GetCurrent()) {}
49 
Initialized(TrackEncoder * aEncoder)50     void Initialized(TrackEncoder* aEncoder) override {
51       MOZ_ASSERT(mOwningThread->IsCurrentThreadIn());
52       mService->EncoderInitialized();
53     }
54 
DataAvailable(TrackEncoder * aEncoder)55     void DataAvailable(TrackEncoder* aEncoder) override {
56       MOZ_ASSERT(mOwningThread->IsCurrentThreadIn());
57       mService->EncoderDataAvailable();
58     }
59 
Error(TrackEncoder * aEncoder)60     void Error(TrackEncoder* aEncoder) override {
61       MOZ_ASSERT(mOwningThread->IsCurrentThreadIn());
62       mService->EncoderError();
63     }
64 
65    private:
66     const RefPtr<OnlineSpeechRecognitionService> mService;
67     const RefPtr<AbstractThread> mOwningThread;
68   };
69 
70   /**
71    * Default constructs a OnlineSpeechRecognitionService
72    */
73   OnlineSpeechRecognitionService();
74 
75   /**
76    * Called by SpeechEncoderListener when the AudioTrackEncoder has been
77    * initialized.
78    */
79   void EncoderInitialized();
80 
81   /**
82    * Called by SpeechEncoderListener when the AudioTrackEncoder has encoded
83    * some data for us to pass along.
84    */
85   void EncoderDataAvailable();
86 
87   /**
88    * Called by SpeechEncoderListener when the AudioTrackEncoder has
89    * encountered an error.
90    */
91   void EncoderError();
92 
93  private:
94   /**
95    * Private destructor to prevent bypassing of reference counting
96    */
97   virtual ~OnlineSpeechRecognitionService();
98 
99   /** The associated SpeechRecognition */
100   nsMainThreadPtrHandle<dom::SpeechRecognition> mRecognition;
101 
102   /**
103    * Builds a mock SpeechRecognitionResultList
104    */
105   dom::SpeechRecognitionResultList* BuildMockResultList();
106 
107   /**
108    * Method responsible for uploading the audio to the remote endpoint
109    */
110   void DoSTT();
111 
112   // Encoded and packaged ogg audio data
113   nsTArray<nsTArray<uint8_t>> mEncodedData;
114   // Member responsible for holding a reference to the TrackEncoderListener
115   RefPtr<SpeechEncoderListener> mSpeechEncoderListener;
116   // Encoder responsible for encoding the frames from pcm to opus which is the
117   // format supported by our backend
118   RefPtr<AudioTrackEncoder> mAudioEncoder;
119   // Object responsible for wrapping the opus frames into an ogg container
120   UniquePtr<ContainerWriter> mWriter;
121   // Member responsible for storing the json string returned by the endpoint
122   nsCString mBuf;
123   // Used to calculate a ceiling on the time spent listening.
124   TimeStamp mFirstIteration;
125   // flag responsible to control if the user choose to abort
126   bool mAborted = false;
127   //  reference to the audio encoder queue
128   RefPtr<TaskQueue> mEncodeTaskQueue;
129 };
130 
131 }  // namespace mozilla
132 
133 #endif
134