1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #ifndef mozilla_dom_OnlineRecognitionService_h
8 #define mozilla_dom_OnlineRecognitionService_h
9 
10 #include "nsCOMPtr.h"
11 #include "nsTArray.h"
12 #include "nsISpeechRecognitionService.h"
13 #include "speex/speex_resampler.h"
14 #include "nsIStreamListener.h"
15 #include "OpusTrackEncoder.h"
16 #include "ContainerWriter.h"
17 
18 #define NS_ONLINE_SPEECH_RECOGNITION_SERVICE_CID \
19   {0x0ff5ce56,                                   \
20    0x5b09,                                       \
21    0x4db8,                                       \
22    {0xad, 0xc6, 0x82, 0x66, 0xaf, 0x95, 0xf8, 0x64}};
23 
24 namespace mozilla {
25 
26 namespace ipc {
27 class PrincipalInfo;
28 }  // namespace ipc
29 
30 /**
31  * Online implementation of the nsISpeechRecognitionService interface
32  */
33 class OnlineSpeechRecognitionService : public nsISpeechRecognitionService,
34                                        public nsIStreamListener {
35  public:
36   // Add XPCOM glue code
37   NS_DECL_THREADSAFE_ISUPPORTS
38   NS_DECL_NSISPEECHRECOGNITIONSERVICE
39   NS_DECL_NSIREQUESTOBSERVER
40   NS_DECL_NSISTREAMLISTENER
41 
42   /**
43    * Listener responsible for handling the events raised by the TrackEncoder
44    */
45   class SpeechEncoderListener : public TrackEncoderListener {
46    public:
SpeechEncoderListener(OnlineSpeechRecognitionService * aService)47     explicit SpeechEncoderListener(OnlineSpeechRecognitionService* aService)
48         : mService(aService), mOwningThread(AbstractThread::GetCurrent()) {}
49 
Started(TrackEncoder * aEncoder)50     void Started(TrackEncoder* aEncoder) override {}
51 
Initialized(TrackEncoder * aEncoder)52     void Initialized(TrackEncoder* aEncoder) override {
53       MOZ_ASSERT(mOwningThread->IsCurrentThreadIn());
54       mService->EncoderInitialized();
55     }
56 
Error(TrackEncoder * aEncoder)57     void Error(TrackEncoder* aEncoder) override {
58       MOZ_ASSERT(mOwningThread->IsCurrentThreadIn());
59       mService->EncoderError();
60     }
61 
62    private:
63     const RefPtr<OnlineSpeechRecognitionService> mService;
64     const RefPtr<AbstractThread> mOwningThread;
65   };
66 
67   /**
68    * Default constructs a OnlineSpeechRecognitionService
69    */
70   OnlineSpeechRecognitionService();
71 
72   /**
73    * Called by SpeechEncoderListener when the AudioTrackEncoder has been
74    * initialized.
75    */
76   void EncoderInitialized();
77 
78   /**
79    * Called after the AudioTrackEncoder has encoded all data for us to wrap in a
80    * container and pass along.
81    */
82   void EncoderFinished();
83 
84   /**
85    * Called by SpeechEncoderListener when the AudioTrackEncoder has
86    * encountered an error.
87    */
88   void EncoderError();
89 
90  private:
91   /**
92    * Private destructor to prevent bypassing of reference counting
93    */
94   virtual ~OnlineSpeechRecognitionService();
95 
96   /** The associated SpeechRecognition */
97   nsMainThreadPtrHandle<dom::SpeechRecognition> mRecognition;
98 
99   /**
100    * Builds a mock SpeechRecognitionResultList
101    */
102   dom::SpeechRecognitionResultList* BuildMockResultList();
103 
104   /**
105    * Method responsible for uploading the audio to the remote endpoint
106    */
107   void DoSTT();
108 
109   // Encoded and packaged ogg audio data
110   nsTArray<nsTArray<uint8_t>> mEncodedData;
111   // Member responsible for holding a reference to the TrackEncoderListener
112   RefPtr<SpeechEncoderListener> mSpeechEncoderListener;
113   // MediaQueue fed encoded data by mAudioEncoder
114   MediaQueue<EncodedFrame> mEncodedAudioQueue;
115   // Encoder responsible for encoding the frames from pcm to opus which is the
116   // format supported by our backend
117   UniquePtr<AudioTrackEncoder> mAudioEncoder;
118   // Object responsible for wrapping the opus frames into an ogg container
119   UniquePtr<ContainerWriter> mWriter;
120   // Member responsible for storing the json string returned by the endpoint
121   nsCString mBuf;
122   // Used to calculate a ceiling on the time spent listening.
123   TimeStamp mFirstIteration;
124   // flag responsible to control if the user choose to abort
125   bool mAborted = false;
126   //  reference to the audio encoder queue
127   RefPtr<TaskQueue> mEncodeTaskQueue;
128 };
129 
130 }  // namespace mozilla
131 
132 #endif
133