1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim:set ts=2 sw=2 sts=2 et cindent: */ 3 /* This Source Code Form is subject to the terms of the Mozilla Public 4 * License, v. 2.0. If a copy of the MPL was not distributed with this 5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 6 7 #ifndef mozilla_dom_OnlineRecognitionService_h 8 #define mozilla_dom_OnlineRecognitionService_h 9 10 #include "nsCOMPtr.h" 11 #include "nsTArray.h" 12 #include "nsISpeechRecognitionService.h" 13 #include "speex/speex_resampler.h" 14 #include "nsIStreamListener.h" 15 #include "OpusTrackEncoder.h" 16 #include "ContainerWriter.h" 17 18 #define NS_ONLINE_SPEECH_RECOGNITION_SERVICE_CID \ 19 {0x0ff5ce56, \ 20 0x5b09, \ 21 0x4db8, \ 22 {0xad, 0xc6, 0x82, 0x66, 0xaf, 0x95, 0xf8, 0x64}}; 23 24 namespace mozilla { 25 26 namespace ipc { 27 class PrincipalInfo; 28 } // namespace ipc 29 30 /** 31 * Online implementation of the nsISpeechRecognitionService interface 32 */ 33 class OnlineSpeechRecognitionService : public nsISpeechRecognitionService, 34 public nsIStreamListener { 35 public: 36 // Add XPCOM glue code 37 NS_DECL_THREADSAFE_ISUPPORTS 38 NS_DECL_NSISPEECHRECOGNITIONSERVICE 39 NS_DECL_NSIREQUESTOBSERVER 40 NS_DECL_NSISTREAMLISTENER 41 42 /** 43 * Listener responsible for handling the events raised by the TrackEncoder 44 */ 45 class SpeechEncoderListener : public TrackEncoderListener { 46 public: SpeechEncoderListener(OnlineSpeechRecognitionService * aService)47 explicit SpeechEncoderListener(OnlineSpeechRecognitionService* aService) 48 : mService(aService), mOwningThread(AbstractThread::GetCurrent()) {} 49 Started(TrackEncoder * aEncoder)50 void Started(TrackEncoder* aEncoder) override {} 51 Initialized(TrackEncoder * aEncoder)52 void Initialized(TrackEncoder* aEncoder) override { 53 MOZ_ASSERT(mOwningThread->IsCurrentThreadIn()); 54 mService->EncoderInitialized(); 55 } 56 Error(TrackEncoder * aEncoder)57 void Error(TrackEncoder* aEncoder) override { 58 MOZ_ASSERT(mOwningThread->IsCurrentThreadIn()); 59 mService->EncoderError(); 60 } 61 62 private: 63 const RefPtr<OnlineSpeechRecognitionService> mService; 64 const RefPtr<AbstractThread> mOwningThread; 65 }; 66 67 /** 68 * Default constructs a OnlineSpeechRecognitionService 69 */ 70 OnlineSpeechRecognitionService(); 71 72 /** 73 * Called by SpeechEncoderListener when the AudioTrackEncoder has been 74 * initialized. 75 */ 76 void EncoderInitialized(); 77 78 /** 79 * Called after the AudioTrackEncoder has encoded all data for us to wrap in a 80 * container and pass along. 81 */ 82 void EncoderFinished(); 83 84 /** 85 * Called by SpeechEncoderListener when the AudioTrackEncoder has 86 * encountered an error. 87 */ 88 void EncoderError(); 89 90 private: 91 /** 92 * Private destructor to prevent bypassing of reference counting 93 */ 94 virtual ~OnlineSpeechRecognitionService(); 95 96 /** The associated SpeechRecognition */ 97 nsMainThreadPtrHandle<dom::SpeechRecognition> mRecognition; 98 99 /** 100 * Builds a mock SpeechRecognitionResultList 101 */ 102 dom::SpeechRecognitionResultList* BuildMockResultList(); 103 104 /** 105 * Method responsible for uploading the audio to the remote endpoint 106 */ 107 void DoSTT(); 108 109 // Encoded and packaged ogg audio data 110 nsTArray<nsTArray<uint8_t>> mEncodedData; 111 // Member responsible for holding a reference to the TrackEncoderListener 112 RefPtr<SpeechEncoderListener> mSpeechEncoderListener; 113 // MediaQueue fed encoded data by mAudioEncoder 114 MediaQueue<EncodedFrame> mEncodedAudioQueue; 115 // Encoder responsible for encoding the frames from pcm to opus which is the 116 // format supported by our backend 117 UniquePtr<AudioTrackEncoder> mAudioEncoder; 118 // Object responsible for wrapping the opus frames into an ogg container 119 UniquePtr<ContainerWriter> mWriter; 120 // Member responsible for storing the json string returned by the endpoint 121 nsCString mBuf; 122 // Used to calculate a ceiling on the time spent listening. 123 TimeStamp mFirstIteration; 124 // flag responsible to control if the user choose to abort 125 bool mAborted = false; 126 // reference to the audio encoder queue 127 RefPtr<TaskQueue> mEncodeTaskQueue; 128 }; 129 130 } // namespace mozilla 131 132 #endif 133