1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
4  * You can obtain one at http://mozilla.org/MPL/2.0/. */
5 
6 #ifndef MediaEngineWebRTCAudio_h
7 #define MediaEngineWebRTCAudio_h
8 
9 #include "AudioPacketizer.h"
10 #include "AudioSegment.h"
11 #include "AudioDeviceInfo.h"
12 #include "MediaEngineWebRTC.h"
13 #include "MediaTrackListener.h"
14 #include "webrtc/modules/audio_processing/include/audio_processing.h"
15 
16 namespace mozilla {
17 
18 class AudioInputProcessing;
19 class AudioInputTrack;
20 
21 // This class is created and used exclusively on the Media Manager thread, with
22 // exactly two exceptions:
23 // - Pull is always called on the MTG thread. It only ever uses
24 //   mInputProcessing. mInputProcessing is set, then a message is sent first to
25 //   the main thread and then the MTG thread so that it can be used as part of
26 //   the graph processing. On destruction, similarly, a message is sent to the
27 //   graph so that it stops using it, and then it is deleted.
28 // - mSettings is created on the MediaManager thread is always ever accessed on
29 //   the Main Thread. It is const.
30 class MediaEngineWebRTCMicrophoneSource : public MediaEngineSource {
31  public:
32   MediaEngineWebRTCMicrophoneSource(RefPtr<AudioDeviceInfo> aInfo,
33                                     const nsString& aDeviceName,
34                                     const nsCString& aDeviceUUID,
35                                     const nsString& aDeviceGroup,
36                                     uint32_t aMaxChannelCount,
37                                     bool aDelayAgnostic, bool aExtendedFilter);
38 
39   nsString GetName() const override;
40   nsCString GetUUID() const override;
41   nsString GetGroupId() const override;
42 
43   nsresult Allocate(const dom::MediaTrackConstraints& aConstraints,
44                     const MediaEnginePrefs& aPrefs, uint64_t aWindowID,
45                     const char** aOutBadConstraint) override;
46   nsresult Deallocate() override;
47   void SetTrack(const RefPtr<MediaTrack>& aTrack,
48                 const PrincipalHandle& aPrincipal) override;
49   nsresult Start() override;
50   nsresult Stop() override;
51   nsresult Reconfigure(const dom::MediaTrackConstraints& aConstraints,
52                        const MediaEnginePrefs& aPrefs,
53                        const char** aOutBadConstraint) override;
54 
55   /**
56    * Assigns the current settings of the capture to aOutSettings.
57    * Main thread only.
58    */
59   void GetSettings(dom::MediaTrackSettings& aOutSettings) const override;
60 
GetMediaSource()61   dom::MediaSourceEnum GetMediaSource() const override {
62     return dom::MediaSourceEnum::Microphone;
63   }
64 
TakePhoto(MediaEnginePhotoCallback * aCallback)65   nsresult TakePhoto(MediaEnginePhotoCallback* aCallback) override {
66     return NS_ERROR_NOT_IMPLEMENTED;
67   }
68 
69  protected:
70   ~MediaEngineWebRTCMicrophoneSource() = default;
71 
72  private:
73   /**
74    * From a set of constraints and about:config preferences, output the correct
75    * set of preferences that can be sent to AudioInputProcessing.
76    *
77    * This can fail if the number of channels requested is zero, negative, or
78    * more than the device supports.
79    */
80   nsresult EvaluateSettings(const NormalizedConstraints& aConstraintsUpdate,
81                             const MediaEnginePrefs& aInPrefs,
82                             MediaEnginePrefs* aOutPrefs,
83                             const char** aOutBadConstraint);
84   /**
85    * From settings output by EvaluateSettings, send those settings to the
86    * AudioInputProcessing instance and the main thread (for use in GetSettings).
87    */
88   void ApplySettings(const MediaEnginePrefs& aPrefs);
89 
90   /**
91    * Sent the AudioProcessingModule parameter for a given processing algorithm.
92    */
93   void UpdateAECSettings(bool aEnable, bool aUseAecMobile,
94                          webrtc::EchoCancellation::SuppressionLevel aLevel,
95                          webrtc::EchoControlMobile::RoutingMode aRoutingMode);
96   void UpdateAGCSettings(bool aEnable, webrtc::GainControl::Mode aMode);
97   void UpdateHPFSettings(bool aEnable);
98   void UpdateNSSettings(bool aEnable, webrtc::NoiseSuppression::Level aLevel);
99   void UpdateAPMExtraOptions(bool aExtendedFilter, bool aDelayAgnostic);
100 
101   PrincipalHandle mPrincipal = PRINCIPAL_HANDLE_NONE;
102 
103   const RefPtr<AudioDeviceInfo> mDeviceInfo;
104   const bool mDelayAgnostic;
105   const bool mExtendedFilter;
106   const nsString mDeviceName;
107   const nsCString mDeviceUUID;
108   const nsString mDeviceGroup;
109 
110   // The maximum number of channels that this device supports.
111   const uint32_t mDeviceMaxChannelCount;
112   // The current settings for the underlying device.
113   // Constructed on the MediaManager thread, and then only ever accessed on the
114   // main thread.
115   const nsMainThreadPtrHandle<media::Refcountable<dom::MediaTrackSettings>>
116       mSettings;
117 
118   // Current state of the resource for this source.
119   MediaEngineSourceState mState;
120 
121   // The current preferences for the APM's various processing stages.
122   MediaEnginePrefs mCurrentPrefs;
123 
124   // The AudioInputTrack used to inteface with the MediaTrackGraph. Set in
125   // SetTrack as part of the initialization, and nulled in ::Deallocate.
126   RefPtr<AudioInputTrack> mTrack;
127 
128   // See note at the top of this class.
129   RefPtr<AudioInputProcessing> mInputProcessing;
130 };
131 
132 // This class is created on the MediaManager thread, and then exclusively used
133 // on the MTG thread.
134 // All communication is done via message passing using MTG ControlMessages
135 class AudioInputProcessing : public AudioDataListener {
136  public:
137   AudioInputProcessing(uint32_t aMaxChannelCount,
138                        const PrincipalHandle& aPrincipalHandle);
139 
140   void Pull(MediaTrackGraphImpl* aGraph, GraphTime aFrom, GraphTime aTo,
141             GraphTime aTrackEnd, AudioSegment* aSegment,
142             bool aLastPullThisIteration, bool* aEnded);
143 
144   void NotifyOutputData(MediaTrackGraphImpl* aGraph, BufferInfo aInfo) override;
145   void NotifyInputStopped(MediaTrackGraphImpl* aGraph) override;
146   void NotifyInputData(MediaTrackGraphImpl* aGraph, const BufferInfo aInfo,
147                        uint32_t aAlreadyBuffered) override;
IsVoiceInput(MediaTrackGraphImpl * aGraph)148   bool IsVoiceInput(MediaTrackGraphImpl* aGraph) const override {
149     // If we're passing data directly without AEC or any other process, this
150     // means that all voice-processing has been disabled intentionaly. In this
151     // case, consider that the device is not used for voice input.
152     return !PassThrough(aGraph);
153   }
154 
155   void Start();
156   void Stop();
157 
158   void DeviceChanged(MediaTrackGraphImpl* aGraph) override;
159 
RequestedInputChannelCount(MediaTrackGraphImpl *)160   uint32_t RequestedInputChannelCount(MediaTrackGraphImpl*) override {
161     return GetRequestedInputChannelCount();
162   }
163 
164   void Disconnect(MediaTrackGraphImpl* aGraph) override;
165 
166   // aSegment stores the unprocessed non-interleaved audio input data from mic
167   void ProcessInput(MediaTrackGraphImpl* aGraph, const AudioSegment* aSegment);
168 
169   void PacketizeAndProcess(MediaTrackGraphImpl* aGraph,
170                            const AudioDataValue* aBuffer, size_t aFrames,
171                            TrackRate aRate, uint32_t aChannels);
172 
173   void SetPassThrough(MediaTrackGraphImpl* aGraph, bool aPassThrough);
174   uint32_t GetRequestedInputChannelCount();
175   void SetRequestedInputChannelCount(MediaTrackGraphImpl* aGraph,
176                                      uint32_t aRequestedInputChannelCount);
177   // This is true when all processing is disabled, we can skip
178   // packetization, resampling and other processing passes.
179   bool PassThrough(MediaTrackGraphImpl* aGraphImpl) const;
180 
181   // This allow changing the APM options, enabling or disabling processing
182   // steps.
183   void UpdateAECSettings(bool aEnable, bool aUseAecMobile,
184                          webrtc::EchoCancellation::SuppressionLevel aLevel,
185                          webrtc::EchoControlMobile::RoutingMode aRoutingMode);
186   void UpdateAGCSettings(bool aEnable, webrtc::GainControl::Mode aMode);
187   void UpdateHPFSettings(bool aEnable);
188   void UpdateNSSettings(bool aEnable, webrtc::NoiseSuppression::Level aLevel);
189   void UpdateAPMExtraOptions(bool aExtendedFilter, bool aDelayAgnostic);
190 
191   void End();
192 
193   TrackTime NumBufferedFrames(MediaTrackGraphImpl* aGraph) const;
194 
195  private:
196   ~AudioInputProcessing() = default;
197   // This implements the processing algoritm to apply to the input (e.g. a
198   // microphone). If all algorithms are disabled, this class in not used. This
199   // class only accepts audio chunks of 10ms. It has two inputs and one output:
200   // it is fed the speaker data and the microphone data. It outputs processed
201   // input data.
202   const UniquePtr<webrtc::AudioProcessing> mAudioProcessing;
203   // Packetizer to be able to feed 10ms packets to the input side of
204   // mAudioProcessing. Not used if the processing is bypassed.
205   UniquePtr<AudioPacketizer<AudioDataValue, float>> mPacketizerInput;
206   // Packetizer to be able to feed 10ms packets to the output side of
207   // mAudioProcessing. Not used if the processing is bypassed.
208   UniquePtr<AudioPacketizer<AudioDataValue, float>> mPacketizerOutput;
209   // The number of channels asked for by content, after clamping to the range of
210   // legal channel count for this particular device. This is the number of
211   // channels of the input buffer passed as parameter in NotifyInputData.
212   uint32_t mRequestedInputChannelCount;
213   // mSkipProcessing is true if none of the processing passes are enabled,
214   // because of prefs or constraints. This allows simply copying the audio into
215   // the MTG, skipping resampling and the whole webrtc.org code.
216   bool mSkipProcessing;
217   // Stores the mixed audio output for the reverse-stream of the AEC (the
218   // speaker data).
219   AlignedFloatBuffer mOutputBuffer;
220   // Stores the input audio, to be processed by the APM.
221   AlignedFloatBuffer mInputBuffer;
222   // Stores the deinterleaved microphone audio
223   AlignedFloatBuffer mDeinterleavedBuffer;
224   // Stores the mixed down input audio
225   AlignedFloatBuffer mInputDownmixBuffer;
226   // Stores data waiting to be pulled.
227   AudioSegment mSegment;
228   // Set to false by Start(). Becomes true after the first time we append real
229   // audio frames from the audio callback.
230   bool mLiveFramesAppended;
231   // Once live frames have been appended, this is the number of frames appended
232   // as pre-buffer for that data, to avoid underruns. Buffering in the track
233   // might be needed because of the AUDIO_BLOCK interval at which we run the
234   // graph, the packetizer keeping some input data. Care must be taken when
235   // turning on and off the packetizer.
236   TrackTime mLiveBufferingAppended;
237   // Principal for the data that flows through this class.
238   const PrincipalHandle mPrincipal;
239   // Whether or not this MediaEngine is enabled. If it's not enabled, it
240   // operates in "pull" mode, and we append silence only, releasing the audio
241   // input track.
242   bool mEnabled;
243   // Whether or not we've ended and removed the AudioInputTrack.
244   bool mEnded;
245   // Store the unprocessed interleaved audio input data
246   Maybe<BufferInfo> mInputData;
247 };
248 
249 // MediaTrack subclass tailored for MediaEngineWebRTCMicrophoneSource.
250 class AudioInputTrack : public ProcessedMediaTrack {
251   // Only accessed on the graph thread.
252   RefPtr<AudioInputProcessing> mInputProcessing;
253 
254   // Only accessed on the main thread. Link to the track producing raw audio
255   // input data. Graph thread should use mInputs to get the source
256   RefPtr<MediaInputPort> mPort;
257 
258   // Only accessed on the main thread. Used for bookkeeping on main thread, such
259   // that CloseAudioInput can be idempotent.
260   // XXX Should really be a CubebUtils::AudioDeviceID, but they aren't
261   // copyable (opaque pointers)
262   RefPtr<AudioDataListener> mInputListener;
263 
264   // Only accessed on the main thread.
265   Maybe<CubebUtils::AudioDeviceID> mDeviceId;
266 
AudioInputTrack(TrackRate aSampleRate)267   explicit AudioInputTrack(TrackRate aSampleRate)
268       : ProcessedMediaTrack(aSampleRate, MediaSegment::AUDIO,
269                             new AudioSegment()) {}
270 
271   ~AudioInputTrack() = default;
272 
273  public:
274   // Main Thread API
275   // Users of audio inputs go through the track so it can track when the
276   // last track referencing an input goes away, so it can close the cubeb
277   // input. Main thread only.
278   nsresult OpenAudioInput(CubebUtils::AudioDeviceID aId,
279                           AudioDataListener* aListener);
280   void CloseAudioInput();
281   Maybe<CubebUtils::AudioDeviceID> DeviceId() const;
282   void Destroy() override;
283   void SetInputProcessing(RefPtr<AudioInputProcessing> aInputProcessing);
284   static AudioInputTrack* Create(MediaTrackGraph* aGraph);
285 
286   // Graph Thread API
287   void DestroyImpl() override;
288   void ProcessInput(GraphTime aFrom, GraphTime aTo, uint32_t aFlags) override;
NumberOfChannels()289   uint32_t NumberOfChannels() const override {
290     MOZ_DIAGNOSTIC_ASSERT(
291         mInputProcessing,
292         "Must set mInputProcessing before exposing to content");
293     return mInputProcessing->GetRequestedInputChannelCount();
294   }
295 
296   // Any thread
AsAudioInputTrack()297   AudioInputTrack* AsAudioInputTrack() override { return this; }
298 
299  private:
300   // Graph thread API
301   void SetInputProcessingImpl(RefPtr<AudioInputProcessing> aInputProcessing);
302 };
303 
304 class MediaEngineWebRTCAudioCaptureSource : public MediaEngineSource {
305  public:
MediaEngineWebRTCAudioCaptureSource(const char * aUuid)306   explicit MediaEngineWebRTCAudioCaptureSource(const char* aUuid) {}
307   nsString GetName() const override;
308   nsCString GetUUID() const override;
309   nsString GetGroupId() const override;
Allocate(const dom::MediaTrackConstraints & aConstraints,const MediaEnginePrefs & aPrefs,uint64_t aWindowID,const char ** aOutBadConstraint)310   nsresult Allocate(const dom::MediaTrackConstraints& aConstraints,
311                     const MediaEnginePrefs& aPrefs, uint64_t aWindowID,
312                     const char** aOutBadConstraint) override {
313     // Nothing to do here, everything is managed in MediaManager.cpp
314     return NS_OK;
315   }
Deallocate()316   nsresult Deallocate() override {
317     // Nothing to do here, everything is managed in MediaManager.cpp
318     return NS_OK;
319   }
320   void SetTrack(const RefPtr<MediaTrack>& aTrack,
321                 const PrincipalHandle& aPrincipal) override;
322   nsresult Start() override;
323   nsresult Stop() override;
324   nsresult Reconfigure(const dom::MediaTrackConstraints& aConstraints,
325                        const MediaEnginePrefs& aPrefs,
326                        const char** aOutBadConstraint) override;
327 
GetMediaSource()328   dom::MediaSourceEnum GetMediaSource() const override {
329     return dom::MediaSourceEnum::AudioCapture;
330   }
331 
TakePhoto(MediaEnginePhotoCallback * aCallback)332   nsresult TakePhoto(MediaEnginePhotoCallback* aCallback) override {
333     return NS_ERROR_NOT_IMPLEMENTED;
334   }
335 
336   void GetSettings(dom::MediaTrackSettings& aOutSettings) const override;
337 
338  protected:
339   virtual ~MediaEngineWebRTCAudioCaptureSource() = default;
340 };
341 
342 }  // end namespace mozilla
343 
344 #endif  // MediaEngineWebRTCAudio_h
345