1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* This Source Code Form is subject to the terms of the Mozilla Public
3  * License, v. 2.0. If a copy of the MPL was not distributed with this file,
4  * You can obtain one at http://mozilla.org/MPL/2.0/. */
5 
6 #ifndef MediaEngineWebRTCAudio_h
7 #define MediaEngineWebRTCAudio_h
8 
9 #include "AudioPacketizer.h"
10 #include "AudioSegment.h"
11 #include "AudioDeviceInfo.h"
12 #include "MediaEngineWebRTC.h"
13 #include "MediaTrackListener.h"
14 #include "modules/audio_processing/include/audio_processing.h"
15 
16 namespace mozilla {
17 
18 class AudioInputProcessing;
19 class AudioProcessingTrack;
20 
21 // This class is created and used exclusively on the Media Manager thread, with
22 // exactly two exceptions:
23 // - Pull is always called on the MTG thread. It only ever uses
24 //   mInputProcessing. mInputProcessing is set, then a message is sent first to
25 //   the main thread and then the MTG thread so that it can be used as part of
26 //   the graph processing. On destruction, similarly, a message is sent to the
27 //   graph so that it stops using it, and then it is deleted.
28 // - mSettings is created on the MediaManager thread is always ever accessed on
29 //   the Main Thread. It is const.
30 class MediaEngineWebRTCMicrophoneSource : public MediaEngineSource {
31  public:
32   explicit MediaEngineWebRTCMicrophoneSource(const MediaDevice* aMediaDevice);
33 
34   nsresult Allocate(const dom::MediaTrackConstraints& aConstraints,
35                     const MediaEnginePrefs& aPrefs, uint64_t aWindowID,
36                     const char** aOutBadConstraint) override;
37   nsresult Deallocate() override;
38   void SetTrack(const RefPtr<MediaTrack>& aTrack,
39                 const PrincipalHandle& aPrincipal) override;
40   nsresult Start() override;
41   nsresult Stop() override;
42   nsresult Reconfigure(const dom::MediaTrackConstraints& aConstraints,
43                        const MediaEnginePrefs& aPrefs,
44                        const char** aOutBadConstraint) override;
45 
46   /**
47    * Assigns the current settings of the capture to aOutSettings.
48    * Main thread only.
49    */
50   void GetSettings(dom::MediaTrackSettings& aOutSettings) const override;
51 
TakePhoto(MediaEnginePhotoCallback * aCallback)52   nsresult TakePhoto(MediaEnginePhotoCallback* aCallback) override {
53     return NS_ERROR_NOT_IMPLEMENTED;
54   }
55 
56  protected:
57   ~MediaEngineWebRTCMicrophoneSource() = default;
58 
59  private:
60   /**
61    * From a set of constraints and about:config preferences, output the correct
62    * set of preferences that can be sent to AudioInputProcessing.
63    *
64    * This can fail if the number of channels requested is zero, negative, or
65    * more than the device supports.
66    */
67   nsresult EvaluateSettings(const NormalizedConstraints& aConstraintsUpdate,
68                             const MediaEnginePrefs& aInPrefs,
69                             MediaEnginePrefs* aOutPrefs,
70                             const char** aOutBadConstraint);
71   /**
72    * From settings output by EvaluateSettings, send those settings to the
73    * AudioInputProcessing instance and the main thread (for use in GetSettings).
74    */
75   void ApplySettings(const MediaEnginePrefs& aPrefs);
76 
77   PrincipalHandle mPrincipal = PRINCIPAL_HANDLE_NONE;
78 
79   const RefPtr<AudioDeviceInfo> mDeviceInfo;
80 
81   // The maximum number of channels that this device supports.
82   const uint32_t mDeviceMaxChannelCount;
83   // The current settings for the underlying device.
84   // Constructed on the MediaManager thread, and then only ever accessed on the
85   // main thread.
86   const nsMainThreadPtrHandle<media::Refcountable<dom::MediaTrackSettings>>
87       mSettings;
88 
89   // Current state of the resource for this source.
90   MediaEngineSourceState mState;
91 
92   // The current preferences that will be forwarded to mAudioProcessingConfig
93   // below.
94   MediaEnginePrefs mCurrentPrefs;
95 
96   // The AudioProcessingTrack used to inteface with the MediaTrackGraph. Set in
97   // SetTrack as part of the initialization, and nulled in ::Deallocate.
98   RefPtr<AudioProcessingTrack> mTrack;
99 
100   // See note at the top of this class.
101   RefPtr<AudioInputProcessing> mInputProcessing;
102 
103   // Copy of the config currently applied to AudioProcessing through
104   // mInputProcessing.
105   webrtc::AudioProcessing::Config mAudioProcessingConfig;
106 };
107 
108 // This class is created on the MediaManager thread, and then exclusively used
109 // on the MTG thread.
110 // All communication is done via message passing using MTG ControlMessages
111 class AudioInputProcessing : public AudioDataListener {
112  public:
113   explicit AudioInputProcessing(uint32_t aMaxChannelCount);
114   void Process(MediaTrackGraphImpl* aGraph, GraphTime aFrom, GraphTime aTo,
115                AudioSegment* aInput, AudioSegment* aOutput);
116 
117   void ProcessOutputData(MediaTrackGraphImpl* aGraph, AudioDataValue* aBuffer,
118                          size_t aFrames, TrackRate aRate, uint32_t aChannels);
IsVoiceInput(MediaTrackGraphImpl * aGraph)119   bool IsVoiceInput(MediaTrackGraphImpl* aGraph) const override {
120     // If we're passing data directly without AEC or any other process, this
121     // means that all voice-processing has been disabled intentionaly. In this
122     // case, consider that the device is not used for voice input.
123     return !PassThrough(aGraph);
124   }
125 
126   void Start(MediaTrackGraphImpl* aGraph);
127   void Stop(MediaTrackGraphImpl* aGraph);
128 
129   void DeviceChanged(MediaTrackGraphImpl* aGraph) override;
130 
RequestedInputChannelCount(MediaTrackGraphImpl *)131   uint32_t RequestedInputChannelCount(MediaTrackGraphImpl*) override {
132     return GetRequestedInputChannelCount();
133   }
134 
135   void Disconnect(MediaTrackGraphImpl* aGraph) override;
136 
137   void PacketizeAndProcess(MediaTrackGraphImpl* aGraph,
138                            const AudioSegment& aSegment);
139 
140   void SetPassThrough(MediaTrackGraphImpl* aGraph, bool aPassThrough);
141   uint32_t GetRequestedInputChannelCount();
142   void SetRequestedInputChannelCount(MediaTrackGraphImpl* aGraph,
143                                      uint32_t aRequestedInputChannelCount);
144   // This is true when all processing is disabled, we can skip
145   // packetization, resampling and other processing passes.
146   bool PassThrough(MediaTrackGraphImpl* aGraphImpl) const;
147 
148   // This allow changing the APM options, enabling or disabling processing
149   // steps. The config gets applied the next time we're about to process input
150   // data.
151   void ApplyConfig(MediaTrackGraphImpl* aGraph,
152                    const webrtc::AudioProcessing::Config& aConfig);
153 
154   void End();
155 
156   TrackTime NumBufferedFrames(MediaTrackGraphImpl* aGraph) const;
157 
158   // The packet size contains samples in 10ms. The unit of aRate is hz.
GetPacketSize(TrackRate aRate)159   constexpr static uint32_t GetPacketSize(TrackRate aRate) {
160     return static_cast<uint32_t>(aRate) / 100u;
161   }
162 
IsEnded()163   bool IsEnded() const { return mEnded; }
164 
165  private:
166   ~AudioInputProcessing() = default;
167   void EnsureAudioProcessing(MediaTrackGraphImpl* aGraph, uint32_t aChannels);
168   void ResetAudioProcessing(MediaTrackGraphImpl* aGraph);
169   PrincipalHandle GetCheckedPrincipal(const AudioSegment& aSegment);
170   // This implements the processing algoritm to apply to the input (e.g. a
171   // microphone). If all algorithms are disabled, this class in not used. This
172   // class only accepts audio chunks of 10ms. It has two inputs and one output:
173   // it is fed the speaker data and the microphone data. It outputs processed
174   // input data.
175   const UniquePtr<webrtc::AudioProcessing> mAudioProcessing;
176   // Packetizer to be able to feed 10ms packets to the input side of
177   // mAudioProcessing. Not used if the processing is bypassed.
178   Maybe<AudioPacketizer<AudioDataValue, float>> mPacketizerInput;
179   // Packetizer to be able to feed 10ms packets to the output side of
180   // mAudioProcessing. Not used if the processing is bypassed.
181   Maybe<AudioPacketizer<AudioDataValue, float>> mPacketizerOutput;
182   // The number of channels asked for by content, after clamping to the range of
183   // legal channel count for this particular device.
184   uint32_t mRequestedInputChannelCount;
185   // mSkipProcessing is true if none of the processing passes are enabled,
186   // because of prefs or constraints. This allows simply copying the audio into
187   // the MTG, skipping resampling and the whole webrtc.org code.
188   bool mSkipProcessing;
189   // Stores the mixed audio output for the reverse-stream of the AEC (the
190   // speaker data).
191   AlignedFloatBuffer mOutputBuffer;
192   // Stores the input audio, to be processed by the APM.
193   AlignedFloatBuffer mInputBuffer;
194   // Stores the deinterleaved microphone audio
195   AlignedFloatBuffer mDeinterleavedBuffer;
196   // Stores the mixed down input audio
197   AlignedFloatBuffer mInputDownmixBuffer;
198   // Stores data waiting to be pulled.
199   AudioSegment mSegment;
200   // Whether or not this MediaEngine is enabled. If it's not enabled, it
201   // operates in "pull" mode, and we append silence only, releasing the audio
202   // input track.
203   bool mEnabled;
204   // Whether or not we've ended and removed the AudioProcessingTrack.
205   bool mEnded;
206   // When processing is enabled, the number of packets received by this
207   // instance, to implement periodic logging.
208   uint64_t mPacketCount;
209   // A storage holding the interleaved audio data converted the AudioSegment.
210   // This will be used as an input parameter for PacketizeAndProcess. This
211   // should be removed once bug 1729041 is done.
212   AutoTArray<AudioDataValue,
213              SilentChannel::AUDIO_PROCESSING_FRAMES * GUESS_AUDIO_CHANNELS>
214       mInterleavedBuffer;
215   // Tracks the pending frames with paired principals piled up in packetizer.
216   std::deque<std::pair<TrackTime, PrincipalHandle>> mChunksInPacketizer;
217 };
218 
219 // MediaTrack subclass tailored for MediaEngineWebRTCMicrophoneSource.
220 class AudioProcessingTrack : public ProcessedMediaTrack {
221   // Only accessed on the graph thread.
222   RefPtr<AudioInputProcessing> mInputProcessing;
223 
224   // Only accessed on the main thread. Link to the track producing raw audio
225   // input data. Graph thread should use mInputs to get the source
226   RefPtr<MediaInputPort> mPort;
227 
228   // Only accessed on the main thread. Used for bookkeeping on main thread, such
229   // that DisconnectDeviceInput can be idempotent.
230   // XXX Should really be a CubebUtils::AudioDeviceID, but they aren't
231   // copyable (opaque pointers)
232   RefPtr<AudioDataListener> mInputListener;
233 
234   // Only accessed on the main thread.
235   Maybe<CubebUtils::AudioDeviceID> mDeviceId;
236 
AudioProcessingTrack(TrackRate aSampleRate)237   explicit AudioProcessingTrack(TrackRate aSampleRate)
238       : ProcessedMediaTrack(aSampleRate, MediaSegment::AUDIO,
239                             new AudioSegment()) {}
240 
241   ~AudioProcessingTrack() = default;
242 
243  public:
244   // Main Thread API
245   // Users of audio inputs go through the track so it can track when the
246   // last track referencing an input goes away, so it can close the cubeb
247   // input. Main thread only.
248   nsresult ConnectDeviceInput(CubebUtils::AudioDeviceID aId,
249                               AudioDataListener* aListener,
250                               const PrincipalHandle& aPrincipal);
251   void DisconnectDeviceInput();
252   Maybe<CubebUtils::AudioDeviceID> DeviceId() const;
253   void Destroy() override;
254   void SetInputProcessing(RefPtr<AudioInputProcessing> aInputProcessing);
255   static AudioProcessingTrack* Create(MediaTrackGraph* aGraph);
256 
257   // Graph Thread API
258   void DestroyImpl() override;
259   void ProcessInput(GraphTime aFrom, GraphTime aTo, uint32_t aFlags) override;
NumberOfChannels()260   uint32_t NumberOfChannels() const override {
261     MOZ_DIAGNOSTIC_ASSERT(
262         mInputProcessing,
263         "Must set mInputProcessing before exposing to content");
264     return mInputProcessing->GetRequestedInputChannelCount();
265   }
266   // Get the data in [aFrom, aTo) from aPort->GetSource() to aOutput. aOutput
267   // needs to be empty.
268   void GetInputSourceData(AudioSegment& aOutput, const MediaInputPort* aPort,
269                           GraphTime aFrom, GraphTime aTo) const;
270   // Pass the graph's mixed audio output to mInputProcessing for processing as
271   // the reverse stream.
272   void NotifyOutputData(MediaTrackGraphImpl* aGraph, AudioDataValue* aBuffer,
273                         size_t aFrames, TrackRate aRate, uint32_t aChannels);
274 
275   // Any thread
AsAudioProcessingTrack()276   AudioProcessingTrack* AsAudioProcessingTrack() override { return this; }
277 
278  private:
279   // Graph thread API
280   void SetInputProcessingImpl(RefPtr<AudioInputProcessing> aInputProcessing);
281 };
282 
283 class MediaEngineWebRTCAudioCaptureSource : public MediaEngineSource {
284  public:
285   explicit MediaEngineWebRTCAudioCaptureSource(const MediaDevice* aMediaDevice);
286   static nsString GetUUID();
287   static nsString GetGroupId();
Allocate(const dom::MediaTrackConstraints & aConstraints,const MediaEnginePrefs & aPrefs,uint64_t aWindowID,const char ** aOutBadConstraint)288   nsresult Allocate(const dom::MediaTrackConstraints& aConstraints,
289                     const MediaEnginePrefs& aPrefs, uint64_t aWindowID,
290                     const char** aOutBadConstraint) override {
291     // Nothing to do here, everything is managed in MediaManager.cpp
292     return NS_OK;
293   }
Deallocate()294   nsresult Deallocate() override {
295     // Nothing to do here, everything is managed in MediaManager.cpp
296     return NS_OK;
297   }
298   void SetTrack(const RefPtr<MediaTrack>& aTrack,
299                 const PrincipalHandle& aPrincipal) override;
300   nsresult Start() override;
301   nsresult Stop() override;
302   nsresult Reconfigure(const dom::MediaTrackConstraints& aConstraints,
303                        const MediaEnginePrefs& aPrefs,
304                        const char** aOutBadConstraint) override;
305 
TakePhoto(MediaEnginePhotoCallback * aCallback)306   nsresult TakePhoto(MediaEnginePhotoCallback* aCallback) override {
307     return NS_ERROR_NOT_IMPLEMENTED;
308   }
309 
310   void GetSettings(dom::MediaTrackSettings& aOutSettings) const override;
311 
312  protected:
313   virtual ~MediaEngineWebRTCAudioCaptureSource() = default;
314 };
315 
316 }  // end namespace mozilla
317 
318 #endif  // MediaEngineWebRTCAudio_h
319