1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "AudioSink.h"
8 #include "AudioConverter.h"
9 #include "AudioDeviceInfo.h"
10 #include "MediaQueue.h"
11 #include "VideoUtils.h"
12 #include "mozilla/CheckedInt.h"
13 #include "mozilla/DebugOnly.h"
14 #include "mozilla/IntegerPrintfMacros.h"
15 #include "mozilla/ProfilerMarkerTypes.h"
16 #include "mozilla/StaticPrefs_media.h"
17 #include "mozilla/StaticPrefs_dom.h"
18 #include "nsPrintfCString.h"
19 
20 namespace mozilla {
21 
22 extern LazyLogModule gMediaDecoderLog;
23 #define SINK_LOG(msg, ...)                   \
24   MOZ_LOG(gMediaDecoderLog, LogLevel::Debug, \
25           ("AudioSink=%p " msg, this, ##__VA_ARGS__))
26 #define SINK_LOG_V(msg, ...)                   \
27   MOZ_LOG(gMediaDecoderLog, LogLevel::Verbose, \
28           ("AudioSink=%p " msg, this, ##__VA_ARGS__))
29 
30 // The amount of audio frames that is used to fuzz rounding errors.
31 static const int64_t AUDIO_FUZZ_FRAMES = 1;
32 
33 // Amount of audio frames we will be processing ahead of use
34 static const int32_t LOW_AUDIO_USECS = 300000;
35 
36 using media::TimeUnit;
37 
AudioSink(AbstractThread * aThread,MediaQueue<AudioData> & aAudioQueue,const TimeUnit & aStartTime,const AudioInfo & aInfo,AudioDeviceInfo * aAudioDevice)38 AudioSink::AudioSink(AbstractThread* aThread,
39                      MediaQueue<AudioData>& aAudioQueue,
40                      const TimeUnit& aStartTime, const AudioInfo& aInfo,
41                      AudioDeviceInfo* aAudioDevice)
42     : mStartTime(aStartTime),
43       mInfo(aInfo),
44       mAudioDevice(aAudioDevice),
45       mPlaying(true),
46       mMonitor("AudioSink"),
47       mWritten(0),
48       mErrored(false),
49       mOwnerThread(aThread),
50       mProcessedQueueLength(0),
51       mFramesParsed(0),
52       mOutputRate(DecideAudioPlaybackSampleRate(aInfo)),
53       mOutputChannels(DecideAudioPlaybackChannels(aInfo)),
54       mAudibilityMonitor(
55           mOutputRate,
56           StaticPrefs::dom_media_silence_duration_for_audibility()),
57       mIsAudioDataAudible(false),
58       mAudioQueue(aAudioQueue) {}
59 
60 AudioSink::~AudioSink() = default;
61 
Start(const PlaybackParams & aParams)62 Result<already_AddRefed<MediaSink::EndedPromise>, nsresult> AudioSink::Start(
63     const PlaybackParams& aParams) {
64   MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
65 
66   mAudioQueueListener = mAudioQueue.PushEvent().Connect(
67       mOwnerThread, this, &AudioSink::OnAudioPushed);
68   mAudioQueueFinishListener = mAudioQueue.FinishEvent().Connect(
69       mOwnerThread, this, &AudioSink::NotifyAudioNeeded);
70   mProcessedQueueListener = mProcessedQueue.PopFrontEvent().Connect(
71       mOwnerThread, this, &AudioSink::OnAudioPopped);
72 
73   // To ensure at least one audio packet will be popped from AudioQueue and
74   // ready to be played.
75   NotifyAudioNeeded();
76   nsresult rv = InitializeAudioStream(aParams);
77   if (NS_FAILED(rv)) {
78     return Err(rv);
79   }
80   return mAudioStream->Start();
81 }
82 
GetPosition()83 TimeUnit AudioSink::GetPosition() {
84   int64_t tmp;
85   if (mAudioStream && (tmp = mAudioStream->GetPosition()) >= 0) {
86     TimeUnit pos = TimeUnit::FromMicroseconds(tmp);
87     NS_ASSERTION(pos >= mLastGoodPosition,
88                  "AudioStream position shouldn't go backward");
89     TimeUnit tmp = mStartTime + pos;
90     if (!tmp.IsValid()) {
91       mErrored = true;
92       return mStartTime + mLastGoodPosition;
93     }
94     // Update the last good position when we got a good one.
95     if (pos >= mLastGoodPosition) {
96       mLastGoodPosition = pos;
97     }
98   }
99 
100   return mStartTime + mLastGoodPosition;
101 }
102 
HasUnplayedFrames()103 bool AudioSink::HasUnplayedFrames() {
104   // Experimentation suggests that GetPositionInFrames() is zero-indexed,
105   // so we need to add 1 here before comparing it to mWritten.
106   int64_t total;
107   {
108     MonitorAutoLock mon(mMonitor);
109     total = mWritten + (mCursor.get() ? mCursor->Available() : 0);
110   }
111   return mProcessedQueue.GetSize() ||
112          (mAudioStream && mAudioStream->GetPositionInFrames() + 1 < total);
113 }
114 
Shutdown()115 void AudioSink::Shutdown() {
116   MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
117 
118   mAudioQueueListener.Disconnect();
119   mAudioQueueFinishListener.Disconnect();
120   mProcessedQueueListener.Disconnect();
121 
122   if (mAudioStream) {
123     mAudioStream->Shutdown();
124     mAudioStream = nullptr;
125   }
126   // Shutdown audio sink doesn't mean the playback is going to stop, so if we
127   // simply discard these data, then we will no longer be able to play them.
128   // Eg. we change to sink to capture-based sink that will need to continue play
129   // remaining data from the audio queue.
130   {
131     MonitorAutoLock mon(mMonitor);
132     while (mProcessedQueue.GetSize() > 0) {
133       RefPtr<AudioData> audio = mProcessedQueue.PopBack();
134       if (audio == mCurrentData) {
135         break;
136       }
137       mAudioQueue.PushFront(audio);
138     }
139     if (mCurrentData) {
140       uint32_t unplayedFrames = mCursor->Available();
141       // If we've consumed some partial content from the first audio data, then
142       // we have to adjust its data offset and frames number in order not to
143       // play the same content again.
144       if (unplayedFrames > 0 && unplayedFrames < mCurrentData->Frames()) {
145         const uint32_t orginalFrames = mCurrentData->Frames();
146         const uint32_t offsetFrames = mCurrentData->Frames() - unplayedFrames;
147         Unused << mCurrentData->SetTrimWindow(
148             {mCurrentData->mTime + FramesToTimeUnit(offsetFrames, mOutputRate),
149              mCurrentData->GetEndTime()});
150         SINK_LOG_V("After adjustment, audio frame from %u to %u", orginalFrames,
151                    mCurrentData->Frames());
152       }
153       mAudioQueue.PushFront(mCurrentData);
154     }
155     MOZ_ASSERT(mProcessedQueue.GetSize() == 0);
156   }
157   mProcessedQueue.Finish();
158 }
159 
SetVolume(double aVolume)160 void AudioSink::SetVolume(double aVolume) {
161   if (mAudioStream) {
162     mAudioStream->SetVolume(aVolume);
163   }
164 }
165 
SetStreamName(const nsAString & aStreamName)166 void AudioSink::SetStreamName(const nsAString& aStreamName) {
167   if (mAudioStream) {
168     mAudioStream->SetStreamName(aStreamName);
169   }
170 }
171 
SetPlaybackRate(double aPlaybackRate)172 void AudioSink::SetPlaybackRate(double aPlaybackRate) {
173   MOZ_ASSERT(aPlaybackRate != 0,
174              "Don't set the playbackRate to 0 on AudioStream");
175   if (mAudioStream) {
176     mAudioStream->SetPlaybackRate(aPlaybackRate);
177   }
178 }
179 
SetPreservesPitch(bool aPreservesPitch)180 void AudioSink::SetPreservesPitch(bool aPreservesPitch) {
181   if (mAudioStream) {
182     mAudioStream->SetPreservesPitch(aPreservesPitch);
183   }
184 }
185 
SetPlaying(bool aPlaying)186 void AudioSink::SetPlaying(bool aPlaying) {
187   if (!mAudioStream || mAudioStream->IsPlaybackCompleted() ||
188       mPlaying == aPlaying) {
189     return;
190   }
191   // pause/resume AudioStream as necessary.
192   if (!aPlaying) {
193     mAudioStream->Pause();
194   } else if (aPlaying) {
195     mAudioStream->Resume();
196   }
197   mPlaying = aPlaying;
198 }
199 
InitializeAudioStream(const PlaybackParams & aParams)200 nsresult AudioSink::InitializeAudioStream(const PlaybackParams& aParams) {
201   mAudioStream = new AudioStream(*this);
202   // When AudioQueue is empty, there is no way to know the channel layout of
203   // the coming audio data, so we use the predefined channel map instead.
204   AudioConfig::ChannelLayout::ChannelMap channelMap =
205       mConverter ? mConverter->OutputConfig().Layout().Map()
206                  : AudioConfig::ChannelLayout(mOutputChannels).Map();
207   // The layout map used here is already processed by mConverter with
208   // mOutputChannels into SMPTE format, so there is no need to worry if
209   // StaticPrefs::accessibility_monoaudio_enable() or
210   // StaticPrefs::media_forcestereo_enabled() is applied.
211   nsresult rv = mAudioStream->Init(mOutputChannels, channelMap, mOutputRate,
212                                    mAudioDevice);
213   if (NS_FAILED(rv)) {
214     mAudioStream->Shutdown();
215     mAudioStream = nullptr;
216     return rv;
217   }
218 
219   // Set playback params before calling Start() so they can take effect
220   // as soon as the 1st DataCallback of the AudioStream fires.
221   mAudioStream->SetVolume(aParams.mVolume);
222   mAudioStream->SetPlaybackRate(aParams.mPlaybackRate);
223   mAudioStream->SetPreservesPitch(aParams.mPreservesPitch);
224   return NS_OK;
225 }
226 
GetEndTime() const227 TimeUnit AudioSink::GetEndTime() const {
228   int64_t written;
229   {
230     MonitorAutoLock mon(mMonitor);
231     written = mWritten;
232   }
233   TimeUnit played = FramesToTimeUnit(written, mOutputRate) + mStartTime;
234   if (!played.IsValid()) {
235     NS_WARNING("Int overflow calculating audio end time");
236     return TimeUnit::Zero();
237   }
238   // As we may be resampling, rounding errors may occur. Ensure we never get
239   // past the original end time.
240   return std::min(mLastEndTime, played);
241 }
242 
PopFrames(uint32_t aFrames)243 UniquePtr<AudioStream::Chunk> AudioSink::PopFrames(uint32_t aFrames) {
244   class Chunk : public AudioStream::Chunk {
245    public:
246     Chunk(AudioData* aBuffer, uint32_t aFrames, AudioDataValue* aData)
247         : mBuffer(aBuffer), mFrames(aFrames), mData(aData) {}
248     Chunk() : mFrames(0), mData(nullptr) {}
249     const AudioDataValue* Data() const override { return mData; }
250     uint32_t Frames() const override { return mFrames; }
251     uint32_t Channels() const override {
252       return mBuffer ? mBuffer->mChannels : 0;
253     }
254     uint32_t Rate() const override { return mBuffer ? mBuffer->mRate : 0; }
255     AudioDataValue* GetWritable() const override { return mData; }
256 
257    private:
258     const RefPtr<AudioData> mBuffer;
259     const uint32_t mFrames;
260     AudioDataValue* const mData;
261   };
262 
263   bool needPopping = false;
264   if (!mCurrentData) {
265     // No data in the queue. Return an empty chunk.
266     if (!mProcessedQueue.GetSize()) {
267       return MakeUnique<Chunk>();
268     }
269 
270     // We need to update our values prior popping the processed queue in
271     // order to prevent the pop event to fire too early (prior
272     // mProcessedQueueLength being updated) or prevent HasUnplayedFrames
273     // to incorrectly return true during the time interval betweeen the
274     // when mProcessedQueue is read and mWritten is updated.
275     needPopping = true;
276     {
277       MonitorAutoLock mon(mMonitor);
278       mCurrentData = mProcessedQueue.PeekFront();
279       mCursor = MakeUnique<AudioBufferCursor>(mCurrentData->Data(),
280                                               mCurrentData->mChannels,
281                                               mCurrentData->Frames());
282     }
283     MOZ_ASSERT(mCurrentData->Frames() > 0);
284     mProcessedQueueLength -=
285         FramesToUsecs(mCurrentData->Frames(), mOutputRate).value();
286   }
287 
288   auto framesToPop = std::min(aFrames, mCursor->Available());
289 
290   SINK_LOG_V("playing audio at time=%" PRId64 " offset=%u length=%u",
291              mCurrentData->mTime.ToMicroseconds(),
292              mCurrentData->Frames() - mCursor->Available(), framesToPop);
293 
294 #ifdef MOZ_GECKO_PROFILER
295   mOwnerThread->Dispatch(NS_NewRunnableFunction(
296       "AudioSink:AddMarker",
297       [startTime = mCurrentData->mTime.ToMicroseconds(),
298        endTime = mCurrentData->GetEndTime().ToMicroseconds()] {
299         PROFILER_MARKER("PlayAudio", MEDIA_PLAYBACK, {}, MediaSampleMarker,
300                         startTime, endTime);
301       }));
302 #endif  // MOZ_GECKO_PROFILER
303 
304   UniquePtr<AudioStream::Chunk> chunk =
305       MakeUnique<Chunk>(mCurrentData, framesToPop, mCursor->Ptr());
306 
307   {
308     MonitorAutoLock mon(mMonitor);
309     mWritten += framesToPop;
310     mCursor->Advance(framesToPop);
311     // All frames are popped. Reset mCurrentData so we can pop new elements from
312     // the audio queue in next calls to PopFrames().
313     if (!mCursor->Available()) {
314       mCurrentData = nullptr;
315     }
316   }
317 
318   if (needPopping) {
319     // We can now safely pop the audio packet from the processed queue.
320     // This will fire the popped event, triggering a call to NotifyAudioNeeded.
321     RefPtr<AudioData> releaseMe = mProcessedQueue.PopFront();
322     CheckIsAudible(releaseMe);
323   }
324 
325   return chunk;
326 }
327 
Ended() const328 bool AudioSink::Ended() const {
329   // Return true when error encountered so AudioStream can start draining.
330   return mProcessedQueue.IsFinished() || mErrored;
331 }
332 
CheckIsAudible(const AudioData * aData)333 void AudioSink::CheckIsAudible(const AudioData* aData) {
334   MOZ_ASSERT(aData);
335 
336   mAudibilityMonitor.Process(aData);
337   bool isAudible = mAudibilityMonitor.RecentlyAudible();
338 
339   if (isAudible != mIsAudioDataAudible) {
340     mIsAudioDataAudible = isAudible;
341     mAudibleEvent.Notify(mIsAudioDataAudible);
342   }
343 }
344 
OnAudioPopped(const RefPtr<AudioData> & aSample)345 void AudioSink::OnAudioPopped(const RefPtr<AudioData>& aSample) {
346   SINK_LOG_V("AudioStream has used an audio packet.");
347   NotifyAudioNeeded();
348 }
349 
OnAudioPushed(const RefPtr<AudioData> & aSample)350 void AudioSink::OnAudioPushed(const RefPtr<AudioData>& aSample) {
351   SINK_LOG_V("One new audio packet available.");
352   NotifyAudioNeeded();
353 }
354 
NotifyAudioNeeded()355 void AudioSink::NotifyAudioNeeded() {
356   MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn(),
357              "Not called from the owner's thread");
358 
359   // Always ensure we have two processed frames pending to allow for processing
360   // latency.
361   while (mAudioQueue.GetSize() &&
362          (mAudioQueue.IsFinished() || mProcessedQueueLength < LOW_AUDIO_USECS ||
363           mProcessedQueue.GetSize() < 2)) {
364     RefPtr<AudioData> data = mAudioQueue.PopFront();
365 
366     // Ignore the element with 0 frames and try next.
367     if (!data->Frames()) {
368       continue;
369     }
370 
371     if (!mConverter ||
372         (data->mRate != mConverter->InputConfig().Rate() ||
373          data->mChannels != mConverter->InputConfig().Channels())) {
374       SINK_LOG_V("Audio format changed from %u@%uHz to %u@%uHz",
375                  mConverter ? mConverter->InputConfig().Channels() : 0,
376                  mConverter ? mConverter->InputConfig().Rate() : 0,
377                  data->mChannels, data->mRate);
378 
379       DrainConverter();
380 
381       // mFramesParsed indicates the current playtime in frames at the current
382       // input sampling rate. Recalculate it per the new sampling rate.
383       if (mFramesParsed) {
384         // We minimize overflow.
385         uint32_t oldRate = mConverter->InputConfig().Rate();
386         uint32_t newRate = data->mRate;
387         CheckedInt64 result = SaferMultDiv(mFramesParsed, newRate, oldRate);
388         if (!result.isValid()) {
389           NS_WARNING("Int overflow in AudioSink");
390           mErrored = true;
391           return;
392         }
393         mFramesParsed = result.value();
394       }
395 
396       const AudioConfig::ChannelLayout inputLayout =
397           data->mChannelMap
398               ? AudioConfig::ChannelLayout::SMPTEDefault(data->mChannelMap)
399               : AudioConfig::ChannelLayout(data->mChannels);
400       const AudioConfig::ChannelLayout outputLayout =
401           mOutputChannels == data->mChannels
402               ? inputLayout
403               : AudioConfig::ChannelLayout(mOutputChannels);
404       AudioConfig inConfig =
405           AudioConfig(inputLayout, data->mChannels, data->mRate);
406       AudioConfig outConfig =
407           AudioConfig(outputLayout, mOutputChannels, mOutputRate);
408       if (!AudioConverter::CanConvert(inConfig, outConfig)) {
409         mErrored = true;
410         return;
411       }
412       mConverter = MakeUnique<AudioConverter>(inConfig, outConfig);
413     }
414 
415     // See if there's a gap in the audio. If there is, push silence into the
416     // audio hardware, so we can play across the gap.
417     // Calculate the timestamp of the next chunk of audio in numbers of
418     // samples.
419     CheckedInt64 sampleTime =
420         TimeUnitToFrames(data->mTime - mStartTime, data->mRate);
421     // Calculate the number of frames that have been pushed onto the audio
422     // hardware.
423     CheckedInt64 missingFrames = sampleTime - mFramesParsed;
424 
425     if (!missingFrames.isValid() || !sampleTime.isValid()) {
426       NS_WARNING("Int overflow in AudioSink");
427       mErrored = true;
428       return;
429     }
430 
431     if (missingFrames.value() > AUDIO_FUZZ_FRAMES) {
432       // The next audio packet begins some time after the end of the last packet
433       // we pushed to the audio hardware. We must push silence into the audio
434       // hardware so that the next audio packet begins playback at the correct
435       // time.
436       missingFrames = std::min<int64_t>(INT32_MAX, missingFrames.value());
437       mFramesParsed += missingFrames.value();
438 
439       RefPtr<AudioData> silenceData;
440       AlignedAudioBuffer silenceBuffer(missingFrames.value() * data->mChannels);
441       if (!silenceBuffer) {
442         NS_WARNING("OOM in AudioSink");
443         mErrored = true;
444         return;
445       }
446       if (mConverter->InputConfig() != mConverter->OutputConfig()) {
447         AlignedAudioBuffer convertedData =
448             mConverter->Process(AudioSampleBuffer(std::move(silenceBuffer)))
449                 .Forget();
450         silenceData = CreateAudioFromBuffer(std::move(convertedData), data);
451       } else {
452         silenceData = CreateAudioFromBuffer(std::move(silenceBuffer), data);
453       }
454       PushProcessedAudio(silenceData);
455     }
456 
457     mLastEndTime = data->GetEndTime();
458     mFramesParsed += data->Frames();
459 
460     if (mConverter->InputConfig() != mConverter->OutputConfig()) {
461       AlignedAudioBuffer buffer(data->MoveableData());
462       AlignedAudioBuffer convertedData =
463           mConverter->Process(AudioSampleBuffer(std::move(buffer))).Forget();
464       data = CreateAudioFromBuffer(std::move(convertedData), data);
465     }
466     if (PushProcessedAudio(data)) {
467       mLastProcessedPacket = Some(data);
468     }
469   }
470 
471   if (mAudioQueue.IsFinished()) {
472     // We have reached the end of the data, drain the resampler.
473     DrainConverter();
474     mProcessedQueue.Finish();
475   }
476 }
477 
PushProcessedAudio(AudioData * aData)478 uint32_t AudioSink::PushProcessedAudio(AudioData* aData) {
479   if (!aData || !aData->Frames()) {
480     return 0;
481   }
482   mProcessedQueue.Push(aData);
483   mProcessedQueueLength += FramesToUsecs(aData->Frames(), mOutputRate).value();
484   return aData->Frames();
485 }
486 
CreateAudioFromBuffer(AlignedAudioBuffer && aBuffer,AudioData * aReference)487 already_AddRefed<AudioData> AudioSink::CreateAudioFromBuffer(
488     AlignedAudioBuffer&& aBuffer, AudioData* aReference) {
489   uint32_t frames = aBuffer.Length() / mOutputChannels;
490   if (!frames) {
491     return nullptr;
492   }
493   auto duration = FramesToTimeUnit(frames, mOutputRate);
494   if (!duration.IsValid()) {
495     NS_WARNING("Int overflow in AudioSink");
496     mErrored = true;
497     return nullptr;
498   }
499   RefPtr<AudioData> data =
500       new AudioData(aReference->mOffset, aReference->mTime, std::move(aBuffer),
501                     mOutputChannels, mOutputRate);
502   MOZ_DIAGNOSTIC_ASSERT(duration == data->mDuration, "must be equal");
503   return data.forget();
504 }
505 
DrainConverter(uint32_t aMaxFrames)506 uint32_t AudioSink::DrainConverter(uint32_t aMaxFrames) {
507   MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
508 
509   if (!mConverter || !mLastProcessedPacket || !aMaxFrames) {
510     // nothing to drain.
511     return 0;
512   }
513 
514   RefPtr<AudioData> lastPacket = mLastProcessedPacket.ref();
515   mLastProcessedPacket.reset();
516 
517   // To drain we simply provide an empty packet to the audio converter.
518   AlignedAudioBuffer convertedData =
519       mConverter->Process(AudioSampleBuffer(AlignedAudioBuffer())).Forget();
520 
521   uint32_t frames = convertedData.Length() / mOutputChannels;
522   if (!convertedData.SetLength(std::min(frames, aMaxFrames) *
523                                mOutputChannels)) {
524     // This can never happen as we were reducing the length of convertData.
525     mErrored = true;
526     return 0;
527   }
528 
529   RefPtr<AudioData> data =
530       CreateAudioFromBuffer(std::move(convertedData), lastPacket);
531   if (!data) {
532     return 0;
533   }
534   mProcessedQueue.Push(data);
535   return data->Frames();
536 }
537 
GetDebugInfo(dom::MediaSinkDebugInfo & aInfo)538 void AudioSink::GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) {
539   MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
540   aInfo.mAudioSinkWrapper.mAudioSink.mStartTime = mStartTime.ToMicroseconds();
541   aInfo.mAudioSinkWrapper.mAudioSink.mLastGoodPosition =
542       mLastGoodPosition.ToMicroseconds();
543   aInfo.mAudioSinkWrapper.mAudioSink.mIsPlaying = mPlaying;
544   aInfo.mAudioSinkWrapper.mAudioSink.mOutputRate = mOutputRate;
545   aInfo.mAudioSinkWrapper.mAudioSink.mWritten = mWritten;
546   aInfo.mAudioSinkWrapper.mAudioSink.mHasErrored = bool(mErrored);
547   aInfo.mAudioSinkWrapper.mAudioSink.mPlaybackComplete =
548       mAudioStream ? mAudioStream->IsPlaybackCompleted() : false;
549 }
550 
551 }  // namespace mozilla
552