1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim:set ts=2 sw=2 sts=2 et cindent: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "AudioSink.h"
8 #include "AudioConverter.h"
9 #include "AudioDeviceInfo.h"
10 #include "MediaQueue.h"
11 #include "VideoUtils.h"
12 #include "mozilla/CheckedInt.h"
13 #include "mozilla/DebugOnly.h"
14 #include "mozilla/IntegerPrintfMacros.h"
15 #include "mozilla/ProfilerMarkerTypes.h"
16 #include "mozilla/StaticPrefs_media.h"
17 #include "mozilla/StaticPrefs_dom.h"
18 #include "nsPrintfCString.h"
19
20 namespace mozilla {
21
22 extern LazyLogModule gMediaDecoderLog;
23 #define SINK_LOG(msg, ...) \
24 MOZ_LOG(gMediaDecoderLog, LogLevel::Debug, \
25 ("AudioSink=%p " msg, this, ##__VA_ARGS__))
26 #define SINK_LOG_V(msg, ...) \
27 MOZ_LOG(gMediaDecoderLog, LogLevel::Verbose, \
28 ("AudioSink=%p " msg, this, ##__VA_ARGS__))
29
30 // The amount of audio frames that is used to fuzz rounding errors.
31 static const int64_t AUDIO_FUZZ_FRAMES = 1;
32
33 // Amount of audio frames we will be processing ahead of use
34 static const int32_t LOW_AUDIO_USECS = 300000;
35
36 using media::TimeUnit;
37
AudioSink(AbstractThread * aThread,MediaQueue<AudioData> & aAudioQueue,const TimeUnit & aStartTime,const AudioInfo & aInfo,AudioDeviceInfo * aAudioDevice)38 AudioSink::AudioSink(AbstractThread* aThread,
39 MediaQueue<AudioData>& aAudioQueue,
40 const TimeUnit& aStartTime, const AudioInfo& aInfo,
41 AudioDeviceInfo* aAudioDevice)
42 : mStartTime(aStartTime),
43 mInfo(aInfo),
44 mAudioDevice(aAudioDevice),
45 mPlaying(true),
46 mMonitor("AudioSink"),
47 mWritten(0),
48 mErrored(false),
49 mOwnerThread(aThread),
50 mProcessedQueueLength(0),
51 mFramesParsed(0),
52 mOutputRate(DecideAudioPlaybackSampleRate(aInfo)),
53 mOutputChannels(DecideAudioPlaybackChannels(aInfo)),
54 mAudibilityMonitor(
55 mOutputRate,
56 StaticPrefs::dom_media_silence_duration_for_audibility()),
57 mIsAudioDataAudible(false),
58 mAudioQueue(aAudioQueue) {}
59
60 AudioSink::~AudioSink() = default;
61
Start(const PlaybackParams & aParams)62 Result<already_AddRefed<MediaSink::EndedPromise>, nsresult> AudioSink::Start(
63 const PlaybackParams& aParams) {
64 MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
65
66 mAudioQueueListener = mAudioQueue.PushEvent().Connect(
67 mOwnerThread, this, &AudioSink::OnAudioPushed);
68 mAudioQueueFinishListener = mAudioQueue.FinishEvent().Connect(
69 mOwnerThread, this, &AudioSink::NotifyAudioNeeded);
70 mProcessedQueueListener = mProcessedQueue.PopFrontEvent().Connect(
71 mOwnerThread, this, &AudioSink::OnAudioPopped);
72
73 // To ensure at least one audio packet will be popped from AudioQueue and
74 // ready to be played.
75 NotifyAudioNeeded();
76 nsresult rv = InitializeAudioStream(aParams);
77 if (NS_FAILED(rv)) {
78 return Err(rv);
79 }
80 return mAudioStream->Start();
81 }
82
GetPosition()83 TimeUnit AudioSink::GetPosition() {
84 int64_t tmp;
85 if (mAudioStream && (tmp = mAudioStream->GetPosition()) >= 0) {
86 TimeUnit pos = TimeUnit::FromMicroseconds(tmp);
87 NS_ASSERTION(pos >= mLastGoodPosition,
88 "AudioStream position shouldn't go backward");
89 TimeUnit tmp = mStartTime + pos;
90 if (!tmp.IsValid()) {
91 mErrored = true;
92 return mStartTime + mLastGoodPosition;
93 }
94 // Update the last good position when we got a good one.
95 if (pos >= mLastGoodPosition) {
96 mLastGoodPosition = pos;
97 }
98 }
99
100 return mStartTime + mLastGoodPosition;
101 }
102
HasUnplayedFrames()103 bool AudioSink::HasUnplayedFrames() {
104 // Experimentation suggests that GetPositionInFrames() is zero-indexed,
105 // so we need to add 1 here before comparing it to mWritten.
106 int64_t total;
107 {
108 MonitorAutoLock mon(mMonitor);
109 total = mWritten + (mCursor.get() ? mCursor->Available() : 0);
110 }
111 return mProcessedQueue.GetSize() ||
112 (mAudioStream && mAudioStream->GetPositionInFrames() + 1 < total);
113 }
114
Shutdown()115 void AudioSink::Shutdown() {
116 MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
117
118 mAudioQueueListener.Disconnect();
119 mAudioQueueFinishListener.Disconnect();
120 mProcessedQueueListener.Disconnect();
121
122 if (mAudioStream) {
123 mAudioStream->Shutdown();
124 mAudioStream = nullptr;
125 }
126 // Shutdown audio sink doesn't mean the playback is going to stop, so if we
127 // simply discard these data, then we will no longer be able to play them.
128 // Eg. we change to sink to capture-based sink that will need to continue play
129 // remaining data from the audio queue.
130 {
131 MonitorAutoLock mon(mMonitor);
132 while (mProcessedQueue.GetSize() > 0) {
133 RefPtr<AudioData> audio = mProcessedQueue.PopBack();
134 if (audio == mCurrentData) {
135 break;
136 }
137 mAudioQueue.PushFront(audio);
138 }
139 if (mCurrentData) {
140 uint32_t unplayedFrames = mCursor->Available();
141 // If we've consumed some partial content from the first audio data, then
142 // we have to adjust its data offset and frames number in order not to
143 // play the same content again.
144 if (unplayedFrames > 0 && unplayedFrames < mCurrentData->Frames()) {
145 const uint32_t orginalFrames = mCurrentData->Frames();
146 const uint32_t offsetFrames = mCurrentData->Frames() - unplayedFrames;
147 Unused << mCurrentData->SetTrimWindow(
148 {mCurrentData->mTime + FramesToTimeUnit(offsetFrames, mOutputRate),
149 mCurrentData->GetEndTime()});
150 SINK_LOG_V("After adjustment, audio frame from %u to %u", orginalFrames,
151 mCurrentData->Frames());
152 }
153 mAudioQueue.PushFront(mCurrentData);
154 }
155 MOZ_ASSERT(mProcessedQueue.GetSize() == 0);
156 }
157 mProcessedQueue.Finish();
158 }
159
SetVolume(double aVolume)160 void AudioSink::SetVolume(double aVolume) {
161 if (mAudioStream) {
162 mAudioStream->SetVolume(aVolume);
163 }
164 }
165
SetStreamName(const nsAString & aStreamName)166 void AudioSink::SetStreamName(const nsAString& aStreamName) {
167 if (mAudioStream) {
168 mAudioStream->SetStreamName(aStreamName);
169 }
170 }
171
SetPlaybackRate(double aPlaybackRate)172 void AudioSink::SetPlaybackRate(double aPlaybackRate) {
173 MOZ_ASSERT(aPlaybackRate != 0,
174 "Don't set the playbackRate to 0 on AudioStream");
175 if (mAudioStream) {
176 mAudioStream->SetPlaybackRate(aPlaybackRate);
177 }
178 }
179
SetPreservesPitch(bool aPreservesPitch)180 void AudioSink::SetPreservesPitch(bool aPreservesPitch) {
181 if (mAudioStream) {
182 mAudioStream->SetPreservesPitch(aPreservesPitch);
183 }
184 }
185
SetPlaying(bool aPlaying)186 void AudioSink::SetPlaying(bool aPlaying) {
187 if (!mAudioStream || mAudioStream->IsPlaybackCompleted() ||
188 mPlaying == aPlaying) {
189 return;
190 }
191 // pause/resume AudioStream as necessary.
192 if (!aPlaying) {
193 mAudioStream->Pause();
194 } else if (aPlaying) {
195 mAudioStream->Resume();
196 }
197 mPlaying = aPlaying;
198 }
199
InitializeAudioStream(const PlaybackParams & aParams)200 nsresult AudioSink::InitializeAudioStream(const PlaybackParams& aParams) {
201 mAudioStream = new AudioStream(*this);
202 // When AudioQueue is empty, there is no way to know the channel layout of
203 // the coming audio data, so we use the predefined channel map instead.
204 AudioConfig::ChannelLayout::ChannelMap channelMap =
205 mConverter ? mConverter->OutputConfig().Layout().Map()
206 : AudioConfig::ChannelLayout(mOutputChannels).Map();
207 // The layout map used here is already processed by mConverter with
208 // mOutputChannels into SMPTE format, so there is no need to worry if
209 // StaticPrefs::accessibility_monoaudio_enable() or
210 // StaticPrefs::media_forcestereo_enabled() is applied.
211 nsresult rv = mAudioStream->Init(mOutputChannels, channelMap, mOutputRate,
212 mAudioDevice);
213 if (NS_FAILED(rv)) {
214 mAudioStream->Shutdown();
215 mAudioStream = nullptr;
216 return rv;
217 }
218
219 // Set playback params before calling Start() so they can take effect
220 // as soon as the 1st DataCallback of the AudioStream fires.
221 mAudioStream->SetVolume(aParams.mVolume);
222 mAudioStream->SetPlaybackRate(aParams.mPlaybackRate);
223 mAudioStream->SetPreservesPitch(aParams.mPreservesPitch);
224 return NS_OK;
225 }
226
GetEndTime() const227 TimeUnit AudioSink::GetEndTime() const {
228 int64_t written;
229 {
230 MonitorAutoLock mon(mMonitor);
231 written = mWritten;
232 }
233 TimeUnit played = FramesToTimeUnit(written, mOutputRate) + mStartTime;
234 if (!played.IsValid()) {
235 NS_WARNING("Int overflow calculating audio end time");
236 return TimeUnit::Zero();
237 }
238 // As we may be resampling, rounding errors may occur. Ensure we never get
239 // past the original end time.
240 return std::min(mLastEndTime, played);
241 }
242
PopFrames(uint32_t aFrames)243 UniquePtr<AudioStream::Chunk> AudioSink::PopFrames(uint32_t aFrames) {
244 class Chunk : public AudioStream::Chunk {
245 public:
246 Chunk(AudioData* aBuffer, uint32_t aFrames, AudioDataValue* aData)
247 : mBuffer(aBuffer), mFrames(aFrames), mData(aData) {}
248 Chunk() : mFrames(0), mData(nullptr) {}
249 const AudioDataValue* Data() const override { return mData; }
250 uint32_t Frames() const override { return mFrames; }
251 uint32_t Channels() const override {
252 return mBuffer ? mBuffer->mChannels : 0;
253 }
254 uint32_t Rate() const override { return mBuffer ? mBuffer->mRate : 0; }
255 AudioDataValue* GetWritable() const override { return mData; }
256
257 private:
258 const RefPtr<AudioData> mBuffer;
259 const uint32_t mFrames;
260 AudioDataValue* const mData;
261 };
262
263 bool needPopping = false;
264 if (!mCurrentData) {
265 // No data in the queue. Return an empty chunk.
266 if (!mProcessedQueue.GetSize()) {
267 return MakeUnique<Chunk>();
268 }
269
270 // We need to update our values prior popping the processed queue in
271 // order to prevent the pop event to fire too early (prior
272 // mProcessedQueueLength being updated) or prevent HasUnplayedFrames
273 // to incorrectly return true during the time interval betweeen the
274 // when mProcessedQueue is read and mWritten is updated.
275 needPopping = true;
276 {
277 MonitorAutoLock mon(mMonitor);
278 mCurrentData = mProcessedQueue.PeekFront();
279 mCursor = MakeUnique<AudioBufferCursor>(mCurrentData->Data(),
280 mCurrentData->mChannels,
281 mCurrentData->Frames());
282 }
283 MOZ_ASSERT(mCurrentData->Frames() > 0);
284 mProcessedQueueLength -=
285 FramesToUsecs(mCurrentData->Frames(), mOutputRate).value();
286 }
287
288 auto framesToPop = std::min(aFrames, mCursor->Available());
289
290 SINK_LOG_V("playing audio at time=%" PRId64 " offset=%u length=%u",
291 mCurrentData->mTime.ToMicroseconds(),
292 mCurrentData->Frames() - mCursor->Available(), framesToPop);
293
294 #ifdef MOZ_GECKO_PROFILER
295 mOwnerThread->Dispatch(NS_NewRunnableFunction(
296 "AudioSink:AddMarker",
297 [startTime = mCurrentData->mTime.ToMicroseconds(),
298 endTime = mCurrentData->GetEndTime().ToMicroseconds()] {
299 PROFILER_MARKER("PlayAudio", MEDIA_PLAYBACK, {}, MediaSampleMarker,
300 startTime, endTime);
301 }));
302 #endif // MOZ_GECKO_PROFILER
303
304 UniquePtr<AudioStream::Chunk> chunk =
305 MakeUnique<Chunk>(mCurrentData, framesToPop, mCursor->Ptr());
306
307 {
308 MonitorAutoLock mon(mMonitor);
309 mWritten += framesToPop;
310 mCursor->Advance(framesToPop);
311 // All frames are popped. Reset mCurrentData so we can pop new elements from
312 // the audio queue in next calls to PopFrames().
313 if (!mCursor->Available()) {
314 mCurrentData = nullptr;
315 }
316 }
317
318 if (needPopping) {
319 // We can now safely pop the audio packet from the processed queue.
320 // This will fire the popped event, triggering a call to NotifyAudioNeeded.
321 RefPtr<AudioData> releaseMe = mProcessedQueue.PopFront();
322 CheckIsAudible(releaseMe);
323 }
324
325 return chunk;
326 }
327
Ended() const328 bool AudioSink::Ended() const {
329 // Return true when error encountered so AudioStream can start draining.
330 return mProcessedQueue.IsFinished() || mErrored;
331 }
332
CheckIsAudible(const AudioData * aData)333 void AudioSink::CheckIsAudible(const AudioData* aData) {
334 MOZ_ASSERT(aData);
335
336 mAudibilityMonitor.Process(aData);
337 bool isAudible = mAudibilityMonitor.RecentlyAudible();
338
339 if (isAudible != mIsAudioDataAudible) {
340 mIsAudioDataAudible = isAudible;
341 mAudibleEvent.Notify(mIsAudioDataAudible);
342 }
343 }
344
OnAudioPopped(const RefPtr<AudioData> & aSample)345 void AudioSink::OnAudioPopped(const RefPtr<AudioData>& aSample) {
346 SINK_LOG_V("AudioStream has used an audio packet.");
347 NotifyAudioNeeded();
348 }
349
OnAudioPushed(const RefPtr<AudioData> & aSample)350 void AudioSink::OnAudioPushed(const RefPtr<AudioData>& aSample) {
351 SINK_LOG_V("One new audio packet available.");
352 NotifyAudioNeeded();
353 }
354
NotifyAudioNeeded()355 void AudioSink::NotifyAudioNeeded() {
356 MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn(),
357 "Not called from the owner's thread");
358
359 // Always ensure we have two processed frames pending to allow for processing
360 // latency.
361 while (mAudioQueue.GetSize() &&
362 (mAudioQueue.IsFinished() || mProcessedQueueLength < LOW_AUDIO_USECS ||
363 mProcessedQueue.GetSize() < 2)) {
364 RefPtr<AudioData> data = mAudioQueue.PopFront();
365
366 // Ignore the element with 0 frames and try next.
367 if (!data->Frames()) {
368 continue;
369 }
370
371 if (!mConverter ||
372 (data->mRate != mConverter->InputConfig().Rate() ||
373 data->mChannels != mConverter->InputConfig().Channels())) {
374 SINK_LOG_V("Audio format changed from %u@%uHz to %u@%uHz",
375 mConverter ? mConverter->InputConfig().Channels() : 0,
376 mConverter ? mConverter->InputConfig().Rate() : 0,
377 data->mChannels, data->mRate);
378
379 DrainConverter();
380
381 // mFramesParsed indicates the current playtime in frames at the current
382 // input sampling rate. Recalculate it per the new sampling rate.
383 if (mFramesParsed) {
384 // We minimize overflow.
385 uint32_t oldRate = mConverter->InputConfig().Rate();
386 uint32_t newRate = data->mRate;
387 CheckedInt64 result = SaferMultDiv(mFramesParsed, newRate, oldRate);
388 if (!result.isValid()) {
389 NS_WARNING("Int overflow in AudioSink");
390 mErrored = true;
391 return;
392 }
393 mFramesParsed = result.value();
394 }
395
396 const AudioConfig::ChannelLayout inputLayout =
397 data->mChannelMap
398 ? AudioConfig::ChannelLayout::SMPTEDefault(data->mChannelMap)
399 : AudioConfig::ChannelLayout(data->mChannels);
400 const AudioConfig::ChannelLayout outputLayout =
401 mOutputChannels == data->mChannels
402 ? inputLayout
403 : AudioConfig::ChannelLayout(mOutputChannels);
404 AudioConfig inConfig =
405 AudioConfig(inputLayout, data->mChannels, data->mRate);
406 AudioConfig outConfig =
407 AudioConfig(outputLayout, mOutputChannels, mOutputRate);
408 if (!AudioConverter::CanConvert(inConfig, outConfig)) {
409 mErrored = true;
410 return;
411 }
412 mConverter = MakeUnique<AudioConverter>(inConfig, outConfig);
413 }
414
415 // See if there's a gap in the audio. If there is, push silence into the
416 // audio hardware, so we can play across the gap.
417 // Calculate the timestamp of the next chunk of audio in numbers of
418 // samples.
419 CheckedInt64 sampleTime =
420 TimeUnitToFrames(data->mTime - mStartTime, data->mRate);
421 // Calculate the number of frames that have been pushed onto the audio
422 // hardware.
423 CheckedInt64 missingFrames = sampleTime - mFramesParsed;
424
425 if (!missingFrames.isValid() || !sampleTime.isValid()) {
426 NS_WARNING("Int overflow in AudioSink");
427 mErrored = true;
428 return;
429 }
430
431 if (missingFrames.value() > AUDIO_FUZZ_FRAMES) {
432 // The next audio packet begins some time after the end of the last packet
433 // we pushed to the audio hardware. We must push silence into the audio
434 // hardware so that the next audio packet begins playback at the correct
435 // time.
436 missingFrames = std::min<int64_t>(INT32_MAX, missingFrames.value());
437 mFramesParsed += missingFrames.value();
438
439 RefPtr<AudioData> silenceData;
440 AlignedAudioBuffer silenceBuffer(missingFrames.value() * data->mChannels);
441 if (!silenceBuffer) {
442 NS_WARNING("OOM in AudioSink");
443 mErrored = true;
444 return;
445 }
446 if (mConverter->InputConfig() != mConverter->OutputConfig()) {
447 AlignedAudioBuffer convertedData =
448 mConverter->Process(AudioSampleBuffer(std::move(silenceBuffer)))
449 .Forget();
450 silenceData = CreateAudioFromBuffer(std::move(convertedData), data);
451 } else {
452 silenceData = CreateAudioFromBuffer(std::move(silenceBuffer), data);
453 }
454 PushProcessedAudio(silenceData);
455 }
456
457 mLastEndTime = data->GetEndTime();
458 mFramesParsed += data->Frames();
459
460 if (mConverter->InputConfig() != mConverter->OutputConfig()) {
461 AlignedAudioBuffer buffer(data->MoveableData());
462 AlignedAudioBuffer convertedData =
463 mConverter->Process(AudioSampleBuffer(std::move(buffer))).Forget();
464 data = CreateAudioFromBuffer(std::move(convertedData), data);
465 }
466 if (PushProcessedAudio(data)) {
467 mLastProcessedPacket = Some(data);
468 }
469 }
470
471 if (mAudioQueue.IsFinished()) {
472 // We have reached the end of the data, drain the resampler.
473 DrainConverter();
474 mProcessedQueue.Finish();
475 }
476 }
477
PushProcessedAudio(AudioData * aData)478 uint32_t AudioSink::PushProcessedAudio(AudioData* aData) {
479 if (!aData || !aData->Frames()) {
480 return 0;
481 }
482 mProcessedQueue.Push(aData);
483 mProcessedQueueLength += FramesToUsecs(aData->Frames(), mOutputRate).value();
484 return aData->Frames();
485 }
486
CreateAudioFromBuffer(AlignedAudioBuffer && aBuffer,AudioData * aReference)487 already_AddRefed<AudioData> AudioSink::CreateAudioFromBuffer(
488 AlignedAudioBuffer&& aBuffer, AudioData* aReference) {
489 uint32_t frames = aBuffer.Length() / mOutputChannels;
490 if (!frames) {
491 return nullptr;
492 }
493 auto duration = FramesToTimeUnit(frames, mOutputRate);
494 if (!duration.IsValid()) {
495 NS_WARNING("Int overflow in AudioSink");
496 mErrored = true;
497 return nullptr;
498 }
499 RefPtr<AudioData> data =
500 new AudioData(aReference->mOffset, aReference->mTime, std::move(aBuffer),
501 mOutputChannels, mOutputRate);
502 MOZ_DIAGNOSTIC_ASSERT(duration == data->mDuration, "must be equal");
503 return data.forget();
504 }
505
DrainConverter(uint32_t aMaxFrames)506 uint32_t AudioSink::DrainConverter(uint32_t aMaxFrames) {
507 MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
508
509 if (!mConverter || !mLastProcessedPacket || !aMaxFrames) {
510 // nothing to drain.
511 return 0;
512 }
513
514 RefPtr<AudioData> lastPacket = mLastProcessedPacket.ref();
515 mLastProcessedPacket.reset();
516
517 // To drain we simply provide an empty packet to the audio converter.
518 AlignedAudioBuffer convertedData =
519 mConverter->Process(AudioSampleBuffer(AlignedAudioBuffer())).Forget();
520
521 uint32_t frames = convertedData.Length() / mOutputChannels;
522 if (!convertedData.SetLength(std::min(frames, aMaxFrames) *
523 mOutputChannels)) {
524 // This can never happen as we were reducing the length of convertData.
525 mErrored = true;
526 return 0;
527 }
528
529 RefPtr<AudioData> data =
530 CreateAudioFromBuffer(std::move(convertedData), lastPacket);
531 if (!data) {
532 return 0;
533 }
534 mProcessedQueue.Push(data);
535 return data->Frames();
536 }
537
GetDebugInfo(dom::MediaSinkDebugInfo & aInfo)538 void AudioSink::GetDebugInfo(dom::MediaSinkDebugInfo& aInfo) {
539 MOZ_ASSERT(mOwnerThread->IsCurrentThreadIn());
540 aInfo.mAudioSinkWrapper.mAudioSink.mStartTime = mStartTime.ToMicroseconds();
541 aInfo.mAudioSinkWrapper.mAudioSink.mLastGoodPosition =
542 mLastGoodPosition.ToMicroseconds();
543 aInfo.mAudioSinkWrapper.mAudioSink.mIsPlaying = mPlaying;
544 aInfo.mAudioSinkWrapper.mAudioSink.mOutputRate = mOutputRate;
545 aInfo.mAudioSinkWrapper.mAudioSink.mWritten = mWritten;
546 aInfo.mAudioSinkWrapper.mAudioSink.mHasErrored = bool(mErrored);
547 aInfo.mAudioSinkWrapper.mAudioSink.mPlaybackComplete =
548 mAudioStream ? mAudioStream->IsPlaybackCompleted() : false;
549 }
550
551 } // namespace mozilla
552