1 /*
2 * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <string.h>
12
13 #include <limits>
14 #include <memory>
15 #include <sstream>
16 #include <string>
17 #include <utility>
18
19 #include "api/audio/audio_mixer.h"
20 #include "modules/audio_mixer/audio_mixer_impl.h"
21 #include "modules/audio_mixer/default_output_rate_calculator.h"
22 #include "rtc_base/bind.h"
23 #include "rtc_base/checks.h"
24 #include "rtc_base/event.h"
25 #include "rtc_base/task_queue.h"
26 #include "test/gmock.h"
27
28 using testing::_;
29 using testing::Exactly;
30 using testing::Invoke;
31 using testing::Return;
32
33 namespace webrtc {
34
35 namespace {
36
37 constexpr int kDefaultSampleRateHz = 48000;
38
39 // Utility function that resets the frame member variables with
40 // sensible defaults.
ResetFrame(AudioFrame * frame)41 void ResetFrame(AudioFrame* frame) {
42 frame->sample_rate_hz_ = kDefaultSampleRateHz;
43 frame->num_channels_ = 1;
44
45 // Frame duration 10ms.
46 frame->samples_per_channel_ = kDefaultSampleRateHz / 100;
47 frame->vad_activity_ = AudioFrame::kVadActive;
48 frame->speech_type_ = AudioFrame::kNormalSpeech;
49 }
50
ProduceDebugText(int sample_rate_hz,int number_of_channels,int number_of_sources)51 std::string ProduceDebugText(int sample_rate_hz,
52 int number_of_channels,
53 int number_of_sources) {
54 std::ostringstream ss;
55 ss << "Sample rate: " << sample_rate_hz << " ";
56 ss << "Number of channels: " << number_of_channels << " ";
57 ss << "Number of sources: " << number_of_sources;
58 return ss.str();
59 }
60
61 AudioFrame frame_for_mixing;
62
63 } // namespace
64
65 class MockMixerAudioSource : public AudioMixer::Source {
66 public:
MockMixerAudioSource()67 MockMixerAudioSource()
68 : fake_audio_frame_info_(AudioMixer::Source::AudioFrameInfo::kNormal) {
69 ON_CALL(*this, GetAudioFrameWithInfo(_, _))
70 .WillByDefault(
71 Invoke(this, &MockMixerAudioSource::FakeAudioFrameWithInfo));
72 ON_CALL(*this, PreferredSampleRate())
73 .WillByDefault(Return(kDefaultSampleRateHz));
74 }
75
76 MOCK_METHOD2(GetAudioFrameWithInfo,
77 AudioFrameInfo(int sample_rate_hz, AudioFrame* audio_frame));
78
79 MOCK_CONST_METHOD0(PreferredSampleRate, int());
80 MOCK_CONST_METHOD0(Ssrc, int());
81
fake_frame()82 AudioFrame* fake_frame() { return &fake_frame_; }
fake_info()83 AudioFrameInfo fake_info() { return fake_audio_frame_info_; }
set_fake_info(const AudioFrameInfo audio_frame_info)84 void set_fake_info(const AudioFrameInfo audio_frame_info) {
85 fake_audio_frame_info_ = audio_frame_info;
86 }
87
88 private:
FakeAudioFrameWithInfo(int sample_rate_hz,AudioFrame * audio_frame)89 AudioFrameInfo FakeAudioFrameWithInfo(int sample_rate_hz,
90 AudioFrame* audio_frame) {
91 audio_frame->CopyFrom(fake_frame_);
92 audio_frame->sample_rate_hz_ = sample_rate_hz;
93 audio_frame->samples_per_channel_ =
94 rtc::CheckedDivExact(sample_rate_hz, 100);
95 return fake_info();
96 }
97
98 AudioFrame fake_frame_;
99 AudioFrameInfo fake_audio_frame_info_;
100 };
101
102 class CustomRateCalculator : public OutputRateCalculator {
103 public:
CustomRateCalculator(int rate)104 explicit CustomRateCalculator(int rate) : rate_(rate) {}
CalculateOutputRate(const std::vector<int> & preferred_rates)105 int CalculateOutputRate(const std::vector<int>& preferred_rates) override {
106 return rate_;
107 }
108
109 private:
110 const int rate_;
111 };
112
113 // Creates participants from |frames| and |frame_info| and adds them
114 // to the mixer. Compares mixed status with |expected_status|
MixAndCompare(const std::vector<AudioFrame> & frames,const std::vector<AudioMixer::Source::AudioFrameInfo> & frame_info,const std::vector<bool> & expected_status)115 void MixAndCompare(
116 const std::vector<AudioFrame>& frames,
117 const std::vector<AudioMixer::Source::AudioFrameInfo>& frame_info,
118 const std::vector<bool>& expected_status) {
119 const size_t num_audio_sources = frames.size();
120 RTC_DCHECK(frames.size() == frame_info.size());
121 RTC_DCHECK(frame_info.size() == expected_status.size());
122
123 const auto mixer = AudioMixerImpl::Create();
124 std::vector<MockMixerAudioSource> participants(num_audio_sources);
125
126 for (size_t i = 0; i < num_audio_sources; ++i) {
127 participants[i].fake_frame()->CopyFrom(frames[i]);
128 participants[i].set_fake_info(frame_info[i]);
129 }
130
131 for (size_t i = 0; i < num_audio_sources; ++i) {
132 EXPECT_TRUE(mixer->AddSource(&participants[i]));
133 EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
134 .Times(Exactly(1));
135 }
136
137 mixer->Mix(1, &frame_for_mixing);
138
139 for (size_t i = 0; i < num_audio_sources; ++i) {
140 EXPECT_EQ(expected_status[i],
141 mixer->GetAudioSourceMixabilityStatusForTest(&participants[i]))
142 << "Mixed status of AudioSource #" << i << " wrong.";
143 }
144 }
145
MixMonoAtGivenNativeRate(int native_sample_rate,AudioFrame * mix_frame,rtc::scoped_refptr<AudioMixer> mixer,MockMixerAudioSource * audio_source)146 void MixMonoAtGivenNativeRate(int native_sample_rate,
147 AudioFrame* mix_frame,
148 rtc::scoped_refptr<AudioMixer> mixer,
149 MockMixerAudioSource* audio_source) {
150 ON_CALL(*audio_source, PreferredSampleRate())
151 .WillByDefault(Return(native_sample_rate));
152 audio_source->fake_frame()->sample_rate_hz_ = native_sample_rate;
153 audio_source->fake_frame()->samples_per_channel_ = native_sample_rate / 100;
154
155 mixer->Mix(1, mix_frame);
156 }
157
TEST(AudioMixer,LargestEnergyVadActiveMixed)158 TEST(AudioMixer, LargestEnergyVadActiveMixed) {
159 constexpr int kAudioSources =
160 AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 3;
161
162 const auto mixer = AudioMixerImpl::Create();
163
164 MockMixerAudioSource participants[kAudioSources];
165
166 for (int i = 0; i < kAudioSources; ++i) {
167 ResetFrame(participants[i].fake_frame());
168
169 // We set the 80-th sample value since the first 80 samples may be
170 // modified by a ramped-in window.
171 participants[i].fake_frame()->mutable_data()[80] = i;
172
173 EXPECT_TRUE(mixer->AddSource(&participants[i]));
174 EXPECT_CALL(participants[i], GetAudioFrameWithInfo(_, _)).Times(Exactly(1));
175 }
176
177 // Last participant gives audio frame with passive VAD, although it has the
178 // largest energy.
179 participants[kAudioSources - 1].fake_frame()->vad_activity_ =
180 AudioFrame::kVadPassive;
181
182 AudioFrame audio_frame;
183 mixer->Mix(1, // number of channels
184 &audio_frame);
185
186 for (int i = 0; i < kAudioSources; ++i) {
187 bool is_mixed =
188 mixer->GetAudioSourceMixabilityStatusForTest(&participants[i]);
189 if (i == kAudioSources - 1 ||
190 i < kAudioSources - 1 -
191 AudioMixerImpl::kMaximumAmountOfMixedAudioSources) {
192 EXPECT_FALSE(is_mixed) << "Mixing status of AudioSource #" << i
193 << " wrong.";
194 } else {
195 EXPECT_TRUE(is_mixed) << "Mixing status of AudioSource #" << i
196 << " wrong.";
197 }
198 }
199 }
200
TEST(AudioMixer,FrameNotModifiedForSingleParticipant)201 TEST(AudioMixer, FrameNotModifiedForSingleParticipant) {
202 const auto mixer = AudioMixerImpl::Create();
203
204 MockMixerAudioSource participant;
205
206 ResetFrame(participant.fake_frame());
207 const size_t n_samples = participant.fake_frame()->samples_per_channel_;
208
209 // Modify the frame so that it's not zero.
210 int16_t* fake_frame_data = participant.fake_frame()->mutable_data();
211 for (size_t j = 0; j < n_samples; ++j) {
212 fake_frame_data[j] = static_cast<int16_t>(j);
213 }
214
215 EXPECT_TRUE(mixer->AddSource(&participant));
216 EXPECT_CALL(participant, GetAudioFrameWithInfo(_, _)).Times(Exactly(2));
217
218 AudioFrame audio_frame;
219 // Two mix iteration to compare after the ramp-up step.
220 for (int i = 0; i < 2; ++i) {
221 mixer->Mix(1, // number of channels
222 &audio_frame);
223 }
224
225 EXPECT_EQ(
226 0,
227 memcmp(participant.fake_frame()->data(), audio_frame.data(), n_samples));
228 }
229
TEST(AudioMixer,SourceAtNativeRateShouldNeverResample)230 TEST(AudioMixer, SourceAtNativeRateShouldNeverResample) {
231 const auto mixer = AudioMixerImpl::Create();
232
233 MockMixerAudioSource audio_source;
234 ResetFrame(audio_source.fake_frame());
235
236 mixer->AddSource(&audio_source);
237
238 for (auto frequency : {8000, 16000, 32000, 48000}) {
239 EXPECT_CALL(audio_source, GetAudioFrameWithInfo(frequency, _))
240 .Times(Exactly(1));
241
242 MixMonoAtGivenNativeRate(frequency, &frame_for_mixing, mixer,
243 &audio_source);
244 }
245 }
246
TEST(AudioMixer,MixerShouldMixAtNativeSourceRate)247 TEST(AudioMixer, MixerShouldMixAtNativeSourceRate) {
248 const auto mixer = AudioMixerImpl::Create();
249
250 MockMixerAudioSource audio_source;
251 ResetFrame(audio_source.fake_frame());
252
253 mixer->AddSource(&audio_source);
254
255 for (auto frequency : {8000, 16000, 32000, 48000}) {
256 MixMonoAtGivenNativeRate(frequency, &frame_for_mixing, mixer,
257 &audio_source);
258
259 EXPECT_EQ(frequency, frame_for_mixing.sample_rate_hz_);
260 }
261 }
262
TEST(AudioMixer,MixerShouldAlwaysMixAtNativeRate)263 TEST(AudioMixer, MixerShouldAlwaysMixAtNativeRate) {
264 const auto mixer = AudioMixerImpl::Create();
265
266 MockMixerAudioSource participant;
267 ResetFrame(participant.fake_frame());
268 mixer->AddSource(&participant);
269
270 const int needed_frequency = 44100;
271 ON_CALL(participant, PreferredSampleRate())
272 .WillByDefault(Return(needed_frequency));
273
274 // We expect mixing frequency to be native and >= needed_frequency.
275 const int expected_mix_frequency = 48000;
276 EXPECT_CALL(participant, GetAudioFrameWithInfo(expected_mix_frequency, _))
277 .Times(Exactly(1));
278 participant.fake_frame()->sample_rate_hz_ = expected_mix_frequency;
279 participant.fake_frame()->samples_per_channel_ = expected_mix_frequency / 100;
280
281 mixer->Mix(1, &frame_for_mixing);
282
283 EXPECT_EQ(48000, frame_for_mixing.sample_rate_hz_);
284 }
285
286 // Check that the mixing rate is always >= participants preferred rate.
TEST(AudioMixer,ShouldNotCauseQualityLossForMultipleSources)287 TEST(AudioMixer, ShouldNotCauseQualityLossForMultipleSources) {
288 const auto mixer = AudioMixerImpl::Create();
289
290 std::vector<MockMixerAudioSource> audio_sources(2);
291 const std::vector<int> source_sample_rates = {8000, 16000};
292 for (int i = 0; i < 2; ++i) {
293 auto& source = audio_sources[i];
294 ResetFrame(source.fake_frame());
295 mixer->AddSource(&source);
296 const auto sample_rate = source_sample_rates[i];
297 EXPECT_CALL(source, PreferredSampleRate()).WillOnce(Return(sample_rate));
298
299 EXPECT_CALL(source, GetAudioFrameWithInfo(testing::Ge(sample_rate), _));
300 }
301 mixer->Mix(1, &frame_for_mixing);
302 }
303
TEST(AudioMixer,ParticipantNumberOfChannels)304 TEST(AudioMixer, ParticipantNumberOfChannels) {
305 const auto mixer = AudioMixerImpl::Create();
306
307 MockMixerAudioSource participant;
308 ResetFrame(participant.fake_frame());
309
310 EXPECT_TRUE(mixer->AddSource(&participant));
311 for (size_t number_of_channels : {1, 2}) {
312 EXPECT_CALL(participant, GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
313 .Times(Exactly(1));
314 mixer->Mix(number_of_channels, &frame_for_mixing);
315 EXPECT_EQ(number_of_channels, frame_for_mixing.num_channels_);
316 }
317 }
318
319 // Maximal amount of participants are mixed one iteration, then
320 // another participant with higher energy is added.
TEST(AudioMixer,RampedOutSourcesShouldNotBeMarkedMixed)321 TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) {
322 constexpr int kAudioSources =
323 AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1;
324
325 const auto mixer = AudioMixerImpl::Create();
326 MockMixerAudioSource participants[kAudioSources];
327
328 for (int i = 0; i < kAudioSources; ++i) {
329 ResetFrame(participants[i].fake_frame());
330 // Set the participant audio energy to increase with the index
331 // |i|.
332 participants[i].fake_frame()->mutable_data()[0] = 100 * i;
333 }
334
335 // Add all participants but the loudest for mixing.
336 for (int i = 0; i < kAudioSources - 1; ++i) {
337 EXPECT_TRUE(mixer->AddSource(&participants[i]));
338 EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
339 .Times(Exactly(1));
340 }
341
342 // First mixer iteration
343 mixer->Mix(1, &frame_for_mixing);
344
345 // All participants but the loudest should have been mixed.
346 for (int i = 0; i < kAudioSources - 1; ++i) {
347 EXPECT_TRUE(mixer->GetAudioSourceMixabilityStatusForTest(&participants[i]))
348 << "Mixed status of AudioSource #" << i << " wrong.";
349 }
350
351 // Add new participant with higher energy.
352 EXPECT_TRUE(mixer->AddSource(&participants[kAudioSources - 1]));
353 for (int i = 0; i < kAudioSources; ++i) {
354 EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
355 .Times(Exactly(1));
356 }
357
358 mixer->Mix(1, &frame_for_mixing);
359
360 // The most quiet participant should not have been mixed.
361 EXPECT_FALSE(mixer->GetAudioSourceMixabilityStatusForTest(&participants[0]))
362 << "Mixed status of AudioSource #0 wrong.";
363
364 // The loudest participants should have been mixed.
365 for (int i = 1; i < kAudioSources; ++i) {
366 EXPECT_EQ(true,
367 mixer->GetAudioSourceMixabilityStatusForTest(&participants[i]))
368 << "Mixed status of AudioSource #" << i << " wrong.";
369 }
370 }
371
372 // This test checks that the initialization and participant addition
373 // can be done on a different thread.
TEST(AudioMixer,ConstructFromOtherThread)374 TEST(AudioMixer, ConstructFromOtherThread) {
375 rtc::TaskQueue init_queue("init");
376 rtc::scoped_refptr<AudioMixer> mixer;
377 rtc::Event event(false, false);
378 init_queue.PostTask([&mixer, &event]() {
379 mixer = AudioMixerImpl::Create();
380 event.Set();
381 });
382 event.Wait(rtc::Event::kForever);
383
384 MockMixerAudioSource participant;
385 EXPECT_CALL(participant, PreferredSampleRate())
386 .WillRepeatedly(Return(kDefaultSampleRateHz));
387
388 ResetFrame(participant.fake_frame());
389
390 rtc::TaskQueue participant_queue("participant");
391 participant_queue.PostTask([&mixer, &event, &participant]() {
392 mixer->AddSource(&participant);
393 event.Set();
394 });
395 event.Wait(rtc::Event::kForever);
396
397 EXPECT_CALL(participant, GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
398 .Times(Exactly(1));
399
400 // Do one mixer iteration
401 mixer->Mix(1, &frame_for_mixing);
402 }
403
TEST(AudioMixer,MutedShouldMixAfterUnmuted)404 TEST(AudioMixer, MutedShouldMixAfterUnmuted) {
405 constexpr int kAudioSources =
406 AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1;
407
408 std::vector<AudioFrame> frames(kAudioSources);
409 for (auto& frame : frames) {
410 ResetFrame(&frame);
411 }
412
413 std::vector<AudioMixer::Source::AudioFrameInfo> frame_info(
414 kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal);
415 frame_info[0] = AudioMixer::Source::AudioFrameInfo::kMuted;
416 std::vector<bool> expected_status(kAudioSources, true);
417 expected_status[0] = false;
418
419 MixAndCompare(frames, frame_info, expected_status);
420 }
421
TEST(AudioMixer,PassiveShouldMixAfterNormal)422 TEST(AudioMixer, PassiveShouldMixAfterNormal) {
423 constexpr int kAudioSources =
424 AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1;
425
426 std::vector<AudioFrame> frames(kAudioSources);
427 for (auto& frame : frames) {
428 ResetFrame(&frame);
429 }
430
431 std::vector<AudioMixer::Source::AudioFrameInfo> frame_info(
432 kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal);
433 frames[0].vad_activity_ = AudioFrame::kVadPassive;
434 std::vector<bool> expected_status(kAudioSources, true);
435 expected_status[0] = false;
436
437 MixAndCompare(frames, frame_info, expected_status);
438 }
439
TEST(AudioMixer,ActiveShouldMixBeforeLoud)440 TEST(AudioMixer, ActiveShouldMixBeforeLoud) {
441 constexpr int kAudioSources =
442 AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1;
443
444 std::vector<AudioFrame> frames(kAudioSources);
445 for (auto& frame : frames) {
446 ResetFrame(&frame);
447 }
448
449 std::vector<AudioMixer::Source::AudioFrameInfo> frame_info(
450 kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal);
451 frames[0].vad_activity_ = AudioFrame::kVadPassive;
452 int16_t* frame_data = frames[0].mutable_data();
453 std::fill(frame_data, frame_data + kDefaultSampleRateHz / 100,
454 std::numeric_limits<int16_t>::max());
455 std::vector<bool> expected_status(kAudioSources, true);
456 expected_status[0] = false;
457
458 MixAndCompare(frames, frame_info, expected_status);
459 }
460
TEST(AudioMixer,UnmutedShouldMixBeforeLoud)461 TEST(AudioMixer, UnmutedShouldMixBeforeLoud) {
462 constexpr int kAudioSources =
463 AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1;
464
465 std::vector<AudioFrame> frames(kAudioSources);
466 for (auto& frame : frames) {
467 ResetFrame(&frame);
468 }
469
470 std::vector<AudioMixer::Source::AudioFrameInfo> frame_info(
471 kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal);
472 frame_info[0] = AudioMixer::Source::AudioFrameInfo::kMuted;
473 int16_t* frame_data = frames[0].mutable_data();
474 std::fill(frame_data, frame_data + kDefaultSampleRateHz / 100,
475 std::numeric_limits<int16_t>::max());
476 std::vector<bool> expected_status(kAudioSources, true);
477 expected_status[0] = false;
478
479 MixAndCompare(frames, frame_info, expected_status);
480 }
481
TEST(AudioMixer,MixingRateShouldBeDecidedByRateCalculator)482 TEST(AudioMixer, MixingRateShouldBeDecidedByRateCalculator) {
483 constexpr int kOutputRate = 22000;
484 const auto mixer =
485 AudioMixerImpl::Create(std::unique_ptr<OutputRateCalculator>(
486 new CustomRateCalculator(kOutputRate)),
487 true);
488 MockMixerAudioSource audio_source;
489 mixer->AddSource(&audio_source);
490 ResetFrame(audio_source.fake_frame());
491
492 EXPECT_CALL(audio_source, GetAudioFrameWithInfo(kOutputRate, _))
493 .Times(Exactly(1));
494
495 mixer->Mix(1, &frame_for_mixing);
496 }
497
TEST(AudioMixer,ZeroSourceRateShouldBeDecidedByRateCalculator)498 TEST(AudioMixer, ZeroSourceRateShouldBeDecidedByRateCalculator) {
499 constexpr int kOutputRate = 8000;
500 const auto mixer =
501 AudioMixerImpl::Create(std::unique_ptr<OutputRateCalculator>(
502 new CustomRateCalculator(kOutputRate)),
503 true);
504
505 mixer->Mix(1, &frame_for_mixing);
506
507 EXPECT_EQ(kOutputRate, frame_for_mixing.sample_rate_hz_);
508 }
509
TEST(AudioMixer,NoLimiterBasicApiCalls)510 TEST(AudioMixer, NoLimiterBasicApiCalls) {
511 const auto mixer = AudioMixerImpl::Create(
512 std::unique_ptr<OutputRateCalculator>(new DefaultOutputRateCalculator()),
513 false);
514 mixer->Mix(1, &frame_for_mixing);
515 }
516
TEST(AudioMixer,AnyRateIsPossibleWithNoLimiter)517 TEST(AudioMixer, AnyRateIsPossibleWithNoLimiter) {
518 // No APM limiter means no AudioProcessing::NativeRate restriction
519 // on mixing rate. The rate has to be divisible by 100 since we use
520 // 10 ms frames, though.
521 for (const auto rate : {8000, 20000, 24000, 32000, 44100}) {
522 for (const size_t number_of_channels : {1, 2}) {
523 for (const auto number_of_sources : {0, 1, 2, 3, 4}) {
524 SCOPED_TRACE(
525 ProduceDebugText(rate, number_of_sources, number_of_sources));
526 const auto mixer =
527 AudioMixerImpl::Create(std::unique_ptr<OutputRateCalculator>(
528 new CustomRateCalculator(rate)),
529 false);
530
531 std::vector<MockMixerAudioSource> sources(number_of_sources);
532 for (auto& source : sources) {
533 mixer->AddSource(&source);
534 }
535
536 mixer->Mix(number_of_channels, &frame_for_mixing);
537 EXPECT_EQ(rate, frame_for_mixing.sample_rate_hz_);
538 EXPECT_EQ(number_of_channels, frame_for_mixing.num_channels_);
539 }
540 }
541 }
542 }
543 } // namespace webrtc
544