1 /*
2 * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include <string.h>
12
13 #include <limits>
14 #include <memory>
15 #include <utility>
16
17 #include "webrtc/api/audio/audio_mixer.h"
18 #include "webrtc/base/bind.h"
19 #include "webrtc/base/thread.h"
20 #include "webrtc/modules/audio_mixer/audio_mixer_impl.h"
21 #include "webrtc/modules/audio_mixer/default_output_rate_calculator.h"
22 #include "webrtc/test/gmock.h"
23
24 using testing::_;
25 using testing::Exactly;
26 using testing::Invoke;
27 using testing::Return;
28
29 namespace webrtc {
30
31 namespace {
32
33 constexpr int kDefaultSampleRateHz = 48000;
34 constexpr int kId = 1;
35
36 // Utility function that resets the frame member variables with
37 // sensible defaults.
ResetFrame(AudioFrame * frame)38 void ResetFrame(AudioFrame* frame) {
39 frame->id_ = kId;
40 frame->sample_rate_hz_ = kDefaultSampleRateHz;
41 frame->num_channels_ = 1;
42
43 // Frame duration 10ms.
44 frame->samples_per_channel_ = kDefaultSampleRateHz / 100;
45 frame->vad_activity_ = AudioFrame::kVadActive;
46 frame->speech_type_ = AudioFrame::kNormalSpeech;
47 }
48
49 AudioFrame frame_for_mixing;
50
51 } // namespace
52
53 class MockMixerAudioSource : public AudioMixer::Source {
54 public:
MockMixerAudioSource()55 MockMixerAudioSource()
56 : fake_audio_frame_info_(AudioMixer::Source::AudioFrameInfo::kNormal) {
57 ON_CALL(*this, GetAudioFrameWithInfo(_, _))
58 .WillByDefault(
59 Invoke(this, &MockMixerAudioSource::FakeAudioFrameWithInfo));
60 ON_CALL(*this, PreferredSampleRate())
61 .WillByDefault(Return(kDefaultSampleRateHz));
62 }
63
64 MOCK_METHOD2(GetAudioFrameWithInfo,
65 AudioFrameInfo(int sample_rate_hz, AudioFrame* audio_frame));
66
67 MOCK_CONST_METHOD0(PreferredSampleRate, int());
68 MOCK_CONST_METHOD0(Ssrc, int());
69
fake_frame()70 AudioFrame* fake_frame() { return &fake_frame_; }
fake_info()71 AudioFrameInfo fake_info() { return fake_audio_frame_info_; }
set_fake_info(const AudioFrameInfo audio_frame_info)72 void set_fake_info(const AudioFrameInfo audio_frame_info) {
73 fake_audio_frame_info_ = audio_frame_info;
74 }
75
76 private:
FakeAudioFrameWithInfo(int sample_rate_hz,AudioFrame * audio_frame)77 AudioFrameInfo FakeAudioFrameWithInfo(int sample_rate_hz,
78 AudioFrame* audio_frame) {
79 audio_frame->CopyFrom(fake_frame_);
80 audio_frame->sample_rate_hz_ = sample_rate_hz;
81 audio_frame->samples_per_channel_ = sample_rate_hz / 100;
82 return fake_info();
83 }
84
85 AudioFrame fake_frame_;
86 AudioFrameInfo fake_audio_frame_info_;
87 };
88
89 class CustomRateCalculator : public OutputRateCalculator {
90 public:
CustomRateCalculator(int rate)91 explicit CustomRateCalculator(int rate) : rate_(rate) {}
CalculateOutputRate(const std::vector<int> & preferred_rates)92 int CalculateOutputRate(const std::vector<int>& preferred_rates) {
93 return rate_;
94 }
95
96 private:
97 const int rate_;
98 };
99
100 // Creates participants from |frames| and |frame_info| and adds them
101 // to the mixer. Compares mixed status with |expected_status|
MixAndCompare(const std::vector<AudioFrame> & frames,const std::vector<AudioMixer::Source::AudioFrameInfo> & frame_info,const std::vector<bool> & expected_status)102 void MixAndCompare(
103 const std::vector<AudioFrame>& frames,
104 const std::vector<AudioMixer::Source::AudioFrameInfo>& frame_info,
105 const std::vector<bool>& expected_status) {
106 int num_audio_sources = frames.size();
107 RTC_DCHECK(frames.size() == frame_info.size());
108 RTC_DCHECK(frame_info.size() == expected_status.size());
109
110 const auto mixer = AudioMixerImpl::Create();
111 std::vector<MockMixerAudioSource> participants(num_audio_sources);
112
113 for (int i = 0; i < num_audio_sources; i++) {
114 participants[i].fake_frame()->CopyFrom(frames[i]);
115 participants[i].set_fake_info(frame_info[i]);
116 }
117
118 for (int i = 0; i < num_audio_sources; i++) {
119 EXPECT_TRUE(mixer->AddSource(&participants[i]));
120 EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
121 .Times(Exactly(1));
122 }
123
124 mixer->Mix(1, &frame_for_mixing);
125
126 for (int i = 0; i < num_audio_sources; i++) {
127 EXPECT_EQ(expected_status[i],
128 mixer->GetAudioSourceMixabilityStatusForTest(&participants[i]))
129 << "Mixed status of AudioSource #" << i << " wrong.";
130 }
131 }
132
MixMonoAtGivenNativeRate(int native_sample_rate,AudioFrame * mix_frame,rtc::scoped_refptr<AudioMixer> mixer,MockMixerAudioSource * audio_source)133 void MixMonoAtGivenNativeRate(int native_sample_rate,
134 AudioFrame* mix_frame,
135 rtc::scoped_refptr<AudioMixer> mixer,
136 MockMixerAudioSource* audio_source) {
137 ON_CALL(*audio_source, PreferredSampleRate())
138 .WillByDefault(Return(native_sample_rate));
139 audio_source->fake_frame()->sample_rate_hz_ = native_sample_rate;
140 audio_source->fake_frame()->samples_per_channel_ = native_sample_rate / 100;
141
142 mixer->Mix(1, mix_frame);
143 }
144
TEST(AudioMixer,LargestEnergyVadActiveMixed)145 TEST(AudioMixer, LargestEnergyVadActiveMixed) {
146 constexpr int kAudioSources =
147 AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 3;
148
149 const auto mixer = AudioMixerImpl::Create();
150
151 MockMixerAudioSource participants[kAudioSources];
152
153 for (int i = 0; i < kAudioSources; ++i) {
154 ResetFrame(participants[i].fake_frame());
155
156 // We set the 80-th sample value since the first 80 samples may be
157 // modified by a ramped-in window.
158 participants[i].fake_frame()->data_[80] = i;
159
160 EXPECT_TRUE(mixer->AddSource(&participants[i]));
161 EXPECT_CALL(participants[i], GetAudioFrameWithInfo(_, _)).Times(Exactly(1));
162 }
163
164 // Last participant gives audio frame with passive VAD, although it has the
165 // largest energy.
166 participants[kAudioSources - 1].fake_frame()->vad_activity_ =
167 AudioFrame::kVadPassive;
168
169 AudioFrame audio_frame;
170 mixer->Mix(1, // number of channels
171 &audio_frame);
172
173 for (int i = 0; i < kAudioSources; ++i) {
174 bool is_mixed =
175 mixer->GetAudioSourceMixabilityStatusForTest(&participants[i]);
176 if (i == kAudioSources - 1 ||
177 i < kAudioSources - 1 -
178 AudioMixerImpl::kMaximumAmountOfMixedAudioSources) {
179 EXPECT_FALSE(is_mixed) << "Mixing status of AudioSource #" << i
180 << " wrong.";
181 } else {
182 EXPECT_TRUE(is_mixed) << "Mixing status of AudioSource #" << i
183 << " wrong.";
184 }
185 }
186 }
187
TEST(AudioMixer,FrameNotModifiedForSingleParticipant)188 TEST(AudioMixer, FrameNotModifiedForSingleParticipant) {
189 const auto mixer = AudioMixerImpl::Create();
190
191 MockMixerAudioSource participant;
192
193 ResetFrame(participant.fake_frame());
194 const int n_samples = participant.fake_frame()->samples_per_channel_;
195
196 // Modify the frame so that it's not zero.
197 for (int j = 0; j < n_samples; j++) {
198 participant.fake_frame()->data_[j] = j;
199 }
200
201 EXPECT_TRUE(mixer->AddSource(&participant));
202 EXPECT_CALL(participant, GetAudioFrameWithInfo(_, _)).Times(Exactly(2));
203
204 AudioFrame audio_frame;
205 // Two mix iteration to compare after the ramp-up step.
206 for (int i = 0; i < 2; i++) {
207 mixer->Mix(1, // number of channels
208 &audio_frame);
209 }
210
211 EXPECT_EQ(
212 0, memcmp(participant.fake_frame()->data_, audio_frame.data_, n_samples));
213 }
214
TEST(AudioMixer,SourceAtNativeRateShouldNeverResample)215 TEST(AudioMixer, SourceAtNativeRateShouldNeverResample) {
216 const auto mixer = AudioMixerImpl::Create();
217
218 MockMixerAudioSource audio_source;
219 ResetFrame(audio_source.fake_frame());
220
221 mixer->AddSource(&audio_source);
222
223 for (auto frequency : {8000, 16000, 32000, 48000}) {
224 EXPECT_CALL(audio_source, GetAudioFrameWithInfo(frequency, _))
225 .Times(Exactly(1));
226
227 MixMonoAtGivenNativeRate(frequency, &frame_for_mixing, mixer,
228 &audio_source);
229 }
230 }
231
TEST(AudioMixer,MixerShouldMixAtNativeSourceRate)232 TEST(AudioMixer, MixerShouldMixAtNativeSourceRate) {
233 const auto mixer = AudioMixerImpl::Create();
234
235 MockMixerAudioSource audio_source;
236 ResetFrame(audio_source.fake_frame());
237
238 mixer->AddSource(&audio_source);
239
240 for (auto frequency : {8000, 16000, 32000, 48000}) {
241 MixMonoAtGivenNativeRate(frequency, &frame_for_mixing, mixer,
242 &audio_source);
243
244 EXPECT_EQ(frequency, frame_for_mixing.sample_rate_hz_);
245 }
246 }
247
TEST(AudioMixer,MixerShouldAlwaysMixAtNativeRate)248 TEST(AudioMixer, MixerShouldAlwaysMixAtNativeRate) {
249 const auto mixer = AudioMixerImpl::Create();
250
251 MockMixerAudioSource participant;
252 ResetFrame(participant.fake_frame());
253 mixer->AddSource(&participant);
254
255 const int needed_frequency = 44100;
256 ON_CALL(participant, PreferredSampleRate())
257 .WillByDefault(Return(needed_frequency));
258
259 // We expect mixing frequency to be native and >= needed_frequency.
260 const int expected_mix_frequency = 48000;
261 EXPECT_CALL(participant, GetAudioFrameWithInfo(expected_mix_frequency, _))
262 .Times(Exactly(1));
263 participant.fake_frame()->sample_rate_hz_ = expected_mix_frequency;
264 participant.fake_frame()->samples_per_channel_ = expected_mix_frequency / 100;
265
266 mixer->Mix(1, &frame_for_mixing);
267
268 EXPECT_EQ(48000, frame_for_mixing.sample_rate_hz_);
269 }
270
271 // Check that the mixing rate is always >= participants preferred rate.
TEST(AudioMixer,ShouldNotCauseQualityLossForMultipleSources)272 TEST(AudioMixer, ShouldNotCauseQualityLossForMultipleSources) {
273 const auto mixer = AudioMixerImpl::Create();
274
275 std::vector<MockMixerAudioSource> audio_sources(2);
276 const std::vector<int> source_sample_rates = {8000, 16000};
277 for (int i = 0; i < 2; ++i) {
278 auto& source = audio_sources[i];
279 ResetFrame(source.fake_frame());
280 mixer->AddSource(&source);
281 const auto sample_rate = source_sample_rates[i];
282 EXPECT_CALL(source, PreferredSampleRate()).WillOnce(Return(sample_rate));
283
284 EXPECT_CALL(source, GetAudioFrameWithInfo(testing::Ge(sample_rate), _));
285 }
286 mixer->Mix(1, &frame_for_mixing);
287 }
288
TEST(AudioMixer,ParticipantNumberOfChannels)289 TEST(AudioMixer, ParticipantNumberOfChannels) {
290 const auto mixer = AudioMixerImpl::Create();
291
292 MockMixerAudioSource participant;
293 ResetFrame(participant.fake_frame());
294
295 EXPECT_TRUE(mixer->AddSource(&participant));
296 for (size_t number_of_channels : {1, 2}) {
297 EXPECT_CALL(participant, GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
298 .Times(Exactly(1));
299 mixer->Mix(number_of_channels, &frame_for_mixing);
300 EXPECT_EQ(number_of_channels, frame_for_mixing.num_channels_);
301 }
302 }
303
304 // Maximal amount of participants are mixed one iteration, then
305 // another participant with higher energy is added.
TEST(AudioMixer,RampedOutSourcesShouldNotBeMarkedMixed)306 TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) {
307 constexpr int kAudioSources =
308 AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1;
309
310 const auto mixer = AudioMixerImpl::Create();
311 MockMixerAudioSource participants[kAudioSources];
312
313 for (int i = 0; i < kAudioSources; i++) {
314 ResetFrame(participants[i].fake_frame());
315 // Set the participant audio energy to increase with the index
316 // |i|.
317 participants[i].fake_frame()->data_[0] = 100 * i;
318 }
319
320 // Add all participants but the loudest for mixing.
321 for (int i = 0; i < kAudioSources - 1; i++) {
322 EXPECT_TRUE(mixer->AddSource(&participants[i]));
323 EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
324 .Times(Exactly(1));
325 }
326
327 // First mixer iteration
328 mixer->Mix(1, &frame_for_mixing);
329
330 // All participants but the loudest should have been mixed.
331 for (int i = 0; i < kAudioSources - 1; i++) {
332 EXPECT_TRUE(mixer->GetAudioSourceMixabilityStatusForTest(&participants[i]))
333 << "Mixed status of AudioSource #" << i << " wrong.";
334 }
335
336 // Add new participant with higher energy.
337 EXPECT_TRUE(mixer->AddSource(&participants[kAudioSources - 1]));
338 for (int i = 0; i < kAudioSources; i++) {
339 EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
340 .Times(Exactly(1));
341 }
342
343 mixer->Mix(1, &frame_for_mixing);
344
345 // The most quiet participant should not have been mixed.
346 EXPECT_FALSE(mixer->GetAudioSourceMixabilityStatusForTest(&participants[0]))
347 << "Mixed status of AudioSource #0 wrong.";
348
349 // The loudest participants should have been mixed.
350 for (int i = 1; i < kAudioSources; i++) {
351 EXPECT_EQ(true,
352 mixer->GetAudioSourceMixabilityStatusForTest(&participants[i]))
353 << "Mixed status of AudioSource #" << i << " wrong.";
354 }
355 }
356
357 // This test checks that the initialization and participant addition
358 // can be done on a different thread.
TEST(AudioMixer,ConstructFromOtherThread)359 TEST(AudioMixer, ConstructFromOtherThread) {
360 std::unique_ptr<rtc::Thread> init_thread = rtc::Thread::Create();
361 std::unique_ptr<rtc::Thread> participant_thread = rtc::Thread::Create();
362 init_thread->Start();
363 const auto mixer = init_thread->Invoke<rtc::scoped_refptr<AudioMixer>>(
364 RTC_FROM_HERE, &AudioMixerImpl::Create);
365 MockMixerAudioSource participant;
366
367 ResetFrame(participant.fake_frame());
368
369 participant_thread->Start();
370 EXPECT_TRUE(participant_thread->Invoke<int>(
371 RTC_FROM_HERE,
372 rtc::Bind(&AudioMixer::AddSource, mixer.get(), &participant)));
373
374 EXPECT_CALL(participant, GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
375 .Times(Exactly(1));
376
377 // Do one mixer iteration
378 mixer->Mix(1, &frame_for_mixing);
379 }
380
TEST(AudioMixer,MutedShouldMixAfterUnmuted)381 TEST(AudioMixer, MutedShouldMixAfterUnmuted) {
382 constexpr int kAudioSources =
383 AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1;
384
385 std::vector<AudioFrame> frames(kAudioSources);
386 for (auto& frame : frames) {
387 ResetFrame(&frame);
388 }
389
390 std::vector<AudioMixer::Source::AudioFrameInfo> frame_info(
391 kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal);
392 frame_info[0] = AudioMixer::Source::AudioFrameInfo::kMuted;
393 std::vector<bool> expected_status(kAudioSources, true);
394 expected_status[0] = false;
395
396 MixAndCompare(frames, frame_info, expected_status);
397 }
398
TEST(AudioMixer,PassiveShouldMixAfterNormal)399 TEST(AudioMixer, PassiveShouldMixAfterNormal) {
400 constexpr int kAudioSources =
401 AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1;
402
403 std::vector<AudioFrame> frames(kAudioSources);
404 for (auto& frame : frames) {
405 ResetFrame(&frame);
406 }
407
408 std::vector<AudioMixer::Source::AudioFrameInfo> frame_info(
409 kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal);
410 frames[0].vad_activity_ = AudioFrame::kVadPassive;
411 std::vector<bool> expected_status(kAudioSources, true);
412 expected_status[0] = false;
413
414 MixAndCompare(frames, frame_info, expected_status);
415 }
416
TEST(AudioMixer,ActiveShouldMixBeforeLoud)417 TEST(AudioMixer, ActiveShouldMixBeforeLoud) {
418 constexpr int kAudioSources =
419 AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1;
420
421 std::vector<AudioFrame> frames(kAudioSources);
422 for (auto& frame : frames) {
423 ResetFrame(&frame);
424 }
425
426 std::vector<AudioMixer::Source::AudioFrameInfo> frame_info(
427 kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal);
428 frames[0].vad_activity_ = AudioFrame::kVadPassive;
429 std::fill(frames[0].data_, frames[0].data_ + kDefaultSampleRateHz / 100,
430 std::numeric_limits<int16_t>::max());
431 std::vector<bool> expected_status(kAudioSources, true);
432 expected_status[0] = false;
433
434 MixAndCompare(frames, frame_info, expected_status);
435 }
436
TEST(AudioMixer,UnmutedShouldMixBeforeLoud)437 TEST(AudioMixer, UnmutedShouldMixBeforeLoud) {
438 constexpr int kAudioSources =
439 AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1;
440
441 std::vector<AudioFrame> frames(kAudioSources);
442 for (auto& frame : frames) {
443 ResetFrame(&frame);
444 }
445
446 std::vector<AudioMixer::Source::AudioFrameInfo> frame_info(
447 kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal);
448 frame_info[0] = AudioMixer::Source::AudioFrameInfo::kMuted;
449 std::fill(frames[0].data_, frames[0].data_ + kDefaultSampleRateHz / 100,
450 std::numeric_limits<int16_t>::max());
451 std::vector<bool> expected_status(kAudioSources, true);
452 expected_status[0] = false;
453
454 MixAndCompare(frames, frame_info, expected_status);
455 }
456
TEST(AudioMixer,MixingRateShouldBeDecidedByRateCalculator)457 TEST(AudioMixer, MixingRateShouldBeDecidedByRateCalculator) {
458 constexpr int kOutputRate = 22000;
459 const auto mixer = AudioMixerImpl::CreateWithOutputRateCalculator(
460 std::unique_ptr<OutputRateCalculator>(
461 new CustomRateCalculator(kOutputRate)));
462 MockMixerAudioSource audio_source;
463 mixer->AddSource(&audio_source);
464 ResetFrame(audio_source.fake_frame());
465
466 EXPECT_CALL(audio_source, GetAudioFrameWithInfo(kOutputRate, _))
467 .Times(Exactly(1));
468
469 mixer->Mix(1, &frame_for_mixing);
470 }
471
TEST(AudioMixer,ZeroSourceRateShouldBeDecidedByRateCalculator)472 TEST(AudioMixer, ZeroSourceRateShouldBeDecidedByRateCalculator) {
473 constexpr int kOutputRate = 8000;
474 const auto mixer = AudioMixerImpl::CreateWithOutputRateCalculator(
475 std::unique_ptr<OutputRateCalculator>(
476 new CustomRateCalculator(kOutputRate)));
477
478 mixer->Mix(1, &frame_for_mixing);
479
480 EXPECT_EQ(kOutputRate, frame_for_mixing.sample_rate_hz_);
481 }
482 } // namespace webrtc
483