1 /*
2  *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <string.h>
12 
13 #include <limits>
14 #include <memory>
15 #include <utility>
16 
17 #include "webrtc/api/audio/audio_mixer.h"
18 #include "webrtc/base/bind.h"
19 #include "webrtc/base/thread.h"
20 #include "webrtc/modules/audio_mixer/audio_mixer_impl.h"
21 #include "webrtc/modules/audio_mixer/default_output_rate_calculator.h"
22 #include "webrtc/test/gmock.h"
23 
24 using testing::_;
25 using testing::Exactly;
26 using testing::Invoke;
27 using testing::Return;
28 
29 namespace webrtc {
30 
31 namespace {
32 
33 constexpr int kDefaultSampleRateHz = 48000;
34 constexpr int kId = 1;
35 
36 // Utility function that resets the frame member variables with
37 // sensible defaults.
ResetFrame(AudioFrame * frame)38 void ResetFrame(AudioFrame* frame) {
39   frame->id_ = kId;
40   frame->sample_rate_hz_ = kDefaultSampleRateHz;
41   frame->num_channels_ = 1;
42 
43   // Frame duration 10ms.
44   frame->samples_per_channel_ = kDefaultSampleRateHz / 100;
45   frame->vad_activity_ = AudioFrame::kVadActive;
46   frame->speech_type_ = AudioFrame::kNormalSpeech;
47 }
48 
49 AudioFrame frame_for_mixing;
50 
51 }  // namespace
52 
53 class MockMixerAudioSource : public AudioMixer::Source {
54  public:
MockMixerAudioSource()55   MockMixerAudioSource()
56       : fake_audio_frame_info_(AudioMixer::Source::AudioFrameInfo::kNormal) {
57     ON_CALL(*this, GetAudioFrameWithInfo(_, _))
58         .WillByDefault(
59             Invoke(this, &MockMixerAudioSource::FakeAudioFrameWithInfo));
60     ON_CALL(*this, PreferredSampleRate())
61         .WillByDefault(Return(kDefaultSampleRateHz));
62   }
63 
64   MOCK_METHOD2(GetAudioFrameWithInfo,
65                AudioFrameInfo(int sample_rate_hz, AudioFrame* audio_frame));
66 
67   MOCK_CONST_METHOD0(PreferredSampleRate, int());
68   MOCK_CONST_METHOD0(Ssrc, int());
69 
fake_frame()70   AudioFrame* fake_frame() { return &fake_frame_; }
fake_info()71   AudioFrameInfo fake_info() { return fake_audio_frame_info_; }
set_fake_info(const AudioFrameInfo audio_frame_info)72   void set_fake_info(const AudioFrameInfo audio_frame_info) {
73     fake_audio_frame_info_ = audio_frame_info;
74   }
75 
76  private:
FakeAudioFrameWithInfo(int sample_rate_hz,AudioFrame * audio_frame)77   AudioFrameInfo FakeAudioFrameWithInfo(int sample_rate_hz,
78                                         AudioFrame* audio_frame) {
79     audio_frame->CopyFrom(fake_frame_);
80     audio_frame->sample_rate_hz_ = sample_rate_hz;
81     audio_frame->samples_per_channel_ = sample_rate_hz / 100;
82     return fake_info();
83   }
84 
85   AudioFrame fake_frame_;
86   AudioFrameInfo fake_audio_frame_info_;
87 };
88 
89 class CustomRateCalculator : public OutputRateCalculator {
90  public:
CustomRateCalculator(int rate)91   explicit CustomRateCalculator(int rate) : rate_(rate) {}
CalculateOutputRate(const std::vector<int> & preferred_rates)92   int CalculateOutputRate(const std::vector<int>& preferred_rates) {
93     return rate_;
94   }
95 
96  private:
97   const int rate_;
98 };
99 
100 // Creates participants from |frames| and |frame_info| and adds them
101 // to the mixer. Compares mixed status with |expected_status|
MixAndCompare(const std::vector<AudioFrame> & frames,const std::vector<AudioMixer::Source::AudioFrameInfo> & frame_info,const std::vector<bool> & expected_status)102 void MixAndCompare(
103     const std::vector<AudioFrame>& frames,
104     const std::vector<AudioMixer::Source::AudioFrameInfo>& frame_info,
105     const std::vector<bool>& expected_status) {
106   int num_audio_sources = frames.size();
107   RTC_DCHECK(frames.size() == frame_info.size());
108   RTC_DCHECK(frame_info.size() == expected_status.size());
109 
110   const auto mixer = AudioMixerImpl::Create();
111   std::vector<MockMixerAudioSource> participants(num_audio_sources);
112 
113   for (int i = 0; i < num_audio_sources; i++) {
114     participants[i].fake_frame()->CopyFrom(frames[i]);
115     participants[i].set_fake_info(frame_info[i]);
116   }
117 
118   for (int i = 0; i < num_audio_sources; i++) {
119     EXPECT_TRUE(mixer->AddSource(&participants[i]));
120     EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
121         .Times(Exactly(1));
122   }
123 
124   mixer->Mix(1, &frame_for_mixing);
125 
126   for (int i = 0; i < num_audio_sources; i++) {
127     EXPECT_EQ(expected_status[i],
128               mixer->GetAudioSourceMixabilityStatusForTest(&participants[i]))
129         << "Mixed status of AudioSource #" << i << " wrong.";
130   }
131 }
132 
MixMonoAtGivenNativeRate(int native_sample_rate,AudioFrame * mix_frame,rtc::scoped_refptr<AudioMixer> mixer,MockMixerAudioSource * audio_source)133 void MixMonoAtGivenNativeRate(int native_sample_rate,
134                               AudioFrame* mix_frame,
135                               rtc::scoped_refptr<AudioMixer> mixer,
136                               MockMixerAudioSource* audio_source) {
137   ON_CALL(*audio_source, PreferredSampleRate())
138       .WillByDefault(Return(native_sample_rate));
139   audio_source->fake_frame()->sample_rate_hz_ = native_sample_rate;
140   audio_source->fake_frame()->samples_per_channel_ = native_sample_rate / 100;
141 
142   mixer->Mix(1, mix_frame);
143 }
144 
TEST(AudioMixer,LargestEnergyVadActiveMixed)145 TEST(AudioMixer, LargestEnergyVadActiveMixed) {
146   constexpr int kAudioSources =
147       AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 3;
148 
149   const auto mixer = AudioMixerImpl::Create();
150 
151   MockMixerAudioSource participants[kAudioSources];
152 
153   for (int i = 0; i < kAudioSources; ++i) {
154     ResetFrame(participants[i].fake_frame());
155 
156     // We set the 80-th sample value since the first 80 samples may be
157     // modified by a ramped-in window.
158     participants[i].fake_frame()->data_[80] = i;
159 
160     EXPECT_TRUE(mixer->AddSource(&participants[i]));
161     EXPECT_CALL(participants[i], GetAudioFrameWithInfo(_, _)).Times(Exactly(1));
162   }
163 
164   // Last participant gives audio frame with passive VAD, although it has the
165   // largest energy.
166   participants[kAudioSources - 1].fake_frame()->vad_activity_ =
167       AudioFrame::kVadPassive;
168 
169   AudioFrame audio_frame;
170   mixer->Mix(1,  // number of channels
171              &audio_frame);
172 
173   for (int i = 0; i < kAudioSources; ++i) {
174     bool is_mixed =
175         mixer->GetAudioSourceMixabilityStatusForTest(&participants[i]);
176     if (i == kAudioSources - 1 ||
177         i < kAudioSources - 1 -
178                 AudioMixerImpl::kMaximumAmountOfMixedAudioSources) {
179       EXPECT_FALSE(is_mixed) << "Mixing status of AudioSource #" << i
180                              << " wrong.";
181     } else {
182       EXPECT_TRUE(is_mixed) << "Mixing status of AudioSource #" << i
183                             << " wrong.";
184     }
185   }
186 }
187 
TEST(AudioMixer,FrameNotModifiedForSingleParticipant)188 TEST(AudioMixer, FrameNotModifiedForSingleParticipant) {
189   const auto mixer = AudioMixerImpl::Create();
190 
191   MockMixerAudioSource participant;
192 
193   ResetFrame(participant.fake_frame());
194   const int n_samples = participant.fake_frame()->samples_per_channel_;
195 
196   // Modify the frame so that it's not zero.
197   for (int j = 0; j < n_samples; j++) {
198     participant.fake_frame()->data_[j] = j;
199   }
200 
201   EXPECT_TRUE(mixer->AddSource(&participant));
202   EXPECT_CALL(participant, GetAudioFrameWithInfo(_, _)).Times(Exactly(2));
203 
204   AudioFrame audio_frame;
205   // Two mix iteration to compare after the ramp-up step.
206   for (int i = 0; i < 2; i++) {
207     mixer->Mix(1,  // number of channels
208                &audio_frame);
209   }
210 
211   EXPECT_EQ(
212       0, memcmp(participant.fake_frame()->data_, audio_frame.data_, n_samples));
213 }
214 
TEST(AudioMixer,SourceAtNativeRateShouldNeverResample)215 TEST(AudioMixer, SourceAtNativeRateShouldNeverResample) {
216   const auto mixer = AudioMixerImpl::Create();
217 
218   MockMixerAudioSource audio_source;
219   ResetFrame(audio_source.fake_frame());
220 
221   mixer->AddSource(&audio_source);
222 
223   for (auto frequency : {8000, 16000, 32000, 48000}) {
224     EXPECT_CALL(audio_source, GetAudioFrameWithInfo(frequency, _))
225         .Times(Exactly(1));
226 
227     MixMonoAtGivenNativeRate(frequency, &frame_for_mixing, mixer,
228                              &audio_source);
229   }
230 }
231 
TEST(AudioMixer,MixerShouldMixAtNativeSourceRate)232 TEST(AudioMixer, MixerShouldMixAtNativeSourceRate) {
233   const auto mixer = AudioMixerImpl::Create();
234 
235   MockMixerAudioSource audio_source;
236   ResetFrame(audio_source.fake_frame());
237 
238   mixer->AddSource(&audio_source);
239 
240   for (auto frequency : {8000, 16000, 32000, 48000}) {
241     MixMonoAtGivenNativeRate(frequency, &frame_for_mixing, mixer,
242                              &audio_source);
243 
244     EXPECT_EQ(frequency, frame_for_mixing.sample_rate_hz_);
245   }
246 }
247 
TEST(AudioMixer,MixerShouldAlwaysMixAtNativeRate)248 TEST(AudioMixer, MixerShouldAlwaysMixAtNativeRate) {
249   const auto mixer = AudioMixerImpl::Create();
250 
251   MockMixerAudioSource participant;
252   ResetFrame(participant.fake_frame());
253   mixer->AddSource(&participant);
254 
255   const int needed_frequency = 44100;
256   ON_CALL(participant, PreferredSampleRate())
257       .WillByDefault(Return(needed_frequency));
258 
259   // We expect mixing frequency to be native and >= needed_frequency.
260   const int expected_mix_frequency = 48000;
261   EXPECT_CALL(participant, GetAudioFrameWithInfo(expected_mix_frequency, _))
262       .Times(Exactly(1));
263   participant.fake_frame()->sample_rate_hz_ = expected_mix_frequency;
264   participant.fake_frame()->samples_per_channel_ = expected_mix_frequency / 100;
265 
266   mixer->Mix(1, &frame_for_mixing);
267 
268   EXPECT_EQ(48000, frame_for_mixing.sample_rate_hz_);
269 }
270 
271 // Check that the mixing rate is always >= participants preferred rate.
TEST(AudioMixer,ShouldNotCauseQualityLossForMultipleSources)272 TEST(AudioMixer, ShouldNotCauseQualityLossForMultipleSources) {
273   const auto mixer = AudioMixerImpl::Create();
274 
275   std::vector<MockMixerAudioSource> audio_sources(2);
276   const std::vector<int> source_sample_rates = {8000, 16000};
277   for (int i = 0; i < 2; ++i) {
278     auto& source = audio_sources[i];
279     ResetFrame(source.fake_frame());
280     mixer->AddSource(&source);
281     const auto sample_rate = source_sample_rates[i];
282     EXPECT_CALL(source, PreferredSampleRate()).WillOnce(Return(sample_rate));
283 
284     EXPECT_CALL(source, GetAudioFrameWithInfo(testing::Ge(sample_rate), _));
285   }
286   mixer->Mix(1, &frame_for_mixing);
287 }
288 
TEST(AudioMixer,ParticipantNumberOfChannels)289 TEST(AudioMixer, ParticipantNumberOfChannels) {
290   const auto mixer = AudioMixerImpl::Create();
291 
292   MockMixerAudioSource participant;
293   ResetFrame(participant.fake_frame());
294 
295   EXPECT_TRUE(mixer->AddSource(&participant));
296   for (size_t number_of_channels : {1, 2}) {
297     EXPECT_CALL(participant, GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
298         .Times(Exactly(1));
299     mixer->Mix(number_of_channels, &frame_for_mixing);
300     EXPECT_EQ(number_of_channels, frame_for_mixing.num_channels_);
301   }
302 }
303 
304 // Maximal amount of participants are mixed one iteration, then
305 // another participant with higher energy is added.
TEST(AudioMixer,RampedOutSourcesShouldNotBeMarkedMixed)306 TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) {
307   constexpr int kAudioSources =
308       AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1;
309 
310   const auto mixer = AudioMixerImpl::Create();
311   MockMixerAudioSource participants[kAudioSources];
312 
313   for (int i = 0; i < kAudioSources; i++) {
314     ResetFrame(participants[i].fake_frame());
315     // Set the participant audio energy to increase with the index
316     // |i|.
317     participants[i].fake_frame()->data_[0] = 100 * i;
318   }
319 
320   // Add all participants but the loudest for mixing.
321   for (int i = 0; i < kAudioSources - 1; i++) {
322     EXPECT_TRUE(mixer->AddSource(&participants[i]));
323     EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
324         .Times(Exactly(1));
325   }
326 
327   // First mixer iteration
328   mixer->Mix(1, &frame_for_mixing);
329 
330   // All participants but the loudest should have been mixed.
331   for (int i = 0; i < kAudioSources - 1; i++) {
332     EXPECT_TRUE(mixer->GetAudioSourceMixabilityStatusForTest(&participants[i]))
333         << "Mixed status of AudioSource #" << i << " wrong.";
334   }
335 
336   // Add new participant with higher energy.
337   EXPECT_TRUE(mixer->AddSource(&participants[kAudioSources - 1]));
338   for (int i = 0; i < kAudioSources; i++) {
339     EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
340         .Times(Exactly(1));
341   }
342 
343   mixer->Mix(1, &frame_for_mixing);
344 
345   // The most quiet participant should not have been mixed.
346   EXPECT_FALSE(mixer->GetAudioSourceMixabilityStatusForTest(&participants[0]))
347       << "Mixed status of AudioSource #0 wrong.";
348 
349   // The loudest participants should have been mixed.
350   for (int i = 1; i < kAudioSources; i++) {
351     EXPECT_EQ(true,
352               mixer->GetAudioSourceMixabilityStatusForTest(&participants[i]))
353         << "Mixed status of AudioSource #" << i << " wrong.";
354   }
355 }
356 
357 // This test checks that the initialization and participant addition
358 // can be done on a different thread.
TEST(AudioMixer,ConstructFromOtherThread)359 TEST(AudioMixer, ConstructFromOtherThread) {
360   std::unique_ptr<rtc::Thread> init_thread = rtc::Thread::Create();
361   std::unique_ptr<rtc::Thread> participant_thread = rtc::Thread::Create();
362   init_thread->Start();
363   const auto mixer = init_thread->Invoke<rtc::scoped_refptr<AudioMixer>>(
364       RTC_FROM_HERE, &AudioMixerImpl::Create);
365   MockMixerAudioSource participant;
366 
367   ResetFrame(participant.fake_frame());
368 
369   participant_thread->Start();
370   EXPECT_TRUE(participant_thread->Invoke<int>(
371       RTC_FROM_HERE,
372       rtc::Bind(&AudioMixer::AddSource, mixer.get(), &participant)));
373 
374   EXPECT_CALL(participant, GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
375       .Times(Exactly(1));
376 
377   // Do one mixer iteration
378   mixer->Mix(1, &frame_for_mixing);
379 }
380 
TEST(AudioMixer,MutedShouldMixAfterUnmuted)381 TEST(AudioMixer, MutedShouldMixAfterUnmuted) {
382   constexpr int kAudioSources =
383       AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1;
384 
385   std::vector<AudioFrame> frames(kAudioSources);
386   for (auto& frame : frames) {
387     ResetFrame(&frame);
388   }
389 
390   std::vector<AudioMixer::Source::AudioFrameInfo> frame_info(
391       kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal);
392   frame_info[0] = AudioMixer::Source::AudioFrameInfo::kMuted;
393   std::vector<bool> expected_status(kAudioSources, true);
394   expected_status[0] = false;
395 
396   MixAndCompare(frames, frame_info, expected_status);
397 }
398 
TEST(AudioMixer,PassiveShouldMixAfterNormal)399 TEST(AudioMixer, PassiveShouldMixAfterNormal) {
400   constexpr int kAudioSources =
401       AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1;
402 
403   std::vector<AudioFrame> frames(kAudioSources);
404   for (auto& frame : frames) {
405     ResetFrame(&frame);
406   }
407 
408   std::vector<AudioMixer::Source::AudioFrameInfo> frame_info(
409       kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal);
410   frames[0].vad_activity_ = AudioFrame::kVadPassive;
411   std::vector<bool> expected_status(kAudioSources, true);
412   expected_status[0] = false;
413 
414   MixAndCompare(frames, frame_info, expected_status);
415 }
416 
TEST(AudioMixer,ActiveShouldMixBeforeLoud)417 TEST(AudioMixer, ActiveShouldMixBeforeLoud) {
418   constexpr int kAudioSources =
419       AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1;
420 
421   std::vector<AudioFrame> frames(kAudioSources);
422   for (auto& frame : frames) {
423     ResetFrame(&frame);
424   }
425 
426   std::vector<AudioMixer::Source::AudioFrameInfo> frame_info(
427       kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal);
428   frames[0].vad_activity_ = AudioFrame::kVadPassive;
429   std::fill(frames[0].data_, frames[0].data_ + kDefaultSampleRateHz / 100,
430             std::numeric_limits<int16_t>::max());
431   std::vector<bool> expected_status(kAudioSources, true);
432   expected_status[0] = false;
433 
434   MixAndCompare(frames, frame_info, expected_status);
435 }
436 
TEST(AudioMixer,UnmutedShouldMixBeforeLoud)437 TEST(AudioMixer, UnmutedShouldMixBeforeLoud) {
438   constexpr int kAudioSources =
439       AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1;
440 
441   std::vector<AudioFrame> frames(kAudioSources);
442   for (auto& frame : frames) {
443     ResetFrame(&frame);
444   }
445 
446   std::vector<AudioMixer::Source::AudioFrameInfo> frame_info(
447       kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal);
448   frame_info[0] = AudioMixer::Source::AudioFrameInfo::kMuted;
449   std::fill(frames[0].data_, frames[0].data_ + kDefaultSampleRateHz / 100,
450             std::numeric_limits<int16_t>::max());
451   std::vector<bool> expected_status(kAudioSources, true);
452   expected_status[0] = false;
453 
454   MixAndCompare(frames, frame_info, expected_status);
455 }
456 
TEST(AudioMixer,MixingRateShouldBeDecidedByRateCalculator)457 TEST(AudioMixer, MixingRateShouldBeDecidedByRateCalculator) {
458   constexpr int kOutputRate = 22000;
459   const auto mixer = AudioMixerImpl::CreateWithOutputRateCalculator(
460       std::unique_ptr<OutputRateCalculator>(
461           new CustomRateCalculator(kOutputRate)));
462   MockMixerAudioSource audio_source;
463   mixer->AddSource(&audio_source);
464   ResetFrame(audio_source.fake_frame());
465 
466   EXPECT_CALL(audio_source, GetAudioFrameWithInfo(kOutputRate, _))
467       .Times(Exactly(1));
468 
469   mixer->Mix(1, &frame_for_mixing);
470 }
471 
TEST(AudioMixer,ZeroSourceRateShouldBeDecidedByRateCalculator)472 TEST(AudioMixer, ZeroSourceRateShouldBeDecidedByRateCalculator) {
473   constexpr int kOutputRate = 8000;
474   const auto mixer = AudioMixerImpl::CreateWithOutputRateCalculator(
475       std::unique_ptr<OutputRateCalculator>(
476           new CustomRateCalculator(kOutputRate)));
477 
478   mixer->Mix(1, &frame_for_mixing);
479 
480   EXPECT_EQ(kOutputRate, frame_for_mixing.sample_rate_hz_);
481 }
482 }  // namespace webrtc
483