1 /*
2  *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <string.h>
12 
13 #include <limits>
14 #include <memory>
15 #include <sstream>
16 #include <string>
17 #include <utility>
18 
19 #include "api/audio/audio_mixer.h"
20 #include "modules/audio_mixer/audio_mixer_impl.h"
21 #include "modules/audio_mixer/default_output_rate_calculator.h"
22 #include "rtc_base/bind.h"
23 #include "rtc_base/checks.h"
24 #include "rtc_base/event.h"
25 #include "rtc_base/task_queue.h"
26 #include "test/gmock.h"
27 
28 using testing::_;
29 using testing::Exactly;
30 using testing::Invoke;
31 using testing::Return;
32 
33 namespace webrtc {
34 
35 namespace {
36 
37 constexpr int kDefaultSampleRateHz = 48000;
38 
39 // Utility function that resets the frame member variables with
40 // sensible defaults.
ResetFrame(AudioFrame * frame)41 void ResetFrame(AudioFrame* frame) {
42   frame->sample_rate_hz_ = kDefaultSampleRateHz;
43   frame->num_channels_ = 1;
44 
45   // Frame duration 10ms.
46   frame->samples_per_channel_ = kDefaultSampleRateHz / 100;
47   frame->vad_activity_ = AudioFrame::kVadActive;
48   frame->speech_type_ = AudioFrame::kNormalSpeech;
49 }
50 
ProduceDebugText(int sample_rate_hz,int number_of_channels,int number_of_sources)51 std::string ProduceDebugText(int sample_rate_hz,
52                              int number_of_channels,
53                              int number_of_sources) {
54   std::ostringstream ss;
55   ss << "Sample rate: " << sample_rate_hz << " ";
56   ss << "Number of channels: " << number_of_channels << " ";
57   ss << "Number of sources: " << number_of_sources;
58   return ss.str();
59 }
60 
61 AudioFrame frame_for_mixing;
62 
63 }  // namespace
64 
65 class MockMixerAudioSource : public AudioMixer::Source {
66  public:
MockMixerAudioSource()67   MockMixerAudioSource()
68       : fake_audio_frame_info_(AudioMixer::Source::AudioFrameInfo::kNormal) {
69     ON_CALL(*this, GetAudioFrameWithInfo(_, _))
70         .WillByDefault(
71             Invoke(this, &MockMixerAudioSource::FakeAudioFrameWithInfo));
72     ON_CALL(*this, PreferredSampleRate())
73         .WillByDefault(Return(kDefaultSampleRateHz));
74   }
75 
76   MOCK_METHOD2(GetAudioFrameWithInfo,
77                AudioFrameInfo(int sample_rate_hz, AudioFrame* audio_frame));
78 
79   MOCK_CONST_METHOD0(PreferredSampleRate, int());
80   MOCK_CONST_METHOD0(Ssrc, int());
81 
fake_frame()82   AudioFrame* fake_frame() { return &fake_frame_; }
fake_info()83   AudioFrameInfo fake_info() { return fake_audio_frame_info_; }
set_fake_info(const AudioFrameInfo audio_frame_info)84   void set_fake_info(const AudioFrameInfo audio_frame_info) {
85     fake_audio_frame_info_ = audio_frame_info;
86   }
87 
88  private:
FakeAudioFrameWithInfo(int sample_rate_hz,AudioFrame * audio_frame)89   AudioFrameInfo FakeAudioFrameWithInfo(int sample_rate_hz,
90                                         AudioFrame* audio_frame) {
91     audio_frame->CopyFrom(fake_frame_);
92     audio_frame->sample_rate_hz_ = sample_rate_hz;
93     audio_frame->samples_per_channel_ =
94         rtc::CheckedDivExact(sample_rate_hz, 100);
95     return fake_info();
96   }
97 
98   AudioFrame fake_frame_;
99   AudioFrameInfo fake_audio_frame_info_;
100 };
101 
102 class CustomRateCalculator : public OutputRateCalculator {
103  public:
CustomRateCalculator(int rate)104   explicit CustomRateCalculator(int rate) : rate_(rate) {}
CalculateOutputRate(const std::vector<int> & preferred_rates)105   int CalculateOutputRate(const std::vector<int>& preferred_rates) override {
106     return rate_;
107   }
108 
109  private:
110   const int rate_;
111 };
112 
113 // Creates participants from |frames| and |frame_info| and adds them
114 // to the mixer. Compares mixed status with |expected_status|
MixAndCompare(const std::vector<AudioFrame> & frames,const std::vector<AudioMixer::Source::AudioFrameInfo> & frame_info,const std::vector<bool> & expected_status)115 void MixAndCompare(
116     const std::vector<AudioFrame>& frames,
117     const std::vector<AudioMixer::Source::AudioFrameInfo>& frame_info,
118     const std::vector<bool>& expected_status) {
119   const size_t num_audio_sources = frames.size();
120   RTC_DCHECK(frames.size() == frame_info.size());
121   RTC_DCHECK(frame_info.size() == expected_status.size());
122 
123   const auto mixer = AudioMixerImpl::Create();
124   std::vector<MockMixerAudioSource> participants(num_audio_sources);
125 
126   for (size_t i = 0; i < num_audio_sources; ++i) {
127     participants[i].fake_frame()->CopyFrom(frames[i]);
128     participants[i].set_fake_info(frame_info[i]);
129   }
130 
131   for (size_t i = 0; i < num_audio_sources; ++i) {
132     EXPECT_TRUE(mixer->AddSource(&participants[i]));
133     EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
134         .Times(Exactly(1));
135   }
136 
137   mixer->Mix(1, &frame_for_mixing);
138 
139   for (size_t i = 0; i < num_audio_sources; ++i) {
140     EXPECT_EQ(expected_status[i],
141               mixer->GetAudioSourceMixabilityStatusForTest(&participants[i]))
142         << "Mixed status of AudioSource #" << i << " wrong.";
143   }
144 }
145 
MixMonoAtGivenNativeRate(int native_sample_rate,AudioFrame * mix_frame,rtc::scoped_refptr<AudioMixer> mixer,MockMixerAudioSource * audio_source)146 void MixMonoAtGivenNativeRate(int native_sample_rate,
147                               AudioFrame* mix_frame,
148                               rtc::scoped_refptr<AudioMixer> mixer,
149                               MockMixerAudioSource* audio_source) {
150   ON_CALL(*audio_source, PreferredSampleRate())
151       .WillByDefault(Return(native_sample_rate));
152   audio_source->fake_frame()->sample_rate_hz_ = native_sample_rate;
153   audio_source->fake_frame()->samples_per_channel_ = native_sample_rate / 100;
154 
155   mixer->Mix(1, mix_frame);
156 }
157 
TEST(AudioMixer,LargestEnergyVadActiveMixed)158 TEST(AudioMixer, LargestEnergyVadActiveMixed) {
159   constexpr int kAudioSources =
160       AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 3;
161 
162   const auto mixer = AudioMixerImpl::Create();
163 
164   MockMixerAudioSource participants[kAudioSources];
165 
166   for (int i = 0; i < kAudioSources; ++i) {
167     ResetFrame(participants[i].fake_frame());
168 
169     // We set the 80-th sample value since the first 80 samples may be
170     // modified by a ramped-in window.
171     participants[i].fake_frame()->mutable_data()[80] = i;
172 
173     EXPECT_TRUE(mixer->AddSource(&participants[i]));
174     EXPECT_CALL(participants[i], GetAudioFrameWithInfo(_, _)).Times(Exactly(1));
175   }
176 
177   // Last participant gives audio frame with passive VAD, although it has the
178   // largest energy.
179   participants[kAudioSources - 1].fake_frame()->vad_activity_ =
180       AudioFrame::kVadPassive;
181 
182   AudioFrame audio_frame;
183   mixer->Mix(1,  // number of channels
184              &audio_frame);
185 
186   for (int i = 0; i < kAudioSources; ++i) {
187     bool is_mixed =
188         mixer->GetAudioSourceMixabilityStatusForTest(&participants[i]);
189     if (i == kAudioSources - 1 ||
190         i < kAudioSources - 1 -
191                 AudioMixerImpl::kMaximumAmountOfMixedAudioSources) {
192       EXPECT_FALSE(is_mixed) << "Mixing status of AudioSource #" << i
193                              << " wrong.";
194     } else {
195       EXPECT_TRUE(is_mixed) << "Mixing status of AudioSource #" << i
196                             << " wrong.";
197     }
198   }
199 }
200 
TEST(AudioMixer,FrameNotModifiedForSingleParticipant)201 TEST(AudioMixer, FrameNotModifiedForSingleParticipant) {
202   const auto mixer = AudioMixerImpl::Create();
203 
204   MockMixerAudioSource participant;
205 
206   ResetFrame(participant.fake_frame());
207   const size_t n_samples = participant.fake_frame()->samples_per_channel_;
208 
209   // Modify the frame so that it's not zero.
210   int16_t* fake_frame_data = participant.fake_frame()->mutable_data();
211   for (size_t j = 0; j < n_samples; ++j) {
212     fake_frame_data[j] = static_cast<int16_t>(j);
213   }
214 
215   EXPECT_TRUE(mixer->AddSource(&participant));
216   EXPECT_CALL(participant, GetAudioFrameWithInfo(_, _)).Times(Exactly(2));
217 
218   AudioFrame audio_frame;
219   // Two mix iteration to compare after the ramp-up step.
220   for (int i = 0; i < 2; ++i) {
221     mixer->Mix(1,  // number of channels
222                &audio_frame);
223   }
224 
225   EXPECT_EQ(
226       0,
227       memcmp(participant.fake_frame()->data(), audio_frame.data(), n_samples));
228 }
229 
TEST(AudioMixer,SourceAtNativeRateShouldNeverResample)230 TEST(AudioMixer, SourceAtNativeRateShouldNeverResample) {
231   const auto mixer = AudioMixerImpl::Create();
232 
233   MockMixerAudioSource audio_source;
234   ResetFrame(audio_source.fake_frame());
235 
236   mixer->AddSource(&audio_source);
237 
238   for (auto frequency : {8000, 16000, 32000, 48000}) {
239     EXPECT_CALL(audio_source, GetAudioFrameWithInfo(frequency, _))
240         .Times(Exactly(1));
241 
242     MixMonoAtGivenNativeRate(frequency, &frame_for_mixing, mixer,
243                              &audio_source);
244   }
245 }
246 
TEST(AudioMixer,MixerShouldMixAtNativeSourceRate)247 TEST(AudioMixer, MixerShouldMixAtNativeSourceRate) {
248   const auto mixer = AudioMixerImpl::Create();
249 
250   MockMixerAudioSource audio_source;
251   ResetFrame(audio_source.fake_frame());
252 
253   mixer->AddSource(&audio_source);
254 
255   for (auto frequency : {8000, 16000, 32000, 48000}) {
256     MixMonoAtGivenNativeRate(frequency, &frame_for_mixing, mixer,
257                              &audio_source);
258 
259     EXPECT_EQ(frequency, frame_for_mixing.sample_rate_hz_);
260   }
261 }
262 
TEST(AudioMixer,MixerShouldAlwaysMixAtNativeRate)263 TEST(AudioMixer, MixerShouldAlwaysMixAtNativeRate) {
264   const auto mixer = AudioMixerImpl::Create();
265 
266   MockMixerAudioSource participant;
267   ResetFrame(participant.fake_frame());
268   mixer->AddSource(&participant);
269 
270   const int needed_frequency = 44100;
271   ON_CALL(participant, PreferredSampleRate())
272       .WillByDefault(Return(needed_frequency));
273 
274   // We expect mixing frequency to be native and >= needed_frequency.
275   const int expected_mix_frequency = 48000;
276   EXPECT_CALL(participant, GetAudioFrameWithInfo(expected_mix_frequency, _))
277       .Times(Exactly(1));
278   participant.fake_frame()->sample_rate_hz_ = expected_mix_frequency;
279   participant.fake_frame()->samples_per_channel_ = expected_mix_frequency / 100;
280 
281   mixer->Mix(1, &frame_for_mixing);
282 
283   EXPECT_EQ(48000, frame_for_mixing.sample_rate_hz_);
284 }
285 
286 // Check that the mixing rate is always >= participants preferred rate.
TEST(AudioMixer,ShouldNotCauseQualityLossForMultipleSources)287 TEST(AudioMixer, ShouldNotCauseQualityLossForMultipleSources) {
288   const auto mixer = AudioMixerImpl::Create();
289 
290   std::vector<MockMixerAudioSource> audio_sources(2);
291   const std::vector<int> source_sample_rates = {8000, 16000};
292   for (int i = 0; i < 2; ++i) {
293     auto& source = audio_sources[i];
294     ResetFrame(source.fake_frame());
295     mixer->AddSource(&source);
296     const auto sample_rate = source_sample_rates[i];
297     EXPECT_CALL(source, PreferredSampleRate()).WillOnce(Return(sample_rate));
298 
299     EXPECT_CALL(source, GetAudioFrameWithInfo(testing::Ge(sample_rate), _));
300   }
301   mixer->Mix(1, &frame_for_mixing);
302 }
303 
TEST(AudioMixer,ParticipantNumberOfChannels)304 TEST(AudioMixer, ParticipantNumberOfChannels) {
305   const auto mixer = AudioMixerImpl::Create();
306 
307   MockMixerAudioSource participant;
308   ResetFrame(participant.fake_frame());
309 
310   EXPECT_TRUE(mixer->AddSource(&participant));
311   for (size_t number_of_channels : {1, 2}) {
312     EXPECT_CALL(participant, GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
313         .Times(Exactly(1));
314     mixer->Mix(number_of_channels, &frame_for_mixing);
315     EXPECT_EQ(number_of_channels, frame_for_mixing.num_channels_);
316   }
317 }
318 
319 // Maximal amount of participants are mixed one iteration, then
320 // another participant with higher energy is added.
TEST(AudioMixer,RampedOutSourcesShouldNotBeMarkedMixed)321 TEST(AudioMixer, RampedOutSourcesShouldNotBeMarkedMixed) {
322   constexpr int kAudioSources =
323       AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1;
324 
325   const auto mixer = AudioMixerImpl::Create();
326   MockMixerAudioSource participants[kAudioSources];
327 
328   for (int i = 0; i < kAudioSources; ++i) {
329     ResetFrame(participants[i].fake_frame());
330     // Set the participant audio energy to increase with the index
331     // |i|.
332     participants[i].fake_frame()->mutable_data()[0] = 100 * i;
333   }
334 
335   // Add all participants but the loudest for mixing.
336   for (int i = 0; i < kAudioSources - 1; ++i) {
337     EXPECT_TRUE(mixer->AddSource(&participants[i]));
338     EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
339         .Times(Exactly(1));
340   }
341 
342   // First mixer iteration
343   mixer->Mix(1, &frame_for_mixing);
344 
345   // All participants but the loudest should have been mixed.
346   for (int i = 0; i < kAudioSources - 1; ++i) {
347     EXPECT_TRUE(mixer->GetAudioSourceMixabilityStatusForTest(&participants[i]))
348         << "Mixed status of AudioSource #" << i << " wrong.";
349   }
350 
351   // Add new participant with higher energy.
352   EXPECT_TRUE(mixer->AddSource(&participants[kAudioSources - 1]));
353   for (int i = 0; i < kAudioSources; ++i) {
354     EXPECT_CALL(participants[i], GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
355         .Times(Exactly(1));
356   }
357 
358   mixer->Mix(1, &frame_for_mixing);
359 
360   // The most quiet participant should not have been mixed.
361   EXPECT_FALSE(mixer->GetAudioSourceMixabilityStatusForTest(&participants[0]))
362       << "Mixed status of AudioSource #0 wrong.";
363 
364   // The loudest participants should have been mixed.
365   for (int i = 1; i < kAudioSources; ++i) {
366     EXPECT_EQ(true,
367               mixer->GetAudioSourceMixabilityStatusForTest(&participants[i]))
368         << "Mixed status of AudioSource #" << i << " wrong.";
369   }
370 }
371 
372 // This test checks that the initialization and participant addition
373 // can be done on a different thread.
TEST(AudioMixer,ConstructFromOtherThread)374 TEST(AudioMixer, ConstructFromOtherThread) {
375   rtc::TaskQueue init_queue("init");
376   rtc::scoped_refptr<AudioMixer> mixer;
377   rtc::Event event(false, false);
378   init_queue.PostTask([&mixer, &event]() {
379     mixer = AudioMixerImpl::Create();
380     event.Set();
381   });
382   event.Wait(rtc::Event::kForever);
383 
384   MockMixerAudioSource participant;
385   EXPECT_CALL(participant, PreferredSampleRate())
386       .WillRepeatedly(Return(kDefaultSampleRateHz));
387 
388   ResetFrame(participant.fake_frame());
389 
390   rtc::TaskQueue participant_queue("participant");
391   participant_queue.PostTask([&mixer, &event, &participant]() {
392     mixer->AddSource(&participant);
393     event.Set();
394   });
395   event.Wait(rtc::Event::kForever);
396 
397   EXPECT_CALL(participant, GetAudioFrameWithInfo(kDefaultSampleRateHz, _))
398       .Times(Exactly(1));
399 
400   // Do one mixer iteration
401   mixer->Mix(1, &frame_for_mixing);
402 }
403 
TEST(AudioMixer,MutedShouldMixAfterUnmuted)404 TEST(AudioMixer, MutedShouldMixAfterUnmuted) {
405   constexpr int kAudioSources =
406       AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1;
407 
408   std::vector<AudioFrame> frames(kAudioSources);
409   for (auto& frame : frames) {
410     ResetFrame(&frame);
411   }
412 
413   std::vector<AudioMixer::Source::AudioFrameInfo> frame_info(
414       kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal);
415   frame_info[0] = AudioMixer::Source::AudioFrameInfo::kMuted;
416   std::vector<bool> expected_status(kAudioSources, true);
417   expected_status[0] = false;
418 
419   MixAndCompare(frames, frame_info, expected_status);
420 }
421 
TEST(AudioMixer,PassiveShouldMixAfterNormal)422 TEST(AudioMixer, PassiveShouldMixAfterNormal) {
423   constexpr int kAudioSources =
424       AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1;
425 
426   std::vector<AudioFrame> frames(kAudioSources);
427   for (auto& frame : frames) {
428     ResetFrame(&frame);
429   }
430 
431   std::vector<AudioMixer::Source::AudioFrameInfo> frame_info(
432       kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal);
433   frames[0].vad_activity_ = AudioFrame::kVadPassive;
434   std::vector<bool> expected_status(kAudioSources, true);
435   expected_status[0] = false;
436 
437   MixAndCompare(frames, frame_info, expected_status);
438 }
439 
TEST(AudioMixer,ActiveShouldMixBeforeLoud)440 TEST(AudioMixer, ActiveShouldMixBeforeLoud) {
441   constexpr int kAudioSources =
442       AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1;
443 
444   std::vector<AudioFrame> frames(kAudioSources);
445   for (auto& frame : frames) {
446     ResetFrame(&frame);
447   }
448 
449   std::vector<AudioMixer::Source::AudioFrameInfo> frame_info(
450       kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal);
451   frames[0].vad_activity_ = AudioFrame::kVadPassive;
452   int16_t* frame_data = frames[0].mutable_data();
453   std::fill(frame_data, frame_data + kDefaultSampleRateHz / 100,
454             std::numeric_limits<int16_t>::max());
455   std::vector<bool> expected_status(kAudioSources, true);
456   expected_status[0] = false;
457 
458   MixAndCompare(frames, frame_info, expected_status);
459 }
460 
TEST(AudioMixer,UnmutedShouldMixBeforeLoud)461 TEST(AudioMixer, UnmutedShouldMixBeforeLoud) {
462   constexpr int kAudioSources =
463       AudioMixerImpl::kMaximumAmountOfMixedAudioSources + 1;
464 
465   std::vector<AudioFrame> frames(kAudioSources);
466   for (auto& frame : frames) {
467     ResetFrame(&frame);
468   }
469 
470   std::vector<AudioMixer::Source::AudioFrameInfo> frame_info(
471       kAudioSources, AudioMixer::Source::AudioFrameInfo::kNormal);
472   frame_info[0] = AudioMixer::Source::AudioFrameInfo::kMuted;
473   int16_t* frame_data = frames[0].mutable_data();
474   std::fill(frame_data, frame_data + kDefaultSampleRateHz / 100,
475             std::numeric_limits<int16_t>::max());
476   std::vector<bool> expected_status(kAudioSources, true);
477   expected_status[0] = false;
478 
479   MixAndCompare(frames, frame_info, expected_status);
480 }
481 
TEST(AudioMixer,MixingRateShouldBeDecidedByRateCalculator)482 TEST(AudioMixer, MixingRateShouldBeDecidedByRateCalculator) {
483   constexpr int kOutputRate = 22000;
484   const auto mixer =
485       AudioMixerImpl::Create(std::unique_ptr<OutputRateCalculator>(
486                                  new CustomRateCalculator(kOutputRate)),
487                              true);
488   MockMixerAudioSource audio_source;
489   mixer->AddSource(&audio_source);
490   ResetFrame(audio_source.fake_frame());
491 
492   EXPECT_CALL(audio_source, GetAudioFrameWithInfo(kOutputRate, _))
493       .Times(Exactly(1));
494 
495   mixer->Mix(1, &frame_for_mixing);
496 }
497 
TEST(AudioMixer,ZeroSourceRateShouldBeDecidedByRateCalculator)498 TEST(AudioMixer, ZeroSourceRateShouldBeDecidedByRateCalculator) {
499   constexpr int kOutputRate = 8000;
500   const auto mixer =
501       AudioMixerImpl::Create(std::unique_ptr<OutputRateCalculator>(
502                                  new CustomRateCalculator(kOutputRate)),
503                              true);
504 
505   mixer->Mix(1, &frame_for_mixing);
506 
507   EXPECT_EQ(kOutputRate, frame_for_mixing.sample_rate_hz_);
508 }
509 
TEST(AudioMixer,NoLimiterBasicApiCalls)510 TEST(AudioMixer, NoLimiterBasicApiCalls) {
511   const auto mixer = AudioMixerImpl::Create(
512       std::unique_ptr<OutputRateCalculator>(new DefaultOutputRateCalculator()),
513       false);
514   mixer->Mix(1, &frame_for_mixing);
515 }
516 
TEST(AudioMixer,AnyRateIsPossibleWithNoLimiter)517 TEST(AudioMixer, AnyRateIsPossibleWithNoLimiter) {
518   // No APM limiter means no AudioProcessing::NativeRate restriction
519   // on mixing rate. The rate has to be divisible by 100 since we use
520   // 10 ms frames, though.
521   for (const auto rate : {8000, 20000, 24000, 32000, 44100}) {
522     for (const size_t number_of_channels : {1, 2}) {
523       for (const auto number_of_sources : {0, 1, 2, 3, 4}) {
524         SCOPED_TRACE(
525             ProduceDebugText(rate, number_of_sources, number_of_sources));
526         const auto mixer =
527             AudioMixerImpl::Create(std::unique_ptr<OutputRateCalculator>(
528                                        new CustomRateCalculator(rate)),
529                                    false);
530 
531         std::vector<MockMixerAudioSource> sources(number_of_sources);
532         for (auto& source : sources) {
533           mixer->AddSource(&source);
534         }
535 
536         mixer->Mix(number_of_channels, &frame_for_mixing);
537         EXPECT_EQ(rate, frame_for_mixing.sample_rate_hz_);
538         EXPECT_EQ(number_of_channels, frame_for_mixing.num_channels_);
539       }
540     }
541   }
542 }
543 }  // namespace webrtc
544