1 /*
2  *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "audio/utility/channel_mixer.h"
12 
13 #include <memory>
14 
15 #include "api/audio/audio_frame.h"
16 #include "api/audio/channel_layout.h"
17 #include "audio/utility/channel_mixing_matrix.h"
18 #include "rtc_base/arraysize.h"
19 #include "rtc_base/strings/string_builder.h"
20 #include "test/gtest.h"
21 
22 namespace webrtc {
23 
24 namespace {
25 
26 constexpr uint32_t kTimestamp = 27;
27 constexpr int kSampleRateHz = 16000;
28 constexpr size_t kSamplesPerChannel = kSampleRateHz / 100;
29 
30 class ChannelMixerTest : public ::testing::Test {
31  protected:
ChannelMixerTest()32   ChannelMixerTest() {
33     // Use 10ms audio frames by default. Don't set values yet.
34     frame_.samples_per_channel_ = kSamplesPerChannel;
35     frame_.sample_rate_hz_ = kSampleRateHz;
36     EXPECT_TRUE(frame_.muted());
37   }
38 
~ChannelMixerTest()39   virtual ~ChannelMixerTest() {}
40 
41   AudioFrame frame_;
42 };
43 
SetFrameData(int16_t data,AudioFrame * frame)44 void SetFrameData(int16_t data, AudioFrame* frame) {
45   int16_t* frame_data = frame->mutable_data();
46   for (size_t i = 0; i < frame->samples_per_channel() * frame->num_channels();
47        i++) {
48     frame_data[i] = data;
49   }
50 }
51 
SetMonoData(int16_t center,AudioFrame * frame)52 void SetMonoData(int16_t center, AudioFrame* frame) {
53   frame->num_channels_ = 1;
54   int16_t* frame_data = frame->mutable_data();
55   for (size_t i = 0; i < frame->samples_per_channel(); ++i) {
56     frame_data[i] = center;
57   }
58   EXPECT_FALSE(frame->muted());
59 }
60 
SetStereoData(int16_t left,int16_t right,AudioFrame * frame)61 void SetStereoData(int16_t left, int16_t right, AudioFrame* frame) {
62   ASSERT_LE(2 * frame->samples_per_channel(), frame->max_16bit_samples());
63   frame->num_channels_ = 2;
64   int16_t* frame_data = frame->mutable_data();
65   for (size_t i = 0; i < frame->samples_per_channel() * 2; i += 2) {
66     frame_data[i] = left;
67     frame_data[i + 1] = right;
68   }
69   EXPECT_FALSE(frame->muted());
70 }
71 
SetFiveOneData(int16_t front_left,int16_t front_right,int16_t center,int16_t lfe,int16_t side_left,int16_t side_right,AudioFrame * frame)72 void SetFiveOneData(int16_t front_left,
73                     int16_t front_right,
74                     int16_t center,
75                     int16_t lfe,
76                     int16_t side_left,
77                     int16_t side_right,
78                     AudioFrame* frame) {
79   ASSERT_LE(6 * frame->samples_per_channel(), frame->max_16bit_samples());
80   frame->num_channels_ = 6;
81   int16_t* frame_data = frame->mutable_data();
82   for (size_t i = 0; i < frame->samples_per_channel() * 6; i += 6) {
83     frame_data[i] = front_left;
84     frame_data[i + 1] = front_right;
85     frame_data[i + 2] = center;
86     frame_data[i + 3] = lfe;
87     frame_data[i + 4] = side_left;
88     frame_data[i + 5] = side_right;
89   }
90   EXPECT_FALSE(frame->muted());
91 }
92 
SetSevenOneData(int16_t front_left,int16_t front_right,int16_t center,int16_t lfe,int16_t side_left,int16_t side_right,int16_t back_left,int16_t back_right,AudioFrame * frame)93 void SetSevenOneData(int16_t front_left,
94                      int16_t front_right,
95                      int16_t center,
96                      int16_t lfe,
97                      int16_t side_left,
98                      int16_t side_right,
99                      int16_t back_left,
100                      int16_t back_right,
101                      AudioFrame* frame) {
102   ASSERT_LE(8 * frame->samples_per_channel(), frame->max_16bit_samples());
103   frame->num_channels_ = 8;
104   int16_t* frame_data = frame->mutable_data();
105   for (size_t i = 0; i < frame->samples_per_channel() * 8; i += 8) {
106     frame_data[i] = front_left;
107     frame_data[i + 1] = front_right;
108     frame_data[i + 2] = center;
109     frame_data[i + 3] = lfe;
110     frame_data[i + 4] = side_left;
111     frame_data[i + 5] = side_right;
112     frame_data[i + 6] = back_left;
113     frame_data[i + 7] = back_right;
114   }
115   EXPECT_FALSE(frame->muted());
116 }
117 
AllSamplesEquals(int16_t sample,const AudioFrame * frame)118 bool AllSamplesEquals(int16_t sample, const AudioFrame* frame) {
119   const int16_t* frame_data = frame->data();
120   for (size_t i = 0; i < frame->samples_per_channel() * frame->num_channels();
121        i++) {
122     if (frame_data[i] != sample) {
123       return false;
124     }
125   }
126   return true;
127 }
128 
VerifyFramesAreEqual(const AudioFrame & frame1,const AudioFrame & frame2)129 void VerifyFramesAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) {
130   EXPECT_EQ(frame1.num_channels(), frame2.num_channels());
131   EXPECT_EQ(frame1.samples_per_channel(), frame2.samples_per_channel());
132   const int16_t* frame1_data = frame1.data();
133   const int16_t* frame2_data = frame2.data();
134   for (size_t i = 0; i < frame1.samples_per_channel() * frame1.num_channels();
135        i++) {
136     EXPECT_EQ(frame1_data[i], frame2_data[i]);
137   }
138   EXPECT_EQ(frame1.muted(), frame2.muted());
139 }
140 
141 }  // namespace
142 
143 // Test all possible layout conversions can be constructed and mixed. Don't
144 // care about the actual content, simply run through all mixing combinations
145 // and ensure that nothing fails.
TEST_F(ChannelMixerTest,ConstructAllPossibleLayouts)146 TEST_F(ChannelMixerTest, ConstructAllPossibleLayouts) {
147   for (ChannelLayout input_layout = CHANNEL_LAYOUT_MONO;
148        input_layout <= CHANNEL_LAYOUT_MAX;
149        input_layout = static_cast<ChannelLayout>(input_layout + 1)) {
150     for (ChannelLayout output_layout = CHANNEL_LAYOUT_MONO;
151          output_layout <= CHANNEL_LAYOUT_MAX;
152          output_layout = static_cast<ChannelLayout>(output_layout + 1)) {
153       // DISCRETE, BITSTREAM can't be tested here based on the current approach.
154       // CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC is not mixable.
155       // Stereo down mix should never be the output layout.
156       if (input_layout == CHANNEL_LAYOUT_BITSTREAM ||
157           input_layout == CHANNEL_LAYOUT_DISCRETE ||
158           input_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC ||
159           output_layout == CHANNEL_LAYOUT_BITSTREAM ||
160           output_layout == CHANNEL_LAYOUT_DISCRETE ||
161           output_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC ||
162           output_layout == CHANNEL_LAYOUT_STEREO_DOWNMIX) {
163         continue;
164       }
165 
166       rtc::StringBuilder ss;
167       ss << "Input Layout: " << input_layout
168          << ", Output Layout: " << output_layout;
169       SCOPED_TRACE(ss.str());
170       ChannelMixer mixer(input_layout, output_layout);
171 
172       frame_.UpdateFrame(kTimestamp, nullptr, kSamplesPerChannel, kSampleRateHz,
173                          AudioFrame::kNormalSpeech, AudioFrame::kVadActive,
174                          ChannelLayoutToChannelCount(input_layout));
175       EXPECT_TRUE(frame_.muted());
176       mixer.Transform(&frame_);
177     }
178   }
179 }
180 
181 // Ensure that the audio frame is untouched when input and output channel
182 // layouts are identical, i.e., the transformation should have no effect.
183 // Exclude invalid mixing combinations.
TEST_F(ChannelMixerTest,NoMixingForIdenticalChannelLayouts)184 TEST_F(ChannelMixerTest, NoMixingForIdenticalChannelLayouts) {
185   for (ChannelLayout input_layout = CHANNEL_LAYOUT_MONO;
186        input_layout <= CHANNEL_LAYOUT_MAX;
187        input_layout = static_cast<ChannelLayout>(input_layout + 1)) {
188     for (ChannelLayout output_layout = CHANNEL_LAYOUT_MONO;
189          output_layout <= CHANNEL_LAYOUT_MAX;
190          output_layout = static_cast<ChannelLayout>(output_layout + 1)) {
191       if (input_layout != output_layout ||
192           input_layout == CHANNEL_LAYOUT_BITSTREAM ||
193           input_layout == CHANNEL_LAYOUT_DISCRETE ||
194           input_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC ||
195           output_layout == CHANNEL_LAYOUT_STEREO_DOWNMIX) {
196         continue;
197       }
198       ChannelMixer mixer(input_layout, output_layout);
199       frame_.num_channels_ = ChannelLayoutToChannelCount(input_layout);
200       SetFrameData(99, &frame_);
201       mixer.Transform(&frame_);
202       EXPECT_EQ(ChannelLayoutToChannelCount(input_layout),
203                 static_cast<int>(frame_.num_channels()));
204       EXPECT_TRUE(AllSamplesEquals(99, &frame_));
205     }
206   }
207 }
208 
TEST_F(ChannelMixerTest,StereoToMono)209 TEST_F(ChannelMixerTest, StereoToMono) {
210   ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_MONO);
211   //
212   //                      Input: stereo
213   //                      LEFT  RIGHT
214   // Output: mono CENTER  0.5   0.5
215   //
216   SetStereoData(7, 3, &frame_);
217   EXPECT_EQ(2u, frame_.num_channels());
218   mixer.Transform(&frame_);
219   EXPECT_EQ(1u, frame_.num_channels());
220   EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout());
221 
222   AudioFrame mono_frame;
223   mono_frame.samples_per_channel_ = frame_.samples_per_channel();
224   SetMonoData(5, &mono_frame);
225   VerifyFramesAreEqual(mono_frame, frame_);
226 
227   SetStereoData(-32768, -32768, &frame_);
228   EXPECT_EQ(2u, frame_.num_channels());
229   mixer.Transform(&frame_);
230   EXPECT_EQ(1u, frame_.num_channels());
231   EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout());
232   SetMonoData(-32768, &mono_frame);
233   VerifyFramesAreEqual(mono_frame, frame_);
234 }
235 
TEST_F(ChannelMixerTest,StereoToMonoMuted)236 TEST_F(ChannelMixerTest, StereoToMonoMuted) {
237   ASSERT_TRUE(frame_.muted());
238   ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_MONO);
239   mixer.Transform(&frame_);
240   EXPECT_EQ(1u, frame_.num_channels());
241   EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout());
242   EXPECT_TRUE(frame_.muted());
243 }
244 
TEST_F(ChannelMixerTest,FiveOneToSevenOneMuted)245 TEST_F(ChannelMixerTest, FiveOneToSevenOneMuted) {
246   ASSERT_TRUE(frame_.muted());
247   ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_7_1);
248   mixer.Transform(&frame_);
249   EXPECT_EQ(8u, frame_.num_channels());
250   EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout());
251   EXPECT_TRUE(frame_.muted());
252 }
253 
TEST_F(ChannelMixerTest,FiveOneToMono)254 TEST_F(ChannelMixerTest, FiveOneToMono) {
255   ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_MONO);
256   //
257   //                      Input: 5.1
258   //                      LEFT   RIGHT  CENTER  LFE    SIDE_LEFT  SIDE_RIGHT
259   // Output: mono CENTER  0.707  0.707  1       0.707  0.707      0.707
260   //
261   // a = [10, 20, 15, 2, 5, 5]
262   // b = [1/sqrt(2), 1/sqrt(2), 1.0, 1/sqrt(2), 1/sqrt(2), 1/sqrt(2)] =>
263   // a * b (dot product) = 44.69848480983499,
264   // which is truncated into 44 using 16 bit representation.
265   //
266   SetFiveOneData(10, 20, 15, 2, 5, 5, &frame_);
267   EXPECT_EQ(6u, frame_.num_channels());
268   mixer.Transform(&frame_);
269   EXPECT_EQ(1u, frame_.num_channels());
270   EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout());
271 
272   AudioFrame mono_frame;
273   mono_frame.samples_per_channel_ = frame_.samples_per_channel();
274   SetMonoData(44, &mono_frame);
275   VerifyFramesAreEqual(mono_frame, frame_);
276 
277   SetFiveOneData(-32768, -32768, -32768, -32768, -32768, -32768, &frame_);
278   EXPECT_EQ(6u, frame_.num_channels());
279   mixer.Transform(&frame_);
280   EXPECT_EQ(1u, frame_.num_channels());
281   EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout());
282   SetMonoData(-32768, &mono_frame);
283   VerifyFramesAreEqual(mono_frame, frame_);
284 }
285 
TEST_F(ChannelMixerTest,FiveOneToSevenOne)286 TEST_F(ChannelMixerTest, FiveOneToSevenOne) {
287   ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_7_1);
288   //
289   //                        Input: 5.1
290   //                        LEFT   RIGHT  CENTER  LFE    SIDE_LEFT  SIDE_RIGHT
291   // Output: 7.1 LEFT       1      0      0       0      0          0
292   //             RIGHT      0      1      0       0      0          0
293   //             CENTER     0      0      1       0      0          0
294   //             LFE        0      0      0       1      0          0
295   //             SIDE_LEFT  0      0      0       0      1          0
296   //             SIDE_RIGHT 0      0      0       0      0          1
297   //             BACK_LEFT  0      0      0       0      0          0
298   //             BACK_RIGHT 0      0      0       0      0          0
299   //
300   SetFiveOneData(10, 20, 15, 2, 5, 5, &frame_);
301   EXPECT_EQ(6u, frame_.num_channels());
302   mixer.Transform(&frame_);
303   EXPECT_EQ(8u, frame_.num_channels());
304   EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout());
305 
306   AudioFrame seven_one_frame;
307   seven_one_frame.samples_per_channel_ = frame_.samples_per_channel();
308   SetSevenOneData(10, 20, 15, 2, 5, 5, 0, 0, &seven_one_frame);
309   VerifyFramesAreEqual(seven_one_frame, frame_);
310 
311   SetFiveOneData(-32768, 32767, -32768, 32767, -32768, 32767, &frame_);
312   EXPECT_EQ(6u, frame_.num_channels());
313   mixer.Transform(&frame_);
314   EXPECT_EQ(8u, frame_.num_channels());
315   EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout());
316   SetSevenOneData(-32768, 32767, -32768, 32767, -32768, 32767, 0, 0,
317                   &seven_one_frame);
318   VerifyFramesAreEqual(seven_one_frame, frame_);
319 }
320 
TEST_F(ChannelMixerTest,FiveOneBackToStereo)321 TEST_F(ChannelMixerTest, FiveOneBackToStereo) {
322   ChannelMixer mixer(CHANNEL_LAYOUT_5_1_BACK, CHANNEL_LAYOUT_STEREO);
323   //
324   //                      Input: 5.1
325   //                      LEFT   RIGHT  CENTER  LFE    BACK_LEFT  BACK_RIGHT
326   // Output: stereo LEFT  1      0      0.707   0.707  0.707      0
327   //                RIGHT 0      1      0.707   0.707  0          0.707
328   //
329   SetFiveOneData(20, 30, 15, 2, 5, 5, &frame_);
330   EXPECT_EQ(6u, frame_.num_channels());
331   mixer.Transform(&frame_);
332   EXPECT_EQ(2u, frame_.num_channels());
333   EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout());
334 
335   AudioFrame stereo_frame;
336   stereo_frame.samples_per_channel_ = frame_.samples_per_channel();
337   SetStereoData(35, 45, &stereo_frame);
338   VerifyFramesAreEqual(stereo_frame, frame_);
339 
340   SetFiveOneData(-32768, -32768, -32768, -32768, -32768, -32768, &frame_);
341   EXPECT_EQ(6u, frame_.num_channels());
342   mixer.Transform(&frame_);
343   EXPECT_EQ(2u, frame_.num_channels());
344   EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout());
345   SetStereoData(-32768, -32768, &stereo_frame);
346   VerifyFramesAreEqual(stereo_frame, frame_);
347 }
348 
TEST_F(ChannelMixerTest,MonoToStereo)349 TEST_F(ChannelMixerTest, MonoToStereo) {
350   ChannelMixer mixer(CHANNEL_LAYOUT_MONO, CHANNEL_LAYOUT_STEREO);
351   //
352   //                       Input: mono
353   //                       CENTER
354   // Output: stereo LEFT   1
355   //                RIGHT  1
356   //
357   SetMonoData(44, &frame_);
358   EXPECT_EQ(1u, frame_.num_channels());
359   mixer.Transform(&frame_);
360   EXPECT_EQ(2u, frame_.num_channels());
361   EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout());
362 
363   AudioFrame stereo_frame;
364   stereo_frame.samples_per_channel_ = frame_.samples_per_channel();
365   SetStereoData(44, 44, &stereo_frame);
366   VerifyFramesAreEqual(stereo_frame, frame_);
367 }
368 
TEST_F(ChannelMixerTest,StereoToFiveOne)369 TEST_F(ChannelMixerTest, StereoToFiveOne) {
370   ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_5_1);
371   //
372   //                         Input: Stereo
373   //                         LEFT   RIGHT
374   // Output: 5.1 LEFT        1      0
375   //             RIGHT       0      1
376   //             CENTER      0      0
377   //             LFE         0      0
378   //             SIDE_LEFT   0      0
379   //             SIDE_RIGHT  0      0
380   //
381   SetStereoData(50, 60, &frame_);
382   EXPECT_EQ(2u, frame_.num_channels());
383   mixer.Transform(&frame_);
384   EXPECT_EQ(6u, frame_.num_channels());
385   EXPECT_EQ(CHANNEL_LAYOUT_5_1, frame_.channel_layout());
386 
387   AudioFrame five_one_frame;
388   five_one_frame.samples_per_channel_ = frame_.samples_per_channel();
389   SetFiveOneData(50, 60, 0, 0, 0, 0, &five_one_frame);
390   VerifyFramesAreEqual(five_one_frame, frame_);
391 }
392 
393 }  // namespace webrtc
394