1 /*
2 * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "audio/utility/channel_mixer.h"
12
13 #include <memory>
14
15 #include "api/audio/audio_frame.h"
16 #include "api/audio/channel_layout.h"
17 #include "audio/utility/channel_mixing_matrix.h"
18 #include "rtc_base/arraysize.h"
19 #include "rtc_base/strings/string_builder.h"
20 #include "test/gtest.h"
21
22 namespace webrtc {
23
24 namespace {
25
26 constexpr uint32_t kTimestamp = 27;
27 constexpr int kSampleRateHz = 16000;
28 constexpr size_t kSamplesPerChannel = kSampleRateHz / 100;
29
30 class ChannelMixerTest : public ::testing::Test {
31 protected:
ChannelMixerTest()32 ChannelMixerTest() {
33 // Use 10ms audio frames by default. Don't set values yet.
34 frame_.samples_per_channel_ = kSamplesPerChannel;
35 frame_.sample_rate_hz_ = kSampleRateHz;
36 EXPECT_TRUE(frame_.muted());
37 }
38
~ChannelMixerTest()39 virtual ~ChannelMixerTest() {}
40
41 AudioFrame frame_;
42 };
43
SetFrameData(int16_t data,AudioFrame * frame)44 void SetFrameData(int16_t data, AudioFrame* frame) {
45 int16_t* frame_data = frame->mutable_data();
46 for (size_t i = 0; i < frame->samples_per_channel() * frame->num_channels();
47 i++) {
48 frame_data[i] = data;
49 }
50 }
51
SetMonoData(int16_t center,AudioFrame * frame)52 void SetMonoData(int16_t center, AudioFrame* frame) {
53 frame->num_channels_ = 1;
54 int16_t* frame_data = frame->mutable_data();
55 for (size_t i = 0; i < frame->samples_per_channel(); ++i) {
56 frame_data[i] = center;
57 }
58 EXPECT_FALSE(frame->muted());
59 }
60
SetStereoData(int16_t left,int16_t right,AudioFrame * frame)61 void SetStereoData(int16_t left, int16_t right, AudioFrame* frame) {
62 ASSERT_LE(2 * frame->samples_per_channel(), frame->max_16bit_samples());
63 frame->num_channels_ = 2;
64 int16_t* frame_data = frame->mutable_data();
65 for (size_t i = 0; i < frame->samples_per_channel() * 2; i += 2) {
66 frame_data[i] = left;
67 frame_data[i + 1] = right;
68 }
69 EXPECT_FALSE(frame->muted());
70 }
71
SetFiveOneData(int16_t front_left,int16_t front_right,int16_t center,int16_t lfe,int16_t side_left,int16_t side_right,AudioFrame * frame)72 void SetFiveOneData(int16_t front_left,
73 int16_t front_right,
74 int16_t center,
75 int16_t lfe,
76 int16_t side_left,
77 int16_t side_right,
78 AudioFrame* frame) {
79 ASSERT_LE(6 * frame->samples_per_channel(), frame->max_16bit_samples());
80 frame->num_channels_ = 6;
81 int16_t* frame_data = frame->mutable_data();
82 for (size_t i = 0; i < frame->samples_per_channel() * 6; i += 6) {
83 frame_data[i] = front_left;
84 frame_data[i + 1] = front_right;
85 frame_data[i + 2] = center;
86 frame_data[i + 3] = lfe;
87 frame_data[i + 4] = side_left;
88 frame_data[i + 5] = side_right;
89 }
90 EXPECT_FALSE(frame->muted());
91 }
92
SetSevenOneData(int16_t front_left,int16_t front_right,int16_t center,int16_t lfe,int16_t side_left,int16_t side_right,int16_t back_left,int16_t back_right,AudioFrame * frame)93 void SetSevenOneData(int16_t front_left,
94 int16_t front_right,
95 int16_t center,
96 int16_t lfe,
97 int16_t side_left,
98 int16_t side_right,
99 int16_t back_left,
100 int16_t back_right,
101 AudioFrame* frame) {
102 ASSERT_LE(8 * frame->samples_per_channel(), frame->max_16bit_samples());
103 frame->num_channels_ = 8;
104 int16_t* frame_data = frame->mutable_data();
105 for (size_t i = 0; i < frame->samples_per_channel() * 8; i += 8) {
106 frame_data[i] = front_left;
107 frame_data[i + 1] = front_right;
108 frame_data[i + 2] = center;
109 frame_data[i + 3] = lfe;
110 frame_data[i + 4] = side_left;
111 frame_data[i + 5] = side_right;
112 frame_data[i + 6] = back_left;
113 frame_data[i + 7] = back_right;
114 }
115 EXPECT_FALSE(frame->muted());
116 }
117
AllSamplesEquals(int16_t sample,const AudioFrame * frame)118 bool AllSamplesEquals(int16_t sample, const AudioFrame* frame) {
119 const int16_t* frame_data = frame->data();
120 for (size_t i = 0; i < frame->samples_per_channel() * frame->num_channels();
121 i++) {
122 if (frame_data[i] != sample) {
123 return false;
124 }
125 }
126 return true;
127 }
128
VerifyFramesAreEqual(const AudioFrame & frame1,const AudioFrame & frame2)129 void VerifyFramesAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) {
130 EXPECT_EQ(frame1.num_channels(), frame2.num_channels());
131 EXPECT_EQ(frame1.samples_per_channel(), frame2.samples_per_channel());
132 const int16_t* frame1_data = frame1.data();
133 const int16_t* frame2_data = frame2.data();
134 for (size_t i = 0; i < frame1.samples_per_channel() * frame1.num_channels();
135 i++) {
136 EXPECT_EQ(frame1_data[i], frame2_data[i]);
137 }
138 EXPECT_EQ(frame1.muted(), frame2.muted());
139 }
140
141 } // namespace
142
143 // Test all possible layout conversions can be constructed and mixed. Don't
144 // care about the actual content, simply run through all mixing combinations
145 // and ensure that nothing fails.
TEST_F(ChannelMixerTest,ConstructAllPossibleLayouts)146 TEST_F(ChannelMixerTest, ConstructAllPossibleLayouts) {
147 for (ChannelLayout input_layout = CHANNEL_LAYOUT_MONO;
148 input_layout <= CHANNEL_LAYOUT_MAX;
149 input_layout = static_cast<ChannelLayout>(input_layout + 1)) {
150 for (ChannelLayout output_layout = CHANNEL_LAYOUT_MONO;
151 output_layout <= CHANNEL_LAYOUT_MAX;
152 output_layout = static_cast<ChannelLayout>(output_layout + 1)) {
153 // DISCRETE, BITSTREAM can't be tested here based on the current approach.
154 // CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC is not mixable.
155 // Stereo down mix should never be the output layout.
156 if (input_layout == CHANNEL_LAYOUT_BITSTREAM ||
157 input_layout == CHANNEL_LAYOUT_DISCRETE ||
158 input_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC ||
159 output_layout == CHANNEL_LAYOUT_BITSTREAM ||
160 output_layout == CHANNEL_LAYOUT_DISCRETE ||
161 output_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC ||
162 output_layout == CHANNEL_LAYOUT_STEREO_DOWNMIX) {
163 continue;
164 }
165
166 rtc::StringBuilder ss;
167 ss << "Input Layout: " << input_layout
168 << ", Output Layout: " << output_layout;
169 SCOPED_TRACE(ss.str());
170 ChannelMixer mixer(input_layout, output_layout);
171
172 frame_.UpdateFrame(kTimestamp, nullptr, kSamplesPerChannel, kSampleRateHz,
173 AudioFrame::kNormalSpeech, AudioFrame::kVadActive,
174 ChannelLayoutToChannelCount(input_layout));
175 EXPECT_TRUE(frame_.muted());
176 mixer.Transform(&frame_);
177 }
178 }
179 }
180
181 // Ensure that the audio frame is untouched when input and output channel
182 // layouts are identical, i.e., the transformation should have no effect.
183 // Exclude invalid mixing combinations.
TEST_F(ChannelMixerTest,NoMixingForIdenticalChannelLayouts)184 TEST_F(ChannelMixerTest, NoMixingForIdenticalChannelLayouts) {
185 for (ChannelLayout input_layout = CHANNEL_LAYOUT_MONO;
186 input_layout <= CHANNEL_LAYOUT_MAX;
187 input_layout = static_cast<ChannelLayout>(input_layout + 1)) {
188 for (ChannelLayout output_layout = CHANNEL_LAYOUT_MONO;
189 output_layout <= CHANNEL_LAYOUT_MAX;
190 output_layout = static_cast<ChannelLayout>(output_layout + 1)) {
191 if (input_layout != output_layout ||
192 input_layout == CHANNEL_LAYOUT_BITSTREAM ||
193 input_layout == CHANNEL_LAYOUT_DISCRETE ||
194 input_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC ||
195 output_layout == CHANNEL_LAYOUT_STEREO_DOWNMIX) {
196 continue;
197 }
198 ChannelMixer mixer(input_layout, output_layout);
199 frame_.num_channels_ = ChannelLayoutToChannelCount(input_layout);
200 SetFrameData(99, &frame_);
201 mixer.Transform(&frame_);
202 EXPECT_EQ(ChannelLayoutToChannelCount(input_layout),
203 static_cast<int>(frame_.num_channels()));
204 EXPECT_TRUE(AllSamplesEquals(99, &frame_));
205 }
206 }
207 }
208
TEST_F(ChannelMixerTest,StereoToMono)209 TEST_F(ChannelMixerTest, StereoToMono) {
210 ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_MONO);
211 //
212 // Input: stereo
213 // LEFT RIGHT
214 // Output: mono CENTER 0.5 0.5
215 //
216 SetStereoData(7, 3, &frame_);
217 EXPECT_EQ(2u, frame_.num_channels());
218 mixer.Transform(&frame_);
219 EXPECT_EQ(1u, frame_.num_channels());
220 EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout());
221
222 AudioFrame mono_frame;
223 mono_frame.samples_per_channel_ = frame_.samples_per_channel();
224 SetMonoData(5, &mono_frame);
225 VerifyFramesAreEqual(mono_frame, frame_);
226
227 SetStereoData(-32768, -32768, &frame_);
228 EXPECT_EQ(2u, frame_.num_channels());
229 mixer.Transform(&frame_);
230 EXPECT_EQ(1u, frame_.num_channels());
231 EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout());
232 SetMonoData(-32768, &mono_frame);
233 VerifyFramesAreEqual(mono_frame, frame_);
234 }
235
TEST_F(ChannelMixerTest,StereoToMonoMuted)236 TEST_F(ChannelMixerTest, StereoToMonoMuted) {
237 ASSERT_TRUE(frame_.muted());
238 ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_MONO);
239 mixer.Transform(&frame_);
240 EXPECT_EQ(1u, frame_.num_channels());
241 EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout());
242 EXPECT_TRUE(frame_.muted());
243 }
244
TEST_F(ChannelMixerTest,FiveOneToSevenOneMuted)245 TEST_F(ChannelMixerTest, FiveOneToSevenOneMuted) {
246 ASSERT_TRUE(frame_.muted());
247 ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_7_1);
248 mixer.Transform(&frame_);
249 EXPECT_EQ(8u, frame_.num_channels());
250 EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout());
251 EXPECT_TRUE(frame_.muted());
252 }
253
TEST_F(ChannelMixerTest,FiveOneToMono)254 TEST_F(ChannelMixerTest, FiveOneToMono) {
255 ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_MONO);
256 //
257 // Input: 5.1
258 // LEFT RIGHT CENTER LFE SIDE_LEFT SIDE_RIGHT
259 // Output: mono CENTER 0.707 0.707 1 0.707 0.707 0.707
260 //
261 // a = [10, 20, 15, 2, 5, 5]
262 // b = [1/sqrt(2), 1/sqrt(2), 1.0, 1/sqrt(2), 1/sqrt(2), 1/sqrt(2)] =>
263 // a * b (dot product) = 44.69848480983499,
264 // which is truncated into 44 using 16 bit representation.
265 //
266 SetFiveOneData(10, 20, 15, 2, 5, 5, &frame_);
267 EXPECT_EQ(6u, frame_.num_channels());
268 mixer.Transform(&frame_);
269 EXPECT_EQ(1u, frame_.num_channels());
270 EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout());
271
272 AudioFrame mono_frame;
273 mono_frame.samples_per_channel_ = frame_.samples_per_channel();
274 SetMonoData(44, &mono_frame);
275 VerifyFramesAreEqual(mono_frame, frame_);
276
277 SetFiveOneData(-32768, -32768, -32768, -32768, -32768, -32768, &frame_);
278 EXPECT_EQ(6u, frame_.num_channels());
279 mixer.Transform(&frame_);
280 EXPECT_EQ(1u, frame_.num_channels());
281 EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout());
282 SetMonoData(-32768, &mono_frame);
283 VerifyFramesAreEqual(mono_frame, frame_);
284 }
285
TEST_F(ChannelMixerTest,FiveOneToSevenOne)286 TEST_F(ChannelMixerTest, FiveOneToSevenOne) {
287 ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_7_1);
288 //
289 // Input: 5.1
290 // LEFT RIGHT CENTER LFE SIDE_LEFT SIDE_RIGHT
291 // Output: 7.1 LEFT 1 0 0 0 0 0
292 // RIGHT 0 1 0 0 0 0
293 // CENTER 0 0 1 0 0 0
294 // LFE 0 0 0 1 0 0
295 // SIDE_LEFT 0 0 0 0 1 0
296 // SIDE_RIGHT 0 0 0 0 0 1
297 // BACK_LEFT 0 0 0 0 0 0
298 // BACK_RIGHT 0 0 0 0 0 0
299 //
300 SetFiveOneData(10, 20, 15, 2, 5, 5, &frame_);
301 EXPECT_EQ(6u, frame_.num_channels());
302 mixer.Transform(&frame_);
303 EXPECT_EQ(8u, frame_.num_channels());
304 EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout());
305
306 AudioFrame seven_one_frame;
307 seven_one_frame.samples_per_channel_ = frame_.samples_per_channel();
308 SetSevenOneData(10, 20, 15, 2, 5, 5, 0, 0, &seven_one_frame);
309 VerifyFramesAreEqual(seven_one_frame, frame_);
310
311 SetFiveOneData(-32768, 32767, -32768, 32767, -32768, 32767, &frame_);
312 EXPECT_EQ(6u, frame_.num_channels());
313 mixer.Transform(&frame_);
314 EXPECT_EQ(8u, frame_.num_channels());
315 EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout());
316 SetSevenOneData(-32768, 32767, -32768, 32767, -32768, 32767, 0, 0,
317 &seven_one_frame);
318 VerifyFramesAreEqual(seven_one_frame, frame_);
319 }
320
TEST_F(ChannelMixerTest,FiveOneBackToStereo)321 TEST_F(ChannelMixerTest, FiveOneBackToStereo) {
322 ChannelMixer mixer(CHANNEL_LAYOUT_5_1_BACK, CHANNEL_LAYOUT_STEREO);
323 //
324 // Input: 5.1
325 // LEFT RIGHT CENTER LFE BACK_LEFT BACK_RIGHT
326 // Output: stereo LEFT 1 0 0.707 0.707 0.707 0
327 // RIGHT 0 1 0.707 0.707 0 0.707
328 //
329 SetFiveOneData(20, 30, 15, 2, 5, 5, &frame_);
330 EXPECT_EQ(6u, frame_.num_channels());
331 mixer.Transform(&frame_);
332 EXPECT_EQ(2u, frame_.num_channels());
333 EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout());
334
335 AudioFrame stereo_frame;
336 stereo_frame.samples_per_channel_ = frame_.samples_per_channel();
337 SetStereoData(35, 45, &stereo_frame);
338 VerifyFramesAreEqual(stereo_frame, frame_);
339
340 SetFiveOneData(-32768, -32768, -32768, -32768, -32768, -32768, &frame_);
341 EXPECT_EQ(6u, frame_.num_channels());
342 mixer.Transform(&frame_);
343 EXPECT_EQ(2u, frame_.num_channels());
344 EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout());
345 SetStereoData(-32768, -32768, &stereo_frame);
346 VerifyFramesAreEqual(stereo_frame, frame_);
347 }
348
TEST_F(ChannelMixerTest,MonoToStereo)349 TEST_F(ChannelMixerTest, MonoToStereo) {
350 ChannelMixer mixer(CHANNEL_LAYOUT_MONO, CHANNEL_LAYOUT_STEREO);
351 //
352 // Input: mono
353 // CENTER
354 // Output: stereo LEFT 1
355 // RIGHT 1
356 //
357 SetMonoData(44, &frame_);
358 EXPECT_EQ(1u, frame_.num_channels());
359 mixer.Transform(&frame_);
360 EXPECT_EQ(2u, frame_.num_channels());
361 EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout());
362
363 AudioFrame stereo_frame;
364 stereo_frame.samples_per_channel_ = frame_.samples_per_channel();
365 SetStereoData(44, 44, &stereo_frame);
366 VerifyFramesAreEqual(stereo_frame, frame_);
367 }
368
TEST_F(ChannelMixerTest,StereoToFiveOne)369 TEST_F(ChannelMixerTest, StereoToFiveOne) {
370 ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_5_1);
371 //
372 // Input: Stereo
373 // LEFT RIGHT
374 // Output: 5.1 LEFT 1 0
375 // RIGHT 0 1
376 // CENTER 0 0
377 // LFE 0 0
378 // SIDE_LEFT 0 0
379 // SIDE_RIGHT 0 0
380 //
381 SetStereoData(50, 60, &frame_);
382 EXPECT_EQ(2u, frame_.num_channels());
383 mixer.Transform(&frame_);
384 EXPECT_EQ(6u, frame_.num_channels());
385 EXPECT_EQ(CHANNEL_LAYOUT_5_1, frame_.channel_layout());
386
387 AudioFrame five_one_frame;
388 five_one_frame.samples_per_channel_ = frame_.samples_per_channel();
389 SetFiveOneData(50, 60, 0, 0, 0, 0, &five_one_frame);
390 VerifyFramesAreEqual(five_one_frame, frame_);
391 }
392
393 } // namespace webrtc
394