audio/utility/channel_mixer_unittest.cc

/*
 *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "audio/utility/channel_mixer.h"

#include <memory>

#include "api/audio/audio_frame.h"
#include "api/audio/channel_layout.h"
#include "audio/utility/channel_mixing_matrix.h"
#include "rtc_base/arraysize.h"
#include "rtc_base/strings/string_builder.h"
#include "test/gtest.h"

namespace webrtc {

namespace {

constexpr uint32_t kTimestamp = 27;
constexpr int kSampleRateHz = 16000;
constexpr size_t kSamplesPerChannel = kSampleRateHz / 100;

class ChannelMixerTest : public ::testing::Test {
 protected:
  ChannelMixerTest() {
    // Use 10ms audio frames by default. Don't set values yet.
    frame_.samples_per_channel_ = kSamplesPerChannel;
    frame_.sample_rate_hz_ = kSampleRateHz;
    EXPECT_TRUE(frame_.muted());
  }

  virtual ~ChannelMixerTest() {}

  AudioFrame frame_;
};

void SetFrameData(int16_t data, AudioFrame* frame) {
  int16_t* frame_data = frame->mutable_data();
  for (size_t i = 0; i < frame->samples_per_channel() * frame->num_channels();
       i++) {
    frame_data[i] = data;
  }
}

void SetMonoData(int16_t center, AudioFrame* frame) {
  frame->num_channels_ = 1;
  int16_t* frame_data = frame->mutable_data();
  for (size_t i = 0; i < frame->samples_per_channel(); ++i) {
    frame_data[i] = center;
  }
  EXPECT_FALSE(frame->muted());
}

void SetStereoData(int16_t left, int16_t right, AudioFrame* frame) {
  ASSERT_LE(2 * frame->samples_per_channel(), frame->max_16bit_samples());
  frame->num_channels_ = 2;
  int16_t* frame_data = frame->mutable_data();
  for (size_t i = 0; i < frame->samples_per_channel() * 2; i += 2) {
    frame_data[i] = left;
    frame_data[i + 1] = right;
  }
  EXPECT_FALSE(frame->muted());
}

void SetFiveOneData(int16_t front_left,
                    int16_t front_right,
                    int16_t center,
                    int16_t lfe,
                    int16_t side_left,
                    int16_t side_right,
                    AudioFrame* frame) {
  ASSERT_LE(6 * frame->samples_per_channel(), frame->max_16bit_samples());
  frame->num_channels_ = 6;
  int16_t* frame_data = frame->mutable_data();
  for (size_t i = 0; i < frame->samples_per_channel() * 6; i += 6) {
    frame_data[i] = front_left;
    frame_data[i + 1] = front_right;
    frame_data[i + 2] = center;
    frame_data[i + 3] = lfe;
    frame_data[i + 4] = side_left;
    frame_data[i + 5] = side_right;
  }
  EXPECT_FALSE(frame->muted());
}

void SetSevenOneData(int16_t front_left,
                     int16_t front_right,
                     int16_t center,
                     int16_t lfe,
                     int16_t side_left,
                     int16_t side_right,
                     int16_t back_left,
                     int16_t back_right,
                     AudioFrame* frame) {
  ASSERT_LE(8 * frame->samples_per_channel(), frame->max_16bit_samples());
  frame->num_channels_ = 8;
  int16_t* frame_data = frame->mutable_data();
  for (size_t i = 0; i < frame->samples_per_channel() * 8; i += 8) {
    frame_data[i] = front_left;
    frame_data[i + 1] = front_right;
    frame_data[i + 2] = center;
    frame_data[i + 3] = lfe;
    frame_data[i + 4] = side_left;
    frame_data[i + 5] = side_right;
    frame_data[i + 6] = back_left;
    frame_data[i + 7] = back_right;
  }
  EXPECT_FALSE(frame->muted());
}

bool AllSamplesEquals(int16_t sample, const AudioFrame* frame) {
  const int16_t* frame_data = frame->data();
  for (size_t i = 0; i < frame->samples_per_channel() * frame->num_channels();
       i++) {
    if (frame_data[i] != sample) {
      return false;
    }
  }
  return true;
}

void VerifyFramesAreEqual(const AudioFrame& frame1, const AudioFrame& frame2) {
  EXPECT_EQ(frame1.num_channels(), frame2.num_channels());
  EXPECT_EQ(frame1.samples_per_channel(), frame2.samples_per_channel());
  const int16_t* frame1_data = frame1.data();
  const int16_t* frame2_data = frame2.data();
  for (size_t i = 0; i < frame1.samples_per_channel() * frame1.num_channels();
       i++) {
    EXPECT_EQ(frame1_data[i], frame2_data[i]);
  }
  EXPECT_EQ(frame1.muted(), frame2.muted());
}

}  // namespace

// Test all possible layout conversions can be constructed and mixed. Don't
// care about the actual content, simply run through all mixing combinations
// and ensure that nothing fails.
TEST_F(ChannelMixerTest, ConstructAllPossibleLayouts) {
  for (ChannelLayout input_layout = CHANNEL_LAYOUT_MONO;
       input_layout <= CHANNEL_LAYOUT_MAX;
       input_layout = static_cast<ChannelLayout>(input_layout + 1)) {
    for (ChannelLayout output_layout = CHANNEL_LAYOUT_MONO;
         output_layout <= CHANNEL_LAYOUT_MAX;
         output_layout = static_cast<ChannelLayout>(output_layout + 1)) {
      // DISCRETE, BITSTREAM can't be tested here based on the current approach.
      // CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC is not mixable.
      // Stereo down mix should never be the output layout.
      if (input_layout == CHANNEL_LAYOUT_BITSTREAM ||
          input_layout == CHANNEL_LAYOUT_DISCRETE ||
          input_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC ||
          output_layout == CHANNEL_LAYOUT_BITSTREAM ||
          output_layout == CHANNEL_LAYOUT_DISCRETE ||
          output_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC ||
          output_layout == CHANNEL_LAYOUT_STEREO_DOWNMIX) {
        continue;
      }

      rtc::StringBuilder ss;
      ss << "Input Layout: " << input_layout
         << ", Output Layout: " << output_layout;
      SCOPED_TRACE(ss.str());
      ChannelMixer mixer(input_layout, output_layout);

      frame_.UpdateFrame(kTimestamp, nullptr, kSamplesPerChannel, kSampleRateHz,
                         AudioFrame::kNormalSpeech, AudioFrame::kVadActive,
                         ChannelLayoutToChannelCount(input_layout));
      EXPECT_TRUE(frame_.muted());
      mixer.Transform(&frame_);
    }
  }
}

// Ensure that the audio frame is untouched when input and output channel
// layouts are identical, i.e., the transformation should have no effect.
// Exclude invalid mixing combinations.
TEST_F(ChannelMixerTest, NoMixingForIdenticalChannelLayouts) {
  for (ChannelLayout input_layout = CHANNEL_LAYOUT_MONO;
       input_layout <= CHANNEL_LAYOUT_MAX;
       input_layout = static_cast<ChannelLayout>(input_layout + 1)) {
    for (ChannelLayout output_layout = CHANNEL_LAYOUT_MONO;
         output_layout <= CHANNEL_LAYOUT_MAX;
         output_layout = static_cast<ChannelLayout>(output_layout + 1)) {
      if (input_layout != output_layout ||
          input_layout == CHANNEL_LAYOUT_BITSTREAM ||
          input_layout == CHANNEL_LAYOUT_DISCRETE ||
          input_layout == CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC ||
          output_layout == CHANNEL_LAYOUT_STEREO_DOWNMIX) {
        continue;
      }
      ChannelMixer mixer(input_layout, output_layout);
      frame_.num_channels_ = ChannelLayoutToChannelCount(input_layout);
      SetFrameData(99, &frame_);
      mixer.Transform(&frame_);
      EXPECT_EQ(ChannelLayoutToChannelCount(input_layout),
                static_cast<int>(frame_.num_channels()));
      EXPECT_TRUE(AllSamplesEquals(99, &frame_));
    }
  }
}

TEST_F(ChannelMixerTest, StereoToMono) {
  ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_MONO);
  //
  //                      Input: stereo
  //                      LEFT  RIGHT
  // Output: mono CENTER  0.5   0.5
  //
  SetStereoData(7, 3, &frame_);
  EXPECT_EQ(2u, frame_.num_channels());
  mixer.Transform(&frame_);
  EXPECT_EQ(1u, frame_.num_channels());
  EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout());

  AudioFrame mono_frame;
  mono_frame.samples_per_channel_ = frame_.samples_per_channel();
  SetMonoData(5, &mono_frame);
  VerifyFramesAreEqual(mono_frame, frame_);

  SetStereoData(-32768, -32768, &frame_);
  EXPECT_EQ(2u, frame_.num_channels());
  mixer.Transform(&frame_);
  EXPECT_EQ(1u, frame_.num_channels());
  EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout());
  SetMonoData(-32768, &mono_frame);
  VerifyFramesAreEqual(mono_frame, frame_);
}

TEST_F(ChannelMixerTest, StereoToMonoMuted) {
  ASSERT_TRUE(frame_.muted());
  ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_MONO);
  mixer.Transform(&frame_);
  EXPECT_EQ(1u, frame_.num_channels());
  EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout());
  EXPECT_TRUE(frame_.muted());
}

TEST_F(ChannelMixerTest, FiveOneToSevenOneMuted) {
  ASSERT_TRUE(frame_.muted());
  ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_7_1);
  mixer.Transform(&frame_);
  EXPECT_EQ(8u, frame_.num_channels());
  EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout());
  EXPECT_TRUE(frame_.muted());
}

TEST_F(ChannelMixerTest, FiveOneToMono) {
  ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_MONO);
  //
  //                      Input: 5.1
  //                      LEFT   RIGHT  CENTER  LFE    SIDE_LEFT  SIDE_RIGHT
  // Output: mono CENTER  0.707  0.707  1       0.707  0.707      0.707
  //
  // a = [10, 20, 15, 2, 5, 5]
  // b = [1/sqrt(2), 1/sqrt(2), 1.0, 1/sqrt(2), 1/sqrt(2), 1/sqrt(2)] =>
  // a * b (dot product) = 44.69848480983499,
  // which is truncated into 44 using 16 bit representation.
  //
  SetFiveOneData(10, 20, 15, 2, 5, 5, &frame_);
  EXPECT_EQ(6u, frame_.num_channels());
  mixer.Transform(&frame_);
  EXPECT_EQ(1u, frame_.num_channels());
  EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout());

  AudioFrame mono_frame;
  mono_frame.samples_per_channel_ = frame_.samples_per_channel();
  SetMonoData(44, &mono_frame);
  VerifyFramesAreEqual(mono_frame, frame_);

  SetFiveOneData(-32768, -32768, -32768, -32768, -32768, -32768, &frame_);
  EXPECT_EQ(6u, frame_.num_channels());
  mixer.Transform(&frame_);
  EXPECT_EQ(1u, frame_.num_channels());
  EXPECT_EQ(CHANNEL_LAYOUT_MONO, frame_.channel_layout());
  SetMonoData(-32768, &mono_frame);
  VerifyFramesAreEqual(mono_frame, frame_);
}

TEST_F(ChannelMixerTest, FiveOneToSevenOne) {
  ChannelMixer mixer(CHANNEL_LAYOUT_5_1, CHANNEL_LAYOUT_7_1);
  //
  //                        Input: 5.1
  //                        LEFT   RIGHT  CENTER  LFE    SIDE_LEFT  SIDE_RIGHT
  // Output: 7.1 LEFT       1      0      0       0      0          0
  //             RIGHT      0      1      0       0      0          0
  //             CENTER     0      0      1       0      0          0
  //             LFE        0      0      0       1      0          0
  //             SIDE_LEFT  0      0      0       0      1          0
  //             SIDE_RIGHT 0      0      0       0      0          1
  //             BACK_LEFT  0      0      0       0      0          0
  //             BACK_RIGHT 0      0      0       0      0          0
  //
  SetFiveOneData(10, 20, 15, 2, 5, 5, &frame_);
  EXPECT_EQ(6u, frame_.num_channels());
  mixer.Transform(&frame_);
  EXPECT_EQ(8u, frame_.num_channels());
  EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout());

  AudioFrame seven_one_frame;
  seven_one_frame.samples_per_channel_ = frame_.samples_per_channel();
  SetSevenOneData(10, 20, 15, 2, 5, 5, 0, 0, &seven_one_frame);
  VerifyFramesAreEqual(seven_one_frame, frame_);

  SetFiveOneData(-32768, 32767, -32768, 32767, -32768, 32767, &frame_);
  EXPECT_EQ(6u, frame_.num_channels());
  mixer.Transform(&frame_);
  EXPECT_EQ(8u, frame_.num_channels());
  EXPECT_EQ(CHANNEL_LAYOUT_7_1, frame_.channel_layout());
  SetSevenOneData(-32768, 32767, -32768, 32767, -32768, 32767, 0, 0,
                  &seven_one_frame);
  VerifyFramesAreEqual(seven_one_frame, frame_);
}

TEST_F(ChannelMixerTest, FiveOneBackToStereo) {
  ChannelMixer mixer(CHANNEL_LAYOUT_5_1_BACK, CHANNEL_LAYOUT_STEREO);
  //
  //                      Input: 5.1
  //                      LEFT   RIGHT  CENTER  LFE    BACK_LEFT  BACK_RIGHT
  // Output: stereo LEFT  1      0      0.707   0.707  0.707      0
  //                RIGHT 0      1      0.707   0.707  0          0.707
  //
  SetFiveOneData(20, 30, 15, 2, 5, 5, &frame_);
  EXPECT_EQ(6u, frame_.num_channels());
  mixer.Transform(&frame_);
  EXPECT_EQ(2u, frame_.num_channels());
  EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout());

  AudioFrame stereo_frame;
  stereo_frame.samples_per_channel_ = frame_.samples_per_channel();
  SetStereoData(35, 45, &stereo_frame);
  VerifyFramesAreEqual(stereo_frame, frame_);

  SetFiveOneData(-32768, -32768, -32768, -32768, -32768, -32768, &frame_);
  EXPECT_EQ(6u, frame_.num_channels());
  mixer.Transform(&frame_);
  EXPECT_EQ(2u, frame_.num_channels());
  EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout());
  SetStereoData(-32768, -32768, &stereo_frame);
  VerifyFramesAreEqual(stereo_frame, frame_);
}

TEST_F(ChannelMixerTest, MonoToStereo) {
  ChannelMixer mixer(CHANNEL_LAYOUT_MONO, CHANNEL_LAYOUT_STEREO);
  //
  //                       Input: mono
  //                       CENTER
  // Output: stereo LEFT   1
  //                RIGHT  1
  //
  SetMonoData(44, &frame_);
  EXPECT_EQ(1u, frame_.num_channels());
  mixer.Transform(&frame_);
  EXPECT_EQ(2u, frame_.num_channels());
  EXPECT_EQ(CHANNEL_LAYOUT_STEREO, frame_.channel_layout());

  AudioFrame stereo_frame;
  stereo_frame.samples_per_channel_ = frame_.samples_per_channel();
  SetStereoData(44, 44, &stereo_frame);
  VerifyFramesAreEqual(stereo_frame, frame_);
}

TEST_F(ChannelMixerTest, StereoToFiveOne) {
  ChannelMixer mixer(CHANNEL_LAYOUT_STEREO, CHANNEL_LAYOUT_5_1);
  //
  //                         Input: Stereo
  //                         LEFT   RIGHT
  // Output: 5.1 LEFT        1      0
  //             RIGHT       0      1
  //             CENTER      0      0
  //             LFE         0      0
  //             SIDE_LEFT   0      0
  //             SIDE_RIGHT  0      0
  //
  SetStereoData(50, 60, &frame_);
  EXPECT_EQ(2u, frame_.num_channels());
  mixer.Transform(&frame_);
  EXPECT_EQ(6u, frame_.num_channels());
  EXPECT_EQ(CHANNEL_LAYOUT_5_1, frame_.channel_layout());

  AudioFrame five_one_frame;
  five_one_frame.samples_per_channel_ = frame_.samples_per_channel();
  SetFiveOneData(50, 60, 0, 0, 0, 0, &five_one_frame);
  VerifyFramesAreEqual(five_one_frame, frame_);
}

}  // namespace webrtc