1 /*
2  *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 #include <vector>
11 
12 #include "api/array_view.h"
13 #include "modules/audio_processing/audio_buffer.h"
14 #include "modules/audio_processing/test/audio_buffer_tools.h"
15 #include "modules/audio_processing/test/bitexactness_tools.h"
16 #include "modules/audio_processing/voice_detection_impl.h"
17 #include "test/gtest.h"
18 
19 namespace webrtc {
20 namespace {
21 
22 const int kNumFramesToProcess = 1000;
23 
24 // Process one frame of data and produce the output.
ProcessOneFrame(int sample_rate_hz,AudioBuffer * audio_buffer,VoiceDetectionImpl * voice_detection)25 void ProcessOneFrame(int sample_rate_hz,
26                      AudioBuffer* audio_buffer,
27                      VoiceDetectionImpl* voice_detection) {
28   if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) {
29     audio_buffer->SplitIntoFrequencyBands();
30   }
31 
32   voice_detection->ProcessCaptureAudio(audio_buffer);
33 }
34 
35 // Processes a specified amount of frames, verifies the results and reports
36 // any errors.
RunBitexactnessTest(int sample_rate_hz,size_t num_channels,int frame_size_ms_reference,bool stream_has_voice_reference,VoiceDetection::Likelihood likelihood_reference)37 void RunBitexactnessTest(int sample_rate_hz,
38                          size_t num_channels,
39                          int frame_size_ms_reference,
40                          bool stream_has_voice_reference,
41                          VoiceDetection::Likelihood likelihood_reference) {
42   rtc::CriticalSection crit_capture;
43   VoiceDetectionImpl voice_detection(&crit_capture);
44   voice_detection.Initialize(sample_rate_hz > 16000 ? 16000 : sample_rate_hz);
45   voice_detection.Enable(true);
46 
47   int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100);
48   const StreamConfig capture_config(sample_rate_hz, num_channels, false);
49   AudioBuffer capture_buffer(
50       capture_config.num_frames(), capture_config.num_channels(),
51       capture_config.num_frames(), capture_config.num_channels(),
52       capture_config.num_frames());
53   test::InputAudioFile capture_file(
54       test::GetApmCaptureTestVectorFileName(sample_rate_hz));
55   std::vector<float> capture_input(samples_per_channel * num_channels);
56   for (int frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) {
57     ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels,
58                                    &capture_file, capture_input);
59 
60     test::CopyVectorToAudioBuffer(capture_config, capture_input,
61                                   &capture_buffer);
62 
63     ProcessOneFrame(sample_rate_hz, &capture_buffer, &voice_detection);
64   }
65 
66   int frame_size_ms = voice_detection.frame_size_ms();
67   bool stream_has_voice = voice_detection.stream_has_voice();
68   VoiceDetection::Likelihood likelihood = voice_detection.likelihood();
69 
70   // Compare the outputs to the references.
71   EXPECT_EQ(frame_size_ms_reference, frame_size_ms);
72   EXPECT_EQ(stream_has_voice_reference, stream_has_voice);
73   EXPECT_EQ(likelihood_reference, likelihood);
74 }
75 
76 const int kFrameSizeMsReference = 10;
77 const bool kStreamHasVoiceReference = true;
78 const VoiceDetection::Likelihood kLikelihoodReference =
79     VoiceDetection::kLowLikelihood;
80 
81 }  // namespace
82 
TEST(VoiceDetectionBitExactnessTest,Mono8kHz)83 TEST(VoiceDetectionBitExactnessTest, Mono8kHz) {
84   RunBitexactnessTest(8000, 1, kFrameSizeMsReference, kStreamHasVoiceReference,
85                       kLikelihoodReference);
86 }
87 
TEST(VoiceDetectionBitExactnessTest,Mono16kHz)88 TEST(VoiceDetectionBitExactnessTest, Mono16kHz) {
89   RunBitexactnessTest(16000, 1, kFrameSizeMsReference, kStreamHasVoiceReference,
90                       kLikelihoodReference);
91 }
92 
TEST(VoiceDetectionBitExactnessTest,Mono32kHz)93 TEST(VoiceDetectionBitExactnessTest, Mono32kHz) {
94   RunBitexactnessTest(32000, 1, kFrameSizeMsReference, kStreamHasVoiceReference,
95                       kLikelihoodReference);
96 }
97 
TEST(VoiceDetectionBitExactnessTest,Mono48kHz)98 TEST(VoiceDetectionBitExactnessTest, Mono48kHz) {
99   RunBitexactnessTest(48000, 1, kFrameSizeMsReference, kStreamHasVoiceReference,
100                       kLikelihoodReference);
101 }
102 
TEST(VoiceDetectionBitExactnessTest,Stereo8kHz)103 TEST(VoiceDetectionBitExactnessTest, Stereo8kHz) {
104   RunBitexactnessTest(8000, 2, kFrameSizeMsReference, kStreamHasVoiceReference,
105                       kLikelihoodReference);
106 }
107 
TEST(VoiceDetectionBitExactnessTest,Stereo16kHz)108 TEST(VoiceDetectionBitExactnessTest, Stereo16kHz) {
109   RunBitexactnessTest(16000, 2, kFrameSizeMsReference, kStreamHasVoiceReference,
110                       kLikelihoodReference);
111 }
112 
TEST(VoiceDetectionBitExactnessTest,Stereo32kHz)113 TEST(VoiceDetectionBitExactnessTest, Stereo32kHz) {
114   RunBitexactnessTest(32000, 2, kFrameSizeMsReference, kStreamHasVoiceReference,
115                       kLikelihoodReference);
116 }
117 
TEST(VoiceDetectionBitExactnessTest,Stereo48kHz)118 TEST(VoiceDetectionBitExactnessTest, Stereo48kHz) {
119   RunBitexactnessTest(48000, 2, kFrameSizeMsReference, kStreamHasVoiceReference,
120                       kLikelihoodReference);
121 }
122 
123 }  // namespace webrtc
124