1 /*
2  *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/voice_detection.h"
12 
13 #include "common_audio/vad/include/webrtc_vad.h"
14 #include "modules/audio_processing/audio_buffer.h"
15 #include "rtc_base/checks.h"
16 
17 namespace webrtc {
18 class VoiceDetection::Vad {
19  public:
Vad()20   Vad() {
21     state_ = WebRtcVad_Create();
22     RTC_CHECK(state_);
23     int error = WebRtcVad_Init(state_);
24     RTC_DCHECK_EQ(0, error);
25   }
~Vad()26   ~Vad() { WebRtcVad_Free(state_); }
27 
28   Vad(Vad&) = delete;
29   Vad& operator=(Vad&) = delete;
30 
state()31   VadInst* state() { return state_; }
32 
33  private:
34   VadInst* state_ = nullptr;
35 };
36 
VoiceDetection(int sample_rate_hz,Likelihood likelihood)37 VoiceDetection::VoiceDetection(int sample_rate_hz, Likelihood likelihood)
38     : sample_rate_hz_(sample_rate_hz),
39       frame_size_samples_(static_cast<size_t>(sample_rate_hz_ / 100)),
40       likelihood_(likelihood),
41       vad_(new Vad()) {
42   int mode = 2;
43   switch (likelihood) {
44     case VoiceDetection::kVeryLowLikelihood:
45       mode = 3;
46       break;
47     case VoiceDetection::kLowLikelihood:
48       mode = 2;
49       break;
50     case VoiceDetection::kModerateLikelihood:
51       mode = 1;
52       break;
53     case VoiceDetection::kHighLikelihood:
54       mode = 0;
55       break;
56     default:
57       RTC_NOTREACHED();
58       break;
59   }
60   int error = WebRtcVad_set_mode(vad_->state(), mode);
61   RTC_DCHECK_EQ(0, error);
62 }
63 
~VoiceDetection()64 VoiceDetection::~VoiceDetection() {}
65 
ProcessCaptureAudio(AudioBuffer * audio)66 bool VoiceDetection::ProcessCaptureAudio(AudioBuffer* audio) {
67   RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength,
68                 audio->num_frames_per_band());
69   std::array<int16_t, AudioBuffer::kMaxSplitFrameLength> mixed_low_pass_data;
70   rtc::ArrayView<const int16_t> mixed_low_pass(mixed_low_pass_data.data(),
71                                                audio->num_frames_per_band());
72   if (audio->num_channels() == 1) {
73     FloatS16ToS16(audio->split_bands_const(0)[kBand0To8kHz],
74                   audio->num_frames_per_band(), mixed_low_pass_data.data());
75   } else {
76     const int num_channels = static_cast<int>(audio->num_channels());
77     for (size_t i = 0; i < audio->num_frames_per_band(); ++i) {
78       int32_t value =
79           FloatS16ToS16(audio->split_channels_const(kBand0To8kHz)[0][i]);
80       for (int j = 1; j < num_channels; ++j) {
81         value += FloatS16ToS16(audio->split_channels_const(kBand0To8kHz)[j][i]);
82       }
83       mixed_low_pass_data[i] = value / num_channels;
84     }
85   }
86 
87   int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_,
88                                   mixed_low_pass.data(), frame_size_samples_);
89   RTC_DCHECK(vad_ret == 0 || vad_ret == 1);
90   return vad_ret == 0 ? false : true;
91 }
92 }  // namespace webrtc
93