1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/voice_detection_impl.h"
12 
13 #include "api/audio/audio_frame.h"
14 #include "common_audio/vad/include/webrtc_vad.h"
15 #include "modules/audio_processing/audio_buffer.h"
16 #include "rtc_base/checks.h"
17 #include "rtc_base/constructormagic.h"
18 
19 namespace webrtc {
20 class VoiceDetectionImpl::Vad {
21  public:
Vad()22   Vad() {
23     state_ = WebRtcVad_Create();
24     RTC_CHECK(state_);
25     int error = WebRtcVad_Init(state_);
26     RTC_DCHECK_EQ(0, error);
27   }
~Vad()28   ~Vad() { WebRtcVad_Free(state_); }
state()29   VadInst* state() { return state_; }
30 
31  private:
32   VadInst* state_ = nullptr;
33   RTC_DISALLOW_COPY_AND_ASSIGN(Vad);
34 };
35 
VoiceDetectionImpl(rtc::CriticalSection * crit)36 VoiceDetectionImpl::VoiceDetectionImpl(rtc::CriticalSection* crit)
37     : crit_(crit) {
38   RTC_DCHECK(crit);
39 }
40 
~VoiceDetectionImpl()41 VoiceDetectionImpl::~VoiceDetectionImpl() {}
42 
Initialize(int sample_rate_hz)43 void VoiceDetectionImpl::Initialize(int sample_rate_hz) {
44   rtc::CritScope cs(crit_);
45   sample_rate_hz_ = sample_rate_hz;
46   std::unique_ptr<Vad> new_vad;
47   if (enabled_) {
48     new_vad.reset(new Vad());
49   }
50   vad_.swap(new_vad);
51   using_external_vad_ = false;
52   frame_size_samples_ =
53       static_cast<size_t>(frame_size_ms_ * sample_rate_hz_) / 1000;
54   set_likelihood(likelihood_);
55 }
56 
ProcessCaptureAudio(AudioBuffer * audio)57 void VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
58   rtc::CritScope cs(crit_);
59   if (!enabled_) {
60     return;
61   }
62   if (using_external_vad_) {
63     using_external_vad_ = false;
64     return;
65   }
66 
67   RTC_DCHECK_GE(160, audio->num_frames_per_band());
68   // TODO(ajm): concatenate data in frame buffer here.
69   int vad_ret =
70       WebRtcVad_Process(vad_->state(), sample_rate_hz_,
71                         audio->mixed_low_pass_data(), frame_size_samples_);
72   if (vad_ret == 0) {
73     stream_has_voice_ = false;
74     audio->set_activity(AudioFrame::kVadPassive);
75   } else if (vad_ret == 1) {
76     stream_has_voice_ = true;
77     audio->set_activity(AudioFrame::kVadActive);
78   } else {
79     RTC_NOTREACHED();
80   }
81 }
82 
Enable(bool enable)83 int VoiceDetectionImpl::Enable(bool enable) {
84   rtc::CritScope cs(crit_);
85   if (enabled_ != enable) {
86     enabled_ = enable;
87     Initialize(sample_rate_hz_);
88   }
89   return AudioProcessing::kNoError;
90 }
91 
is_enabled() const92 bool VoiceDetectionImpl::is_enabled() const {
93   rtc::CritScope cs(crit_);
94   return enabled_;
95 }
96 
set_stream_has_voice(bool has_voice)97 int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
98   rtc::CritScope cs(crit_);
99   using_external_vad_ = true;
100   stream_has_voice_ = has_voice;
101   return AudioProcessing::kNoError;
102 }
103 
stream_has_voice() const104 bool VoiceDetectionImpl::stream_has_voice() const {
105   rtc::CritScope cs(crit_);
106   // TODO(ajm): enable this assertion?
107   // RTC_DCHECK(using_external_vad_ || is_component_enabled());
108   return stream_has_voice_;
109 }
110 
set_likelihood(VoiceDetection::Likelihood likelihood)111 int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
112   rtc::CritScope cs(crit_);
113   likelihood_ = likelihood;
114   if (enabled_) {
115     int mode = 2;
116     switch (likelihood) {
117       case VoiceDetection::kVeryLowLikelihood:
118         mode = 3;
119         break;
120       case VoiceDetection::kLowLikelihood:
121         mode = 2;
122         break;
123       case VoiceDetection::kModerateLikelihood:
124         mode = 1;
125         break;
126       case VoiceDetection::kHighLikelihood:
127         mode = 0;
128         break;
129       default:
130         RTC_NOTREACHED();
131         break;
132     }
133     int error = WebRtcVad_set_mode(vad_->state(), mode);
134     RTC_DCHECK_EQ(0, error);
135   }
136   return AudioProcessing::kNoError;
137 }
138 
likelihood() const139 VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
140   rtc::CritScope cs(crit_);
141   return likelihood_;
142 }
143 
set_frame_size_ms(int size)144 int VoiceDetectionImpl::set_frame_size_ms(int size) {
145   rtc::CritScope cs(crit_);
146   RTC_DCHECK_EQ(10, size);  // TODO(ajm): remove when supported.
147   frame_size_ms_ = size;
148   Initialize(sample_rate_hz_);
149   return AudioProcessing::kNoError;
150 }
151 
frame_size_ms() const152 int VoiceDetectionImpl::frame_size_ms() const {
153   rtc::CritScope cs(crit_);
154   return frame_size_ms_;
155 }
156 }  // namespace webrtc
157