1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "voice_detection_impl.h"
12 
13 #include <cassert>
14 
15 #include "critical_section_wrapper.h"
16 #include "webrtc_vad.h"
17 
18 #include "audio_processing_impl.h"
19 #include "audio_buffer.h"
20 
21 namespace webrtc {
22 
23 typedef VadInst Handle;
24 
25 namespace {
MapSetting(VoiceDetection::Likelihood likelihood)26 int MapSetting(VoiceDetection::Likelihood likelihood) {
27   switch (likelihood) {
28     case VoiceDetection::kVeryLowLikelihood:
29       return 3;
30     case VoiceDetection::kLowLikelihood:
31       return 2;
32     case VoiceDetection::kModerateLikelihood:
33       return 1;
34     case VoiceDetection::kHighLikelihood:
35       return 0;
36   }
37   assert(false);
38   return -1;
39 }
40 }  // namespace
41 
VoiceDetectionImpl(const AudioProcessingImpl * apm)42 VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessingImpl* apm)
43   : ProcessingComponent(apm),
44     apm_(apm),
45     stream_has_voice_(false),
46     using_external_vad_(false),
47     likelihood_(kLowLikelihood),
48     frame_size_ms_(10),
49     frame_size_samples_(0) {}
50 
~VoiceDetectionImpl()51 VoiceDetectionImpl::~VoiceDetectionImpl() {}
52 
ProcessCaptureAudio(AudioBuffer * audio)53 int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
54   if (!is_component_enabled()) {
55     return apm_->kNoError;
56   }
57 
58   if (using_external_vad_) {
59     using_external_vad_ = false;
60     return apm_->kNoError;
61   }
62   assert(audio->samples_per_split_channel() <= 160);
63 
64   WebRtc_Word16* mixed_data = audio->low_pass_split_data(0);
65   if (audio->num_channels() > 1) {
66     audio->CopyAndMixLowPass(1);
67     mixed_data = audio->mixed_low_pass_data(0);
68   }
69 
70   // TODO(ajm): concatenate data in frame buffer here.
71 
72   int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
73                                   apm_->split_sample_rate_hz(),
74                                   mixed_data,
75                                   frame_size_samples_);
76   if (vad_ret == 0) {
77     stream_has_voice_ = false;
78     audio->set_activity(AudioFrame::kVadPassive);
79   } else if (vad_ret == 1) {
80     stream_has_voice_ = true;
81     audio->set_activity(AudioFrame::kVadActive);
82   } else {
83     return apm_->kUnspecifiedError;
84   }
85 
86   return apm_->kNoError;
87 }
88 
Enable(bool enable)89 int VoiceDetectionImpl::Enable(bool enable) {
90   CriticalSectionScoped crit_scoped(apm_->crit());
91   return EnableComponent(enable);
92 }
93 
is_enabled() const94 bool VoiceDetectionImpl::is_enabled() const {
95   return is_component_enabled();
96 }
97 
set_stream_has_voice(bool has_voice)98 int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
99   using_external_vad_ = true;
100   stream_has_voice_ = has_voice;
101   return apm_->kNoError;
102 }
103 
stream_has_voice() const104 bool VoiceDetectionImpl::stream_has_voice() const {
105   // TODO(ajm): enable this assertion?
106   //assert(using_external_vad_ || is_component_enabled());
107   return stream_has_voice_;
108 }
109 
set_likelihood(VoiceDetection::Likelihood likelihood)110 int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
111   CriticalSectionScoped crit_scoped(apm_->crit());
112   if (MapSetting(likelihood) == -1) {
113     return apm_->kBadParameterError;
114   }
115 
116   likelihood_ = likelihood;
117   return Configure();
118 }
119 
likelihood() const120 VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
121   return likelihood_;
122 }
123 
set_frame_size_ms(int size)124 int VoiceDetectionImpl::set_frame_size_ms(int size) {
125   CriticalSectionScoped crit_scoped(apm_->crit());
126   assert(size == 10); // TODO(ajm): remove when supported.
127   if (size != 10 &&
128       size != 20 &&
129       size != 30) {
130     return apm_->kBadParameterError;
131   }
132 
133   frame_size_ms_ = size;
134 
135   return Initialize();
136 }
137 
frame_size_ms() const138 int VoiceDetectionImpl::frame_size_ms() const {
139   return frame_size_ms_;
140 }
141 
Initialize()142 int VoiceDetectionImpl::Initialize() {
143   int err = ProcessingComponent::Initialize();
144   if (err != apm_->kNoError || !is_component_enabled()) {
145     return err;
146   }
147 
148   using_external_vad_ = false;
149   frame_size_samples_ = frame_size_ms_ * (apm_->split_sample_rate_hz() / 1000);
150   // TODO(ajm): intialize frame buffer here.
151 
152   return apm_->kNoError;
153 }
154 
CreateHandle() const155 void* VoiceDetectionImpl::CreateHandle() const {
156   Handle* handle = NULL;
157   if (WebRtcVad_Create(&handle) != apm_->kNoError) {
158     handle = NULL;
159   } else {
160     assert(handle != NULL);
161   }
162 
163   return handle;
164 }
165 
DestroyHandle(void * handle) const166 int VoiceDetectionImpl::DestroyHandle(void* handle) const {
167   return WebRtcVad_Free(static_cast<Handle*>(handle));
168 }
169 
InitializeHandle(void * handle) const170 int VoiceDetectionImpl::InitializeHandle(void* handle) const {
171   return WebRtcVad_Init(static_cast<Handle*>(handle));
172 }
173 
ConfigureHandle(void * handle) const174 int VoiceDetectionImpl::ConfigureHandle(void* handle) const {
175   return WebRtcVad_set_mode(static_cast<Handle*>(handle),
176                             MapSetting(likelihood_));
177 }
178 
num_handles_required() const179 int VoiceDetectionImpl::num_handles_required() const {
180   return 1;
181 }
182 
GetHandleError(void * handle) const183 int VoiceDetectionImpl::GetHandleError(void* handle) const {
184   // The VAD has no get_error() function.
185   assert(handle != NULL);
186   return apm_->kUnspecifiedError;
187 }
188 }  // namespace webrtc
189