1 /*
2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "voice_detection_impl.h"
12
13 #include <cassert>
14
15 #include "critical_section_wrapper.h"
16 #include "webrtc_vad.h"
17
18 #include "audio_processing_impl.h"
19 #include "audio_buffer.h"
20
21 namespace webrtc {
22
23 typedef VadInst Handle;
24
25 namespace {
MapSetting(VoiceDetection::Likelihood likelihood)26 int MapSetting(VoiceDetection::Likelihood likelihood) {
27 switch (likelihood) {
28 case VoiceDetection::kVeryLowLikelihood:
29 return 3;
30 case VoiceDetection::kLowLikelihood:
31 return 2;
32 case VoiceDetection::kModerateLikelihood:
33 return 1;
34 case VoiceDetection::kHighLikelihood:
35 return 0;
36 }
37 assert(false);
38 return -1;
39 }
40 } // namespace
41
VoiceDetectionImpl(const AudioProcessingImpl * apm)42 VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessingImpl* apm)
43 : ProcessingComponent(apm),
44 apm_(apm),
45 stream_has_voice_(false),
46 using_external_vad_(false),
47 likelihood_(kLowLikelihood),
48 frame_size_ms_(10),
49 frame_size_samples_(0) {}
50
~VoiceDetectionImpl()51 VoiceDetectionImpl::~VoiceDetectionImpl() {}
52
ProcessCaptureAudio(AudioBuffer * audio)53 int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
54 if (!is_component_enabled()) {
55 return apm_->kNoError;
56 }
57
58 if (using_external_vad_) {
59 using_external_vad_ = false;
60 return apm_->kNoError;
61 }
62 assert(audio->samples_per_split_channel() <= 160);
63
64 WebRtc_Word16* mixed_data = audio->low_pass_split_data(0);
65 if (audio->num_channels() > 1) {
66 audio->CopyAndMixLowPass(1);
67 mixed_data = audio->mixed_low_pass_data(0);
68 }
69
70 // TODO(ajm): concatenate data in frame buffer here.
71
72 int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
73 apm_->split_sample_rate_hz(),
74 mixed_data,
75 frame_size_samples_);
76 if (vad_ret == 0) {
77 stream_has_voice_ = false;
78 audio->set_activity(AudioFrame::kVadPassive);
79 } else if (vad_ret == 1) {
80 stream_has_voice_ = true;
81 audio->set_activity(AudioFrame::kVadActive);
82 } else {
83 return apm_->kUnspecifiedError;
84 }
85
86 return apm_->kNoError;
87 }
88
Enable(bool enable)89 int VoiceDetectionImpl::Enable(bool enable) {
90 CriticalSectionScoped crit_scoped(apm_->crit());
91 return EnableComponent(enable);
92 }
93
is_enabled() const94 bool VoiceDetectionImpl::is_enabled() const {
95 return is_component_enabled();
96 }
97
set_stream_has_voice(bool has_voice)98 int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) {
99 using_external_vad_ = true;
100 stream_has_voice_ = has_voice;
101 return apm_->kNoError;
102 }
103
stream_has_voice() const104 bool VoiceDetectionImpl::stream_has_voice() const {
105 // TODO(ajm): enable this assertion?
106 //assert(using_external_vad_ || is_component_enabled());
107 return stream_has_voice_;
108 }
109
set_likelihood(VoiceDetection::Likelihood likelihood)110 int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) {
111 CriticalSectionScoped crit_scoped(apm_->crit());
112 if (MapSetting(likelihood) == -1) {
113 return apm_->kBadParameterError;
114 }
115
116 likelihood_ = likelihood;
117 return Configure();
118 }
119
likelihood() const120 VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const {
121 return likelihood_;
122 }
123
set_frame_size_ms(int size)124 int VoiceDetectionImpl::set_frame_size_ms(int size) {
125 CriticalSectionScoped crit_scoped(apm_->crit());
126 assert(size == 10); // TODO(ajm): remove when supported.
127 if (size != 10 &&
128 size != 20 &&
129 size != 30) {
130 return apm_->kBadParameterError;
131 }
132
133 frame_size_ms_ = size;
134
135 return Initialize();
136 }
137
frame_size_ms() const138 int VoiceDetectionImpl::frame_size_ms() const {
139 return frame_size_ms_;
140 }
141
Initialize()142 int VoiceDetectionImpl::Initialize() {
143 int err = ProcessingComponent::Initialize();
144 if (err != apm_->kNoError || !is_component_enabled()) {
145 return err;
146 }
147
148 using_external_vad_ = false;
149 frame_size_samples_ = frame_size_ms_ * (apm_->split_sample_rate_hz() / 1000);
150 // TODO(ajm): intialize frame buffer here.
151
152 return apm_->kNoError;
153 }
154
CreateHandle() const155 void* VoiceDetectionImpl::CreateHandle() const {
156 Handle* handle = NULL;
157 if (WebRtcVad_Create(&handle) != apm_->kNoError) {
158 handle = NULL;
159 } else {
160 assert(handle != NULL);
161 }
162
163 return handle;
164 }
165
DestroyHandle(void * handle) const166 int VoiceDetectionImpl::DestroyHandle(void* handle) const {
167 return WebRtcVad_Free(static_cast<Handle*>(handle));
168 }
169
InitializeHandle(void * handle) const170 int VoiceDetectionImpl::InitializeHandle(void* handle) const {
171 return WebRtcVad_Init(static_cast<Handle*>(handle));
172 }
173
ConfigureHandle(void * handle) const174 int VoiceDetectionImpl::ConfigureHandle(void* handle) const {
175 return WebRtcVad_set_mode(static_cast<Handle*>(handle),
176 MapSetting(likelihood_));
177 }
178
num_handles_required() const179 int VoiceDetectionImpl::num_handles_required() const {
180 return 1;
181 }
182
GetHandleError(void * handle) const183 int VoiceDetectionImpl::GetHandleError(void* handle) const {
184 // The VAD has no get_error() function.
185 assert(handle != NULL);
186 return apm_->kUnspecifiedError;
187 }
188 } // namespace webrtc
189