1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "modules/audio_processing/voice_detection_impl.h" 12 13 #include "api/audio/audio_frame.h" 14 #include "common_audio/vad/include/webrtc_vad.h" 15 #include "modules/audio_processing/audio_buffer.h" 16 #include "rtc_base/checks.h" 17 #include "rtc_base/constructormagic.h" 18 19 namespace webrtc { 20 class VoiceDetectionImpl::Vad { 21 public: Vad()22 Vad() { 23 state_ = WebRtcVad_Create(); 24 RTC_CHECK(state_); 25 int error = WebRtcVad_Init(state_); 26 RTC_DCHECK_EQ(0, error); 27 } ~Vad()28 ~Vad() { WebRtcVad_Free(state_); } state()29 VadInst* state() { return state_; } 30 31 private: 32 VadInst* state_ = nullptr; 33 RTC_DISALLOW_COPY_AND_ASSIGN(Vad); 34 }; 35 VoiceDetectionImpl(rtc::CriticalSection * crit)36VoiceDetectionImpl::VoiceDetectionImpl(rtc::CriticalSection* crit) 37 : crit_(crit) { 38 RTC_DCHECK(crit); 39 } 40 ~VoiceDetectionImpl()41VoiceDetectionImpl::~VoiceDetectionImpl() {} 42 Initialize(int sample_rate_hz)43void VoiceDetectionImpl::Initialize(int sample_rate_hz) { 44 rtc::CritScope cs(crit_); 45 sample_rate_hz_ = sample_rate_hz; 46 std::unique_ptr<Vad> new_vad; 47 if (enabled_) { 48 new_vad.reset(new Vad()); 49 } 50 vad_.swap(new_vad); 51 using_external_vad_ = false; 52 frame_size_samples_ = 53 static_cast<size_t>(frame_size_ms_ * sample_rate_hz_) / 1000; 54 set_likelihood(likelihood_); 55 } 56 ProcessCaptureAudio(AudioBuffer * audio)57void VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { 58 rtc::CritScope cs(crit_); 59 if (!enabled_) { 60 return; 61 } 62 if (using_external_vad_) { 63 using_external_vad_ = false; 64 return; 65 } 66 67 RTC_DCHECK_GE(160, audio->num_frames_per_band()); 68 // TODO(ajm): concatenate data in frame buffer here. 69 int vad_ret = 70 WebRtcVad_Process(vad_->state(), sample_rate_hz_, 71 audio->mixed_low_pass_data(), frame_size_samples_); 72 if (vad_ret == 0) { 73 stream_has_voice_ = false; 74 audio->set_activity(AudioFrame::kVadPassive); 75 } else if (vad_ret == 1) { 76 stream_has_voice_ = true; 77 audio->set_activity(AudioFrame::kVadActive); 78 } else { 79 RTC_NOTREACHED(); 80 } 81 } 82 Enable(bool enable)83int VoiceDetectionImpl::Enable(bool enable) { 84 rtc::CritScope cs(crit_); 85 if (enabled_ != enable) { 86 enabled_ = enable; 87 Initialize(sample_rate_hz_); 88 } 89 return AudioProcessing::kNoError; 90 } 91 is_enabled() const92bool VoiceDetectionImpl::is_enabled() const { 93 rtc::CritScope cs(crit_); 94 return enabled_; 95 } 96 set_stream_has_voice(bool has_voice)97int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) { 98 rtc::CritScope cs(crit_); 99 using_external_vad_ = true; 100 stream_has_voice_ = has_voice; 101 return AudioProcessing::kNoError; 102 } 103 stream_has_voice() const104bool VoiceDetectionImpl::stream_has_voice() const { 105 rtc::CritScope cs(crit_); 106 // TODO(ajm): enable this assertion? 107 // RTC_DCHECK(using_external_vad_ || is_component_enabled()); 108 return stream_has_voice_; 109 } 110 set_likelihood(VoiceDetection::Likelihood likelihood)111int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) { 112 rtc::CritScope cs(crit_); 113 likelihood_ = likelihood; 114 if (enabled_) { 115 int mode = 2; 116 switch (likelihood) { 117 case VoiceDetection::kVeryLowLikelihood: 118 mode = 3; 119 break; 120 case VoiceDetection::kLowLikelihood: 121 mode = 2; 122 break; 123 case VoiceDetection::kModerateLikelihood: 124 mode = 1; 125 break; 126 case VoiceDetection::kHighLikelihood: 127 mode = 0; 128 break; 129 default: 130 RTC_NOTREACHED(); 131 break; 132 } 133 int error = WebRtcVad_set_mode(vad_->state(), mode); 134 RTC_DCHECK_EQ(0, error); 135 } 136 return AudioProcessing::kNoError; 137 } 138 likelihood() const139VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const { 140 rtc::CritScope cs(crit_); 141 return likelihood_; 142 } 143 set_frame_size_ms(int size)144int VoiceDetectionImpl::set_frame_size_ms(int size) { 145 rtc::CritScope cs(crit_); 146 RTC_DCHECK_EQ(10, size); // TODO(ajm): remove when supported. 147 frame_size_ms_ = size; 148 Initialize(sample_rate_hz_); 149 return AudioProcessing::kNoError; 150 } 151 frame_size_ms() const152int VoiceDetectionImpl::frame_size_ms() const { 153 rtc::CritScope cs(crit_); 154 return frame_size_ms_; 155 } 156 } // namespace webrtc 157