/* * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "modules/audio_processing/voice_detection_impl.h" #include "api/audio/audio_frame.h" #include "common_audio/vad/include/webrtc_vad.h" #include "modules/audio_processing/audio_buffer.h" #include "rtc_base/checks.h" #include "rtc_base/constructormagic.h" namespace webrtc { class VoiceDetectionImpl::Vad { public: Vad() { state_ = WebRtcVad_Create(); RTC_CHECK(state_); int error = WebRtcVad_Init(state_); RTC_DCHECK_EQ(0, error); } ~Vad() { WebRtcVad_Free(state_); } VadInst* state() { return state_; } private: VadInst* state_ = nullptr; RTC_DISALLOW_COPY_AND_ASSIGN(Vad); }; VoiceDetectionImpl::VoiceDetectionImpl(rtc::CriticalSection* crit) : crit_(crit) { RTC_DCHECK(crit); } VoiceDetectionImpl::~VoiceDetectionImpl() {} void VoiceDetectionImpl::Initialize(int sample_rate_hz) { rtc::CritScope cs(crit_); sample_rate_hz_ = sample_rate_hz; std::unique_ptr new_vad; if (enabled_) { new_vad.reset(new Vad()); } vad_.swap(new_vad); using_external_vad_ = false; frame_size_samples_ = static_cast(frame_size_ms_ * sample_rate_hz_) / 1000; set_likelihood(likelihood_); } void VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { rtc::CritScope cs(crit_); if (!enabled_) { return; } if (using_external_vad_) { using_external_vad_ = false; return; } RTC_DCHECK_GE(160, audio->num_frames_per_band()); // TODO(ajm): concatenate data in frame buffer here. int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_, audio->mixed_low_pass_data(), frame_size_samples_); if (vad_ret == 0) { stream_has_voice_ = false; audio->set_activity(AudioFrame::kVadPassive); } else if (vad_ret == 1) { stream_has_voice_ = true; audio->set_activity(AudioFrame::kVadActive); } else { RTC_NOTREACHED(); } } int VoiceDetectionImpl::Enable(bool enable) { rtc::CritScope cs(crit_); if (enabled_ != enable) { enabled_ = enable; Initialize(sample_rate_hz_); } return AudioProcessing::kNoError; } bool VoiceDetectionImpl::is_enabled() const { rtc::CritScope cs(crit_); return enabled_; } int VoiceDetectionImpl::set_stream_has_voice(bool has_voice) { rtc::CritScope cs(crit_); using_external_vad_ = true; stream_has_voice_ = has_voice; return AudioProcessing::kNoError; } bool VoiceDetectionImpl::stream_has_voice() const { rtc::CritScope cs(crit_); // TODO(ajm): enable this assertion? // RTC_DCHECK(using_external_vad_ || is_component_enabled()); return stream_has_voice_; } int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) { rtc::CritScope cs(crit_); likelihood_ = likelihood; if (enabled_) { int mode = 2; switch (likelihood) { case VoiceDetection::kVeryLowLikelihood: mode = 3; break; case VoiceDetection::kLowLikelihood: mode = 2; break; case VoiceDetection::kModerateLikelihood: mode = 1; break; case VoiceDetection::kHighLikelihood: mode = 0; break; default: RTC_NOTREACHED(); break; } int error = WebRtcVad_set_mode(vad_->state(), mode); RTC_DCHECK_EQ(0, error); } return AudioProcessing::kNoError; } VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const { rtc::CritScope cs(crit_); return likelihood_; } int VoiceDetectionImpl::set_frame_size_ms(int size) { rtc::CritScope cs(crit_); RTC_DCHECK_EQ(10, size); // TODO(ajm): remove when supported. frame_size_ms_ = size; Initialize(sample_rate_hz_); return AudioProcessing::kNoError; } int VoiceDetectionImpl::frame_size_ms() const { rtc::CritScope cs(crit_); return frame_size_ms_; } } // namespace webrtc