1 /*
2 * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/audio_processing/vad/voice_activity_detector.h"
12
13 #include <algorithm>
14
15 #include "rtc_base/checks.h"
16
17 namespace webrtc {
18 namespace {
19
20 const size_t kNumChannels = 1;
21
22 const double kDefaultVoiceValue = 1.0;
23 const double kNeutralProbability = 0.5;
24 const double kLowProbability = 0.01;
25
26 } // namespace
27
VoiceActivityDetector()28 VoiceActivityDetector::VoiceActivityDetector()
29 : last_voice_probability_(kDefaultVoiceValue),
30 standalone_vad_(StandaloneVad::Create()) {
31 }
32
33 VoiceActivityDetector::~VoiceActivityDetector() = default;
34
35 // Because ISAC has a different chunk length, it updates
36 // |chunkwise_voice_probabilities_| and |chunkwise_rms_| when there is new data.
37 // Otherwise it clears them.
ProcessChunk(const int16_t * audio,size_t length,int sample_rate_hz)38 void VoiceActivityDetector::ProcessChunk(const int16_t* audio,
39 size_t length,
40 int sample_rate_hz) {
41 RTC_DCHECK_EQ(length, sample_rate_hz / 100);
42 // Resample to the required rate.
43 const int16_t* resampled_ptr = audio;
44 if (sample_rate_hz != kSampleRateHz) {
45 RTC_CHECK_EQ(
46 resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels),
47 0);
48 resampler_.Push(audio, length, resampled_, kLength10Ms, length);
49 resampled_ptr = resampled_;
50 }
51 RTC_DCHECK_EQ(length, kLength10Ms);
52
53 // Each chunk needs to be passed into |standalone_vad_|, because internally it
54 // buffers the audio and processes it all at once when GetActivity() is
55 // called.
56 RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0);
57
58 audio_processing_.ExtractFeatures(resampled_ptr, length, &features_);
59
60 chunkwise_voice_probabilities_.resize(features_.num_frames);
61 chunkwise_rms_.resize(features_.num_frames);
62 std::copy(features_.rms, features_.rms + chunkwise_rms_.size(),
63 chunkwise_rms_.begin());
64 if (features_.num_frames > 0) {
65 if (features_.silence) {
66 // The other features are invalid, so set the voice probabilities to an
67 // arbitrary low value.
68 std::fill(chunkwise_voice_probabilities_.begin(),
69 chunkwise_voice_probabilities_.end(), kLowProbability);
70 } else {
71 std::fill(chunkwise_voice_probabilities_.begin(),
72 chunkwise_voice_probabilities_.end(), kNeutralProbability);
73 RTC_CHECK_GE(
74 standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0],
75 chunkwise_voice_probabilities_.size()),
76 0);
77 RTC_CHECK_GE(pitch_based_vad_.VoicingProbability(
78 features_, &chunkwise_voice_probabilities_[0]),
79 0);
80 }
81 last_voice_probability_ = chunkwise_voice_probabilities_.back();
82 }
83 }
84
85 } // namespace webrtc
86