1 /*
2  *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/vad/voice_activity_detector.h"
12 
13 #include <algorithm>
14 
15 #include "rtc_base/checks.h"
16 
17 namespace webrtc {
18 namespace {
19 
20 const size_t kNumChannels = 1;
21 
22 const double kDefaultVoiceValue = 1.0;
23 const double kNeutralProbability = 0.5;
24 const double kLowProbability = 0.01;
25 
26 }  // namespace
27 
VoiceActivityDetector()28 VoiceActivityDetector::VoiceActivityDetector()
29     : last_voice_probability_(kDefaultVoiceValue),
30       standalone_vad_(StandaloneVad::Create()) {
31 }
32 
33 VoiceActivityDetector::~VoiceActivityDetector() = default;
34 
35 // Because ISAC has a different chunk length, it updates
36 // |chunkwise_voice_probabilities_| and |chunkwise_rms_| when there is new data.
37 // Otherwise it clears them.
ProcessChunk(const int16_t * audio,size_t length,int sample_rate_hz)38 void VoiceActivityDetector::ProcessChunk(const int16_t* audio,
39                                          size_t length,
40                                          int sample_rate_hz) {
41   RTC_DCHECK_EQ(length, sample_rate_hz / 100);
42   // Resample to the required rate.
43   const int16_t* resampled_ptr = audio;
44   if (sample_rate_hz != kSampleRateHz) {
45     RTC_CHECK_EQ(
46         resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels),
47         0);
48     resampler_.Push(audio, length, resampled_, kLength10Ms, length);
49     resampled_ptr = resampled_;
50   }
51   RTC_DCHECK_EQ(length, kLength10Ms);
52 
53   // Each chunk needs to be passed into |standalone_vad_|, because internally it
54   // buffers the audio and processes it all at once when GetActivity() is
55   // called.
56   RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0);
57 
58   audio_processing_.ExtractFeatures(resampled_ptr, length, &features_);
59 
60   chunkwise_voice_probabilities_.resize(features_.num_frames);
61   chunkwise_rms_.resize(features_.num_frames);
62   std::copy(features_.rms, features_.rms + chunkwise_rms_.size(),
63             chunkwise_rms_.begin());
64   if (features_.num_frames > 0) {
65     if (features_.silence) {
66       // The other features are invalid, so set the voice probabilities to an
67       // arbitrary low value.
68       std::fill(chunkwise_voice_probabilities_.begin(),
69                 chunkwise_voice_probabilities_.end(), kLowProbability);
70     } else {
71       std::fill(chunkwise_voice_probabilities_.begin(),
72                 chunkwise_voice_probabilities_.end(), kNeutralProbability);
73       RTC_CHECK_GE(
74           standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0],
75                                        chunkwise_voice_probabilities_.size()),
76           0);
77       RTC_CHECK_GE(pitch_based_vad_.VoicingProbability(
78                        features_, &chunkwise_voice_probabilities_[0]),
79                    0);
80     }
81     last_voice_probability_ = chunkwise_voice_probabilities_.back();
82   }
83 }
84 
85 }  // namespace webrtc
86