1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/vad/pitch_based_vad.h"
12 
13 #include <math.h>
14 #include <string.h>
15 
16 #include "modules/audio_processing/vad/vad_circular_buffer.h"
17 #include "modules/audio_processing/vad/common.h"
18 #include "modules/audio_processing/vad/noise_gmm_tables.h"
19 #include "modules/audio_processing/vad/voice_gmm_tables.h"
20 #include "modules/include/module_common_types.h"
21 
22 namespace webrtc {
23 
24 static_assert(kNoiseGmmDim == kVoiceGmmDim,
25               "noise and voice gmm dimension not equal");
26 
27 // These values should match MATLAB counterparts for unit-tests to pass.
28 static const int kPosteriorHistorySize = 500;  // 5 sec of 10 ms frames.
29 static const double kInitialPriorProbability = 0.3;
30 static const int kTransientWidthThreshold = 7;
31 static const double kLowProbabilityThreshold = 0.2;
32 
LimitProbability(double p)33 static double LimitProbability(double p) {
34   const double kLimHigh = 0.99;
35   const double kLimLow = 0.01;
36 
37   if (p > kLimHigh)
38     p = kLimHigh;
39   else if (p < kLimLow)
40     p = kLimLow;
41   return p;
42 }
43 
PitchBasedVad()44 PitchBasedVad::PitchBasedVad()
45     : p_prior_(kInitialPriorProbability),
46       circular_buffer_(VadCircularBuffer::Create(kPosteriorHistorySize)) {
47   // Setup noise GMM.
48   noise_gmm_.dimension = kNoiseGmmDim;
49   noise_gmm_.num_mixtures = kNoiseGmmNumMixtures;
50   noise_gmm_.weight = kNoiseGmmWeights;
51   noise_gmm_.mean = &kNoiseGmmMean[0][0];
52   noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0];
53 
54   // Setup voice GMM.
55   voice_gmm_.dimension = kVoiceGmmDim;
56   voice_gmm_.num_mixtures = kVoiceGmmNumMixtures;
57   voice_gmm_.weight = kVoiceGmmWeights;
58   voice_gmm_.mean = &kVoiceGmmMean[0][0];
59   voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0];
60 }
61 
~PitchBasedVad()62 PitchBasedVad::~PitchBasedVad() {
63 }
64 
VoicingProbability(const AudioFeatures & features,double * p_combined)65 int PitchBasedVad::VoicingProbability(const AudioFeatures& features,
66                                       double* p_combined) {
67   double p;
68   double gmm_features[3];
69   double pdf_features_given_voice;
70   double pdf_features_given_noise;
71   // These limits are the same in matlab implementation 'VoicingProbGMM().'
72   const double kLimLowLogPitchGain = -2.0;
73   const double kLimHighLogPitchGain = -0.9;
74   const double kLimLowSpectralPeak = 200;
75   const double kLimHighSpectralPeak = 2000;
76   const double kEps = 1e-12;
77   for (size_t n = 0; n < features.num_frames; n++) {
78     gmm_features[0] = features.log_pitch_gain[n];
79     gmm_features[1] = features.spectral_peak[n];
80     gmm_features[2] = features.pitch_lag_hz[n];
81 
82     pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_);
83     pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_);
84 
85     if (features.spectral_peak[n] < kLimLowSpectralPeak ||
86         features.spectral_peak[n] > kLimHighSpectralPeak ||
87         features.log_pitch_gain[n] < kLimLowLogPitchGain) {
88       pdf_features_given_voice = kEps * pdf_features_given_noise;
89     } else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) {
90       pdf_features_given_noise = kEps * pdf_features_given_voice;
91     }
92 
93     p = p_prior_ * pdf_features_given_voice /
94         (pdf_features_given_voice * p_prior_ +
95          pdf_features_given_noise * (1 - p_prior_));
96 
97     p = LimitProbability(p);
98 
99     // Combine pitch-based probability with standalone probability, before
100     // updating prior probabilities.
101     double prod_active = p * p_combined[n];
102     double prod_inactive = (1 - p) * (1 - p_combined[n]);
103     p_combined[n] = prod_active / (prod_active + prod_inactive);
104 
105     if (UpdatePrior(p_combined[n]) < 0)
106       return -1;
107     // Limit prior probability. With a zero prior probability the posterior
108     // probability is always zero.
109     p_prior_ = LimitProbability(p_prior_);
110   }
111   return 0;
112 }
113 
UpdatePrior(double p)114 int PitchBasedVad::UpdatePrior(double p) {
115   circular_buffer_->Insert(p);
116   if (circular_buffer_->RemoveTransient(kTransientWidthThreshold,
117                                         kLowProbabilityThreshold) < 0)
118     return -1;
119   p_prior_ = circular_buffer_->Mean();
120   return 0;
121 }
122 
123 }  // namespace webrtc
124