1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "webrtc/modules/audio_processing/agc/pitch_based_vad.h"
12 
13 #include <assert.h>
14 #include <math.h>
15 #include <string.h>
16 
17 #include "webrtc/modules/audio_processing/agc/circular_buffer.h"
18 #include "webrtc/modules/audio_processing/agc/common.h"
19 #include "webrtc/modules/audio_processing/agc/noise_gmm_tables.h"
20 #include "webrtc/modules/audio_processing/agc/voice_gmm_tables.h"
21 #include "webrtc/modules/interface/module_common_types.h"
22 
23 namespace webrtc {
24 
25 static_assert(kNoiseGmmDim == kVoiceGmmDim,
26               "noise and voice gmm dimension not equal");
27 
28 // These values should match MATLAB counterparts for unit-tests to pass.
29 static const int kPosteriorHistorySize = 500;  // 5 sec of 10 ms frames.
30 static const double kInitialPriorProbability = 0.3;
31 static const int kTransientWidthThreshold = 7;
32 static const double kLowProbabilityThreshold = 0.2;
33 
LimitProbability(double p)34 static double LimitProbability(double p) {
35   const double kLimHigh = 0.99;
36   const double kLimLow = 0.01;
37 
38   if (p > kLimHigh)
39     p = kLimHigh;
40   else if (p < kLimLow)
41     p = kLimLow;
42   return p;
43 }
44 
PitchBasedVad()45 PitchBasedVad::PitchBasedVad()
46     : p_prior_(kInitialPriorProbability),
47       circular_buffer_(AgcCircularBuffer::Create(kPosteriorHistorySize)) {
48   // Setup noise GMM.
49   noise_gmm_.dimension = kNoiseGmmDim;
50   noise_gmm_.num_mixtures = kNoiseGmmNumMixtures;
51   noise_gmm_.weight = kNoiseGmmWeights;
52   noise_gmm_.mean = &kNoiseGmmMean[0][0];
53   noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0];
54 
55   // Setup voice GMM.
56   voice_gmm_.dimension = kVoiceGmmDim;
57   voice_gmm_.num_mixtures = kVoiceGmmNumMixtures;
58   voice_gmm_.weight = kVoiceGmmWeights;
59   voice_gmm_.mean = &kVoiceGmmMean[0][0];
60   voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0];
61 }
62 
~PitchBasedVad()63 PitchBasedVad::~PitchBasedVad() {}
64 
VoicingProbability(const AudioFeatures & features,double * p_combined)65 int PitchBasedVad::VoicingProbability(const AudioFeatures& features,
66                                       double* p_combined) {
67   double p;
68   double gmm_features[3];
69   double pdf_features_given_voice;
70   double pdf_features_given_noise;
71   // These limits are the same in matlab implementation 'VoicingProbGMM().'
72   const double kLimLowLogPitchGain = -2.0;
73   const double kLimHighLogPitchGain = -0.9;
74   const double kLimLowSpectralPeak = 200;
75   const double kLimHighSpectralPeak = 2000;
76   const double kEps = 1e-12;
77   for (int n = 0; n < features.num_frames; n++) {
78     gmm_features[0] = features.log_pitch_gain[n];
79     gmm_features[1] = features.spectral_peak[n];
80     gmm_features[2] = features.pitch_lag_hz[n];
81 
82     pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_);
83     pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_);
84 
85     if (features.spectral_peak[n] < kLimLowSpectralPeak ||
86         features.spectral_peak[n] > kLimHighSpectralPeak ||
87         features.log_pitch_gain[n] < kLimLowLogPitchGain) {
88       pdf_features_given_voice = kEps * pdf_features_given_noise;
89     } else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) {
90       pdf_features_given_noise = kEps * pdf_features_given_voice;
91     }
92 
93     p = p_prior_ * pdf_features_given_voice / (pdf_features_given_voice *
94         p_prior_ + pdf_features_given_noise * (1 - p_prior_));
95 
96     p = LimitProbability(p);
97 
98     // Combine pitch-based probability with standalone probability, before
99     // updating prior probabilities.
100     double prod_active = p * p_combined[n];
101     double prod_inactive = (1 - p) * (1 - p_combined[n]);
102     p_combined[n] = prod_active / (prod_active + prod_inactive);
103 
104     if (UpdatePrior(p_combined[n]) < 0)
105       return -1;
106     // Limit prior probability. With a zero prior probability the posterior
107     // probability is always zero.
108     p_prior_ = LimitProbability(p_prior_);
109   }
110   return 0;
111 }
112 
UpdatePrior(double p)113 int PitchBasedVad::UpdatePrior(double p) {
114   circular_buffer_->Insert(p);
115   if (circular_buffer_->RemoveTransient(kTransientWidthThreshold,
116                                         kLowProbabilityThreshold) < 0)
117     return -1;
118   p_prior_ = circular_buffer_->Mean();
119   return 0;
120 }
121 
122 }  // namespace webrtc
123