1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/vad/pitch_based_vad.h"
12 
13 #include <string.h>
14 
15 #include "modules/audio_processing/vad/common.h"
16 #include "modules/audio_processing/vad/noise_gmm_tables.h"
17 #include "modules/audio_processing/vad/vad_circular_buffer.h"
18 #include "modules/audio_processing/vad/voice_gmm_tables.h"
19 
20 namespace webrtc {
21 
22 static_assert(kNoiseGmmDim == kVoiceGmmDim,
23               "noise and voice gmm dimension not equal");
24 
25 // These values should match MATLAB counterparts for unit-tests to pass.
26 static const int kPosteriorHistorySize = 500;  // 5 sec of 10 ms frames.
27 static const double kInitialPriorProbability = 0.3;
28 static const int kTransientWidthThreshold = 7;
29 static const double kLowProbabilityThreshold = 0.2;
30 
LimitProbability(double p)31 static double LimitProbability(double p) {
32   const double kLimHigh = 0.99;
33   const double kLimLow = 0.01;
34 
35   if (p > kLimHigh)
36     p = kLimHigh;
37   else if (p < kLimLow)
38     p = kLimLow;
39   return p;
40 }
41 
PitchBasedVad()42 PitchBasedVad::PitchBasedVad()
43     : p_prior_(kInitialPriorProbability),
44       circular_buffer_(VadCircularBuffer::Create(kPosteriorHistorySize)) {
45   // Setup noise GMM.
46   noise_gmm_.dimension = kNoiseGmmDim;
47   noise_gmm_.num_mixtures = kNoiseGmmNumMixtures;
48   noise_gmm_.weight = kNoiseGmmWeights;
49   noise_gmm_.mean = &kNoiseGmmMean[0][0];
50   noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0];
51 
52   // Setup voice GMM.
53   voice_gmm_.dimension = kVoiceGmmDim;
54   voice_gmm_.num_mixtures = kVoiceGmmNumMixtures;
55   voice_gmm_.weight = kVoiceGmmWeights;
56   voice_gmm_.mean = &kVoiceGmmMean[0][0];
57   voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0];
58 }
59 
~PitchBasedVad()60 PitchBasedVad::~PitchBasedVad() {}
61 
VoicingProbability(const AudioFeatures & features,double * p_combined)62 int PitchBasedVad::VoicingProbability(const AudioFeatures& features,
63                                       double* p_combined) {
64   double p;
65   double gmm_features[3];
66   double pdf_features_given_voice;
67   double pdf_features_given_noise;
68   // These limits are the same in matlab implementation 'VoicingProbGMM().'
69   const double kLimLowLogPitchGain = -2.0;
70   const double kLimHighLogPitchGain = -0.9;
71   const double kLimLowSpectralPeak = 200;
72   const double kLimHighSpectralPeak = 2000;
73   const double kEps = 1e-12;
74   for (size_t n = 0; n < features.num_frames; n++) {
75     gmm_features[0] = features.log_pitch_gain[n];
76     gmm_features[1] = features.spectral_peak[n];
77     gmm_features[2] = features.pitch_lag_hz[n];
78 
79     pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_);
80     pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_);
81 
82     if (features.spectral_peak[n] < kLimLowSpectralPeak ||
83         features.spectral_peak[n] > kLimHighSpectralPeak ||
84         features.log_pitch_gain[n] < kLimLowLogPitchGain) {
85       pdf_features_given_voice = kEps * pdf_features_given_noise;
86     } else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) {
87       pdf_features_given_noise = kEps * pdf_features_given_voice;
88     }
89 
90     p = p_prior_ * pdf_features_given_voice /
91         (pdf_features_given_voice * p_prior_ +
92          pdf_features_given_noise * (1 - p_prior_));
93 
94     p = LimitProbability(p);
95 
96     // Combine pitch-based probability with standalone probability, before
97     // updating prior probabilities.
98     double prod_active = p * p_combined[n];
99     double prod_inactive = (1 - p) * (1 - p_combined[n]);
100     p_combined[n] = prod_active / (prod_active + prod_inactive);
101 
102     if (UpdatePrior(p_combined[n]) < 0)
103       return -1;
104     // Limit prior probability. With a zero prior probability the posterior
105     // probability is always zero.
106     p_prior_ = LimitProbability(p_prior_);
107   }
108   return 0;
109 }
110 
UpdatePrior(double p)111 int PitchBasedVad::UpdatePrior(double p) {
112   circular_buffer_->Insert(p);
113   if (circular_buffer_->RemoveTransient(kTransientWidthThreshold,
114                                         kLowProbabilityThreshold) < 0)
115     return -1;
116   p_prior_ = circular_buffer_->Mean();
117   return 0;
118 }
119 
120 }  // namespace webrtc
121