1 /*
2  *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/ns/speech_probability_estimator.h"
12 
13 #include <math.h>
14 #include <algorithm>
15 
16 #include "modules/audio_processing/ns/fast_math.h"
17 #include "rtc_base/checks.h"
18 
19 namespace webrtc {
20 
SpeechProbabilityEstimator()21 SpeechProbabilityEstimator::SpeechProbabilityEstimator() {
22   speech_probability_.fill(0.f);
23 }
24 
Update(int32_t num_analyzed_frames,rtc::ArrayView<const float,kFftSizeBy2Plus1> prior_snr,rtc::ArrayView<const float,kFftSizeBy2Plus1> post_snr,rtc::ArrayView<const float,kFftSizeBy2Plus1> conservative_noise_spectrum,rtc::ArrayView<const float,kFftSizeBy2Plus1> signal_spectrum,float signal_spectral_sum,float signal_energy)25 void SpeechProbabilityEstimator::Update(
26     int32_t num_analyzed_frames,
27     rtc::ArrayView<const float, kFftSizeBy2Plus1> prior_snr,
28     rtc::ArrayView<const float, kFftSizeBy2Plus1> post_snr,
29     rtc::ArrayView<const float, kFftSizeBy2Plus1> conservative_noise_spectrum,
30     rtc::ArrayView<const float, kFftSizeBy2Plus1> signal_spectrum,
31     float signal_spectral_sum,
32     float signal_energy) {
33   // Update models.
34   if (num_analyzed_frames < kLongStartupPhaseBlocks) {
35     signal_model_estimator_.AdjustNormalization(num_analyzed_frames,
36                                                 signal_energy);
37   }
38   signal_model_estimator_.Update(prior_snr, post_snr,
39                                  conservative_noise_spectrum, signal_spectrum,
40                                  signal_spectral_sum, signal_energy);
41 
42   const SignalModel& model = signal_model_estimator_.get_model();
43   const PriorSignalModel& prior_model =
44       signal_model_estimator_.get_prior_model();
45 
46   // Width parameter in sigmoid map for prior model.
47   constexpr float kWidthPrior0 = 4.f;
48   // Width for pause region: lower range, so increase width in tanh map.
49   constexpr float kWidthPrior1 = 2.f * kWidthPrior0;
50 
51   // Average LRT feature: use larger width in tanh map for pause regions.
52   float width_prior = model.lrt < prior_model.lrt ? kWidthPrior1 : kWidthPrior0;
53 
54   // Compute indicator function: sigmoid map.
55   float indicator0 =
56       0.5f * (tanh(width_prior * (model.lrt - prior_model.lrt)) + 1.f);
57 
58   // Spectral flatness feature: use larger width in tanh map for pause regions.
59   width_prior = model.spectral_flatness > prior_model.flatness_threshold
60                     ? kWidthPrior1
61                     : kWidthPrior0;
62 
63   // Compute indicator function: sigmoid map.
64   float indicator1 =
65       0.5f * (tanh(1.f * width_prior *
66                    (prior_model.flatness_threshold - model.spectral_flatness)) +
67               1.f);
68 
69   // For template spectrum-difference : use larger width in tanh map for pause
70   // regions.
71   width_prior = model.spectral_diff < prior_model.template_diff_threshold
72                     ? kWidthPrior1
73                     : kWidthPrior0;
74 
75   // Compute indicator function: sigmoid map.
76   float indicator2 =
77       0.5f * (tanh(width_prior * (model.spectral_diff -
78                                   prior_model.template_diff_threshold)) +
79               1.f);
80 
81   // Combine the indicator function with the feature weights.
82   float ind_prior = prior_model.lrt_weighting * indicator0 +
83                     prior_model.flatness_weighting * indicator1 +
84                     prior_model.difference_weighting * indicator2;
85 
86   // Compute the prior probability.
87   prior_speech_prob_ += 0.1f * (ind_prior - prior_speech_prob_);
88 
89   // Make sure probabilities are within range: keep floor to 0.01.
90   prior_speech_prob_ = std::max(std::min(prior_speech_prob_, 1.f), 0.01f);
91 
92   // Final speech probability: combine prior model with LR factor:.
93   float gain_prior =
94       (1.f - prior_speech_prob_) / (prior_speech_prob_ + 0.0001f);
95 
96   std::array<float, kFftSizeBy2Plus1> inv_lrt;
97   ExpApproximationSignFlip(model.avg_log_lrt, inv_lrt);
98   for (size_t i = 0; i < kFftSizeBy2Plus1; ++i) {
99     speech_probability_[i] = 1.f / (1.f + gain_prior * inv_lrt[i]);
100   }
101 }
102 
103 }  // namespace webrtc
104