1 /*
2  *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/aec3/suppression_gain.h"
12 
13 #include "typedefs.h"  // NOLINT(build/include)
14 #if defined(WEBRTC_ARCH_X86_FAMILY)
15 #include <emmintrin.h>
16 #endif
17 #include <math.h>
18 #include <algorithm>
19 #include <functional>
20 #include <numeric>
21 
22 #include "modules/audio_processing/aec3/vector_math.h"
23 #include "rtc_base/checks.h"
24 
25 namespace webrtc {
26 namespace {
27 
28 // Reduce gain to avoid narrow band echo leakage.
NarrowBandAttenuation(int narrow_bin,std::array<float,kFftLengthBy2Plus1> * gain)29 void NarrowBandAttenuation(int narrow_bin,
30                            std::array<float, kFftLengthBy2Plus1>* gain) {
31   const int upper_bin =
32       std::min(narrow_bin + 6, static_cast<int>(kFftLengthBy2Plus1 - 1));
33   for (int k = std::max(0, narrow_bin - 6); k <= upper_bin; ++k) {
34     (*gain)[k] = std::min((*gain)[k], 0.001f);
35   }
36 }
37 
38 // Adjust the gains according to the presence of known external filters.
AdjustForExternalFilters(std::array<float,kFftLengthBy2Plus1> * gain)39 void AdjustForExternalFilters(std::array<float, kFftLengthBy2Plus1>* gain) {
40   // Limit the low frequency gains to avoid the impact of the high-pass filter
41   // on the lower-frequency gain influencing the overall achieved gain.
42   (*gain)[0] = (*gain)[1] = std::min((*gain)[1], (*gain)[2]);
43 
44   // Limit the high frequency gains to avoid the impact of the anti-aliasing
45   // filter on the upper-frequency gains influencing the overall achieved
46   // gain. TODO(peah): Update this when new anti-aliasing filters are
47   // implemented.
48   constexpr size_t kAntiAliasingImpactLimit = (64 * 2000) / 8000;
49   const float min_upper_gain = (*gain)[kAntiAliasingImpactLimit];
50   std::for_each(
51       gain->begin() + kAntiAliasingImpactLimit, gain->end() - 1,
52       [min_upper_gain](float& a) { a = std::min(a, min_upper_gain); });
53   (*gain)[kFftLengthBy2] = (*gain)[kFftLengthBy2Minus1];
54 }
55 
56 // Computes the gain to apply for the bands beyond the first band.
UpperBandsGain(const rtc::Optional<int> & narrow_peak_band,bool saturated_echo,const std::vector<std::vector<float>> & render,const std::array<float,kFftLengthBy2Plus1> & low_band_gain)57 float UpperBandsGain(
58     const rtc::Optional<int>& narrow_peak_band,
59     bool saturated_echo,
60     const std::vector<std::vector<float>>& render,
61     const std::array<float, kFftLengthBy2Plus1>& low_band_gain) {
62   RTC_DCHECK_LT(0, render.size());
63   if (render.size() == 1) {
64     return 1.f;
65   }
66 
67   if (narrow_peak_band &&
68       (*narrow_peak_band > static_cast<int>(kFftLengthBy2Plus1 - 10))) {
69     return 0.001f;
70   }
71 
72   constexpr size_t kLowBandGainLimit = kFftLengthBy2 / 2;
73   const float gain_below_8_khz = *std::min_element(
74       low_band_gain.begin() + kLowBandGainLimit, low_band_gain.end());
75 
76   // Always attenuate the upper bands when there is saturated echo.
77   if (saturated_echo) {
78     return std::min(0.001f, gain_below_8_khz);
79   }
80 
81   // Compute the upper and lower band energies.
82   const auto sum_of_squares = [](float a, float b) { return a + b * b; };
83   const float low_band_energy =
84       std::accumulate(render[0].begin(), render[0].end(), 0.f, sum_of_squares);
85   float high_band_energy = 0.f;
86   for (size_t k = 1; k < render.size(); ++k) {
87     const float energy = std::accumulate(render[k].begin(), render[k].end(),
88                                          0.f, sum_of_squares);
89     high_band_energy = std::max(high_band_energy, energy);
90   }
91 
92   // If there is more power in the lower frequencies than the upper frequencies,
93   // or if the power in upper frequencies is low, do not bound the gain in the
94   // upper bands.
95   float anti_howling_gain;
96   constexpr float kThreshold = kBlockSize * 10.f * 10.f / 4.f;
97   if (high_band_energy < std::max(low_band_energy, kThreshold)) {
98     anti_howling_gain = 1.f;
99   } else {
100     // In all other cases, bound the gain for upper frequencies.
101     RTC_DCHECK_LE(low_band_energy, high_band_energy);
102     RTC_DCHECK_NE(0.f, high_band_energy);
103     anti_howling_gain = 0.01f * sqrtf(low_band_energy / high_band_energy);
104   }
105 
106   // Choose the gain as the minimum of the lower and upper gains.
107   return std::min(gain_below_8_khz, anti_howling_gain);
108 }
109 
110 // Limits the gain increase.
UpdateMaxGainIncrease(const EchoCanceller3Config & config,size_t no_saturation_counter,bool low_noise_render,bool linear_echo_estimate,const std::array<float,kFftLengthBy2Plus1> & last_echo,const std::array<float,kFftLengthBy2Plus1> & echo,const std::array<float,kFftLengthBy2Plus1> & last_gain,const std::array<float,kFftLengthBy2Plus1> & new_gain,std::array<float,kFftLengthBy2Plus1> * gain_increase)111 void UpdateMaxGainIncrease(
112     const EchoCanceller3Config& config,
113     size_t no_saturation_counter,
114     bool low_noise_render,
115     bool linear_echo_estimate,
116     const std::array<float, kFftLengthBy2Plus1>& last_echo,
117     const std::array<float, kFftLengthBy2Plus1>& echo,
118     const std::array<float, kFftLengthBy2Plus1>& last_gain,
119     const std::array<float, kFftLengthBy2Plus1>& new_gain,
120     std::array<float, kFftLengthBy2Plus1>* gain_increase) {
121   float max_increasing;
122   float max_decreasing;
123   float rate_increasing;
124   float rate_decreasing;
125   float min_increasing;
126   float min_decreasing;
127 
128   auto& param = config.gain_updates;
129   if (linear_echo_estimate) {
130     max_increasing = param.nonlinear.max_inc;
131     max_decreasing = param.nonlinear.max_dec;
132     rate_increasing = param.nonlinear.rate_inc;
133     rate_decreasing = param.nonlinear.rate_dec;
134     min_increasing = param.nonlinear.min_inc;
135     min_decreasing = param.nonlinear.min_dec;
136   } else if (low_noise_render) {
137     max_increasing = param.low_noise.max_inc;
138     max_decreasing = param.low_noise.max_dec;
139     rate_increasing = param.low_noise.rate_inc;
140     rate_decreasing = param.low_noise.rate_dec;
141     min_increasing = param.low_noise.min_inc;
142     min_decreasing = param.low_noise.min_dec;
143   } else if (no_saturation_counter > 10) {
144     max_increasing = param.normal.max_inc;
145     max_decreasing = param.normal.max_dec;
146     rate_increasing = param.normal.rate_inc;
147     rate_decreasing = param.normal.rate_dec;
148     min_increasing = param.normal.min_inc;
149     min_decreasing = param.normal.min_dec;
150   } else {
151     max_increasing = param.saturation.max_inc;
152     max_decreasing = param.saturation.max_dec;
153     rate_increasing = param.saturation.rate_inc;
154     rate_decreasing = param.saturation.rate_dec;
155     min_increasing = param.saturation.min_inc;
156     min_decreasing = param.saturation.min_dec;
157   }
158 
159   for (size_t k = 0; k < new_gain.size(); ++k) {
160     if (echo[k] > last_echo[k]) {
161       (*gain_increase)[k] =
162           new_gain[k] > last_gain[k]
163               ? std::min(max_increasing, (*gain_increase)[k] * rate_increasing)
164               : min_increasing;
165     } else {
166       (*gain_increase)[k] =
167           new_gain[k] > last_gain[k]
168               ? std::min(max_decreasing, (*gain_increase)[k] * rate_decreasing)
169               : min_decreasing;
170     }
171   }
172 }
173 
174 // Computes the gain to reduce the echo to a non audible level.
GainToNoAudibleEcho(const EchoCanceller3Config & config,bool low_noise_render,bool saturated_echo,bool saturating_echo_path,bool linear_echo_estimate,const std::array<float,kFftLengthBy2Plus1> & nearend,const std::array<float,kFftLengthBy2Plus1> & echo,const std::array<float,kFftLengthBy2Plus1> & masker,const std::array<float,kFftLengthBy2Plus1> & min_gain,const std::array<float,kFftLengthBy2Plus1> & max_gain,const std::array<float,kFftLengthBy2Plus1> & one_by_echo,std::array<float,kFftLengthBy2Plus1> * gain)175 void GainToNoAudibleEcho(
176     const EchoCanceller3Config& config,
177     bool low_noise_render,
178     bool saturated_echo,
179     bool saturating_echo_path,
180     bool linear_echo_estimate,
181     const std::array<float, kFftLengthBy2Plus1>& nearend,
182     const std::array<float, kFftLengthBy2Plus1>& echo,
183     const std::array<float, kFftLengthBy2Plus1>& masker,
184     const std::array<float, kFftLengthBy2Plus1>& min_gain,
185     const std::array<float, kFftLengthBy2Plus1>& max_gain,
186     const std::array<float, kFftLengthBy2Plus1>& one_by_echo,
187     std::array<float, kFftLengthBy2Plus1>* gain) {
188   float nearend_masking_margin = 0.f;
189   if (linear_echo_estimate) {
190     nearend_masking_margin =
191         low_noise_render
192             ? config.gain_mask.m9
193             : (saturated_echo ? config.gain_mask.m2 : config.gain_mask.m3);
194   } else {
195     nearend_masking_margin = config.gain_mask.m7;
196   }
197 
198   RTC_DCHECK_LE(0.f, nearend_masking_margin);
199   RTC_DCHECK_GT(1.f, nearend_masking_margin);
200   const float one_by_one_minus_nearend_masking_margin =
201       1.f / (1.0f - nearend_masking_margin);
202 
203   const float masker_margin =
204       linear_echo_estimate ? config.gain_mask.m1 : config.gain_mask.m8;
205 
206   for (size_t k = 0; k < gain->size(); ++k) {
207     const float unity_gain_masker = std::max(nearend[k], masker[k]);
208     RTC_DCHECK_LE(0.f, nearend_masking_margin * unity_gain_masker);
209     if (echo[k] <= nearend_masking_margin * unity_gain_masker ||
210         unity_gain_masker <= 0.f) {
211       (*gain)[k] = 1.f;
212     } else {
213       RTC_DCHECK_LT(0.f, unity_gain_masker);
214       (*gain)[k] = std::max(0.f, (1.f - 5.f * echo[k] / unity_gain_masker) *
215                                      one_by_one_minus_nearend_masking_margin);
216       (*gain)[k] =
217           std::max(masker_margin * masker[k] * one_by_echo[k], (*gain)[k]);
218     }
219 
220     (*gain)[k] = std::min(std::max((*gain)[k], min_gain[k]), max_gain[k]);
221   }
222 }
223 
224 // TODO(peah): Make adaptive to take the actual filter error into account.
225 constexpr size_t kUpperAccurateBandPlus1 = 29;
226 
227 // Computes the signal output power that masks the echo signal.
MaskingPower(const EchoCanceller3Config & config,const std::array<float,kFftLengthBy2Plus1> & nearend,const std::array<float,kFftLengthBy2Plus1> & comfort_noise,const std::array<float,kFftLengthBy2Plus1> & last_masker,const std::array<float,kFftLengthBy2Plus1> & gain,std::array<float,kFftLengthBy2Plus1> * masker)228 void MaskingPower(const EchoCanceller3Config& config,
229                   const std::array<float, kFftLengthBy2Plus1>& nearend,
230                   const std::array<float, kFftLengthBy2Plus1>& comfort_noise,
231                   const std::array<float, kFftLengthBy2Plus1>& last_masker,
232                   const std::array<float, kFftLengthBy2Plus1>& gain,
233                   std::array<float, kFftLengthBy2Plus1>* masker) {
234   std::array<float, kFftLengthBy2Plus1> side_band_masker;
235   float max_nearend_after_gain = 0.f;
236   for (size_t k = 0; k < gain.size(); ++k) {
237     const float nearend_after_gain = nearend[k] * gain[k];
238     max_nearend_after_gain =
239         std::max(max_nearend_after_gain, nearend_after_gain);
240     side_band_masker[k] = nearend_after_gain + comfort_noise[k];
241     (*masker)[k] = comfort_noise[k] + config.gain_mask.m4 * last_masker[k];
242   }
243 
244   // Apply masking only between lower frequency bands.
245   RTC_DCHECK_LT(kUpperAccurateBandPlus1, gain.size());
246   for (size_t k = 1; k < kUpperAccurateBandPlus1; ++k) {
247     (*masker)[k] += config.gain_mask.m5 *
248                     (side_band_masker[k - 1] + side_band_masker[k + 1]);
249   }
250 
251   // Add full-band masking as a minimum value for the masker.
252   const float min_masker = max_nearend_after_gain * config.gain_mask.m6;
253   std::for_each(masker->begin(), masker->end(),
254                 [min_masker](float& a) { a = std::max(a, min_masker); });
255 }
256 
257 // Limits the gain in the frequencies for which the adaptive filter has not
258 // converged. Currently, these frequencies are not hardcoded to the frequencies
259 // which are typically not excited by speech.
260 // TODO(peah): Make adaptive to take the actual filter error into account.
AdjustNonConvergedFrequencies(std::array<float,kFftLengthBy2Plus1> * gain)261 void AdjustNonConvergedFrequencies(
262     std::array<float, kFftLengthBy2Plus1>* gain) {
263   constexpr float oneByBandsInSum =
264       1 / static_cast<float>(kUpperAccurateBandPlus1 - 20);
265   const float hf_gain_bound =
266       std::accumulate(gain->begin() + 20,
267                       gain->begin() + kUpperAccurateBandPlus1, 0.f) *
268       oneByBandsInSum;
269 
270   std::for_each(gain->begin() + kUpperAccurateBandPlus1, gain->end(),
271                 [hf_gain_bound](float& a) { a = std::min(a, hf_gain_bound); });
272 }
273 
274 }  // namespace
275 
276 // TODO(peah): Add further optimizations, in particular for the divisions.
LowerBandGain(bool low_noise_render,const rtc::Optional<int> & narrow_peak_band,bool saturated_echo,bool saturating_echo_path,bool linear_echo_estimate,const std::array<float,kFftLengthBy2Plus1> & nearend,const std::array<float,kFftLengthBy2Plus1> & echo,const std::array<float,kFftLengthBy2Plus1> & comfort_noise,std::array<float,kFftLengthBy2Plus1> * gain)277 void SuppressionGain::LowerBandGain(
278     bool low_noise_render,
279     const rtc::Optional<int>& narrow_peak_band,
280     bool saturated_echo,
281     bool saturating_echo_path,
282     bool linear_echo_estimate,
283     const std::array<float, kFftLengthBy2Plus1>& nearend,
284     const std::array<float, kFftLengthBy2Plus1>& echo,
285     const std::array<float, kFftLengthBy2Plus1>& comfort_noise,
286     std::array<float, kFftLengthBy2Plus1>* gain) {
287   // Count the number of blocks since saturation.
288   no_saturation_counter_ = saturated_echo ? 0 : no_saturation_counter_ + 1;
289 
290   // Precompute 1/echo (note that when the echo is zero, the precomputed value
291   // is never used).
292   std::array<float, kFftLengthBy2Plus1> one_by_echo;
293   std::transform(echo.begin(), echo.end(), one_by_echo.begin(),
294                  [](float a) { return a > 0.f ? 1.f / a : 1.f; });
295 
296   // Compute the minimum gain as the attenuating gain to put the signal just
297   // above the zero sample values.
298   std::array<float, kFftLengthBy2Plus1> min_gain;
299   const float min_echo_power =
300       low_noise_render ? config_.echo_audibility.low_render_limit
301                        : config_.echo_audibility.normal_render_limit;
302   if (no_saturation_counter_ > 10) {
303     for (size_t k = 0; k < nearend.size(); ++k) {
304       const float denom = std::min(nearend[k], echo[k]);
305       min_gain[k] = denom > 0.f ? min_echo_power / denom : 1.f;
306       min_gain[k] = std::min(min_gain[k], 1.f);
307     }
308   } else {
309     min_gain.fill(0.f);
310   }
311 
312   // Compute the maximum gain by limiting the gain increase from the previous
313   // gain.
314   std::array<float, kFftLengthBy2Plus1> max_gain;
315   for (size_t k = 0; k < gain->size(); ++k) {
316     max_gain[k] = std::min(std::max(last_gain_[k] * gain_increase_[k],
317                                     config_.gain_updates.floor_first_increase),
318                            1.f);
319   }
320 
321   // Iteratively compute the gain required to attenuate the echo to a non
322   // noticeable level.
323   gain->fill(0.f);
324   for (int k = 0; k < 2; ++k) {
325     std::array<float, kFftLengthBy2Plus1> masker;
326     MaskingPower(config_, nearend, comfort_noise, last_masker_, *gain, &masker);
327     GainToNoAudibleEcho(config_, low_noise_render, saturated_echo,
328                         saturating_echo_path, linear_echo_estimate, nearend,
329                         echo, masker, min_gain, max_gain, one_by_echo, gain);
330     AdjustForExternalFilters(gain);
331     if (narrow_peak_band) {
332       NarrowBandAttenuation(*narrow_peak_band, gain);
333     }
334   }
335 
336   // Adjust the gain for frequencies which have not yet converged.
337   AdjustNonConvergedFrequencies(gain);
338 
339   // Update the allowed maximum gain increase.
340   UpdateMaxGainIncrease(config_, no_saturation_counter_, low_noise_render,
341                         linear_echo_estimate, last_echo_, echo, last_gain_,
342                         *gain, &gain_increase_);
343 
344   // Adjust gain dynamics.
345   const float gain_bound =
346       std::max(0.001f, *std::min_element(gain->begin(), gain->end()) * 10000.f);
347   std::for_each(gain->begin(), gain->end(),
348                 [gain_bound](float& a) { a = std::min(a, gain_bound); });
349 
350   // Store data required for the gain computation of the next block.
351   std::copy(echo.begin(), echo.end(), last_echo_.begin());
352   std::copy(gain->begin(), gain->end(), last_gain_.begin());
353   MaskingPower(config_, nearend, comfort_noise, last_masker_, *gain,
354                &last_masker_);
355   aec3::VectorMath(optimization_).Sqrt(*gain);
356 }
357 
SuppressionGain(const EchoCanceller3Config & config,Aec3Optimization optimization)358 SuppressionGain::SuppressionGain(const EchoCanceller3Config& config,
359                                  Aec3Optimization optimization)
360     : optimization_(optimization), config_(config) {
361   last_gain_.fill(1.f);
362   last_masker_.fill(0.f);
363   gain_increase_.fill(1.f);
364   last_echo_.fill(0.f);
365 }
366 
GetGain(const std::array<float,kFftLengthBy2Plus1> & nearend,const std::array<float,kFftLengthBy2Plus1> & echo,const std::array<float,kFftLengthBy2Plus1> & comfort_noise,const RenderSignalAnalyzer & render_signal_analyzer,const AecState & aec_state,const std::vector<std::vector<float>> & render,float * high_bands_gain,std::array<float,kFftLengthBy2Plus1> * low_band_gain)367 void SuppressionGain::GetGain(
368     const std::array<float, kFftLengthBy2Plus1>& nearend,
369     const std::array<float, kFftLengthBy2Plus1>& echo,
370     const std::array<float, kFftLengthBy2Plus1>& comfort_noise,
371     const RenderSignalAnalyzer& render_signal_analyzer,
372     const AecState& aec_state,
373     const std::vector<std::vector<float>>& render,
374     float* high_bands_gain,
375     std::array<float, kFftLengthBy2Plus1>* low_band_gain) {
376   RTC_DCHECK(high_bands_gain);
377   RTC_DCHECK(low_band_gain);
378 
379   const bool saturated_echo = aec_state.SaturatedEcho();
380   const bool saturating_echo_path = aec_state.SaturatingEchoPath();
381   const bool force_zero_gain = aec_state.ForcedZeroGain();
382   const bool linear_echo_estimate = aec_state.LinearEchoEstimate();
383 
384   if (force_zero_gain) {
385     last_gain_.fill(0.f);
386     std::copy(comfort_noise.begin(), comfort_noise.end(), last_masker_.begin());
387     low_band_gain->fill(0.f);
388     gain_increase_.fill(1.f);
389     *high_bands_gain = 0.f;
390     return;
391   }
392 
393   bool low_noise_render = low_render_detector_.Detect(render);
394 
395   // Compute gain for the lower band.
396   const rtc::Optional<int> narrow_peak_band =
397       render_signal_analyzer.NarrowPeakBand();
398   LowerBandGain(low_noise_render, narrow_peak_band, saturated_echo,
399                 saturating_echo_path, linear_echo_estimate, nearend, echo,
400                 comfort_noise, low_band_gain);
401 
402   // Compute the gain for the upper bands.
403   *high_bands_gain =
404       UpperBandsGain(narrow_peak_band, saturated_echo, render, *low_band_gain);
405 }
406 
407 // Detects when the render signal can be considered to have low power and
408 // consist of stationary noise.
Detect(const std::vector<std::vector<float>> & render)409 bool SuppressionGain::LowNoiseRenderDetector::Detect(
410     const std::vector<std::vector<float>>& render) {
411   float x2_sum = 0.f;
412   float x2_max = 0.f;
413   for (auto x_k : render[0]) {
414     const float x2 = x_k * x_k;
415     x2_sum += x2;
416     x2_max = std::max(x2_max, x2);
417   }
418 
419   constexpr float kThreshold = 50.f * 50.f * 64.f;
420   const bool low_noise_render =
421       average_power_ < kThreshold && x2_max < 3 * average_power_;
422   average_power_ = average_power_ * 0.9f + x2_sum * 0.1f;
423   return low_noise_render;
424 }
425 
426 }  // namespace webrtc
427