1 /*
2  *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/aec3/residual_echo_estimator.h"
12 
13 #include <numeric>
14 #include <vector>
15 
16 #include "rtc_base/checks.h"
17 
18 namespace webrtc {
19 namespace {
20 
21 // Estimates the echo generating signal power as gated maximal power over a time
22 // window.
EchoGeneratingPower(const RenderBuffer & render_buffer,size_t min_delay,size_t max_delay,std::array<float,kFftLengthBy2Plus1> * X2)23 void EchoGeneratingPower(const RenderBuffer& render_buffer,
24                          size_t min_delay,
25                          size_t max_delay,
26                          std::array<float, kFftLengthBy2Plus1>* X2) {
27   X2->fill(0.f);
28   for (size_t k = min_delay; k <= max_delay; ++k) {
29     std::transform(X2->begin(), X2->end(), render_buffer.Spectrum(k).begin(),
30                    X2->begin(),
31                    [](float a, float b) { return std::max(a, b); });
32   }
33 
34   // Apply soft noise gate of -78 dBFS.
35   static constexpr float kNoiseGatePower = 27509.42f;
36   std::for_each(X2->begin(), X2->end(), [](float& a) {
37     if (kNoiseGatePower > a) {
38       a = std::max(0.f, a - 0.3f * (kNoiseGatePower - a));
39     }
40   });
41 }
42 
43 constexpr int kNoiseFloorCounterMax = 50;
44 constexpr float kNoiseFloorMin = 10.f * 10.f * 128.f * 128.f;
45 
46 // Updates estimate for the power of the stationary noise component in the
47 // render signal.
RenderNoisePower(const RenderBuffer & render_buffer,std::array<float,kFftLengthBy2Plus1> * X2_noise_floor,std::array<int,kFftLengthBy2Plus1> * X2_noise_floor_counter)48 void RenderNoisePower(
49     const RenderBuffer& render_buffer,
50     std::array<float, kFftLengthBy2Plus1>* X2_noise_floor,
51     std::array<int, kFftLengthBy2Plus1>* X2_noise_floor_counter) {
52   RTC_DCHECK(X2_noise_floor);
53   RTC_DCHECK(X2_noise_floor_counter);
54 
55   const auto render_power = render_buffer.Spectrum(0);
56   RTC_DCHECK_EQ(X2_noise_floor->size(), render_power.size());
57   RTC_DCHECK_EQ(X2_noise_floor_counter->size(), render_power.size());
58 
59   // Estimate the stationary noise power in a minimum statistics manner.
60   for (size_t k = 0; k < render_power.size(); ++k) {
61     // Decrease rapidly.
62     if (render_power[k] < (*X2_noise_floor)[k]) {
63       (*X2_noise_floor)[k] = render_power[k];
64       (*X2_noise_floor_counter)[k] = 0;
65     } else {
66       // Increase in a delayed, leaky manner.
67       if ((*X2_noise_floor_counter)[k] >= kNoiseFloorCounterMax) {
68         (*X2_noise_floor)[k] =
69             std::max((*X2_noise_floor)[k] * 1.1f, kNoiseFloorMin);
70       } else {
71         ++(*X2_noise_floor_counter)[k];
72       }
73     }
74   }
75 }
76 
77 }  // namespace
78 
ResidualEchoEstimator(const EchoCanceller3Config & config)79 ResidualEchoEstimator::ResidualEchoEstimator(const EchoCanceller3Config& config)
80     : config_(config) {
81   Reset();
82 }
83 
84 ResidualEchoEstimator::~ResidualEchoEstimator() = default;
85 
Estimate(const AecState & aec_state,const RenderBuffer & render_buffer,const std::array<float,kFftLengthBy2Plus1> & S2_linear,const std::array<float,kFftLengthBy2Plus1> & Y2,std::array<float,kFftLengthBy2Plus1> * R2)86 void ResidualEchoEstimator::Estimate(
87     const AecState& aec_state,
88     const RenderBuffer& render_buffer,
89     const std::array<float, kFftLengthBy2Plus1>& S2_linear,
90     const std::array<float, kFftLengthBy2Plus1>& Y2,
91     std::array<float, kFftLengthBy2Plus1>* R2) {
92   RTC_DCHECK(R2);
93 
94   // Estimate the power of the stationary noise in the render signal.
95   RenderNoisePower(render_buffer, &X2_noise_floor_, &X2_noise_floor_counter_);
96 
97   // Estimate the residual echo power.
98   if (aec_state.LinearEchoEstimate()) {
99     RTC_DCHECK(aec_state.FilterDelay());
100     const int filter_delay = *aec_state.FilterDelay();
101     LinearEstimate(S2_linear, aec_state.Erle(), filter_delay, R2);
102     AddEchoReverb(S2_linear, aec_state.SaturatedEcho(), filter_delay,
103                   aec_state.ReverbDecay(), R2);
104 
105     // If the echo is saturated, estimate the echo power as the maximum echo
106     // power with a leakage factor.
107     if (aec_state.SaturatedEcho()) {
108       R2->fill((*std::max_element(R2->begin(), R2->end())) * 100.f);
109     }
110   } else {
111     const rtc::Optional<size_t> delay =
112         aec_state.ExternalDelay()
113             ? (aec_state.FilterDelay() ? aec_state.FilterDelay()
114                                        : aec_state.ExternalDelay())
115             : rtc::Optional<size_t>();
116 
117     // Estimate the echo generating signal power.
118     std::array<float, kFftLengthBy2Plus1> X2;
119     if (aec_state.ExternalDelay() && aec_state.FilterDelay()) {
120       RTC_DCHECK(delay);
121       const int delay_use = static_cast<int>(*delay);
122 
123       // Computes the spectral power over the blocks surrounding the delay.
124       constexpr int kKnownDelayRenderWindowSize = 5;
125       // TODO(peah): Add lookahead since that was what was there initially.
126       static_assert(
127           kUnknownDelayRenderWindowSize >= kKnownDelayRenderWindowSize,
128           "Requirement to ensure that the render buffer is overrun");
129       EchoGeneratingPower(
130           render_buffer, std::max(0, delay_use - 1),
131           std::min(kKnownDelayRenderWindowSize - 1, delay_use + 1), &X2);
132     } else {
133       // Computes the spectral power over the latest blocks.
134       // TODO(peah): Add lookahead since that was what was there initially.
135       EchoGeneratingPower(render_buffer, 0, kUnknownDelayRenderWindowSize - 1,
136                           &X2);
137     }
138 
139     // Subtract the stationary noise power to avoid stationary noise causing
140     // excessive echo suppression.
141     std::transform(
142         X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(),
143         [](float a, float b) { return std::max(0.f, a - 10.f * b); });
144 
145     NonLinearEstimate(
146         aec_state.SufficientFilterUpdates(), aec_state.SaturatedEcho(),
147         config_.ep_strength.bounded_erl, aec_state.TransparentMode(),
148         aec_state.InitialState(), X2, Y2, R2);
149 
150     if (aec_state.ExternalDelay() && aec_state.FilterDelay() &&
151         aec_state.SaturatedEcho()) {
152       AddEchoReverb(*R2, aec_state.SaturatedEcho(),
153                     std::min(static_cast<size_t>(kAdaptiveFilterLength),
154                              delay.value_or(kAdaptiveFilterLength)),
155                     aec_state.ReverbDecay(), R2);
156     }
157   }
158 
159   // If the echo is deemed inaudible, set the residual echo to zero.
160   if (aec_state.InaudibleEcho()) {
161     R2->fill(0.f);
162     R2_old_.fill(0.f);
163     R2_hold_counter_.fill(0.f);
164   }
165 
166   std::copy(R2->begin(), R2->end(), R2_old_.begin());
167 }
168 
Reset()169 void ResidualEchoEstimator::Reset() {
170   X2_noise_floor_counter_.fill(kNoiseFloorCounterMax);
171   X2_noise_floor_.fill(kNoiseFloorMin);
172   R2_reverb_.fill(0.f);
173   R2_old_.fill(0.f);
174   R2_hold_counter_.fill(0.f);
175   for (auto& S2_k : S2_old_) {
176     S2_k.fill(0.f);
177   }
178 }
179 
LinearEstimate(const std::array<float,kFftLengthBy2Plus1> & S2_linear,const std::array<float,kFftLengthBy2Plus1> & erle,size_t delay,std::array<float,kFftLengthBy2Plus1> * R2)180 void ResidualEchoEstimator::LinearEstimate(
181     const std::array<float, kFftLengthBy2Plus1>& S2_linear,
182     const std::array<float, kFftLengthBy2Plus1>& erle,
183     size_t delay,
184     std::array<float, kFftLengthBy2Plus1>* R2) {
185   std::fill(R2_hold_counter_.begin(), R2_hold_counter_.end(), 10.f);
186   std::transform(erle.begin(), erle.end(), S2_linear.begin(), R2->begin(),
187                  [](float a, float b) {
188                    RTC_DCHECK_LT(0.f, a);
189                    return b / a;
190                  });
191 }
192 
NonLinearEstimate(bool sufficient_filter_updates,bool saturated_echo,bool bounded_erl,bool transparent_mode,bool initial_state,const std::array<float,kFftLengthBy2Plus1> & X2,const std::array<float,kFftLengthBy2Plus1> & Y2,std::array<float,kFftLengthBy2Plus1> * R2)193 void ResidualEchoEstimator::NonLinearEstimate(
194     bool sufficient_filter_updates,
195     bool saturated_echo,
196     bool bounded_erl,
197     bool transparent_mode,
198     bool initial_state,
199     const std::array<float, kFftLengthBy2Plus1>& X2,
200     const std::array<float, kFftLengthBy2Plus1>& Y2,
201     std::array<float, kFftLengthBy2Plus1>* R2) {
202   float echo_path_gain_lf;
203   float echo_path_gain_mf;
204   float echo_path_gain_hf;
205 
206   // Set echo path gains.
207   if (saturated_echo) {
208     // If the echo could be saturated, use a very conservative gain.
209     echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 10000.f;
210   } else if (sufficient_filter_updates && !bounded_erl) {
211     // If the filter should have been able to converge, and no assumption is
212     // possible on the ERL, use a low gain.
213     echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 0.01f;
214   } else if ((sufficient_filter_updates && bounded_erl) || transparent_mode) {
215     // If the filter should have been able to converge, and and it is known that
216     // the ERL is bounded, use a very low gain.
217     echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 0.001f;
218   } else if (!initial_state) {
219     // If the AEC is no longer in an initial state, assume a weak echo path.
220     echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 0.01f;
221   } else {
222     // In the initial state, use conservative gains.
223     echo_path_gain_lf = config_.ep_strength.lf;
224     echo_path_gain_mf = config_.ep_strength.mf;
225     echo_path_gain_hf = config_.ep_strength.hf;
226   }
227 
228   // Compute preliminary residual echo.
229   std::transform(
230       X2.begin(), X2.begin() + 12, R2->begin(),
231       [echo_path_gain_lf](float a) { return a * echo_path_gain_lf; });
232   std::transform(
233       X2.begin() + 12, X2.begin() + 25, R2->begin() + 12,
234       [echo_path_gain_mf](float a) { return a * echo_path_gain_mf; });
235   std::transform(
236       X2.begin() + 25, X2.end(), R2->begin() + 25,
237       [echo_path_gain_hf](float a) { return a * echo_path_gain_hf; });
238 
239   for (size_t k = 0; k < R2->size(); ++k) {
240     // Update hold counter.
241     R2_hold_counter_[k] = R2_old_[k] < (*R2)[k] ? 0 : R2_hold_counter_[k] + 1;
242 
243     // Compute the residual echo by holding a maximum echo powers and an echo
244     // fading corresponding to a room with an RT60 value of about 50 ms.
245     (*R2)[k] = R2_hold_counter_[k] < 2
246                    ? std::max((*R2)[k], R2_old_[k])
247                    : std::min((*R2)[k] + R2_old_[k] * 0.1f, Y2[k]);
248   }
249 }
250 
AddEchoReverb(const std::array<float,kFftLengthBy2Plus1> & S2,bool saturated_echo,size_t delay,float reverb_decay_factor,std::array<float,kFftLengthBy2Plus1> * R2)251 void ResidualEchoEstimator::AddEchoReverb(
252     const std::array<float, kFftLengthBy2Plus1>& S2,
253     bool saturated_echo,
254     size_t delay,
255     float reverb_decay_factor,
256     std::array<float, kFftLengthBy2Plus1>* R2) {
257   // Compute the decay factor for how much the echo has decayed before leaving
258   // the region covered by the linear model.
259   auto integer_power = [](float base, int exp) {
260     float result = 1.f;
261     for (int k = 0; k < exp; ++k) {
262       result *= base;
263     }
264     return result;
265   };
266   RTC_DCHECK_LE(delay, S2_old_.size());
267   const float reverb_decay_for_delay =
268       integer_power(reverb_decay_factor, S2_old_.size() - delay);
269 
270   // Update the estimate of the reverberant residual echo power.
271   S2_old_index_ = S2_old_index_ > 0 ? S2_old_index_ - 1 : S2_old_.size() - 1;
272   const auto& S2_end = S2_old_[S2_old_index_];
273   std::transform(
274       S2_end.begin(), S2_end.end(), R2_reverb_.begin(), R2_reverb_.begin(),
275       [reverb_decay_for_delay, reverb_decay_factor](float a, float b) {
276         return (b + a * reverb_decay_for_delay) * reverb_decay_factor;
277       });
278 
279   // Update the buffer of old echo powers.
280   if (saturated_echo) {
281     S2_old_[S2_old_index_].fill((*std::max_element(S2.begin(), S2.end())) *
282                                 100.f);
283   } else {
284     std::copy(S2.begin(), S2.end(), S2_old_[S2_old_index_].begin());
285   }
286 
287   // Add the power of the echo reverb to the residual echo power.
288   std::transform(R2->begin(), R2->end(), R2_reverb_.begin(), R2->begin(),
289                  std::plus<float>());
290 }
291 
292 }  // namespace webrtc
293