1 /*
2  *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/aec3/aec_state.h"
12 
13 #include <math.h>
14 
15 #include <algorithm>
16 #include <numeric>
17 #include <vector>
18 
19 #include "absl/types/optional.h"
20 #include "api/array_view.h"
21 #include "modules/audio_processing/aec3/aec3_common.h"
22 #include "modules/audio_processing/logging/apm_data_dumper.h"
23 #include "rtc_base/atomic_ops.h"
24 #include "rtc_base/checks.h"
25 #include "system_wrappers/include/field_trial.h"
26 
27 namespace webrtc {
28 namespace {
29 
DeactivateInitialStateResetAtEchoPathChange()30 bool DeactivateInitialStateResetAtEchoPathChange() {
31   return field_trial::IsEnabled(
32       "WebRTC-Aec3DeactivateInitialStateResetKillSwitch");
33 }
34 
FullResetAtEchoPathChange()35 bool FullResetAtEchoPathChange() {
36   return !field_trial::IsEnabled("WebRTC-Aec3AecStateFullResetKillSwitch");
37 }
38 
SubtractorAnalyzerResetAtEchoPathChange()39 bool SubtractorAnalyzerResetAtEchoPathChange() {
40   return !field_trial::IsEnabled(
41       "WebRTC-Aec3AecStateSubtractorAnalyzerResetKillSwitch");
42 }
43 
ComputeAvgRenderReverb(const SpectrumBuffer & spectrum_buffer,int delay_blocks,float reverb_decay,ReverbModel * reverb_model,rtc::ArrayView<float,kFftLengthBy2Plus1> reverb_power_spectrum)44 void ComputeAvgRenderReverb(
45     const SpectrumBuffer& spectrum_buffer,
46     int delay_blocks,
47     float reverb_decay,
48     ReverbModel* reverb_model,
49     rtc::ArrayView<float, kFftLengthBy2Plus1> reverb_power_spectrum) {
50   RTC_DCHECK(reverb_model);
51   const size_t num_render_channels = spectrum_buffer.buffer[0].size();
52   int idx_at_delay =
53       spectrum_buffer.OffsetIndex(spectrum_buffer.read, delay_blocks);
54   int idx_past = spectrum_buffer.IncIndex(idx_at_delay);
55 
56   std::array<float, kFftLengthBy2Plus1> X2_data;
57   rtc::ArrayView<const float> X2;
58   if (num_render_channels > 1) {
59     auto average_channels =
60         [](size_t num_render_channels,
61            rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
62                spectrum_band_0,
63            rtc::ArrayView<float, kFftLengthBy2Plus1> render_power) {
64           std::fill(render_power.begin(), render_power.end(), 0.f);
65           for (size_t ch = 0; ch < num_render_channels; ++ch) {
66             for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
67               render_power[k] += spectrum_band_0[ch][k];
68             }
69           }
70           const float normalizer = 1.f / num_render_channels;
71           for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
72             render_power[k] *= normalizer;
73           }
74         };
75     average_channels(num_render_channels, spectrum_buffer.buffer[idx_past],
76                      X2_data);
77     reverb_model->UpdateReverbNoFreqShaping(
78         X2_data, /*power_spectrum_scaling=*/1.0f, reverb_decay);
79 
80     average_channels(num_render_channels, spectrum_buffer.buffer[idx_at_delay],
81                      X2_data);
82     X2 = X2_data;
83   } else {
84     reverb_model->UpdateReverbNoFreqShaping(
85         spectrum_buffer.buffer[idx_past][/*channel=*/0],
86         /*power_spectrum_scaling=*/1.0f, reverb_decay);
87 
88     X2 = spectrum_buffer.buffer[idx_at_delay][/*channel=*/0];
89   }
90 
91   rtc::ArrayView<const float, kFftLengthBy2Plus1> reverb_power =
92       reverb_model->reverb();
93   for (size_t k = 0; k < X2.size(); ++k) {
94     reverb_power_spectrum[k] = X2[k] + reverb_power[k];
95   }
96 }
97 
98 }  // namespace
99 
100 int AecState::instance_count_ = 0;
101 
GetResidualEchoScaling(rtc::ArrayView<float> residual_scaling) const102 void AecState::GetResidualEchoScaling(
103     rtc::ArrayView<float> residual_scaling) const {
104   bool filter_has_had_time_to_converge;
105   if (config_.filter.conservative_initial_phase) {
106     filter_has_had_time_to_converge =
107         strong_not_saturated_render_blocks_ >= 1.5f * kNumBlocksPerSecond;
108   } else {
109     filter_has_had_time_to_converge =
110         strong_not_saturated_render_blocks_ >= 0.8f * kNumBlocksPerSecond;
111   }
112   echo_audibility_.GetResidualEchoScaling(filter_has_had_time_to_converge,
113                                           residual_scaling);
114 }
115 
ErleUncertainty() const116 absl::optional<float> AecState::ErleUncertainty() const {
117   if (SaturatedEcho()) {
118     return 1.f;
119   }
120 
121   return absl::nullopt;
122 }
123 
AecState(const EchoCanceller3Config & config,size_t num_capture_channels)124 AecState::AecState(const EchoCanceller3Config& config,
125                    size_t num_capture_channels)
126     : data_dumper_(
127           new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
128       config_(config),
129       num_capture_channels_(num_capture_channels),
130       deactivate_initial_state_reset_at_echo_path_change_(
131           DeactivateInitialStateResetAtEchoPathChange()),
132       full_reset_at_echo_path_change_(FullResetAtEchoPathChange()),
133       subtractor_analyzer_reset_at_echo_path_change_(
134           SubtractorAnalyzerResetAtEchoPathChange()),
135       initial_state_(config_),
136       delay_state_(config_, num_capture_channels_),
137       transparent_state_(TransparentMode::Create(config_)),
138       filter_quality_state_(config_, num_capture_channels_),
139       erl_estimator_(2 * kNumBlocksPerSecond),
140       erle_estimator_(2 * kNumBlocksPerSecond, config_, num_capture_channels_),
141       filter_analyzer_(config_, num_capture_channels_),
142       echo_audibility_(
143           config_.echo_audibility.use_stationarity_properties_at_init),
144       reverb_model_estimator_(config_, num_capture_channels_),
145       subtractor_output_analyzer_(num_capture_channels_) {}
146 
147 AecState::~AecState() = default;
148 
HandleEchoPathChange(const EchoPathVariability & echo_path_variability)149 void AecState::HandleEchoPathChange(
150     const EchoPathVariability& echo_path_variability) {
151   const auto full_reset = [&]() {
152     filter_analyzer_.Reset();
153     capture_signal_saturation_ = false;
154     strong_not_saturated_render_blocks_ = 0;
155     blocks_with_active_render_ = 0;
156     if (!deactivate_initial_state_reset_at_echo_path_change_) {
157       initial_state_.Reset();
158     }
159     if (transparent_state_) {
160       transparent_state_->Reset();
161     }
162     erle_estimator_.Reset(true);
163     erl_estimator_.Reset();
164     filter_quality_state_.Reset();
165   };
166 
167   // TODO(peah): Refine the reset scheme according to the type of gain and
168   // delay adjustment.
169 
170   if (full_reset_at_echo_path_change_ &&
171       echo_path_variability.delay_change !=
172           EchoPathVariability::DelayAdjustment::kNone) {
173     full_reset();
174   } else if (echo_path_variability.gain_change) {
175     erle_estimator_.Reset(false);
176   }
177   if (subtractor_analyzer_reset_at_echo_path_change_) {
178     subtractor_output_analyzer_.HandleEchoPathChange();
179   }
180 }
181 
Update(const absl::optional<DelayEstimate> & external_delay,rtc::ArrayView<const std::vector<std::array<float,kFftLengthBy2Plus1>>> adaptive_filter_frequency_responses,rtc::ArrayView<const std::vector<float>> adaptive_filter_impulse_responses,const RenderBuffer & render_buffer,rtc::ArrayView<const std::array<float,kFftLengthBy2Plus1>> E2_refined,rtc::ArrayView<const std::array<float,kFftLengthBy2Plus1>> Y2,rtc::ArrayView<const SubtractorOutput> subtractor_output)182 void AecState::Update(
183     const absl::optional<DelayEstimate>& external_delay,
184     rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
185         adaptive_filter_frequency_responses,
186     rtc::ArrayView<const std::vector<float>> adaptive_filter_impulse_responses,
187     const RenderBuffer& render_buffer,
188     rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2_refined,
189     rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
190     rtc::ArrayView<const SubtractorOutput> subtractor_output) {
191   RTC_DCHECK_EQ(num_capture_channels_, Y2.size());
192   RTC_DCHECK_EQ(num_capture_channels_, subtractor_output.size());
193   RTC_DCHECK_EQ(num_capture_channels_,
194                 adaptive_filter_frequency_responses.size());
195   RTC_DCHECK_EQ(num_capture_channels_,
196                 adaptive_filter_impulse_responses.size());
197 
198   // Analyze the filter outputs and filters.
199   bool any_filter_converged;
200   bool all_filters_diverged;
201   subtractor_output_analyzer_.Update(subtractor_output, &any_filter_converged,
202                                      &all_filters_diverged);
203 
204   bool any_filter_consistent;
205   float max_echo_path_gain;
206   filter_analyzer_.Update(adaptive_filter_impulse_responses, render_buffer,
207                           &any_filter_consistent, &max_echo_path_gain);
208 
209   // Estimate the direct path delay of the filter.
210   if (config_.filter.use_linear_filter) {
211     delay_state_.Update(filter_analyzer_.FilterDelaysBlocks(), external_delay,
212                         strong_not_saturated_render_blocks_);
213   }
214 
215   const std::vector<std::vector<float>>& aligned_render_block =
216       render_buffer.Block(-delay_state_.MinDirectPathFilterDelay())[0];
217 
218   // Update render counters.
219   bool active_render = false;
220   for (size_t ch = 0; ch < aligned_render_block.size(); ++ch) {
221     const float render_energy = std::inner_product(
222         aligned_render_block[ch].begin(), aligned_render_block[ch].end(),
223         aligned_render_block[ch].begin(), 0.f);
224     if (render_energy > (config_.render_levels.active_render_limit *
225                          config_.render_levels.active_render_limit) *
226                             kFftLengthBy2) {
227       active_render = true;
228       break;
229     }
230   }
231   blocks_with_active_render_ += active_render ? 1 : 0;
232   strong_not_saturated_render_blocks_ +=
233       active_render && !SaturatedCapture() ? 1 : 0;
234 
235   std::array<float, kFftLengthBy2Plus1> avg_render_spectrum_with_reverb;
236 
237   ComputeAvgRenderReverb(render_buffer.GetSpectrumBuffer(),
238                          delay_state_.MinDirectPathFilterDelay(), ReverbDecay(),
239                          &avg_render_reverb_, avg_render_spectrum_with_reverb);
240 
241   if (config_.echo_audibility.use_stationarity_properties) {
242     // Update the echo audibility evaluator.
243     echo_audibility_.Update(render_buffer, avg_render_reverb_.reverb(),
244                             delay_state_.MinDirectPathFilterDelay(),
245                             delay_state_.ExternalDelayReported());
246   }
247 
248   // Update the ERL and ERLE measures.
249   if (initial_state_.TransitionTriggered()) {
250     erle_estimator_.Reset(false);
251   }
252 
253   erle_estimator_.Update(render_buffer, adaptive_filter_frequency_responses,
254                          avg_render_spectrum_with_reverb, Y2, E2_refined,
255                          subtractor_output_analyzer_.ConvergedFilters());
256 
257   erl_estimator_.Update(
258       subtractor_output_analyzer_.ConvergedFilters(),
259       render_buffer.Spectrum(delay_state_.MinDirectPathFilterDelay()), Y2);
260 
261   // Detect and flag echo saturation.
262   if (config_.ep_strength.echo_can_saturate) {
263     saturation_detector_.Update(aligned_render_block, SaturatedCapture(),
264                                 UsableLinearEstimate(), subtractor_output,
265                                 max_echo_path_gain);
266   } else {
267     RTC_DCHECK(!saturation_detector_.SaturatedEcho());
268   }
269 
270   // Update the decision on whether to use the initial state parameter set.
271   initial_state_.Update(active_render, SaturatedCapture());
272 
273   // Detect whether the transparent mode should be activated.
274   if (transparent_state_) {
275     transparent_state_->Update(delay_state_.MinDirectPathFilterDelay(),
276                                any_filter_consistent, any_filter_converged,
277                                all_filters_diverged, active_render,
278                                SaturatedCapture());
279   }
280 
281   // Analyze the quality of the filter.
282   filter_quality_state_.Update(active_render, TransparentModeActive(),
283                                SaturatedCapture(), external_delay,
284                                any_filter_converged);
285 
286   // Update the reverb estimate.
287   const bool stationary_block =
288       config_.echo_audibility.use_stationarity_properties &&
289       echo_audibility_.IsBlockStationary();
290 
291   reverb_model_estimator_.Update(
292       filter_analyzer_.GetAdjustedFilters(),
293       adaptive_filter_frequency_responses,
294       erle_estimator_.GetInstLinearQualityEstimates(),
295       delay_state_.DirectPathFilterDelays(),
296       filter_quality_state_.UsableLinearFilterOutputs(), stationary_block);
297 
298   erle_estimator_.Dump(data_dumper_);
299   reverb_model_estimator_.Dump(data_dumper_.get());
300   data_dumper_->DumpRaw("aec3_active_render", active_render);
301   data_dumper_->DumpRaw("aec3_erl", Erl());
302   data_dumper_->DumpRaw("aec3_erl_time_domain", ErlTimeDomain());
303   data_dumper_->DumpRaw("aec3_erle", Erle()[0]);
304   data_dumper_->DumpRaw("aec3_usable_linear_estimate", UsableLinearEstimate());
305   data_dumper_->DumpRaw("aec3_transparent_mode", TransparentModeActive());
306   data_dumper_->DumpRaw("aec3_filter_delay",
307                         filter_analyzer_.MinFilterDelayBlocks());
308 
309   data_dumper_->DumpRaw("aec3_any_filter_consistent", any_filter_consistent);
310   data_dumper_->DumpRaw("aec3_initial_state",
311                         initial_state_.InitialStateActive());
312   data_dumper_->DumpRaw("aec3_capture_saturation", SaturatedCapture());
313   data_dumper_->DumpRaw("aec3_echo_saturation", SaturatedEcho());
314   data_dumper_->DumpRaw("aec3_any_filter_converged", any_filter_converged);
315   data_dumper_->DumpRaw("aec3_all_filters_diverged", all_filters_diverged);
316 
317   data_dumper_->DumpRaw("aec3_external_delay_avaliable",
318                         external_delay ? 1 : 0);
319   data_dumper_->DumpRaw("aec3_filter_tail_freq_resp_est",
320                         GetReverbFrequencyResponse());
321 }
322 
InitialState(const EchoCanceller3Config & config)323 AecState::InitialState::InitialState(const EchoCanceller3Config& config)
324     : conservative_initial_phase_(config.filter.conservative_initial_phase),
325       initial_state_seconds_(config.filter.initial_state_seconds) {
326   Reset();
327 }
Reset()328 void AecState::InitialState::InitialState::Reset() {
329   initial_state_ = true;
330   strong_not_saturated_render_blocks_ = 0;
331 }
Update(bool active_render,bool saturated_capture)332 void AecState::InitialState::InitialState::Update(bool active_render,
333                                                   bool saturated_capture) {
334   strong_not_saturated_render_blocks_ +=
335       active_render && !saturated_capture ? 1 : 0;
336 
337   // Flag whether the initial state is still active.
338   bool prev_initial_state = initial_state_;
339   if (conservative_initial_phase_) {
340     initial_state_ =
341         strong_not_saturated_render_blocks_ < 5 * kNumBlocksPerSecond;
342   } else {
343     initial_state_ = strong_not_saturated_render_blocks_ <
344                      initial_state_seconds_ * kNumBlocksPerSecond;
345   }
346 
347   // Flag whether the transition from the initial state has started.
348   transition_triggered_ = !initial_state_ && prev_initial_state;
349 }
350 
FilterDelay(const EchoCanceller3Config & config,size_t num_capture_channels)351 AecState::FilterDelay::FilterDelay(const EchoCanceller3Config& config,
352                                    size_t num_capture_channels)
353     : delay_headroom_blocks_(config.delay.delay_headroom_samples / kBlockSize),
354       filter_delays_blocks_(num_capture_channels, delay_headroom_blocks_),
355       min_filter_delay_(delay_headroom_blocks_) {}
356 
Update(rtc::ArrayView<const int> analyzer_filter_delay_estimates_blocks,const absl::optional<DelayEstimate> & external_delay,size_t blocks_with_proper_filter_adaptation)357 void AecState::FilterDelay::Update(
358     rtc::ArrayView<const int> analyzer_filter_delay_estimates_blocks,
359     const absl::optional<DelayEstimate>& external_delay,
360     size_t blocks_with_proper_filter_adaptation) {
361   // Update the delay based on the external delay.
362   if (external_delay &&
363       (!external_delay_ || external_delay_->delay != external_delay->delay)) {
364     external_delay_ = external_delay;
365     external_delay_reported_ = true;
366   }
367 
368   // Override the estimated delay if it is not certain that the filter has had
369   // time to converge.
370   const bool delay_estimator_may_not_have_converged =
371       blocks_with_proper_filter_adaptation < 2 * kNumBlocksPerSecond;
372   if (delay_estimator_may_not_have_converged && external_delay_) {
373     const int delay_guess = delay_headroom_blocks_;
374     std::fill(filter_delays_blocks_.begin(), filter_delays_blocks_.end(),
375               delay_guess);
376   } else {
377     RTC_DCHECK_EQ(filter_delays_blocks_.size(),
378                   analyzer_filter_delay_estimates_blocks.size());
379     std::copy(analyzer_filter_delay_estimates_blocks.begin(),
380               analyzer_filter_delay_estimates_blocks.end(),
381               filter_delays_blocks_.begin());
382   }
383 
384   min_filter_delay_ = *std::min_element(filter_delays_blocks_.begin(),
385                                         filter_delays_blocks_.end());
386 }
387 
FilteringQualityAnalyzer(const EchoCanceller3Config & config,size_t num_capture_channels)388 AecState::FilteringQualityAnalyzer::FilteringQualityAnalyzer(
389     const EchoCanceller3Config& config,
390     size_t num_capture_channels)
391     : use_linear_filter_(config.filter.use_linear_filter),
392       usable_linear_filter_estimates_(num_capture_channels, false) {}
393 
Reset()394 void AecState::FilteringQualityAnalyzer::Reset() {
395   std::fill(usable_linear_filter_estimates_.begin(),
396             usable_linear_filter_estimates_.end(), false);
397   overall_usable_linear_estimates_ = false;
398   filter_update_blocks_since_reset_ = 0;
399 }
400 
Update(bool active_render,bool transparent_mode,bool saturated_capture,const absl::optional<DelayEstimate> & external_delay,bool any_filter_converged)401 void AecState::FilteringQualityAnalyzer::Update(
402     bool active_render,
403     bool transparent_mode,
404     bool saturated_capture,
405     const absl::optional<DelayEstimate>& external_delay,
406     bool any_filter_converged) {
407   // Update blocks counter.
408   const bool filter_update = active_render && !saturated_capture;
409   filter_update_blocks_since_reset_ += filter_update ? 1 : 0;
410   filter_update_blocks_since_start_ += filter_update ? 1 : 0;
411 
412   // Store convergence flag when observed.
413   convergence_seen_ = convergence_seen_ || any_filter_converged;
414 
415   // Verify requirements for achieving a decent filter. The requirements for
416   // filter adaptation at call startup are more restrictive than after an
417   // in-call reset.
418   const bool sufficient_data_to_converge_at_startup =
419       filter_update_blocks_since_start_ > kNumBlocksPerSecond * 0.4f;
420   const bool sufficient_data_to_converge_at_reset =
421       sufficient_data_to_converge_at_startup &&
422       filter_update_blocks_since_reset_ > kNumBlocksPerSecond * 0.2f;
423 
424   // The linear filter can only be used if it has had time to converge.
425   overall_usable_linear_estimates_ = sufficient_data_to_converge_at_startup &&
426                                      sufficient_data_to_converge_at_reset;
427 
428   // The linear filter can only be used if an external delay or convergence have
429   // been identified
430   overall_usable_linear_estimates_ =
431       overall_usable_linear_estimates_ && (external_delay || convergence_seen_);
432 
433   // If transparent mode is on, deactivate usign the linear filter.
434   overall_usable_linear_estimates_ =
435       overall_usable_linear_estimates_ && !transparent_mode;
436 
437   if (use_linear_filter_) {
438     std::fill(usable_linear_filter_estimates_.begin(),
439               usable_linear_filter_estimates_.end(),
440               overall_usable_linear_estimates_);
441   }
442 }
443 
Update(rtc::ArrayView<const std::vector<float>> x,bool saturated_capture,bool usable_linear_estimate,rtc::ArrayView<const SubtractorOutput> subtractor_output,float echo_path_gain)444 void AecState::SaturationDetector::Update(
445     rtc::ArrayView<const std::vector<float>> x,
446     bool saturated_capture,
447     bool usable_linear_estimate,
448     rtc::ArrayView<const SubtractorOutput> subtractor_output,
449     float echo_path_gain) {
450   saturated_echo_ = false;
451   if (!saturated_capture) {
452     return;
453   }
454 
455   if (usable_linear_estimate) {
456     constexpr float kSaturationThreshold = 20000.f;
457     for (size_t ch = 0; ch < subtractor_output.size(); ++ch) {
458       saturated_echo_ =
459           saturated_echo_ ||
460           (subtractor_output[ch].s_refined_max_abs > kSaturationThreshold ||
461            subtractor_output[ch].s_coarse_max_abs > kSaturationThreshold);
462     }
463   } else {
464     float max_sample = 0.f;
465     for (auto& channel : x) {
466       for (float sample : channel) {
467         max_sample = std::max(max_sample, fabsf(sample));
468       }
469     }
470 
471     const float kMargin = 10.f;
472     float peak_echo_amplitude = max_sample * echo_path_gain * kMargin;
473     saturated_echo_ = saturated_echo_ || peak_echo_amplitude > 32000;
474   }
475 }
476 
477 }  // namespace webrtc
478