1 /*
2  *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/aec3/adaptive_fir_filter.h"
12 
13 #include <math.h>
14 #include <algorithm>
15 #include <numeric>
16 #include <string>
17 #include "typedefs.h"  // NOLINT(build/include)
18 #if defined(WEBRTC_ARCH_X86_FAMILY)
19 #include <emmintrin.h>
20 #endif
21 #include "modules/audio_processing/aec3/aec3_fft.h"
22 #include "modules/audio_processing/aec3/aec_state.h"
23 #include "modules/audio_processing/aec3/cascaded_biquad_filter.h"
24 #include "modules/audio_processing/aec3/render_signal_analyzer.h"
25 #include "modules/audio_processing/aec3/shadow_filter_update_gain.h"
26 #include "modules/audio_processing/logging/apm_data_dumper.h"
27 #include "modules/audio_processing/test/echo_canceller_test_tools.h"
28 #include "rtc_base/arraysize.h"
29 #include "rtc_base/numerics/safe_minmax.h"
30 #include "rtc_base/random.h"
31 #include "system_wrappers/include/cpu_features_wrapper.h"
32 #include "test/gtest.h"
33 
34 namespace webrtc {
35 namespace aec3 {
36 namespace {
37 
ProduceDebugText(size_t delay)38 std::string ProduceDebugText(size_t delay) {
39   std::ostringstream ss;
40   ss << ", Delay: " << delay;
41   return ss.str();
42 }
43 
44 }  // namespace
45 
46 #if defined(WEBRTC_HAS_NEON)
47 // Verifies that the optimized methods for filter adaptation are similar to
48 // their reference counterparts.
TEST(AdaptiveFirFilter,FilterAdaptationNeonOptimizations)49 TEST(AdaptiveFirFilter, FilterAdaptationNeonOptimizations) {
50   RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 12,
51                              std::vector<size_t>(1, 12));
52   Random random_generator(42U);
53   std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f));
54   FftData S_C;
55   FftData S_NEON;
56   FftData G;
57   Aec3Fft fft;
58   std::vector<FftData> H_C(10);
59   std::vector<FftData> H_NEON(10);
60   for (auto& H_j : H_C) {
61     H_j.Clear();
62   }
63   for (auto& H_j : H_NEON) {
64     H_j.Clear();
65   }
66 
67   for (size_t k = 0; k < 30; ++k) {
68     RandomizeSampleVector(&random_generator, x[0]);
69     render_buffer.Insert(x);
70   }
71 
72   for (size_t j = 0; j < G.re.size(); ++j) {
73     G.re[j] = j / 10001.f;
74   }
75   for (size_t j = 1; j < G.im.size() - 1; ++j) {
76     G.im[j] = j / 20001.f;
77   }
78   G.im[0] = 0.f;
79   G.im[G.im.size() - 1] = 0.f;
80 
81   AdaptPartitions_NEON(render_buffer, G, H_NEON);
82   AdaptPartitions(render_buffer, G, H_C);
83   AdaptPartitions_NEON(render_buffer, G, H_NEON);
84   AdaptPartitions(render_buffer, G, H_C);
85 
86   for (size_t l = 0; l < H_C.size(); ++l) {
87     for (size_t j = 0; j < H_C[l].im.size(); ++j) {
88       EXPECT_NEAR(H_C[l].re[j], H_NEON[l].re[j], fabs(H_C[l].re[j] * 0.00001f));
89       EXPECT_NEAR(H_C[l].im[j], H_NEON[l].im[j], fabs(H_C[l].im[j] * 0.00001f));
90     }
91   }
92 
93   ApplyFilter_NEON(render_buffer, H_NEON, &S_NEON);
94   ApplyFilter(render_buffer, H_C, &S_C);
95   for (size_t j = 0; j < S_C.re.size(); ++j) {
96     EXPECT_NEAR(S_C.re[j], S_NEON.re[j], fabs(S_C.re[j] * 0.00001f));
97     EXPECT_NEAR(S_C.im[j], S_NEON.im[j], fabs(S_C.re[j] * 0.00001f));
98   }
99 }
100 
101 // Verifies that the optimized method for frequency response computation is
102 // bitexact to the reference counterpart.
TEST(AdaptiveFirFilter,UpdateFrequencyResponseNeonOptimization)103 TEST(AdaptiveFirFilter, UpdateFrequencyResponseNeonOptimization) {
104   const size_t kNumPartitions = 12;
105   std::vector<FftData> H(kNumPartitions);
106   std::vector<std::array<float, kFftLengthBy2Plus1>> H2(kNumPartitions);
107   std::vector<std::array<float, kFftLengthBy2Plus1>> H2_NEON(kNumPartitions);
108 
109   for (size_t j = 0; j < H.size(); ++j) {
110     for (size_t k = 0; k < H[j].re.size(); ++k) {
111       H[j].re[k] = k + j / 3.f;
112       H[j].im[k] = j + k / 7.f;
113     }
114   }
115 
116   UpdateFrequencyResponse(H, &H2);
117   UpdateFrequencyResponse_NEON(H, &H2_NEON);
118 
119   for (size_t j = 0; j < H2.size(); ++j) {
120     for (size_t k = 0; k < H[j].re.size(); ++k) {
121       EXPECT_FLOAT_EQ(H2[j][k], H2_NEON[j][k]);
122     }
123   }
124 }
125 
126 // Verifies that the optimized method for echo return loss computation is
127 // bitexact to the reference counterpart.
TEST(AdaptiveFirFilter,UpdateErlNeonOptimization)128 TEST(AdaptiveFirFilter, UpdateErlNeonOptimization) {
129   const size_t kNumPartitions = 12;
130   std::vector<std::array<float, kFftLengthBy2Plus1>> H2(kNumPartitions);
131   std::array<float, kFftLengthBy2Plus1> erl;
132   std::array<float, kFftLengthBy2Plus1> erl_NEON;
133 
134   for (size_t j = 0; j < H2.size(); ++j) {
135     for (size_t k = 0; k < H2[j].size(); ++k) {
136       H2[j][k] = k + j / 3.f;
137     }
138   }
139 
140   UpdateErlEstimator(H2, &erl);
141   UpdateErlEstimator_NEON(H2, &erl_NEON);
142 
143   for (size_t j = 0; j < erl.size(); ++j) {
144     EXPECT_FLOAT_EQ(erl[j], erl_NEON[j]);
145   }
146 }
147 
148 #endif
149 
150 #if defined(WEBRTC_ARCH_X86_FAMILY)
151 // Verifies that the optimized methods for filter adaptation are bitexact to
152 // their reference counterparts.
TEST(AdaptiveFirFilter,FilterAdaptationSse2Optimizations)153 TEST(AdaptiveFirFilter, FilterAdaptationSse2Optimizations) {
154   bool use_sse2 = (WebRtc_GetCPUInfo(kSSE2) != 0);
155   if (use_sse2) {
156     RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 12,
157                                std::vector<size_t>(1, 12));
158     Random random_generator(42U);
159     std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f));
160     FftData S_C;
161     FftData S_SSE2;
162     FftData G;
163     Aec3Fft fft;
164     std::vector<FftData> H_C(10);
165     std::vector<FftData> H_SSE2(10);
166     for (auto& H_j : H_C) {
167       H_j.Clear();
168     }
169     for (auto& H_j : H_SSE2) {
170       H_j.Clear();
171     }
172 
173     for (size_t k = 0; k < 500; ++k) {
174       RandomizeSampleVector(&random_generator, x[0]);
175       render_buffer.Insert(x);
176 
177       ApplyFilter_SSE2(render_buffer, H_SSE2, &S_SSE2);
178       ApplyFilter(render_buffer, H_C, &S_C);
179       for (size_t j = 0; j < S_C.re.size(); ++j) {
180         EXPECT_FLOAT_EQ(S_C.re[j], S_SSE2.re[j]);
181         EXPECT_FLOAT_EQ(S_C.im[j], S_SSE2.im[j]);
182       }
183 
184       std::for_each(G.re.begin(), G.re.end(),
185                     [&](float& a) { a = random_generator.Rand<float>(); });
186       std::for_each(G.im.begin(), G.im.end(),
187                     [&](float& a) { a = random_generator.Rand<float>(); });
188 
189       AdaptPartitions_SSE2(render_buffer, G, H_SSE2);
190       AdaptPartitions(render_buffer, G, H_C);
191 
192       for (size_t k = 0; k < H_C.size(); ++k) {
193         for (size_t j = 0; j < H_C[k].re.size(); ++j) {
194           EXPECT_FLOAT_EQ(H_C[k].re[j], H_SSE2[k].re[j]);
195           EXPECT_FLOAT_EQ(H_C[k].im[j], H_SSE2[k].im[j]);
196         }
197       }
198     }
199   }
200 }
201 
202 // Verifies that the optimized method for frequency response computation is
203 // bitexact to the reference counterpart.
TEST(AdaptiveFirFilter,UpdateFrequencyResponseSse2Optimization)204 TEST(AdaptiveFirFilter, UpdateFrequencyResponseSse2Optimization) {
205   bool use_sse2 = (WebRtc_GetCPUInfo(kSSE2) != 0);
206   if (use_sse2) {
207     const size_t kNumPartitions = 12;
208     std::vector<FftData> H(kNumPartitions);
209     std::vector<std::array<float, kFftLengthBy2Plus1>> H2(kNumPartitions);
210     std::vector<std::array<float, kFftLengthBy2Plus1>> H2_SSE2(kNumPartitions);
211 
212     for (size_t j = 0; j < H.size(); ++j) {
213       for (size_t k = 0; k < H[j].re.size(); ++k) {
214         H[j].re[k] = k + j / 3.f;
215         H[j].im[k] = j + k / 7.f;
216       }
217     }
218 
219     UpdateFrequencyResponse(H, &H2);
220     UpdateFrequencyResponse_SSE2(H, &H2_SSE2);
221 
222     for (size_t j = 0; j < H2.size(); ++j) {
223       for (size_t k = 0; k < H[j].re.size(); ++k) {
224         EXPECT_FLOAT_EQ(H2[j][k], H2_SSE2[j][k]);
225       }
226     }
227   }
228 }
229 
230 // Verifies that the optimized method for echo return loss computation is
231 // bitexact to the reference counterpart.
TEST(AdaptiveFirFilter,UpdateErlSse2Optimization)232 TEST(AdaptiveFirFilter, UpdateErlSse2Optimization) {
233   bool use_sse2 = (WebRtc_GetCPUInfo(kSSE2) != 0);
234   if (use_sse2) {
235     const size_t kNumPartitions = 12;
236     std::vector<std::array<float, kFftLengthBy2Plus1>> H2(kNumPartitions);
237     std::array<float, kFftLengthBy2Plus1> erl;
238     std::array<float, kFftLengthBy2Plus1> erl_SSE2;
239 
240     for (size_t j = 0; j < H2.size(); ++j) {
241       for (size_t k = 0; k < H2[j].size(); ++k) {
242         H2[j][k] = k + j / 3.f;
243       }
244     }
245 
246     UpdateErlEstimator(H2, &erl);
247     UpdateErlEstimator_SSE2(H2, &erl_SSE2);
248 
249     for (size_t j = 0; j < erl.size(); ++j) {
250       EXPECT_FLOAT_EQ(erl[j], erl_SSE2[j]);
251     }
252   }
253 }
254 
255 #endif
256 
257 #if RTC_DCHECK_IS_ON && GTEST_HAS_DEATH_TEST && !defined(WEBRTC_ANDROID)
258 // Verifies that the check for non-null data dumper works.
TEST(AdaptiveFirFilter,NullDataDumper)259 TEST(AdaptiveFirFilter, NullDataDumper) {
260   EXPECT_DEATH(AdaptiveFirFilter(9, DetectOptimization(), nullptr), "");
261 }
262 
263 // Verifies that the check for non-null filter output works.
TEST(AdaptiveFirFilter,NullFilterOutput)264 TEST(AdaptiveFirFilter, NullFilterOutput) {
265   ApmDataDumper data_dumper(42);
266   AdaptiveFirFilter filter(9, DetectOptimization(), &data_dumper);
267   RenderBuffer render_buffer(Aec3Optimization::kNone, 3,
268                              filter.SizePartitions(),
269                              std::vector<size_t>(1, filter.SizePartitions()));
270   EXPECT_DEATH(filter.Filter(render_buffer, nullptr), "");
271 }
272 
273 #endif
274 
275 // Verifies that the filter statistics can be accessed when filter statistics
276 // are turned on.
TEST(AdaptiveFirFilter,FilterStatisticsAccess)277 TEST(AdaptiveFirFilter, FilterStatisticsAccess) {
278   ApmDataDumper data_dumper(42);
279   AdaptiveFirFilter filter(9, DetectOptimization(), &data_dumper);
280   filter.Erl();
281   filter.FilterFrequencyResponse();
282 }
283 
284 // Verifies that the filter size if correctly repported.
TEST(AdaptiveFirFilter,FilterSize)285 TEST(AdaptiveFirFilter, FilterSize) {
286   ApmDataDumper data_dumper(42);
287   for (size_t filter_size = 1; filter_size < 5; ++filter_size) {
288     AdaptiveFirFilter filter(filter_size, DetectOptimization(), &data_dumper);
289     EXPECT_EQ(filter_size, filter.SizePartitions());
290   }
291 }
292 
293 // Verifies that the filter is being able to properly filter a signal and to
294 // adapt its coefficients.
TEST(AdaptiveFirFilter,FilterAndAdapt)295 TEST(AdaptiveFirFilter, FilterAndAdapt) {
296   constexpr size_t kNumBlocksToProcess = 500;
297   ApmDataDumper data_dumper(42);
298   AdaptiveFirFilter filter(9, DetectOptimization(), &data_dumper);
299   Aec3Fft fft;
300   RenderBuffer render_buffer(Aec3Optimization::kNone, 3,
301                              filter.SizePartitions(),
302                              std::vector<size_t>(1, filter.SizePartitions()));
303   ShadowFilterUpdateGain gain;
304   Random random_generator(42U);
305   std::vector<std::vector<float>> x(3, std::vector<float>(kBlockSize, 0.f));
306   std::vector<float> n(kBlockSize, 0.f);
307   std::vector<float> y(kBlockSize, 0.f);
308   AecState aec_state(EchoCanceller3Config{});
309   RenderSignalAnalyzer render_signal_analyzer;
310   std::vector<float> e(kBlockSize, 0.f);
311   std::array<float, kFftLength> s_scratch;
312   std::array<float, kBlockSize> s;
313   FftData S;
314   FftData G;
315   FftData E;
316   std::array<float, kFftLengthBy2Plus1> Y2;
317   std::array<float, kFftLengthBy2Plus1> E2_main;
318   std::array<float, kFftLengthBy2Plus1> E2_shadow;
319   // [B,A] = butter(2,100/8000,'high')
320   constexpr CascadedBiQuadFilter::BiQuadCoefficients
321       kHighPassFilterCoefficients = {{0.97261f, -1.94523f, 0.97261f},
322                                      {-1.94448f, 0.94598f}};
323   Y2.fill(0.f);
324   E2_main.fill(0.f);
325   E2_shadow.fill(0.f);
326 
327   constexpr float kScale = 1.0f / kFftLengthBy2;
328 
329   for (size_t delay_samples : {0, 64, 150, 200, 301}) {
330     DelayBuffer<float> delay_buffer(delay_samples);
331     CascadedBiQuadFilter x_hp_filter(kHighPassFilterCoefficients, 1);
332     CascadedBiQuadFilter y_hp_filter(kHighPassFilterCoefficients, 1);
333 
334     SCOPED_TRACE(ProduceDebugText(delay_samples));
335     for (size_t k = 0; k < kNumBlocksToProcess; ++k) {
336       RandomizeSampleVector(&random_generator, x[0]);
337       delay_buffer.Delay(x[0], y);
338 
339       RandomizeSampleVector(&random_generator, n);
340       static constexpr float kNoiseScaling = 1.f / 100.f;
341       std::transform(
342           y.begin(), y.end(), n.begin(), y.begin(),
343           [](float a, float b) { return a + b * kNoiseScaling; });
344 
345       x_hp_filter.Process(x[0]);
346       y_hp_filter.Process(y);
347 
348       render_buffer.Insert(x);
349       render_signal_analyzer.Update(render_buffer, aec_state.FilterDelay());
350 
351       filter.Filter(render_buffer, &S);
352       fft.Ifft(S, &s_scratch);
353       std::transform(y.begin(), y.end(), s_scratch.begin() + kFftLengthBy2,
354                      e.begin(),
355                      [&](float a, float b) { return a - b * kScale; });
356       std::for_each(e.begin(), e.end(),
357                     [](float& a) { a = rtc::SafeClamp(a, -32768.f, 32767.f); });
358       fft.ZeroPaddedFft(e, &E);
359       for (size_t k = 0; k < kBlockSize; ++k) {
360         s[k] = kScale * s_scratch[k + kFftLengthBy2];
361       }
362 
363       gain.Compute(render_buffer, render_signal_analyzer, E,
364                    filter.SizePartitions(), false, &G);
365       filter.Adapt(render_buffer, G);
366       aec_state.HandleEchoPathChange(EchoPathVariability(false, false));
367       aec_state.Update(filter.FilterFrequencyResponse(),
368                        filter.FilterImpulseResponse(), true, rtc::nullopt,
369                        render_buffer, E2_main, Y2, x[0], s, false);
370     }
371     // Verify that the filter is able to perform well.
372     EXPECT_LT(1000 * std::inner_product(e.begin(), e.end(), e.begin(), 0.f),
373               std::inner_product(y.begin(), y.end(), y.begin(), 0.f));
374     ASSERT_TRUE(aec_state.FilterDelay());
375     EXPECT_EQ(delay_samples / kBlockSize, *aec_state.FilterDelay());
376   }
377 }
378 }  // namespace aec3
379 }  // namespace webrtc
380