1 /*
2  *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/aec3/adaptive_fir_filter_erl.h"
12 
13 #include <algorithm>
14 #include <functional>
15 
16 #if defined(WEBRTC_HAS_NEON)
17 #include <arm_neon.h>
18 #endif
19 #if defined(WEBRTC_ARCH_X86_FAMILY)
20 #include <emmintrin.h>
21 #endif
22 
23 namespace webrtc {
24 
25 namespace aec3 {
26 
27 // Computes and stores the echo return loss estimate of the filter, which is the
28 // sum of the partition frequency responses.
ErlComputer(const std::vector<std::array<float,kFftLengthBy2Plus1>> & H2,rtc::ArrayView<float> erl)29 void ErlComputer(const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
30                  rtc::ArrayView<float> erl) {
31   std::fill(erl.begin(), erl.end(), 0.f);
32   for (auto& H2_j : H2) {
33     std::transform(H2_j.begin(), H2_j.end(), erl.begin(), erl.begin(),
34                    std::plus<float>());
35   }
36 }
37 
38 #if defined(WEBRTC_HAS_NEON)
39 // Computes and stores the echo return loss estimate of the filter, which is the
40 // sum of the partition frequency responses.
ErlComputer_NEON(const std::vector<std::array<float,kFftLengthBy2Plus1>> & H2,rtc::ArrayView<float> erl)41 void ErlComputer_NEON(
42     const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
43     rtc::ArrayView<float> erl) {
44   std::fill(erl.begin(), erl.end(), 0.f);
45   for (auto& H2_j : H2) {
46     for (size_t k = 0; k < kFftLengthBy2; k += 4) {
47       const float32x4_t H2_j_k = vld1q_f32(&H2_j[k]);
48       float32x4_t erl_k = vld1q_f32(&erl[k]);
49       erl_k = vaddq_f32(erl_k, H2_j_k);
50       vst1q_f32(&erl[k], erl_k);
51     }
52     erl[kFftLengthBy2] += H2_j[kFftLengthBy2];
53   }
54 }
55 #endif
56 
57 #if defined(WEBRTC_ARCH_X86_FAMILY)
58 // Computes and stores the echo return loss estimate of the filter, which is the
59 // sum of the partition frequency responses.
ErlComputer_SSE2(const std::vector<std::array<float,kFftLengthBy2Plus1>> & H2,rtc::ArrayView<float> erl)60 void ErlComputer_SSE2(
61     const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
62     rtc::ArrayView<float> erl) {
63   std::fill(erl.begin(), erl.end(), 0.f);
64   for (auto& H2_j : H2) {
65     for (size_t k = 0; k < kFftLengthBy2; k += 4) {
66       const __m128 H2_j_k = _mm_loadu_ps(&H2_j[k]);
67       __m128 erl_k = _mm_loadu_ps(&erl[k]);
68       erl_k = _mm_add_ps(erl_k, H2_j_k);
69       _mm_storeu_ps(&erl[k], erl_k);
70     }
71     erl[kFftLengthBy2] += H2_j[kFftLengthBy2];
72   }
73 }
74 #endif
75 
76 }  // namespace aec3
77 
ComputeErl(const Aec3Optimization & optimization,const std::vector<std::array<float,kFftLengthBy2Plus1>> & H2,rtc::ArrayView<float> erl)78 void ComputeErl(const Aec3Optimization& optimization,
79                 const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
80                 rtc::ArrayView<float> erl) {
81   RTC_DCHECK_EQ(kFftLengthBy2Plus1, erl.size());
82   // Update the frequency response and echo return loss for the filter.
83   switch (optimization) {
84 #if defined(WEBRTC_ARCH_X86_FAMILY)
85     case Aec3Optimization::kSse2:
86       aec3::ErlComputer_SSE2(H2, erl);
87       break;
88     case Aec3Optimization::kAvx2:
89       aec3::ErlComputer_AVX2(H2, erl);
90       break;
91 #endif
92 #if defined(WEBRTC_HAS_NEON)
93     case Aec3Optimization::kNeon:
94       aec3::ErlComputer_NEON(H2, erl);
95       break;
96 #endif
97     default:
98       aec3::ErlComputer(H2, erl);
99   }
100 }
101 
102 }  // namespace webrtc
103