1 /*
2 * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/audio_processing/aec3/adaptive_fir_filter_erl.h"
12
13 #include <algorithm>
14 #include <functional>
15
16 #if defined(WEBRTC_HAS_NEON)
17 #include <arm_neon.h>
18 #endif
19 #if defined(WEBRTC_ARCH_X86_FAMILY)
20 #include <emmintrin.h>
21 #endif
22
23 namespace webrtc {
24
25 namespace aec3 {
26
27 // Computes and stores the echo return loss estimate of the filter, which is the
28 // sum of the partition frequency responses.
ErlComputer(const std::vector<std::array<float,kFftLengthBy2Plus1>> & H2,rtc::ArrayView<float> erl)29 void ErlComputer(const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
30 rtc::ArrayView<float> erl) {
31 std::fill(erl.begin(), erl.end(), 0.f);
32 for (auto& H2_j : H2) {
33 std::transform(H2_j.begin(), H2_j.end(), erl.begin(), erl.begin(),
34 std::plus<float>());
35 }
36 }
37
38 #if defined(WEBRTC_HAS_NEON)
39 // Computes and stores the echo return loss estimate of the filter, which is the
40 // sum of the partition frequency responses.
ErlComputer_NEON(const std::vector<std::array<float,kFftLengthBy2Plus1>> & H2,rtc::ArrayView<float> erl)41 void ErlComputer_NEON(
42 const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
43 rtc::ArrayView<float> erl) {
44 std::fill(erl.begin(), erl.end(), 0.f);
45 for (auto& H2_j : H2) {
46 for (size_t k = 0; k < kFftLengthBy2; k += 4) {
47 const float32x4_t H2_j_k = vld1q_f32(&H2_j[k]);
48 float32x4_t erl_k = vld1q_f32(&erl[k]);
49 erl_k = vaddq_f32(erl_k, H2_j_k);
50 vst1q_f32(&erl[k], erl_k);
51 }
52 erl[kFftLengthBy2] += H2_j[kFftLengthBy2];
53 }
54 }
55 #endif
56
57 #if defined(WEBRTC_ARCH_X86_FAMILY)
58 // Computes and stores the echo return loss estimate of the filter, which is the
59 // sum of the partition frequency responses.
ErlComputer_SSE2(const std::vector<std::array<float,kFftLengthBy2Plus1>> & H2,rtc::ArrayView<float> erl)60 void ErlComputer_SSE2(
61 const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
62 rtc::ArrayView<float> erl) {
63 std::fill(erl.begin(), erl.end(), 0.f);
64 for (auto& H2_j : H2) {
65 for (size_t k = 0; k < kFftLengthBy2; k += 4) {
66 const __m128 H2_j_k = _mm_loadu_ps(&H2_j[k]);
67 __m128 erl_k = _mm_loadu_ps(&erl[k]);
68 erl_k = _mm_add_ps(erl_k, H2_j_k);
69 _mm_storeu_ps(&erl[k], erl_k);
70 }
71 erl[kFftLengthBy2] += H2_j[kFftLengthBy2];
72 }
73 }
74 #endif
75
76 } // namespace aec3
77
ComputeErl(const Aec3Optimization & optimization,const std::vector<std::array<float,kFftLengthBy2Plus1>> & H2,rtc::ArrayView<float> erl)78 void ComputeErl(const Aec3Optimization& optimization,
79 const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
80 rtc::ArrayView<float> erl) {
81 RTC_DCHECK_EQ(kFftLengthBy2Plus1, erl.size());
82 // Update the frequency response and echo return loss for the filter.
83 switch (optimization) {
84 #if defined(WEBRTC_ARCH_X86_FAMILY)
85 case Aec3Optimization::kSse2:
86 aec3::ErlComputer_SSE2(H2, erl);
87 break;
88 case Aec3Optimization::kAvx2:
89 aec3::ErlComputer_AVX2(H2, erl);
90 break;
91 #endif
92 #if defined(WEBRTC_HAS_NEON)
93 case Aec3Optimization::kNeon:
94 aec3::ErlComputer_NEON(H2, erl);
95 break;
96 #endif
97 default:
98 aec3::ErlComputer(H2, erl);
99 }
100 }
101
102 } // namespace webrtc
103