1 /*
2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 /*
12  * The core AEC algorithm, which is presented with time-aligned signals.
13  */
14 
15 #include "modules/audio_processing/aec/aec_core.h"
16 
17 #include <math.h>
18 
19 extern "C" {
20 #include "common_audio/signal_processing/include/signal_processing_library.h"
21 }
22 #include "modules/audio_processing/aec/aec_core_optimized_methods.h"
23 #include "modules/audio_processing/utility/ooura_fft.h"
24 
25 namespace webrtc {
26 
27 extern const float WebRtcAec_weightCurve[65];
28 extern const float WebRtcAec_overDriveCurve[65];
29 
WebRtcAec_FilterFar_mips(int num_partitions,int x_fft_buf_block_pos,float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],float y_fft[2][PART_LEN1])30 void WebRtcAec_FilterFar_mips(
31     int num_partitions,
32     int x_fft_buf_block_pos,
33     float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
34     float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
35     float y_fft[2][PART_LEN1]) {
36   int i;
37   for (i = 0; i < num_partitions; i++) {
38     int xPos = (i + x_fft_buf_block_pos) * PART_LEN1;
39     int pos = i * PART_LEN1;
40     // Check for wrap
41     if (i + x_fft_buf_block_pos >= num_partitions) {
42       xPos -= num_partitions * (PART_LEN1);
43     }
44     float* yf0 = y_fft[0];
45     float* yf1 = y_fft[1];
46     float* aRe = x_fft_buf[0] + xPos;
47     float* aIm = x_fft_buf[1] + xPos;
48     float* bRe = h_fft_buf[0] + pos;
49     float* bIm = h_fft_buf[1] + pos;
50     float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13;
51     int len = PART_LEN1 >> 1;
52 
53     __asm __volatile(
54       ".set       push                                                \n\t"
55       ".set       noreorder                                           \n\t"
56       "1:                                                             \n\t"
57       "lwc1       %[f0],      0(%[aRe])                               \n\t"
58       "lwc1       %[f1],      0(%[bRe])                               \n\t"
59       "lwc1       %[f2],      0(%[bIm])                               \n\t"
60       "lwc1       %[f3],      0(%[aIm])                               \n\t"
61       "lwc1       %[f4],      4(%[aRe])                               \n\t"
62       "lwc1       %[f5],      4(%[bRe])                               \n\t"
63       "lwc1       %[f6],      4(%[bIm])                               \n\t"
64       "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
65       "mul.s      %[f0],      %[f0],          %[f2]                   \n\t"
66       "mul.s      %[f9],      %[f4],          %[f5]                   \n\t"
67       "mul.s      %[f4],      %[f4],          %[f6]                   \n\t"
68       "lwc1       %[f7],      4(%[aIm])                               \n\t"
69 #if !defined(MIPS32_R2_LE)
70       "mul.s      %[f12],     %[f2],          %[f3]                   \n\t"
71       "mul.s      %[f1],      %[f3],          %[f1]                   \n\t"
72       "mul.s      %[f11],     %[f6],          %[f7]                   \n\t"
73       "addiu      %[aRe],     %[aRe],         8                       \n\t"
74       "addiu      %[aIm],     %[aIm],         8                       \n\t"
75       "addiu      %[len],     %[len],         -1                      \n\t"
76       "sub.s      %[f8],      %[f8],          %[f12]                  \n\t"
77       "mul.s      %[f12],     %[f7],          %[f5]                   \n\t"
78       "lwc1       %[f2],      0(%[yf0])                               \n\t"
79       "add.s      %[f1],      %[f0],          %[f1]                   \n\t"
80       "lwc1       %[f3],      0(%[yf1])                               \n\t"
81       "sub.s      %[f9],      %[f9],          %[f11]                  \n\t"
82       "lwc1       %[f6],      4(%[yf0])                               \n\t"
83       "add.s      %[f4],      %[f4],          %[f12]                  \n\t"
84 #else  // #if !defined(MIPS32_R2_LE)
85       "addiu      %[aRe],     %[aRe],         8                       \n\t"
86       "addiu      %[aIm],     %[aIm],         8                       \n\t"
87       "addiu      %[len],     %[len],         -1                      \n\t"
88       "nmsub.s    %[f8],      %[f8],          %[f2],      %[f3]       \n\t"
89       "lwc1       %[f2],      0(%[yf0])                               \n\t"
90       "madd.s     %[f1],      %[f0],          %[f3],      %[f1]       \n\t"
91       "lwc1       %[f3],      0(%[yf1])                               \n\t"
92       "nmsub.s    %[f9],      %[f9],          %[f6],      %[f7]       \n\t"
93       "lwc1       %[f6],      4(%[yf0])                               \n\t"
94       "madd.s     %[f4],      %[f4],          %[f7],      %[f5]       \n\t"
95 #endif  // #if !defined(MIPS32_R2_LE)
96       "lwc1       %[f5],      4(%[yf1])                               \n\t"
97       "add.s      %[f2],      %[f2],          %[f8]                   \n\t"
98       "addiu      %[bRe],     %[bRe],         8                       \n\t"
99       "addiu      %[bIm],     %[bIm],         8                       \n\t"
100       "add.s      %[f3],      %[f3],          %[f1]                   \n\t"
101       "add.s      %[f6],      %[f6],          %[f9]                   \n\t"
102       "add.s      %[f5],      %[f5],          %[f4]                   \n\t"
103       "swc1       %[f2],      0(%[yf0])                               \n\t"
104       "swc1       %[f3],      0(%[yf1])                               \n\t"
105       "swc1       %[f6],      4(%[yf0])                               \n\t"
106       "swc1       %[f5],      4(%[yf1])                               \n\t"
107       "addiu      %[yf0],     %[yf0],         8                       \n\t"
108       "bgtz       %[len],     1b                                      \n\t"
109       " addiu     %[yf1],     %[yf1],         8                       \n\t"
110       "lwc1       %[f0],      0(%[aRe])                               \n\t"
111       "lwc1       %[f1],      0(%[bRe])                               \n\t"
112       "lwc1       %[f2],      0(%[bIm])                               \n\t"
113       "lwc1       %[f3],      0(%[aIm])                               \n\t"
114       "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
115       "mul.s      %[f0],      %[f0],          %[f2]                   \n\t"
116 #if !defined(MIPS32_R2_LE)
117       "mul.s      %[f12],     %[f2],          %[f3]                   \n\t"
118       "mul.s      %[f1],      %[f3],          %[f1]                   \n\t"
119       "sub.s      %[f8],      %[f8],          %[f12]                  \n\t"
120       "lwc1       %[f2],      0(%[yf0])                               \n\t"
121       "add.s      %[f1],      %[f0],          %[f1]                   \n\t"
122       "lwc1       %[f3],      0(%[yf1])                               \n\t"
123 #else  // #if !defined(MIPS32_R2_LE)
124       "nmsub.s    %[f8],      %[f8],          %[f2],      %[f3]       \n\t"
125       "lwc1       %[f2],      0(%[yf0])                               \n\t"
126       "madd.s     %[f1],      %[f0],          %[f3],      %[f1]       \n\t"
127       "lwc1       %[f3],      0(%[yf1])                               \n\t"
128 #endif  // #if !defined(MIPS32_R2_LE)
129       "add.s      %[f2],      %[f2],          %[f8]                   \n\t"
130       "add.s      %[f3],      %[f3],          %[f1]                   \n\t"
131       "swc1       %[f2],      0(%[yf0])                               \n\t"
132       "swc1       %[f3],      0(%[yf1])                               \n\t"
133       ".set       pop                                                 \n\t"
134       : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
135         [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
136         [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
137         [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
138         [f12] "=&f" (f12), [f13] "=&f" (f13), [aRe] "+r" (aRe),
139         [aIm] "+r" (aIm), [bRe] "+r" (bRe), [bIm] "+r" (bIm),
140         [yf0] "+r" (yf0), [yf1] "+r" (yf1), [len] "+r" (len)
141       :
142       : "memory");
143   }
144 }
145 
WebRtcAec_FilterAdaptation_mips(const OouraFft & ooura_fft,int num_partitions,int x_fft_buf_block_pos,float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],float e_fft[2][PART_LEN1],float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1])146 void WebRtcAec_FilterAdaptation_mips(
147     const OouraFft& ooura_fft,
148     int num_partitions,
149     int x_fft_buf_block_pos,
150     float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
151     float e_fft[2][PART_LEN1],
152     float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) {
153   float fft[PART_LEN2];
154   int i;
155   for (i = 0; i < num_partitions; i++) {
156     int xPos = (i + x_fft_buf_block_pos) * (PART_LEN1);
157     int pos;
158     // Check for wrap
159     if (i + x_fft_buf_block_pos >= num_partitions) {
160       xPos -= num_partitions * PART_LEN1;
161     }
162 
163     pos = i * PART_LEN1;
164     float* aRe = x_fft_buf[0] + xPos;
165     float* aIm = x_fft_buf[1] + xPos;
166     float* bRe = e_fft[0];
167     float* bIm = e_fft[1];
168     float* fft_tmp;
169 
170     float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12;
171     int len = PART_LEN >> 1;
172 
173     __asm __volatile(
174       ".set       push                                                \n\t"
175       ".set       noreorder                                           \n\t"
176       "addiu      %[fft_tmp], %[fft],         0                       \n\t"
177       "1:                                                             \n\t"
178       "lwc1       %[f0],      0(%[aRe])                               \n\t"
179       "lwc1       %[f1],      0(%[bRe])                               \n\t"
180       "lwc1       %[f2],      0(%[bIm])                               \n\t"
181       "lwc1       %[f4],      4(%[aRe])                               \n\t"
182       "lwc1       %[f5],      4(%[bRe])                               \n\t"
183       "lwc1       %[f6],      4(%[bIm])                               \n\t"
184       "addiu      %[aRe],     %[aRe],         8                       \n\t"
185       "addiu      %[bRe],     %[bRe],         8                       \n\t"
186       "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
187       "mul.s      %[f0],      %[f0],          %[f2]                   \n\t"
188       "lwc1       %[f3],      0(%[aIm])                               \n\t"
189       "mul.s      %[f9],      %[f4],          %[f5]                   \n\t"
190       "lwc1       %[f7],      4(%[aIm])                               \n\t"
191       "mul.s      %[f4],      %[f4],          %[f6]                   \n\t"
192 #if !defined(MIPS32_R2_LE)
193       "mul.s      %[f10],     %[f3],          %[f2]                   \n\t"
194       "mul.s      %[f1],      %[f3],          %[f1]                   \n\t"
195       "mul.s      %[f11],     %[f7],          %[f6]                   \n\t"
196       "mul.s      %[f5],      %[f7],          %[f5]                   \n\t"
197       "addiu      %[aIm],     %[aIm],         8                       \n\t"
198       "addiu      %[bIm],     %[bIm],         8                       \n\t"
199       "addiu      %[len],     %[len],         -1                      \n\t"
200       "add.s      %[f8],      %[f8],          %[f10]                  \n\t"
201       "sub.s      %[f1],      %[f0],          %[f1]                   \n\t"
202       "add.s      %[f9],      %[f9],          %[f11]                  \n\t"
203       "sub.s      %[f5],      %[f4],          %[f5]                   \n\t"
204 #else  // #if !defined(MIPS32_R2_LE)
205       "addiu      %[aIm],     %[aIm],         8                       \n\t"
206       "addiu      %[bIm],     %[bIm],         8                       \n\t"
207       "addiu      %[len],     %[len],         -1                      \n\t"
208       "madd.s     %[f8],      %[f8],          %[f3],      %[f2]       \n\t"
209       "nmsub.s    %[f1],      %[f0],          %[f3],      %[f1]       \n\t"
210       "madd.s     %[f9],      %[f9],          %[f7],      %[f6]       \n\t"
211       "nmsub.s    %[f5],      %[f4],          %[f7],      %[f5]       \n\t"
212 #endif  // #if !defined(MIPS32_R2_LE)
213       "swc1       %[f8],      0(%[fft_tmp])                           \n\t"
214       "swc1       %[f1],      4(%[fft_tmp])                           \n\t"
215       "swc1       %[f9],      8(%[fft_tmp])                           \n\t"
216       "swc1       %[f5],      12(%[fft_tmp])                          \n\t"
217       "bgtz       %[len],     1b                                      \n\t"
218       " addiu     %[fft_tmp], %[fft_tmp],     16                      \n\t"
219       "lwc1       %[f0],      0(%[aRe])                               \n\t"
220       "lwc1       %[f1],      0(%[bRe])                               \n\t"
221       "lwc1       %[f2],      0(%[bIm])                               \n\t"
222       "lwc1       %[f3],      0(%[aIm])                               \n\t"
223       "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
224 #if !defined(MIPS32_R2_LE)
225       "mul.s      %[f10],     %[f3],          %[f2]                   \n\t"
226       "add.s      %[f8],      %[f8],          %[f10]                  \n\t"
227 #else  // #if !defined(MIPS32_R2_LE)
228       "madd.s     %[f8],      %[f8],          %[f3],      %[f2]       \n\t"
229 #endif  // #if !defined(MIPS32_R2_LE)
230       "swc1       %[f8],      4(%[fft])                               \n\t"
231       ".set       pop                                                 \n\t"
232       : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
233         [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
234         [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
235         [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
236         [f12] "=&f" (f12), [aRe] "+r" (aRe), [aIm] "+r" (aIm),
237         [bRe] "+r" (bRe), [bIm] "+r" (bIm), [fft_tmp] "=&r" (fft_tmp),
238         [len] "+r" (len)
239       : [fft] "r" (fft)
240       : "memory");
241 
242     ooura_fft.InverseFft(fft);
243     memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
244 
245     // fft scaling
246     {
247       float scale = 2.0f / PART_LEN2;
248       __asm __volatile(
249         ".set     push                                    \n\t"
250         ".set     noreorder                               \n\t"
251         "addiu    %[fft_tmp], %[fft],        0            \n\t"
252         "addiu    %[len],     $zero,         8            \n\t"
253         "1:                                               \n\t"
254         "addiu    %[len],     %[len],        -1           \n\t"
255         "lwc1     %[f0],      0(%[fft_tmp])               \n\t"
256         "lwc1     %[f1],      4(%[fft_tmp])               \n\t"
257         "lwc1     %[f2],      8(%[fft_tmp])               \n\t"
258         "lwc1     %[f3],      12(%[fft_tmp])              \n\t"
259         "mul.s    %[f0],      %[f0],         %[scale]     \n\t"
260         "mul.s    %[f1],      %[f1],         %[scale]     \n\t"
261         "mul.s    %[f2],      %[f2],         %[scale]     \n\t"
262         "mul.s    %[f3],      %[f3],         %[scale]     \n\t"
263         "lwc1     %[f4],      16(%[fft_tmp])              \n\t"
264         "lwc1     %[f5],      20(%[fft_tmp])              \n\t"
265         "lwc1     %[f6],      24(%[fft_tmp])              \n\t"
266         "lwc1     %[f7],      28(%[fft_tmp])              \n\t"
267         "mul.s    %[f4],      %[f4],         %[scale]     \n\t"
268         "mul.s    %[f5],      %[f5],         %[scale]     \n\t"
269         "mul.s    %[f6],      %[f6],         %[scale]     \n\t"
270         "mul.s    %[f7],      %[f7],         %[scale]     \n\t"
271         "swc1     %[f0],      0(%[fft_tmp])               \n\t"
272         "swc1     %[f1],      4(%[fft_tmp])               \n\t"
273         "swc1     %[f2],      8(%[fft_tmp])               \n\t"
274         "swc1     %[f3],      12(%[fft_tmp])              \n\t"
275         "swc1     %[f4],      16(%[fft_tmp])              \n\t"
276         "swc1     %[f5],      20(%[fft_tmp])              \n\t"
277         "swc1     %[f6],      24(%[fft_tmp])              \n\t"
278         "swc1     %[f7],      28(%[fft_tmp])              \n\t"
279         "bgtz     %[len],     1b                          \n\t"
280         " addiu   %[fft_tmp], %[fft_tmp],    32           \n\t"
281         ".set     pop                                     \n\t"
282         : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
283           [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
284           [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len),
285           [fft_tmp] "=&r" (fft_tmp)
286         : [scale] "f" (scale), [fft] "r" (fft)
287         : "memory");
288     }
289     ooura_fft.Fft(fft);
290     aRe = h_fft_buf[0] + pos;
291     aIm = h_fft_buf[1] + pos;
292     __asm __volatile(
293       ".set     push                                    \n\t"
294       ".set     noreorder                               \n\t"
295       "addiu    %[fft_tmp], %[fft],        0            \n\t"
296       "addiu    %[len],     $zero,         31           \n\t"
297       "lwc1     %[f0],      0(%[aRe])                   \n\t"
298       "lwc1     %[f1],      0(%[fft_tmp])               \n\t"
299       "lwc1     %[f2],      256(%[aRe])                 \n\t"
300       "lwc1     %[f3],      4(%[fft_tmp])               \n\t"
301       "lwc1     %[f4],      4(%[aRe])                   \n\t"
302       "lwc1     %[f5],      8(%[fft_tmp])               \n\t"
303       "lwc1     %[f6],      4(%[aIm])                   \n\t"
304       "lwc1     %[f7],      12(%[fft_tmp])              \n\t"
305       "add.s    %[f0],      %[f0],         %[f1]        \n\t"
306       "add.s    %[f2],      %[f2],         %[f3]        \n\t"
307       "add.s    %[f4],      %[f4],         %[f5]        \n\t"
308       "add.s    %[f6],      %[f6],         %[f7]        \n\t"
309       "addiu    %[fft_tmp], %[fft_tmp],    16           \n\t"
310       "swc1     %[f0],      0(%[aRe])                   \n\t"
311       "swc1     %[f2],      256(%[aRe])                 \n\t"
312       "swc1     %[f4],      4(%[aRe])                   \n\t"
313       "addiu    %[aRe],     %[aRe],        8            \n\t"
314       "swc1     %[f6],      4(%[aIm])                   \n\t"
315       "addiu    %[aIm],     %[aIm],        8            \n\t"
316       "1:                                               \n\t"
317       "lwc1     %[f0],      0(%[aRe])                   \n\t"
318       "lwc1     %[f1],      0(%[fft_tmp])               \n\t"
319       "lwc1     %[f2],      0(%[aIm])                   \n\t"
320       "lwc1     %[f3],      4(%[fft_tmp])               \n\t"
321       "lwc1     %[f4],      4(%[aRe])                   \n\t"
322       "lwc1     %[f5],      8(%[fft_tmp])               \n\t"
323       "lwc1     %[f6],      4(%[aIm])                   \n\t"
324       "lwc1     %[f7],      12(%[fft_tmp])              \n\t"
325       "add.s    %[f0],      %[f0],         %[f1]        \n\t"
326       "add.s    %[f2],      %[f2],         %[f3]        \n\t"
327       "add.s    %[f4],      %[f4],         %[f5]        \n\t"
328       "add.s    %[f6],      %[f6],         %[f7]        \n\t"
329       "addiu    %[len],     %[len],        -1           \n\t"
330       "addiu    %[fft_tmp], %[fft_tmp],    16           \n\t"
331       "swc1     %[f0],      0(%[aRe])                   \n\t"
332       "swc1     %[f2],      0(%[aIm])                   \n\t"
333       "swc1     %[f4],      4(%[aRe])                   \n\t"
334       "addiu    %[aRe],     %[aRe],        8            \n\t"
335       "swc1     %[f6],      4(%[aIm])                   \n\t"
336       "bgtz     %[len],     1b                          \n\t"
337       " addiu   %[aIm],     %[aIm],        8            \n\t"
338       ".set     pop                                     \n\t"
339       : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
340         [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
341         [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len),
342         [fft_tmp] "=&r" (fft_tmp), [aRe] "+r" (aRe), [aIm] "+r" (aIm)
343       : [fft] "r" (fft)
344       : "memory");
345   }
346 }
347 
WebRtcAec_Overdrive_mips(float overdrive_scaling,float hNlFb,float hNl[PART_LEN1])348 void WebRtcAec_Overdrive_mips(float overdrive_scaling,
349                               float hNlFb,
350                               float hNl[PART_LEN1]) {
351   const float one = 1.0;
352   float* p_hNl;
353   const float* p_WebRtcAec_wC;
354   float temp1, temp2, temp3, temp4;
355 
356   p_hNl = &hNl[0];
357   p_WebRtcAec_wC = &WebRtcAec_weightCurve[0];
358 
359   for (int i = 0; i < PART_LEN1; ++i) {
360     // Weight subbands
361     __asm __volatile(
362       ".set      push                                              \n\t"
363       ".set      noreorder                                         \n\t"
364       "lwc1      %[temp1],    0(%[p_hNl])                          \n\t"
365       "lwc1      %[temp2],    0(%[p_wC])                           \n\t"
366       "c.lt.s    %[hNlFb],    %[temp1]                             \n\t"
367       "bc1f      1f                                                \n\t"
368       " mul.s    %[temp3],    %[temp2],     %[hNlFb]               \n\t"
369       "sub.s     %[temp4],    %[one],       %[temp2]               \n\t"
370 #if !defined(MIPS32_R2_LE)
371       "mul.s     %[temp1],    %[temp1],     %[temp4]               \n\t"
372       "add.s     %[temp1],    %[temp3],     %[temp1]               \n\t"
373 #else  // #if !defined(MIPS32_R2_LE)
374       "madd.s    %[temp1],    %[temp3],     %[temp1],   %[temp4]   \n\t"
375 #endif  // #if !defined(MIPS32_R2_LE)
376       "swc1      %[temp1],    0(%[p_hNl])                          \n\t"
377      "1:                                                           \n\t"
378       "addiu     %[p_wC],     %[p_wC],      4                      \n\t"
379       ".set      pop                                               \n\t"
380       : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3),
381         [temp4] "=&f" (temp4), [p_wC] "+r" (p_WebRtcAec_wC)
382       : [hNlFb] "f" (hNlFb), [one] "f" (one), [p_hNl] "r" (p_hNl)
383       : "memory");
384 
385     hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]);
386   }
387 }
388 
WebRtcAec_Suppress_mips(const float hNl[PART_LEN1],float efw[2][PART_LEN1])389 void WebRtcAec_Suppress_mips(const float hNl[PART_LEN1],
390                              float efw[2][PART_LEN1]) {
391   const float* p_hNl;
392   float* p_efw0;
393   float* p_efw1;
394   float temp1, temp2, temp3, temp4;
395 
396   p_hNl = &hNl[0];
397   p_efw0 = &efw[0][0];
398   p_efw1 = &efw[1][0];
399 
400   for (int i = 0; i < PART_LEN1; ++i) {
401     __asm __volatile(
402       "lwc1      %[temp1],    0(%[p_hNl])              \n\t"
403       "lwc1      %[temp3],    0(%[p_efw1])             \n\t"
404       "lwc1      %[temp2],    0(%[p_efw0])             \n\t"
405       "addiu     %[p_hNl],    %[p_hNl],     4          \n\t"
406       "mul.s     %[temp3],    %[temp3],     %[temp1]   \n\t"
407       "mul.s     %[temp2],    %[temp2],     %[temp1]   \n\t"
408       "addiu     %[p_efw0],   %[p_efw0],    4          \n\t"
409       "addiu     %[p_efw1],   %[p_efw1],    4          \n\t"
410       "neg.s     %[temp4],    %[temp3]                 \n\t"
411       "swc1      %[temp2],    -4(%[p_efw0])            \n\t"
412       "swc1      %[temp4],    -4(%[p_efw1])            \n\t"
413       : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3),
414         [temp4] "=&f" (temp4), [p_efw0] "+r" (p_efw0), [p_efw1] "+r" (p_efw1),
415         [p_hNl] "+r" (p_hNl)
416       :
417       : "memory");
418   }
419 }
420 
WebRtcAec_ScaleErrorSignal_mips(float mu,float error_threshold,float x_pow[PART_LEN1],float ef[2][PART_LEN1])421 void WebRtcAec_ScaleErrorSignal_mips(float mu,
422                                      float error_threshold,
423                                      float x_pow[PART_LEN1],
424                                      float ef[2][PART_LEN1]) {
425   int len = (PART_LEN1);
426   float* ef0 = ef[0];
427   float* ef1 = ef[1];
428   float fac1 = 1e-10f;
429   float err_th2 = error_threshold * error_threshold;
430   float f0, f1, f2;
431 #if !defined(MIPS32_R2_LE)
432   float f3;
433 #endif
434 
435   __asm __volatile(
436     ".set       push                                   \n\t"
437     ".set       noreorder                              \n\t"
438     "1:                                                \n\t"
439     "lwc1       %[f0],     0(%[x_pow])                 \n\t"
440     "lwc1       %[f1],     0(%[ef0])                   \n\t"
441     "lwc1       %[f2],     0(%[ef1])                   \n\t"
442     "add.s      %[f0],     %[f0],       %[fac1]        \n\t"
443     "div.s      %[f1],     %[f1],       %[f0]          \n\t"
444     "div.s      %[f2],     %[f2],       %[f0]          \n\t"
445     "mul.s      %[f0],     %[f1],       %[f1]          \n\t"
446 #if defined(MIPS32_R2_LE)
447     "madd.s     %[f0],     %[f0],       %[f2],   %[f2] \n\t"
448 #else
449     "mul.s      %[f3],     %[f2],       %[f2]          \n\t"
450     "add.s      %[f0],     %[f0],       %[f3]          \n\t"
451 #endif
452     "c.le.s     %[f0],     %[err_th2]                  \n\t"
453     "nop                                               \n\t"
454     "bc1t       2f                                     \n\t"
455     " nop                                              \n\t"
456     "sqrt.s     %[f0],     %[f0]                       \n\t"
457     "add.s      %[f0],     %[f0],       %[fac1]        \n\t"
458     "div.s      %[f0],     %[err_th],   %[f0]          \n\t"
459     "mul.s      %[f1],     %[f1],       %[f0]          \n\t"
460     "mul.s      %[f2],     %[f2],       %[f0]          \n\t"
461     "2:                                                \n\t"
462     "mul.s      %[f1],     %[f1],       %[mu]          \n\t"
463     "mul.s      %[f2],     %[f2],       %[mu]          \n\t"
464     "swc1       %[f1],     0(%[ef0])                   \n\t"
465     "swc1       %[f2],     0(%[ef1])                   \n\t"
466     "addiu      %[len],    %[len],      -1             \n\t"
467     "addiu      %[x_pow],  %[x_pow],    4              \n\t"
468     "addiu      %[ef0],    %[ef0],      4              \n\t"
469     "bgtz       %[len],    1b                          \n\t"
470     " addiu     %[ef1],    %[ef1],      4              \n\t"
471     ".set       pop                                    \n\t"
472     : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
473 #if !defined(MIPS32_R2_LE)
474       [f3] "=&f" (f3),
475 #endif
476       [x_pow] "+r" (x_pow), [ef0] "+r" (ef0), [ef1] "+r" (ef1),
477       [len] "+r" (len)
478     : [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu),
479       [err_th] "f" (error_threshold)
480     : "memory");
481 }
482 
WebRtcAec_InitAec_mips(void)483 void WebRtcAec_InitAec_mips(void) {
484   WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips;
485   WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips;
486   WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips;
487   WebRtcAec_Overdrive = WebRtcAec_Overdrive_mips;
488   WebRtcAec_Suppress = WebRtcAec_Suppress_mips;
489 }
490 }  // namespace webrtc
491