1 /*
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 /*
12 * The core AEC algorithm, which is presented with time-aligned signals.
13 */
14
15 #include "modules/audio_processing/aec/aec_core.h"
16
17 #include <math.h>
18
19 extern "C" {
20 #include "common_audio/signal_processing/include/signal_processing_library.h"
21 }
22 #include "modules/audio_processing/aec/aec_core_optimized_methods.h"
23 #include "modules/audio_processing/utility/ooura_fft.h"
24
25 namespace webrtc {
26
27 extern const float WebRtcAec_weightCurve[65];
28 extern const float WebRtcAec_overDriveCurve[65];
29
WebRtcAec_FilterFar_mips(int num_partitions,int x_fft_buf_block_pos,float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],float y_fft[2][PART_LEN1])30 void WebRtcAec_FilterFar_mips(
31 int num_partitions,
32 int x_fft_buf_block_pos,
33 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
34 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
35 float y_fft[2][PART_LEN1]) {
36 int i;
37 for (i = 0; i < num_partitions; i++) {
38 int xPos = (i + x_fft_buf_block_pos) * PART_LEN1;
39 int pos = i * PART_LEN1;
40 // Check for wrap
41 if (i + x_fft_buf_block_pos >= num_partitions) {
42 xPos -= num_partitions * (PART_LEN1);
43 }
44 float* yf0 = y_fft[0];
45 float* yf1 = y_fft[1];
46 float* aRe = x_fft_buf[0] + xPos;
47 float* aIm = x_fft_buf[1] + xPos;
48 float* bRe = h_fft_buf[0] + pos;
49 float* bIm = h_fft_buf[1] + pos;
50 float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13;
51 int len = PART_LEN1 >> 1;
52
53 __asm __volatile(
54 ".set push \n\t"
55 ".set noreorder \n\t"
56 "1: \n\t"
57 "lwc1 %[f0], 0(%[aRe]) \n\t"
58 "lwc1 %[f1], 0(%[bRe]) \n\t"
59 "lwc1 %[f2], 0(%[bIm]) \n\t"
60 "lwc1 %[f3], 0(%[aIm]) \n\t"
61 "lwc1 %[f4], 4(%[aRe]) \n\t"
62 "lwc1 %[f5], 4(%[bRe]) \n\t"
63 "lwc1 %[f6], 4(%[bIm]) \n\t"
64 "mul.s %[f8], %[f0], %[f1] \n\t"
65 "mul.s %[f0], %[f0], %[f2] \n\t"
66 "mul.s %[f9], %[f4], %[f5] \n\t"
67 "mul.s %[f4], %[f4], %[f6] \n\t"
68 "lwc1 %[f7], 4(%[aIm]) \n\t"
69 #if !defined(MIPS32_R2_LE)
70 "mul.s %[f12], %[f2], %[f3] \n\t"
71 "mul.s %[f1], %[f3], %[f1] \n\t"
72 "mul.s %[f11], %[f6], %[f7] \n\t"
73 "addiu %[aRe], %[aRe], 8 \n\t"
74 "addiu %[aIm], %[aIm], 8 \n\t"
75 "addiu %[len], %[len], -1 \n\t"
76 "sub.s %[f8], %[f8], %[f12] \n\t"
77 "mul.s %[f12], %[f7], %[f5] \n\t"
78 "lwc1 %[f2], 0(%[yf0]) \n\t"
79 "add.s %[f1], %[f0], %[f1] \n\t"
80 "lwc1 %[f3], 0(%[yf1]) \n\t"
81 "sub.s %[f9], %[f9], %[f11] \n\t"
82 "lwc1 %[f6], 4(%[yf0]) \n\t"
83 "add.s %[f4], %[f4], %[f12] \n\t"
84 #else // #if !defined(MIPS32_R2_LE)
85 "addiu %[aRe], %[aRe], 8 \n\t"
86 "addiu %[aIm], %[aIm], 8 \n\t"
87 "addiu %[len], %[len], -1 \n\t"
88 "nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t"
89 "lwc1 %[f2], 0(%[yf0]) \n\t"
90 "madd.s %[f1], %[f0], %[f3], %[f1] \n\t"
91 "lwc1 %[f3], 0(%[yf1]) \n\t"
92 "nmsub.s %[f9], %[f9], %[f6], %[f7] \n\t"
93 "lwc1 %[f6], 4(%[yf0]) \n\t"
94 "madd.s %[f4], %[f4], %[f7], %[f5] \n\t"
95 #endif // #if !defined(MIPS32_R2_LE)
96 "lwc1 %[f5], 4(%[yf1]) \n\t"
97 "add.s %[f2], %[f2], %[f8] \n\t"
98 "addiu %[bRe], %[bRe], 8 \n\t"
99 "addiu %[bIm], %[bIm], 8 \n\t"
100 "add.s %[f3], %[f3], %[f1] \n\t"
101 "add.s %[f6], %[f6], %[f9] \n\t"
102 "add.s %[f5], %[f5], %[f4] \n\t"
103 "swc1 %[f2], 0(%[yf0]) \n\t"
104 "swc1 %[f3], 0(%[yf1]) \n\t"
105 "swc1 %[f6], 4(%[yf0]) \n\t"
106 "swc1 %[f5], 4(%[yf1]) \n\t"
107 "addiu %[yf0], %[yf0], 8 \n\t"
108 "bgtz %[len], 1b \n\t"
109 " addiu %[yf1], %[yf1], 8 \n\t"
110 "lwc1 %[f0], 0(%[aRe]) \n\t"
111 "lwc1 %[f1], 0(%[bRe]) \n\t"
112 "lwc1 %[f2], 0(%[bIm]) \n\t"
113 "lwc1 %[f3], 0(%[aIm]) \n\t"
114 "mul.s %[f8], %[f0], %[f1] \n\t"
115 "mul.s %[f0], %[f0], %[f2] \n\t"
116 #if !defined(MIPS32_R2_LE)
117 "mul.s %[f12], %[f2], %[f3] \n\t"
118 "mul.s %[f1], %[f3], %[f1] \n\t"
119 "sub.s %[f8], %[f8], %[f12] \n\t"
120 "lwc1 %[f2], 0(%[yf0]) \n\t"
121 "add.s %[f1], %[f0], %[f1] \n\t"
122 "lwc1 %[f3], 0(%[yf1]) \n\t"
123 #else // #if !defined(MIPS32_R2_LE)
124 "nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t"
125 "lwc1 %[f2], 0(%[yf0]) \n\t"
126 "madd.s %[f1], %[f0], %[f3], %[f1] \n\t"
127 "lwc1 %[f3], 0(%[yf1]) \n\t"
128 #endif // #if !defined(MIPS32_R2_LE)
129 "add.s %[f2], %[f2], %[f8] \n\t"
130 "add.s %[f3], %[f3], %[f1] \n\t"
131 "swc1 %[f2], 0(%[yf0]) \n\t"
132 "swc1 %[f3], 0(%[yf1]) \n\t"
133 ".set pop \n\t"
134 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
135 [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
136 [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
137 [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
138 [f12] "=&f" (f12), [f13] "=&f" (f13), [aRe] "+r" (aRe),
139 [aIm] "+r" (aIm), [bRe] "+r" (bRe), [bIm] "+r" (bIm),
140 [yf0] "+r" (yf0), [yf1] "+r" (yf1), [len] "+r" (len)
141 :
142 : "memory");
143 }
144 }
145
WebRtcAec_FilterAdaptation_mips(const OouraFft & ooura_fft,int num_partitions,int x_fft_buf_block_pos,float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],float e_fft[2][PART_LEN1],float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1])146 void WebRtcAec_FilterAdaptation_mips(
147 const OouraFft& ooura_fft,
148 int num_partitions,
149 int x_fft_buf_block_pos,
150 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1],
151 float e_fft[2][PART_LEN1],
152 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) {
153 float fft[PART_LEN2];
154 int i;
155 for (i = 0; i < num_partitions; i++) {
156 int xPos = (i + x_fft_buf_block_pos) * (PART_LEN1);
157 int pos;
158 // Check for wrap
159 if (i + x_fft_buf_block_pos >= num_partitions) {
160 xPos -= num_partitions * PART_LEN1;
161 }
162
163 pos = i * PART_LEN1;
164 float* aRe = x_fft_buf[0] + xPos;
165 float* aIm = x_fft_buf[1] + xPos;
166 float* bRe = e_fft[0];
167 float* bIm = e_fft[1];
168 float* fft_tmp;
169
170 float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12;
171 int len = PART_LEN >> 1;
172
173 __asm __volatile(
174 ".set push \n\t"
175 ".set noreorder \n\t"
176 "addiu %[fft_tmp], %[fft], 0 \n\t"
177 "1: \n\t"
178 "lwc1 %[f0], 0(%[aRe]) \n\t"
179 "lwc1 %[f1], 0(%[bRe]) \n\t"
180 "lwc1 %[f2], 0(%[bIm]) \n\t"
181 "lwc1 %[f4], 4(%[aRe]) \n\t"
182 "lwc1 %[f5], 4(%[bRe]) \n\t"
183 "lwc1 %[f6], 4(%[bIm]) \n\t"
184 "addiu %[aRe], %[aRe], 8 \n\t"
185 "addiu %[bRe], %[bRe], 8 \n\t"
186 "mul.s %[f8], %[f0], %[f1] \n\t"
187 "mul.s %[f0], %[f0], %[f2] \n\t"
188 "lwc1 %[f3], 0(%[aIm]) \n\t"
189 "mul.s %[f9], %[f4], %[f5] \n\t"
190 "lwc1 %[f7], 4(%[aIm]) \n\t"
191 "mul.s %[f4], %[f4], %[f6] \n\t"
192 #if !defined(MIPS32_R2_LE)
193 "mul.s %[f10], %[f3], %[f2] \n\t"
194 "mul.s %[f1], %[f3], %[f1] \n\t"
195 "mul.s %[f11], %[f7], %[f6] \n\t"
196 "mul.s %[f5], %[f7], %[f5] \n\t"
197 "addiu %[aIm], %[aIm], 8 \n\t"
198 "addiu %[bIm], %[bIm], 8 \n\t"
199 "addiu %[len], %[len], -1 \n\t"
200 "add.s %[f8], %[f8], %[f10] \n\t"
201 "sub.s %[f1], %[f0], %[f1] \n\t"
202 "add.s %[f9], %[f9], %[f11] \n\t"
203 "sub.s %[f5], %[f4], %[f5] \n\t"
204 #else // #if !defined(MIPS32_R2_LE)
205 "addiu %[aIm], %[aIm], 8 \n\t"
206 "addiu %[bIm], %[bIm], 8 \n\t"
207 "addiu %[len], %[len], -1 \n\t"
208 "madd.s %[f8], %[f8], %[f3], %[f2] \n\t"
209 "nmsub.s %[f1], %[f0], %[f3], %[f1] \n\t"
210 "madd.s %[f9], %[f9], %[f7], %[f6] \n\t"
211 "nmsub.s %[f5], %[f4], %[f7], %[f5] \n\t"
212 #endif // #if !defined(MIPS32_R2_LE)
213 "swc1 %[f8], 0(%[fft_tmp]) \n\t"
214 "swc1 %[f1], 4(%[fft_tmp]) \n\t"
215 "swc1 %[f9], 8(%[fft_tmp]) \n\t"
216 "swc1 %[f5], 12(%[fft_tmp]) \n\t"
217 "bgtz %[len], 1b \n\t"
218 " addiu %[fft_tmp], %[fft_tmp], 16 \n\t"
219 "lwc1 %[f0], 0(%[aRe]) \n\t"
220 "lwc1 %[f1], 0(%[bRe]) \n\t"
221 "lwc1 %[f2], 0(%[bIm]) \n\t"
222 "lwc1 %[f3], 0(%[aIm]) \n\t"
223 "mul.s %[f8], %[f0], %[f1] \n\t"
224 #if !defined(MIPS32_R2_LE)
225 "mul.s %[f10], %[f3], %[f2] \n\t"
226 "add.s %[f8], %[f8], %[f10] \n\t"
227 #else // #if !defined(MIPS32_R2_LE)
228 "madd.s %[f8], %[f8], %[f3], %[f2] \n\t"
229 #endif // #if !defined(MIPS32_R2_LE)
230 "swc1 %[f8], 4(%[fft]) \n\t"
231 ".set pop \n\t"
232 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
233 [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
234 [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
235 [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
236 [f12] "=&f" (f12), [aRe] "+r" (aRe), [aIm] "+r" (aIm),
237 [bRe] "+r" (bRe), [bIm] "+r" (bIm), [fft_tmp] "=&r" (fft_tmp),
238 [len] "+r" (len)
239 : [fft] "r" (fft)
240 : "memory");
241
242 ooura_fft.InverseFft(fft);
243 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
244
245 // fft scaling
246 {
247 float scale = 2.0f / PART_LEN2;
248 __asm __volatile(
249 ".set push \n\t"
250 ".set noreorder \n\t"
251 "addiu %[fft_tmp], %[fft], 0 \n\t"
252 "addiu %[len], $zero, 8 \n\t"
253 "1: \n\t"
254 "addiu %[len], %[len], -1 \n\t"
255 "lwc1 %[f0], 0(%[fft_tmp]) \n\t"
256 "lwc1 %[f1], 4(%[fft_tmp]) \n\t"
257 "lwc1 %[f2], 8(%[fft_tmp]) \n\t"
258 "lwc1 %[f3], 12(%[fft_tmp]) \n\t"
259 "mul.s %[f0], %[f0], %[scale] \n\t"
260 "mul.s %[f1], %[f1], %[scale] \n\t"
261 "mul.s %[f2], %[f2], %[scale] \n\t"
262 "mul.s %[f3], %[f3], %[scale] \n\t"
263 "lwc1 %[f4], 16(%[fft_tmp]) \n\t"
264 "lwc1 %[f5], 20(%[fft_tmp]) \n\t"
265 "lwc1 %[f6], 24(%[fft_tmp]) \n\t"
266 "lwc1 %[f7], 28(%[fft_tmp]) \n\t"
267 "mul.s %[f4], %[f4], %[scale] \n\t"
268 "mul.s %[f5], %[f5], %[scale] \n\t"
269 "mul.s %[f6], %[f6], %[scale] \n\t"
270 "mul.s %[f7], %[f7], %[scale] \n\t"
271 "swc1 %[f0], 0(%[fft_tmp]) \n\t"
272 "swc1 %[f1], 4(%[fft_tmp]) \n\t"
273 "swc1 %[f2], 8(%[fft_tmp]) \n\t"
274 "swc1 %[f3], 12(%[fft_tmp]) \n\t"
275 "swc1 %[f4], 16(%[fft_tmp]) \n\t"
276 "swc1 %[f5], 20(%[fft_tmp]) \n\t"
277 "swc1 %[f6], 24(%[fft_tmp]) \n\t"
278 "swc1 %[f7], 28(%[fft_tmp]) \n\t"
279 "bgtz %[len], 1b \n\t"
280 " addiu %[fft_tmp], %[fft_tmp], 32 \n\t"
281 ".set pop \n\t"
282 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
283 [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
284 [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len),
285 [fft_tmp] "=&r" (fft_tmp)
286 : [scale] "f" (scale), [fft] "r" (fft)
287 : "memory");
288 }
289 ooura_fft.Fft(fft);
290 aRe = h_fft_buf[0] + pos;
291 aIm = h_fft_buf[1] + pos;
292 __asm __volatile(
293 ".set push \n\t"
294 ".set noreorder \n\t"
295 "addiu %[fft_tmp], %[fft], 0 \n\t"
296 "addiu %[len], $zero, 31 \n\t"
297 "lwc1 %[f0], 0(%[aRe]) \n\t"
298 "lwc1 %[f1], 0(%[fft_tmp]) \n\t"
299 "lwc1 %[f2], 256(%[aRe]) \n\t"
300 "lwc1 %[f3], 4(%[fft_tmp]) \n\t"
301 "lwc1 %[f4], 4(%[aRe]) \n\t"
302 "lwc1 %[f5], 8(%[fft_tmp]) \n\t"
303 "lwc1 %[f6], 4(%[aIm]) \n\t"
304 "lwc1 %[f7], 12(%[fft_tmp]) \n\t"
305 "add.s %[f0], %[f0], %[f1] \n\t"
306 "add.s %[f2], %[f2], %[f3] \n\t"
307 "add.s %[f4], %[f4], %[f5] \n\t"
308 "add.s %[f6], %[f6], %[f7] \n\t"
309 "addiu %[fft_tmp], %[fft_tmp], 16 \n\t"
310 "swc1 %[f0], 0(%[aRe]) \n\t"
311 "swc1 %[f2], 256(%[aRe]) \n\t"
312 "swc1 %[f4], 4(%[aRe]) \n\t"
313 "addiu %[aRe], %[aRe], 8 \n\t"
314 "swc1 %[f6], 4(%[aIm]) \n\t"
315 "addiu %[aIm], %[aIm], 8 \n\t"
316 "1: \n\t"
317 "lwc1 %[f0], 0(%[aRe]) \n\t"
318 "lwc1 %[f1], 0(%[fft_tmp]) \n\t"
319 "lwc1 %[f2], 0(%[aIm]) \n\t"
320 "lwc1 %[f3], 4(%[fft_tmp]) \n\t"
321 "lwc1 %[f4], 4(%[aRe]) \n\t"
322 "lwc1 %[f5], 8(%[fft_tmp]) \n\t"
323 "lwc1 %[f6], 4(%[aIm]) \n\t"
324 "lwc1 %[f7], 12(%[fft_tmp]) \n\t"
325 "add.s %[f0], %[f0], %[f1] \n\t"
326 "add.s %[f2], %[f2], %[f3] \n\t"
327 "add.s %[f4], %[f4], %[f5] \n\t"
328 "add.s %[f6], %[f6], %[f7] \n\t"
329 "addiu %[len], %[len], -1 \n\t"
330 "addiu %[fft_tmp], %[fft_tmp], 16 \n\t"
331 "swc1 %[f0], 0(%[aRe]) \n\t"
332 "swc1 %[f2], 0(%[aIm]) \n\t"
333 "swc1 %[f4], 4(%[aRe]) \n\t"
334 "addiu %[aRe], %[aRe], 8 \n\t"
335 "swc1 %[f6], 4(%[aIm]) \n\t"
336 "bgtz %[len], 1b \n\t"
337 " addiu %[aIm], %[aIm], 8 \n\t"
338 ".set pop \n\t"
339 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
340 [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
341 [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len),
342 [fft_tmp] "=&r" (fft_tmp), [aRe] "+r" (aRe), [aIm] "+r" (aIm)
343 : [fft] "r" (fft)
344 : "memory");
345 }
346 }
347
WebRtcAec_Overdrive_mips(float overdrive_scaling,float hNlFb,float hNl[PART_LEN1])348 void WebRtcAec_Overdrive_mips(float overdrive_scaling,
349 float hNlFb,
350 float hNl[PART_LEN1]) {
351 const float one = 1.0;
352 float* p_hNl;
353 const float* p_WebRtcAec_wC;
354 float temp1, temp2, temp3, temp4;
355
356 p_hNl = &hNl[0];
357 p_WebRtcAec_wC = &WebRtcAec_weightCurve[0];
358
359 for (int i = 0; i < PART_LEN1; ++i) {
360 // Weight subbands
361 __asm __volatile(
362 ".set push \n\t"
363 ".set noreorder \n\t"
364 "lwc1 %[temp1], 0(%[p_hNl]) \n\t"
365 "lwc1 %[temp2], 0(%[p_wC]) \n\t"
366 "c.lt.s %[hNlFb], %[temp1] \n\t"
367 "bc1f 1f \n\t"
368 " mul.s %[temp3], %[temp2], %[hNlFb] \n\t"
369 "sub.s %[temp4], %[one], %[temp2] \n\t"
370 #if !defined(MIPS32_R2_LE)
371 "mul.s %[temp1], %[temp1], %[temp4] \n\t"
372 "add.s %[temp1], %[temp3], %[temp1] \n\t"
373 #else // #if !defined(MIPS32_R2_LE)
374 "madd.s %[temp1], %[temp3], %[temp1], %[temp4] \n\t"
375 #endif // #if !defined(MIPS32_R2_LE)
376 "swc1 %[temp1], 0(%[p_hNl]) \n\t"
377 "1: \n\t"
378 "addiu %[p_wC], %[p_wC], 4 \n\t"
379 ".set pop \n\t"
380 : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3),
381 [temp4] "=&f" (temp4), [p_wC] "+r" (p_WebRtcAec_wC)
382 : [hNlFb] "f" (hNlFb), [one] "f" (one), [p_hNl] "r" (p_hNl)
383 : "memory");
384
385 hNl[i] = powf(hNl[i], overdrive_scaling * WebRtcAec_overDriveCurve[i]);
386 }
387 }
388
WebRtcAec_Suppress_mips(const float hNl[PART_LEN1],float efw[2][PART_LEN1])389 void WebRtcAec_Suppress_mips(const float hNl[PART_LEN1],
390 float efw[2][PART_LEN1]) {
391 const float* p_hNl;
392 float* p_efw0;
393 float* p_efw1;
394 float temp1, temp2, temp3, temp4;
395
396 p_hNl = &hNl[0];
397 p_efw0 = &efw[0][0];
398 p_efw1 = &efw[1][0];
399
400 for (int i = 0; i < PART_LEN1; ++i) {
401 __asm __volatile(
402 "lwc1 %[temp1], 0(%[p_hNl]) \n\t"
403 "lwc1 %[temp3], 0(%[p_efw1]) \n\t"
404 "lwc1 %[temp2], 0(%[p_efw0]) \n\t"
405 "addiu %[p_hNl], %[p_hNl], 4 \n\t"
406 "mul.s %[temp3], %[temp3], %[temp1] \n\t"
407 "mul.s %[temp2], %[temp2], %[temp1] \n\t"
408 "addiu %[p_efw0], %[p_efw0], 4 \n\t"
409 "addiu %[p_efw1], %[p_efw1], 4 \n\t"
410 "neg.s %[temp4], %[temp3] \n\t"
411 "swc1 %[temp2], -4(%[p_efw0]) \n\t"
412 "swc1 %[temp4], -4(%[p_efw1]) \n\t"
413 : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3),
414 [temp4] "=&f" (temp4), [p_efw0] "+r" (p_efw0), [p_efw1] "+r" (p_efw1),
415 [p_hNl] "+r" (p_hNl)
416 :
417 : "memory");
418 }
419 }
420
WebRtcAec_ScaleErrorSignal_mips(float mu,float error_threshold,float x_pow[PART_LEN1],float ef[2][PART_LEN1])421 void WebRtcAec_ScaleErrorSignal_mips(float mu,
422 float error_threshold,
423 float x_pow[PART_LEN1],
424 float ef[2][PART_LEN1]) {
425 int len = (PART_LEN1);
426 float* ef0 = ef[0];
427 float* ef1 = ef[1];
428 float fac1 = 1e-10f;
429 float err_th2 = error_threshold * error_threshold;
430 float f0, f1, f2;
431 #if !defined(MIPS32_R2_LE)
432 float f3;
433 #endif
434
435 __asm __volatile(
436 ".set push \n\t"
437 ".set noreorder \n\t"
438 "1: \n\t"
439 "lwc1 %[f0], 0(%[x_pow]) \n\t"
440 "lwc1 %[f1], 0(%[ef0]) \n\t"
441 "lwc1 %[f2], 0(%[ef1]) \n\t"
442 "add.s %[f0], %[f0], %[fac1] \n\t"
443 "div.s %[f1], %[f1], %[f0] \n\t"
444 "div.s %[f2], %[f2], %[f0] \n\t"
445 "mul.s %[f0], %[f1], %[f1] \n\t"
446 #if defined(MIPS32_R2_LE)
447 "madd.s %[f0], %[f0], %[f2], %[f2] \n\t"
448 #else
449 "mul.s %[f3], %[f2], %[f2] \n\t"
450 "add.s %[f0], %[f0], %[f3] \n\t"
451 #endif
452 "c.le.s %[f0], %[err_th2] \n\t"
453 "nop \n\t"
454 "bc1t 2f \n\t"
455 " nop \n\t"
456 "sqrt.s %[f0], %[f0] \n\t"
457 "add.s %[f0], %[f0], %[fac1] \n\t"
458 "div.s %[f0], %[err_th], %[f0] \n\t"
459 "mul.s %[f1], %[f1], %[f0] \n\t"
460 "mul.s %[f2], %[f2], %[f0] \n\t"
461 "2: \n\t"
462 "mul.s %[f1], %[f1], %[mu] \n\t"
463 "mul.s %[f2], %[f2], %[mu] \n\t"
464 "swc1 %[f1], 0(%[ef0]) \n\t"
465 "swc1 %[f2], 0(%[ef1]) \n\t"
466 "addiu %[len], %[len], -1 \n\t"
467 "addiu %[x_pow], %[x_pow], 4 \n\t"
468 "addiu %[ef0], %[ef0], 4 \n\t"
469 "bgtz %[len], 1b \n\t"
470 " addiu %[ef1], %[ef1], 4 \n\t"
471 ".set pop \n\t"
472 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
473 #if !defined(MIPS32_R2_LE)
474 [f3] "=&f" (f3),
475 #endif
476 [x_pow] "+r" (x_pow), [ef0] "+r" (ef0), [ef1] "+r" (ef1),
477 [len] "+r" (len)
478 : [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu),
479 [err_th] "f" (error_threshold)
480 : "memory");
481 }
482
WebRtcAec_InitAec_mips(void)483 void WebRtcAec_InitAec_mips(void) {
484 WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips;
485 WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips;
486 WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips;
487 WebRtcAec_Overdrive = WebRtcAec_Overdrive_mips;
488 WebRtcAec_Suppress = WebRtcAec_Suppress_mips;
489 }
490 } // namespace webrtc
491