1 /*
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 /*
12 * The core AEC algorithm, which is presented with time-aligned signals.
13 */
14
15 #include "webrtc/modules/audio_processing/aec/aec_core.h"
16
17 #include <math.h>
18
19 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
20 #include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
21 #include "webrtc/modules/audio_processing/aec/aec_rdft.h"
22
23 static const int flagHbandCn = 1; // flag for adding comfort noise in H band
24 extern const float WebRtcAec_weightCurve[65];
25 extern const float WebRtcAec_overDriveCurve[65];
26
WebRtcAec_ComfortNoise_mips(AecCore * aec,float efw[2][PART_LEN1],complex_t * comfortNoiseHband,const float * noisePow,const float * lambda)27 void WebRtcAec_ComfortNoise_mips(AecCore* aec,
28 float efw[2][PART_LEN1],
29 complex_t* comfortNoiseHband,
30 const float* noisePow,
31 const float* lambda) {
32 int i, num;
33 float rand[PART_LEN];
34 float noise, noiseAvg, tmp, tmpAvg;
35 int16_t randW16[PART_LEN];
36 complex_t u[PART_LEN1];
37
38 const float pi2 = 6.28318530717959f;
39 const float pi2t = pi2 / 32768;
40
41 // Generate a uniform random array on [0 1]
42 WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed);
43
44 int16_t* randWptr = randW16;
45 float randTemp, randTemp2, randTemp3, randTemp4;
46 int32_t tmp1s, tmp2s, tmp3s, tmp4s;
47
48 for (i = 0; i < PART_LEN; i+=4) {
49 __asm __volatile (
50 ".set push \n\t"
51 ".set noreorder \n\t"
52 "lh %[tmp1s], 0(%[randWptr]) \n\t"
53 "lh %[tmp2s], 2(%[randWptr]) \n\t"
54 "lh %[tmp3s], 4(%[randWptr]) \n\t"
55 "lh %[tmp4s], 6(%[randWptr]) \n\t"
56 "mtc1 %[tmp1s], %[randTemp] \n\t"
57 "mtc1 %[tmp2s], %[randTemp2] \n\t"
58 "mtc1 %[tmp3s], %[randTemp3] \n\t"
59 "mtc1 %[tmp4s], %[randTemp4] \n\t"
60 "cvt.s.w %[randTemp], %[randTemp] \n\t"
61 "cvt.s.w %[randTemp2], %[randTemp2] \n\t"
62 "cvt.s.w %[randTemp3], %[randTemp3] \n\t"
63 "cvt.s.w %[randTemp4], %[randTemp4] \n\t"
64 "addiu %[randWptr], %[randWptr], 8 \n\t"
65 "mul.s %[randTemp], %[randTemp], %[pi2t] \n\t"
66 "mul.s %[randTemp2], %[randTemp2], %[pi2t] \n\t"
67 "mul.s %[randTemp3], %[randTemp3], %[pi2t] \n\t"
68 "mul.s %[randTemp4], %[randTemp4], %[pi2t] \n\t"
69 ".set pop \n\t"
70 : [randWptr] "+r" (randWptr), [randTemp] "=&f" (randTemp),
71 [randTemp2] "=&f" (randTemp2), [randTemp3] "=&f" (randTemp3),
72 [randTemp4] "=&f" (randTemp4), [tmp1s] "=&r" (tmp1s),
73 [tmp2s] "=&r" (tmp2s), [tmp3s] "=&r" (tmp3s),
74 [tmp4s] "=&r" (tmp4s)
75 : [pi2t] "f" (pi2t)
76 : "memory"
77 );
78
79 u[i+1][0] = cosf(randTemp);
80 u[i+1][1] = sinf(randTemp);
81 u[i+2][0] = cosf(randTemp2);
82 u[i+2][1] = sinf(randTemp2);
83 u[i+3][0] = cosf(randTemp3);
84 u[i+3][1] = sinf(randTemp3);
85 u[i+4][0] = cosf(randTemp4);
86 u[i+4][1] = sinf(randTemp4);
87 }
88
89 // Reject LF noise
90 float* u_ptr = &u[1][0];
91 float noise2, noise3, noise4;
92 float tmp1f, tmp2f, tmp3f, tmp4f, tmp5f, tmp6f, tmp7f, tmp8f;
93
94 u[0][0] = 0;
95 u[0][1] = 0;
96 for (i = 1; i < PART_LEN1; i+=4) {
97 __asm __volatile (
98 ".set push \n\t"
99 ".set noreorder \n\t"
100 "lwc1 %[noise], 4(%[noisePow]) \n\t"
101 "lwc1 %[noise2], 8(%[noisePow]) \n\t"
102 "lwc1 %[noise3], 12(%[noisePow]) \n\t"
103 "lwc1 %[noise4], 16(%[noisePow]) \n\t"
104 "sqrt.s %[noise], %[noise] \n\t"
105 "sqrt.s %[noise2], %[noise2] \n\t"
106 "sqrt.s %[noise3], %[noise3] \n\t"
107 "sqrt.s %[noise4], %[noise4] \n\t"
108 "lwc1 %[tmp1f], 0(%[u_ptr]) \n\t"
109 "lwc1 %[tmp2f], 4(%[u_ptr]) \n\t"
110 "lwc1 %[tmp3f], 8(%[u_ptr]) \n\t"
111 "lwc1 %[tmp4f], 12(%[u_ptr]) \n\t"
112 "lwc1 %[tmp5f], 16(%[u_ptr]) \n\t"
113 "lwc1 %[tmp6f], 20(%[u_ptr]) \n\t"
114 "lwc1 %[tmp7f], 24(%[u_ptr]) \n\t"
115 "lwc1 %[tmp8f], 28(%[u_ptr]) \n\t"
116 "addiu %[noisePow], %[noisePow], 16 \n\t"
117 "mul.s %[tmp1f], %[tmp1f], %[noise] \n\t"
118 "mul.s %[tmp2f], %[tmp2f], %[noise] \n\t"
119 "mul.s %[tmp3f], %[tmp3f], %[noise2] \n\t"
120 "mul.s %[tmp4f], %[tmp4f], %[noise2] \n\t"
121 "mul.s %[tmp5f], %[tmp5f], %[noise3] \n\t"
122 "mul.s %[tmp6f], %[tmp6f], %[noise3] \n\t"
123 "swc1 %[tmp1f], 0(%[u_ptr]) \n\t"
124 "swc1 %[tmp3f], 8(%[u_ptr]) \n\t"
125 "mul.s %[tmp8f], %[tmp8f], %[noise4] \n\t"
126 "mul.s %[tmp7f], %[tmp7f], %[noise4] \n\t"
127 "neg.s %[tmp2f] \n\t"
128 "neg.s %[tmp4f] \n\t"
129 "neg.s %[tmp6f] \n\t"
130 "neg.s %[tmp8f] \n\t"
131 "swc1 %[tmp5f], 16(%[u_ptr]) \n\t"
132 "swc1 %[tmp7f], 24(%[u_ptr]) \n\t"
133 "swc1 %[tmp2f], 4(%[u_ptr]) \n\t"
134 "swc1 %[tmp4f], 12(%[u_ptr]) \n\t"
135 "swc1 %[tmp6f], 20(%[u_ptr]) \n\t"
136 "swc1 %[tmp8f], 28(%[u_ptr]) \n\t"
137 "addiu %[u_ptr], %[u_ptr], 32 \n\t"
138 ".set pop \n\t"
139 : [u_ptr] "+r" (u_ptr), [noisePow] "+r" (noisePow),
140 [noise] "=&f" (noise), [noise2] "=&f" (noise2),
141 [noise3] "=&f" (noise3), [noise4] "=&f" (noise4),
142 [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f),
143 [tmp3f] "=&f" (tmp3f), [tmp4f] "=&f" (tmp4f),
144 [tmp5f] "=&f" (tmp5f), [tmp6f] "=&f" (tmp6f),
145 [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f)
146 :
147 : "memory"
148 );
149 }
150 u[PART_LEN][1] = 0;
151 noisePow -= PART_LEN;
152
153 u_ptr = &u[0][0];
154 float* u_ptr_end = &u[PART_LEN][0];
155 float* efw_ptr_0 = &efw[0][0];
156 float* efw_ptr_1 = &efw[1][0];
157 float tmp9f, tmp10f;
158 const float tmp1c = 1.0;
159
160 __asm __volatile (
161 ".set push \n\t"
162 ".set noreorder \n\t"
163 "1: \n\t"
164 "lwc1 %[tmp1f], 0(%[lambda]) \n\t"
165 "lwc1 %[tmp6f], 4(%[lambda]) \n\t"
166 "addiu %[lambda], %[lambda], 8 \n\t"
167 "c.lt.s %[tmp1f], %[tmp1c] \n\t"
168 "bc1f 4f \n\t"
169 " nop \n\t"
170 "c.lt.s %[tmp6f], %[tmp1c] \n\t"
171 "bc1f 3f \n\t"
172 " nop \n\t"
173 "2: \n\t"
174 "mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t"
175 "mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t"
176 "sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t"
177 "sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t"
178 "sqrt.s %[tmp1f], %[tmp1f] \n\t"
179 "sqrt.s %[tmp6f], %[tmp6f] \n\t"
180 "lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"
181 "lwc1 %[tmp3f], 0(%[u_ptr]) \n\t"
182 "lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"
183 "lwc1 %[tmp8f], 8(%[u_ptr]) \n\t"
184 "lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"
185 "lwc1 %[tmp5f], 4(%[u_ptr]) \n\t"
186 "lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"
187 "lwc1 %[tmp10f], 12(%[u_ptr]) \n\t"
188 #if !defined(MIPS32_R2_LE)
189 "mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t"
190 "add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t"
191 "mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t"
192 "add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t"
193 "mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t"
194 "add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t"
195 "mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t"
196 "add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t"
197 #else // #if !defined(MIPS32_R2_LE)
198 "madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t"
199 "madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t"
200 "madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t"
201 "madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t"
202 #endif // #if !defined(MIPS32_R2_LE)
203 "swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"
204 "swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"
205 "swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"
206 "b 5f \n\t"
207 " swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"
208 "3: \n\t"
209 "mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t"
210 "sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t"
211 "sqrt.s %[tmp1f], %[tmp1f] \n\t"
212 "lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"
213 "lwc1 %[tmp3f], 0(%[u_ptr]) \n\t"
214 "lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"
215 "lwc1 %[tmp5f], 4(%[u_ptr]) \n\t"
216 #if !defined(MIPS32_R2_LE)
217 "mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t"
218 "add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t"
219 "mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t"
220 "add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t"
221 #else // #if !defined(MIPS32_R2_LE)
222 "madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t"
223 "madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t"
224 #endif // #if !defined(MIPS32_R2_LE)
225 "swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"
226 "b 5f \n\t"
227 " swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"
228 "4: \n\t"
229 "c.lt.s %[tmp6f], %[tmp1c] \n\t"
230 "bc1f 5f \n\t"
231 " nop \n\t"
232 "mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t"
233 "sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t"
234 "sqrt.s %[tmp6f], %[tmp6f] \n\t"
235 "lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"
236 "lwc1 %[tmp8f], 8(%[u_ptr]) \n\t"
237 "lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"
238 "lwc1 %[tmp10f], 12(%[u_ptr]) \n\t"
239 #if !defined(MIPS32_R2_LE)
240 "mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t"
241 "add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t"
242 "mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t"
243 "add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t"
244 #else // #if !defined(MIPS32_R2_LE)
245 "madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t"
246 "madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t"
247 #endif // #if !defined(MIPS32_R2_LE)
248 "swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"
249 "swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"
250 "5: \n\t"
251 "addiu %[u_ptr], %[u_ptr], 16 \n\t"
252 "addiu %[efw_ptr_0], %[efw_ptr_0], 8 \n\t"
253 "bne %[u_ptr], %[u_ptr_end], 1b \n\t"
254 " addiu %[efw_ptr_1], %[efw_ptr_1], 8 \n\t"
255 ".set pop \n\t"
256 : [lambda] "+r" (lambda), [u_ptr] "+r" (u_ptr),
257 [efw_ptr_0] "+r" (efw_ptr_0), [efw_ptr_1] "+r" (efw_ptr_1),
258 [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f), [tmp3f] "=&f" (tmp3f),
259 [tmp4f] "=&f" (tmp4f), [tmp5f] "=&f" (tmp5f),
260 [tmp6f] "=&f" (tmp6f), [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f),
261 [tmp9f] "=&f" (tmp9f), [tmp10f] "=&f" (tmp10f)
262 : [tmp1c] "f" (tmp1c), [u_ptr_end] "r" (u_ptr_end)
263 : "memory"
264 );
265
266 lambda -= PART_LEN;
267 tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[PART_LEN] * lambda[PART_LEN], 0));
268 //tmp = 1 - lambda[i];
269 efw[0][PART_LEN] += tmp * u[PART_LEN][0];
270 efw[1][PART_LEN] += tmp * u[PART_LEN][1];
271
272 // For H band comfort noise
273 // TODO: don't compute noise and "tmp" twice. Use the previous results.
274 noiseAvg = 0.0;
275 tmpAvg = 0.0;
276 num = 0;
277 if ((aec->sampFreq == 32000 || aec->sampFreq == 48000) && flagHbandCn == 1) {
278 for (i = 0; i < PART_LEN; i++) {
279 rand[i] = ((float)randW16[i]) / 32768;
280 }
281
282 // average noise scale
283 // average over second half of freq spectrum (i.e., 4->8khz)
284 // TODO: we shouldn't need num. We know how many elements we're summing.
285 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
286 num++;
287 noiseAvg += sqrtf(noisePow[i]);
288 }
289 noiseAvg /= (float)num;
290
291 // average nlp scale
292 // average over second half of freq spectrum (i.e., 4->8khz)
293 // TODO: we shouldn't need num. We know how many elements we're summing.
294 num = 0;
295 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
296 num++;
297 tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
298 }
299 tmpAvg /= (float)num;
300
301 // Use average noise for H band
302 // TODO: we should probably have a new random vector here.
303 // Reject LF noise
304 u[0][0] = 0;
305 u[0][1] = 0;
306 for (i = 1; i < PART_LEN1; i++) {
307 tmp = pi2 * rand[i - 1];
308
309 // Use average noise for H band
310 u[i][0] = noiseAvg * (float)cos(tmp);
311 u[i][1] = -noiseAvg * (float)sin(tmp);
312 }
313 u[PART_LEN][1] = 0;
314
315 for (i = 0; i < PART_LEN1; i++) {
316 // Use average NLP weight for H band
317 comfortNoiseHband[i][0] = tmpAvg * u[i][0];
318 comfortNoiseHband[i][1] = tmpAvg * u[i][1];
319 }
320 }
321 }
322
WebRtcAec_FilterFar_mips(AecCore * aec,float yf[2][PART_LEN1])323 void WebRtcAec_FilterFar_mips(AecCore* aec, float yf[2][PART_LEN1]) {
324 int i;
325 for (i = 0; i < aec->num_partitions; i++) {
326 int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
327 int pos = i * PART_LEN1;
328 // Check for wrap
329 if (i + aec->xfBufBlockPos >= aec->num_partitions) {
330 xPos -= aec->num_partitions * (PART_LEN1);
331 }
332 float* yf0 = yf[0];
333 float* yf1 = yf[1];
334 float* aRe = aec->xfBuf[0] + xPos;
335 float* aIm = aec->xfBuf[1] + xPos;
336 float* bRe = aec->wfBuf[0] + pos;
337 float* bIm = aec->wfBuf[1] + pos;
338 float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13;
339 int len = PART_LEN1 >> 1;
340
341 __asm __volatile (
342 ".set push \n\t"
343 ".set noreorder \n\t"
344 "1: \n\t"
345 "lwc1 %[f0], 0(%[aRe]) \n\t"
346 "lwc1 %[f1], 0(%[bRe]) \n\t"
347 "lwc1 %[f2], 0(%[bIm]) \n\t"
348 "lwc1 %[f3], 0(%[aIm]) \n\t"
349 "lwc1 %[f4], 4(%[aRe]) \n\t"
350 "lwc1 %[f5], 4(%[bRe]) \n\t"
351 "lwc1 %[f6], 4(%[bIm]) \n\t"
352 "mul.s %[f8], %[f0], %[f1] \n\t"
353 "mul.s %[f0], %[f0], %[f2] \n\t"
354 "mul.s %[f9], %[f4], %[f5] \n\t"
355 "mul.s %[f4], %[f4], %[f6] \n\t"
356 "lwc1 %[f7], 4(%[aIm]) \n\t"
357 #if !defined(MIPS32_R2_LE)
358 "mul.s %[f12], %[f2], %[f3] \n\t"
359 "mul.s %[f1], %[f3], %[f1] \n\t"
360 "mul.s %[f11], %[f6], %[f7] \n\t"
361 "addiu %[aRe], %[aRe], 8 \n\t"
362 "addiu %[aIm], %[aIm], 8 \n\t"
363 "addiu %[len], %[len], -1 \n\t"
364 "sub.s %[f8], %[f8], %[f12] \n\t"
365 "mul.s %[f12], %[f7], %[f5] \n\t"
366 "lwc1 %[f2], 0(%[yf0]) \n\t"
367 "add.s %[f1], %[f0], %[f1] \n\t"
368 "lwc1 %[f3], 0(%[yf1]) \n\t"
369 "sub.s %[f9], %[f9], %[f11] \n\t"
370 "lwc1 %[f6], 4(%[yf0]) \n\t"
371 "add.s %[f4], %[f4], %[f12] \n\t"
372 #else // #if !defined(MIPS32_R2_LE)
373 "addiu %[aRe], %[aRe], 8 \n\t"
374 "addiu %[aIm], %[aIm], 8 \n\t"
375 "addiu %[len], %[len], -1 \n\t"
376 "nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t"
377 "lwc1 %[f2], 0(%[yf0]) \n\t"
378 "madd.s %[f1], %[f0], %[f3], %[f1] \n\t"
379 "lwc1 %[f3], 0(%[yf1]) \n\t"
380 "nmsub.s %[f9], %[f9], %[f6], %[f7] \n\t"
381 "lwc1 %[f6], 4(%[yf0]) \n\t"
382 "madd.s %[f4], %[f4], %[f7], %[f5] \n\t"
383 #endif // #if !defined(MIPS32_R2_LE)
384 "lwc1 %[f5], 4(%[yf1]) \n\t"
385 "add.s %[f2], %[f2], %[f8] \n\t"
386 "addiu %[bRe], %[bRe], 8 \n\t"
387 "addiu %[bIm], %[bIm], 8 \n\t"
388 "add.s %[f3], %[f3], %[f1] \n\t"
389 "add.s %[f6], %[f6], %[f9] \n\t"
390 "add.s %[f5], %[f5], %[f4] \n\t"
391 "swc1 %[f2], 0(%[yf0]) \n\t"
392 "swc1 %[f3], 0(%[yf1]) \n\t"
393 "swc1 %[f6], 4(%[yf0]) \n\t"
394 "swc1 %[f5], 4(%[yf1]) \n\t"
395 "addiu %[yf0], %[yf0], 8 \n\t"
396 "bgtz %[len], 1b \n\t"
397 " addiu %[yf1], %[yf1], 8 \n\t"
398 "lwc1 %[f0], 0(%[aRe]) \n\t"
399 "lwc1 %[f1], 0(%[bRe]) \n\t"
400 "lwc1 %[f2], 0(%[bIm]) \n\t"
401 "lwc1 %[f3], 0(%[aIm]) \n\t"
402 "mul.s %[f8], %[f0], %[f1] \n\t"
403 "mul.s %[f0], %[f0], %[f2] \n\t"
404 #if !defined(MIPS32_R2_LE)
405 "mul.s %[f12], %[f2], %[f3] \n\t"
406 "mul.s %[f1], %[f3], %[f1] \n\t"
407 "sub.s %[f8], %[f8], %[f12] \n\t"
408 "lwc1 %[f2], 0(%[yf0]) \n\t"
409 "add.s %[f1], %[f0], %[f1] \n\t"
410 "lwc1 %[f3], 0(%[yf1]) \n\t"
411 #else // #if !defined(MIPS32_R2_LE)
412 "nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t"
413 "lwc1 %[f2], 0(%[yf0]) \n\t"
414 "madd.s %[f1], %[f0], %[f3], %[f1] \n\t"
415 "lwc1 %[f3], 0(%[yf1]) \n\t"
416 #endif // #if !defined(MIPS32_R2_LE)
417 "add.s %[f2], %[f2], %[f8] \n\t"
418 "add.s %[f3], %[f3], %[f1] \n\t"
419 "swc1 %[f2], 0(%[yf0]) \n\t"
420 "swc1 %[f3], 0(%[yf1]) \n\t"
421 ".set pop \n\t"
422 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
423 [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
424 [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
425 [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
426 [f12] "=&f" (f12), [f13] "=&f" (f13), [aRe] "+r" (aRe),
427 [aIm] "+r" (aIm), [bRe] "+r" (bRe), [bIm] "+r" (bIm),
428 [yf0] "+r" (yf0), [yf1] "+r" (yf1), [len] "+r" (len)
429 :
430 : "memory"
431 );
432 }
433 }
434
WebRtcAec_FilterAdaptation_mips(AecCore * aec,float * fft,float ef[2][PART_LEN1])435 void WebRtcAec_FilterAdaptation_mips(AecCore* aec,
436 float* fft,
437 float ef[2][PART_LEN1]) {
438 int i;
439 for (i = 0; i < aec->num_partitions; i++) {
440 int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
441 int pos;
442 // Check for wrap
443 if (i + aec->xfBufBlockPos >= aec->num_partitions) {
444 xPos -= aec->num_partitions * PART_LEN1;
445 }
446
447 pos = i * PART_LEN1;
448 float* aRe = aec->xfBuf[0] + xPos;
449 float* aIm = aec->xfBuf[1] + xPos;
450 float* bRe = ef[0];
451 float* bIm = ef[1];
452 float* fft_tmp;
453
454 float f0, f1, f2, f3, f4, f5, f6 ,f7, f8, f9, f10, f11, f12;
455 int len = PART_LEN >> 1;
456
457 __asm __volatile (
458 ".set push \n\t"
459 ".set noreorder \n\t"
460 "addiu %[fft_tmp], %[fft], 0 \n\t"
461 "1: \n\t"
462 "lwc1 %[f0], 0(%[aRe]) \n\t"
463 "lwc1 %[f1], 0(%[bRe]) \n\t"
464 "lwc1 %[f2], 0(%[bIm]) \n\t"
465 "lwc1 %[f4], 4(%[aRe]) \n\t"
466 "lwc1 %[f5], 4(%[bRe]) \n\t"
467 "lwc1 %[f6], 4(%[bIm]) \n\t"
468 "addiu %[aRe], %[aRe], 8 \n\t"
469 "addiu %[bRe], %[bRe], 8 \n\t"
470 "mul.s %[f8], %[f0], %[f1] \n\t"
471 "mul.s %[f0], %[f0], %[f2] \n\t"
472 "lwc1 %[f3], 0(%[aIm]) \n\t"
473 "mul.s %[f9], %[f4], %[f5] \n\t"
474 "lwc1 %[f7], 4(%[aIm]) \n\t"
475 "mul.s %[f4], %[f4], %[f6] \n\t"
476 #if !defined(MIPS32_R2_LE)
477 "mul.s %[f10], %[f3], %[f2] \n\t"
478 "mul.s %[f1], %[f3], %[f1] \n\t"
479 "mul.s %[f11], %[f7], %[f6] \n\t"
480 "mul.s %[f5], %[f7], %[f5] \n\t"
481 "addiu %[aIm], %[aIm], 8 \n\t"
482 "addiu %[bIm], %[bIm], 8 \n\t"
483 "addiu %[len], %[len], -1 \n\t"
484 "add.s %[f8], %[f8], %[f10] \n\t"
485 "sub.s %[f1], %[f0], %[f1] \n\t"
486 "add.s %[f9], %[f9], %[f11] \n\t"
487 "sub.s %[f5], %[f4], %[f5] \n\t"
488 #else // #if !defined(MIPS32_R2_LE)
489 "addiu %[aIm], %[aIm], 8 \n\t"
490 "addiu %[bIm], %[bIm], 8 \n\t"
491 "addiu %[len], %[len], -1 \n\t"
492 "madd.s %[f8], %[f8], %[f3], %[f2] \n\t"
493 "nmsub.s %[f1], %[f0], %[f3], %[f1] \n\t"
494 "madd.s %[f9], %[f9], %[f7], %[f6] \n\t"
495 "nmsub.s %[f5], %[f4], %[f7], %[f5] \n\t"
496 #endif // #if !defined(MIPS32_R2_LE)
497 "swc1 %[f8], 0(%[fft_tmp]) \n\t"
498 "swc1 %[f1], 4(%[fft_tmp]) \n\t"
499 "swc1 %[f9], 8(%[fft_tmp]) \n\t"
500 "swc1 %[f5], 12(%[fft_tmp]) \n\t"
501 "bgtz %[len], 1b \n\t"
502 " addiu %[fft_tmp], %[fft_tmp], 16 \n\t"
503 "lwc1 %[f0], 0(%[aRe]) \n\t"
504 "lwc1 %[f1], 0(%[bRe]) \n\t"
505 "lwc1 %[f2], 0(%[bIm]) \n\t"
506 "lwc1 %[f3], 0(%[aIm]) \n\t"
507 "mul.s %[f8], %[f0], %[f1] \n\t"
508 #if !defined(MIPS32_R2_LE)
509 "mul.s %[f10], %[f3], %[f2] \n\t"
510 "add.s %[f8], %[f8], %[f10] \n\t"
511 #else // #if !defined(MIPS32_R2_LE)
512 "madd.s %[f8], %[f8], %[f3], %[f2] \n\t"
513 #endif // #if !defined(MIPS32_R2_LE)
514 "swc1 %[f8], 4(%[fft]) \n\t"
515 ".set pop \n\t"
516 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
517 [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
518 [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
519 [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
520 [f12] "=&f" (f12), [aRe] "+r" (aRe), [aIm] "+r" (aIm),
521 [bRe] "+r" (bRe), [bIm] "+r" (bIm), [fft_tmp] "=&r" (fft_tmp),
522 [len] "+r" (len)
523 : [fft] "r" (fft)
524 : "memory"
525 );
526
527 aec_rdft_inverse_128(fft);
528 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
529
530 // fft scaling
531 {
532 float scale = 2.0f / PART_LEN2;
533 __asm __volatile (
534 ".set push \n\t"
535 ".set noreorder \n\t"
536 "addiu %[fft_tmp], %[fft], 0 \n\t"
537 "addiu %[len], $zero, 8 \n\t"
538 "1: \n\t"
539 "addiu %[len], %[len], -1 \n\t"
540 "lwc1 %[f0], 0(%[fft_tmp]) \n\t"
541 "lwc1 %[f1], 4(%[fft_tmp]) \n\t"
542 "lwc1 %[f2], 8(%[fft_tmp]) \n\t"
543 "lwc1 %[f3], 12(%[fft_tmp]) \n\t"
544 "mul.s %[f0], %[f0], %[scale] \n\t"
545 "mul.s %[f1], %[f1], %[scale] \n\t"
546 "mul.s %[f2], %[f2], %[scale] \n\t"
547 "mul.s %[f3], %[f3], %[scale] \n\t"
548 "lwc1 %[f4], 16(%[fft_tmp]) \n\t"
549 "lwc1 %[f5], 20(%[fft_tmp]) \n\t"
550 "lwc1 %[f6], 24(%[fft_tmp]) \n\t"
551 "lwc1 %[f7], 28(%[fft_tmp]) \n\t"
552 "mul.s %[f4], %[f4], %[scale] \n\t"
553 "mul.s %[f5], %[f5], %[scale] \n\t"
554 "mul.s %[f6], %[f6], %[scale] \n\t"
555 "mul.s %[f7], %[f7], %[scale] \n\t"
556 "swc1 %[f0], 0(%[fft_tmp]) \n\t"
557 "swc1 %[f1], 4(%[fft_tmp]) \n\t"
558 "swc1 %[f2], 8(%[fft_tmp]) \n\t"
559 "swc1 %[f3], 12(%[fft_tmp]) \n\t"
560 "swc1 %[f4], 16(%[fft_tmp]) \n\t"
561 "swc1 %[f5], 20(%[fft_tmp]) \n\t"
562 "swc1 %[f6], 24(%[fft_tmp]) \n\t"
563 "swc1 %[f7], 28(%[fft_tmp]) \n\t"
564 "bgtz %[len], 1b \n\t"
565 " addiu %[fft_tmp], %[fft_tmp], 32 \n\t"
566 ".set pop \n\t"
567 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
568 [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
569 [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len),
570 [fft_tmp] "=&r" (fft_tmp)
571 : [scale] "f" (scale), [fft] "r" (fft)
572 : "memory"
573 );
574 }
575 aec_rdft_forward_128(fft);
576 aRe = aec->wfBuf[0] + pos;
577 aIm = aec->wfBuf[1] + pos;
578 __asm __volatile (
579 ".set push \n\t"
580 ".set noreorder \n\t"
581 "addiu %[fft_tmp], %[fft], 0 \n\t"
582 "addiu %[len], $zero, 31 \n\t"
583 "lwc1 %[f0], 0(%[aRe]) \n\t"
584 "lwc1 %[f1], 0(%[fft_tmp]) \n\t"
585 "lwc1 %[f2], 256(%[aRe]) \n\t"
586 "lwc1 %[f3], 4(%[fft_tmp]) \n\t"
587 "lwc1 %[f4], 4(%[aRe]) \n\t"
588 "lwc1 %[f5], 8(%[fft_tmp]) \n\t"
589 "lwc1 %[f6], 4(%[aIm]) \n\t"
590 "lwc1 %[f7], 12(%[fft_tmp]) \n\t"
591 "add.s %[f0], %[f0], %[f1] \n\t"
592 "add.s %[f2], %[f2], %[f3] \n\t"
593 "add.s %[f4], %[f4], %[f5] \n\t"
594 "add.s %[f6], %[f6], %[f7] \n\t"
595 "addiu %[fft_tmp], %[fft_tmp], 16 \n\t"
596 "swc1 %[f0], 0(%[aRe]) \n\t"
597 "swc1 %[f2], 256(%[aRe]) \n\t"
598 "swc1 %[f4], 4(%[aRe]) \n\t"
599 "addiu %[aRe], %[aRe], 8 \n\t"
600 "swc1 %[f6], 4(%[aIm]) \n\t"
601 "addiu %[aIm], %[aIm], 8 \n\t"
602 "1: \n\t"
603 "lwc1 %[f0], 0(%[aRe]) \n\t"
604 "lwc1 %[f1], 0(%[fft_tmp]) \n\t"
605 "lwc1 %[f2], 0(%[aIm]) \n\t"
606 "lwc1 %[f3], 4(%[fft_tmp]) \n\t"
607 "lwc1 %[f4], 4(%[aRe]) \n\t"
608 "lwc1 %[f5], 8(%[fft_tmp]) \n\t"
609 "lwc1 %[f6], 4(%[aIm]) \n\t"
610 "lwc1 %[f7], 12(%[fft_tmp]) \n\t"
611 "add.s %[f0], %[f0], %[f1] \n\t"
612 "add.s %[f2], %[f2], %[f3] \n\t"
613 "add.s %[f4], %[f4], %[f5] \n\t"
614 "add.s %[f6], %[f6], %[f7] \n\t"
615 "addiu %[len], %[len], -1 \n\t"
616 "addiu %[fft_tmp], %[fft_tmp], 16 \n\t"
617 "swc1 %[f0], 0(%[aRe]) \n\t"
618 "swc1 %[f2], 0(%[aIm]) \n\t"
619 "swc1 %[f4], 4(%[aRe]) \n\t"
620 "addiu %[aRe], %[aRe], 8 \n\t"
621 "swc1 %[f6], 4(%[aIm]) \n\t"
622 "bgtz %[len], 1b \n\t"
623 " addiu %[aIm], %[aIm], 8 \n\t"
624 ".set pop \n\t"
625 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
626 [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
627 [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len),
628 [fft_tmp] "=&r" (fft_tmp), [aRe] "+r" (aRe), [aIm] "+r" (aIm)
629 : [fft] "r" (fft)
630 : "memory"
631 );
632 }
633 }
634
WebRtcAec_OverdriveAndSuppress_mips(AecCore * aec,float hNl[PART_LEN1],const float hNlFb,float efw[2][PART_LEN1])635 void WebRtcAec_OverdriveAndSuppress_mips(AecCore* aec,
636 float hNl[PART_LEN1],
637 const float hNlFb,
638 float efw[2][PART_LEN1]) {
639 int i;
640 const float one = 1.0;
641 float* p_hNl;
642 float* p_efw0;
643 float* p_efw1;
644 float* p_WebRtcAec_wC;
645 float temp1, temp2, temp3, temp4;
646
647 p_hNl = &hNl[0];
648 p_efw0 = &efw[0][0];
649 p_efw1 = &efw[1][0];
650 p_WebRtcAec_wC = (float*)&WebRtcAec_weightCurve[0];
651
652 for (i = 0; i < PART_LEN1; i++) {
653 // Weight subbands
654 __asm __volatile (
655 ".set push \n\t"
656 ".set noreorder \n\t"
657 "lwc1 %[temp1], 0(%[p_hNl]) \n\t"
658 "lwc1 %[temp2], 0(%[p_wC]) \n\t"
659 "c.lt.s %[hNlFb], %[temp1] \n\t"
660 "bc1f 1f \n\t"
661 " mul.s %[temp3], %[temp2], %[hNlFb] \n\t"
662 "sub.s %[temp4], %[one], %[temp2] \n\t"
663 #if !defined(MIPS32_R2_LE)
664 "mul.s %[temp1], %[temp1], %[temp4] \n\t"
665 "add.s %[temp1], %[temp3], %[temp1] \n\t"
666 #else // #if !defined(MIPS32_R2_LE)
667 "madd.s %[temp1], %[temp3], %[temp1], %[temp4] \n\t"
668 #endif // #if !defined(MIPS32_R2_LE)
669 "swc1 %[temp1], 0(%[p_hNl]) \n\t"
670 "1: \n\t"
671 "addiu %[p_wC], %[p_wC], 4 \n\t"
672 ".set pop \n\t"
673 : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3),
674 [temp4] "=&f" (temp4), [p_wC] "+r" (p_WebRtcAec_wC)
675 : [hNlFb] "f" (hNlFb), [one] "f" (one), [p_hNl] "r" (p_hNl)
676 : "memory"
677 );
678
679 hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
680
681 __asm __volatile (
682 "lwc1 %[temp1], 0(%[p_hNl]) \n\t"
683 "lwc1 %[temp3], 0(%[p_efw1]) \n\t"
684 "lwc1 %[temp2], 0(%[p_efw0]) \n\t"
685 "addiu %[p_hNl], %[p_hNl], 4 \n\t"
686 "mul.s %[temp3], %[temp3], %[temp1] \n\t"
687 "mul.s %[temp2], %[temp2], %[temp1] \n\t"
688 "addiu %[p_efw0], %[p_efw0], 4 \n\t"
689 "addiu %[p_efw1], %[p_efw1], 4 \n\t"
690 "neg.s %[temp4], %[temp3] \n\t"
691 "swc1 %[temp2], -4(%[p_efw0]) \n\t"
692 "swc1 %[temp4], -4(%[p_efw1]) \n\t"
693 : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3),
694 [temp4] "=&f" (temp4), [p_efw0] "+r" (p_efw0), [p_efw1] "+r" (p_efw1),
695 [p_hNl] "+r" (p_hNl)
696 :
697 : "memory"
698 );
699 }
700 }
701
WebRtcAec_ScaleErrorSignal_mips(AecCore * aec,float ef[2][PART_LEN1])702 void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) {
703 const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
704 const float error_threshold = aec->extended_filter_enabled
705 ? kExtendedErrorThreshold
706 : aec->normal_error_threshold;
707 int len = (PART_LEN1);
708 float* ef0 = ef[0];
709 float* ef1 = ef[1];
710 float* xPow = aec->xPow;
711 float fac1 = 1e-10f;
712 float err_th2 = error_threshold * error_threshold;
713 float f0, f1, f2;
714 #if !defined(MIPS32_R2_LE)
715 float f3;
716 #endif
717
718 __asm __volatile (
719 ".set push \n\t"
720 ".set noreorder \n\t"
721 "1: \n\t"
722 "lwc1 %[f0], 0(%[xPow]) \n\t"
723 "lwc1 %[f1], 0(%[ef0]) \n\t"
724 "lwc1 %[f2], 0(%[ef1]) \n\t"
725 "add.s %[f0], %[f0], %[fac1] \n\t"
726 "div.s %[f1], %[f1], %[f0] \n\t"
727 "div.s %[f2], %[f2], %[f0] \n\t"
728 "mul.s %[f0], %[f1], %[f1] \n\t"
729 #if defined(MIPS32_R2_LE)
730 "madd.s %[f0], %[f0], %[f2], %[f2] \n\t"
731 #else
732 "mul.s %[f3], %[f2], %[f2] \n\t"
733 "add.s %[f0], %[f0], %[f3] \n\t"
734 #endif
735 "c.le.s %[f0], %[err_th2] \n\t"
736 "nop \n\t"
737 "bc1t 2f \n\t"
738 " nop \n\t"
739 "sqrt.s %[f0], %[f0] \n\t"
740 "add.s %[f0], %[f0], %[fac1] \n\t"
741 "div.s %[f0], %[err_th], %[f0] \n\t"
742 "mul.s %[f1], %[f1], %[f0] \n\t"
743 "mul.s %[f2], %[f2], %[f0] \n\t"
744 "2: \n\t"
745 "mul.s %[f1], %[f1], %[mu] \n\t"
746 "mul.s %[f2], %[f2], %[mu] \n\t"
747 "swc1 %[f1], 0(%[ef0]) \n\t"
748 "swc1 %[f2], 0(%[ef1]) \n\t"
749 "addiu %[len], %[len], -1 \n\t"
750 "addiu %[xPow], %[xPow], 4 \n\t"
751 "addiu %[ef0], %[ef0], 4 \n\t"
752 "bgtz %[len], 1b \n\t"
753 " addiu %[ef1], %[ef1], 4 \n\t"
754 ".set pop \n\t"
755 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
756 #if !defined(MIPS32_R2_LE)
757 [f3] "=&f" (f3),
758 #endif
759 [xPow] "+r" (xPow), [ef0] "+r" (ef0), [ef1] "+r" (ef1),
760 [len] "+r" (len)
761 : [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu),
762 [err_th] "f" (error_threshold)
763 : "memory"
764 );
765 }
766
WebRtcAec_InitAec_mips(void)767 void WebRtcAec_InitAec_mips(void) {
768 WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips;
769 WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips;
770 WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips;
771 WebRtcAec_ComfortNoise = WebRtcAec_ComfortNoise_mips;
772 WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips;
773 }
774
775