1 /*
2  * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
3  * Copyright Takuya OOURA, 1996-2001
4  *
5  * You may use, copy, modify and distribute this code for any purpose (include
6  * commercial use) and without fee. Please refer to this package when you modify
7  * this code.
8  *
9  * Changes by the WebRTC authors:
10  *    - Trivial type modifications.
11  *    - Minimal code subset to do rdft of length 128.
12  *    - Optimizations because of known length.
13  *
14  *  All changes are covered by the WebRTC license and IP grant:
15  *  Use of this source code is governed by a BSD-style license
16  *  that can be found in the LICENSE file in the root of the source
17  *  tree. An additional intellectual property rights grant can be found
18  *  in the file PATENTS.  All contributing project authors may
19  *  be found in the AUTHORS file in the root of the source tree.
20  */
21 
22 #include "aec_rdft.h"
23 
24 #include <math.h>
25 
26 #include "cpu_features_wrapper.h"
27 #include "typedefs.h"
28 
29 // constants shared by all paths (C, SSE2).
30 float rdft_w[64];
31 // constants used by the C path.
32 float rdft_wk3ri_first[32];
33 float rdft_wk3ri_second[32];
34 // constants used by SSE2 but initialized in C path.
35 ALIGN16_BEG float ALIGN16_END rdft_wk1r[32];
36 ALIGN16_BEG float ALIGN16_END rdft_wk2r[32];
37 ALIGN16_BEG float ALIGN16_END rdft_wk3r[32];
38 ALIGN16_BEG float ALIGN16_END rdft_wk1i[32];
39 ALIGN16_BEG float ALIGN16_END rdft_wk2i[32];
40 ALIGN16_BEG float ALIGN16_END rdft_wk3i[32];
41 ALIGN16_BEG float ALIGN16_END cftmdl_wk1r[4];
42 
43 static int ip[16];
44 
bitrv2_32or128(int n,int * ip,float * a)45 static void bitrv2_32or128(int n, int *ip, float *a) {
46   // n is 32 or 128
47   int j, j1, k, k1, m, m2;
48   float xr, xi, yr, yi;
49 
50   ip[0] = 0;
51   {
52     int l = n;
53     m = 1;
54     while ((m << 3) < l) {
55       l >>= 1;
56       for (j = 0; j < m; j++) {
57         ip[m + j] = ip[j] + l;
58       }
59       m <<= 1;
60     }
61   }
62   m2 = 2 * m;
63   for (k = 0; k < m; k++) {
64     for (j = 0; j < k; j++) {
65       j1 = 2 * j + ip[k];
66       k1 = 2 * k + ip[j];
67       xr = a[j1];
68       xi = a[j1 + 1];
69       yr = a[k1];
70       yi = a[k1 + 1];
71       a[j1] = yr;
72       a[j1 + 1] = yi;
73       a[k1] = xr;
74       a[k1 + 1] = xi;
75       j1 += m2;
76       k1 += 2 * m2;
77       xr = a[j1];
78       xi = a[j1 + 1];
79       yr = a[k1];
80       yi = a[k1 + 1];
81       a[j1] = yr;
82       a[j1 + 1] = yi;
83       a[k1] = xr;
84       a[k1 + 1] = xi;
85       j1 += m2;
86       k1 -= m2;
87       xr = a[j1];
88       xi = a[j1 + 1];
89       yr = a[k1];
90       yi = a[k1 + 1];
91       a[j1] = yr;
92       a[j1 + 1] = yi;
93       a[k1] = xr;
94       a[k1 + 1] = xi;
95       j1 += m2;
96       k1 += 2 * m2;
97       xr = a[j1];
98       xi = a[j1 + 1];
99       yr = a[k1];
100       yi = a[k1 + 1];
101       a[j1] = yr;
102       a[j1 + 1] = yi;
103       a[k1] = xr;
104       a[k1 + 1] = xi;
105     }
106     j1 = 2 * k + m2 + ip[k];
107     k1 = j1 + m2;
108     xr = a[j1];
109     xi = a[j1 + 1];
110     yr = a[k1];
111     yi = a[k1 + 1];
112     a[j1] = yr;
113     a[j1 + 1] = yi;
114     a[k1] = xr;
115     a[k1 + 1] = xi;
116   }
117 }
118 
makewt_32(void)119 static void makewt_32(void) {
120   const int nw = 32;
121   int j, nwh;
122   float delta, x, y;
123 
124   ip[0] = nw;
125   ip[1] = 1;
126   nwh = nw >> 1;
127   delta = atanf(1.0f) / nwh;
128   rdft_w[0] = 1;
129   rdft_w[1] = 0;
130   rdft_w[nwh] = cosf(delta * nwh);
131   rdft_w[nwh + 1] = rdft_w[nwh];
132   for (j = 2; j < nwh; j += 2) {
133     x = cosf(delta * j);
134     y = sinf(delta * j);
135     rdft_w[j] = x;
136     rdft_w[j + 1] = y;
137     rdft_w[nw - j] = y;
138     rdft_w[nw - j + 1] = x;
139   }
140   bitrv2_32or128(nw, ip + 2, rdft_w);
141 
142   // pre-calculate constants used by cft1st_128 and cftmdl_128...
143   cftmdl_wk1r[0] = rdft_w[2];
144   cftmdl_wk1r[1] = rdft_w[2];
145   cftmdl_wk1r[2] = rdft_w[2];
146   cftmdl_wk1r[3] = -rdft_w[2];
147   {
148     int k1;
149 
150     for (k1 = 0, j = 0; j < 128; j += 16, k1 += 2) {
151       const int k2 = 2 * k1;
152       const float wk2r = rdft_w[k1 + 0];
153       const float wk2i = rdft_w[k1 + 1];
154       float wk1r, wk1i;
155       // ... scalar version.
156       wk1r = rdft_w[k2 + 0];
157       wk1i = rdft_w[k2 + 1];
158       rdft_wk3ri_first[k1 + 0] = wk1r - 2 * wk2i * wk1i;
159       rdft_wk3ri_first[k1 + 1] = 2 * wk2i * wk1r - wk1i;
160       wk1r = rdft_w[k2 + 2];
161       wk1i = rdft_w[k2 + 3];
162       rdft_wk3ri_second[k1 + 0] = wk1r - 2 * wk2r * wk1i;
163       rdft_wk3ri_second[k1 + 1] = 2 * wk2r * wk1r - wk1i;
164       // ... vector version.
165       rdft_wk1r[k2 + 0] = rdft_w[k2 + 0];
166       rdft_wk1r[k2 + 1] = rdft_w[k2 + 0];
167       rdft_wk1r[k2 + 2] = rdft_w[k2 + 2];
168       rdft_wk1r[k2 + 3] = rdft_w[k2 + 2];
169       rdft_wk2r[k2 + 0] = rdft_w[k1 + 0];
170       rdft_wk2r[k2 + 1] = rdft_w[k1 + 0];
171       rdft_wk2r[k2 + 2] = -rdft_w[k1 + 1];
172       rdft_wk2r[k2 + 3] = -rdft_w[k1 + 1];
173       rdft_wk3r[k2 + 0] = rdft_wk3ri_first[k1 + 0];
174       rdft_wk3r[k2 + 1] = rdft_wk3ri_first[k1 + 0];
175       rdft_wk3r[k2 + 2] = rdft_wk3ri_second[k1 + 0];
176       rdft_wk3r[k2 + 3] = rdft_wk3ri_second[k1 + 0];
177       rdft_wk1i[k2 + 0] = -rdft_w[k2 + 1];
178       rdft_wk1i[k2 + 1] = rdft_w[k2 + 1];
179       rdft_wk1i[k2 + 2] = -rdft_w[k2 + 3];
180       rdft_wk1i[k2 + 3] = rdft_w[k2 + 3];
181       rdft_wk2i[k2 + 0] = -rdft_w[k1 + 1];
182       rdft_wk2i[k2 + 1] = rdft_w[k1 + 1];
183       rdft_wk2i[k2 + 2] = -rdft_w[k1 + 0];
184       rdft_wk2i[k2 + 3] = rdft_w[k1 + 0];
185       rdft_wk3i[k2 + 0] = -rdft_wk3ri_first[k1 + 1];
186       rdft_wk3i[k2 + 1] = rdft_wk3ri_first[k1 + 1];
187       rdft_wk3i[k2 + 2] = -rdft_wk3ri_second[k1 + 1];
188       rdft_wk3i[k2 + 3] = rdft_wk3ri_second[k1 + 1];
189     }
190   }
191 }
192 
makect_32(void)193 static void makect_32(void) {
194   float *c = rdft_w + 32;
195   const int nc = 32;
196   int j, nch;
197   float delta;
198 
199   ip[1] = nc;
200   nch = nc >> 1;
201   delta = atanf(1.0f) / nch;
202   c[0] = cosf(delta * nch);
203   c[nch] = 0.5f * c[0];
204   for (j = 1; j < nch; j++) {
205     c[j] = 0.5f * cosf(delta * j);
206     c[nc - j] = 0.5f * sinf(delta * j);
207   }
208 }
209 
cft1st_128_C(float * a)210 static void cft1st_128_C(float *a) {
211   const int n = 128;
212   int j, k1, k2;
213   float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
214   float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
215 
216   x0r = a[0] + a[2];
217   x0i = a[1] + a[3];
218   x1r = a[0] - a[2];
219   x1i = a[1] - a[3];
220   x2r = a[4] + a[6];
221   x2i = a[5] + a[7];
222   x3r = a[4] - a[6];
223   x3i = a[5] - a[7];
224   a[0] = x0r + x2r;
225   a[1] = x0i + x2i;
226   a[4] = x0r - x2r;
227   a[5] = x0i - x2i;
228   a[2] = x1r - x3i;
229   a[3] = x1i + x3r;
230   a[6] = x1r + x3i;
231   a[7] = x1i - x3r;
232   wk1r = rdft_w[2];
233   x0r = a[8] + a[10];
234   x0i = a[9] + a[11];
235   x1r = a[8] - a[10];
236   x1i = a[9] - a[11];
237   x2r = a[12] + a[14];
238   x2i = a[13] + a[15];
239   x3r = a[12] - a[14];
240   x3i = a[13] - a[15];
241   a[8] = x0r + x2r;
242   a[9] = x0i + x2i;
243   a[12] = x2i - x0i;
244   a[13] = x0r - x2r;
245   x0r = x1r - x3i;
246   x0i = x1i + x3r;
247   a[10] = wk1r * (x0r - x0i);
248   a[11] = wk1r * (x0r + x0i);
249   x0r = x3i + x1r;
250   x0i = x3r - x1i;
251   a[14] = wk1r * (x0i - x0r);
252   a[15] = wk1r * (x0i + x0r);
253   k1 = 0;
254   for (j = 16; j < n; j += 16) {
255     k1 += 2;
256     k2 = 2 * k1;
257     wk2r = rdft_w[k1 + 0];
258     wk2i = rdft_w[k1 + 1];
259     wk1r = rdft_w[k2 + 0];
260     wk1i = rdft_w[k2 + 1];
261     wk3r = rdft_wk3ri_first[k1 + 0];
262     wk3i = rdft_wk3ri_first[k1 + 1];
263     x0r = a[j + 0] + a[j + 2];
264     x0i = a[j + 1] + a[j + 3];
265     x1r = a[j + 0] - a[j + 2];
266     x1i = a[j + 1] - a[j + 3];
267     x2r = a[j + 4] + a[j + 6];
268     x2i = a[j + 5] + a[j + 7];
269     x3r = a[j + 4] - a[j + 6];
270     x3i = a[j + 5] - a[j + 7];
271     a[j + 0] = x0r + x2r;
272     a[j + 1] = x0i + x2i;
273     x0r -= x2r;
274     x0i -= x2i;
275     a[j + 4] = wk2r * x0r - wk2i * x0i;
276     a[j + 5] = wk2r * x0i + wk2i * x0r;
277     x0r = x1r - x3i;
278     x0i = x1i + x3r;
279     a[j + 2] = wk1r * x0r - wk1i * x0i;
280     a[j + 3] = wk1r * x0i + wk1i * x0r;
281     x0r = x1r + x3i;
282     x0i = x1i - x3r;
283     a[j + 6] = wk3r * x0r - wk3i * x0i;
284     a[j + 7] = wk3r * x0i + wk3i * x0r;
285     wk1r = rdft_w[k2 + 2];
286     wk1i = rdft_w[k2 + 3];
287     wk3r = rdft_wk3ri_second[k1 + 0];
288     wk3i = rdft_wk3ri_second[k1 + 1];
289     x0r = a[j + 8] + a[j + 10];
290     x0i = a[j + 9] + a[j + 11];
291     x1r = a[j + 8] - a[j + 10];
292     x1i = a[j + 9] - a[j + 11];
293     x2r = a[j + 12] + a[j + 14];
294     x2i = a[j + 13] + a[j + 15];
295     x3r = a[j + 12] - a[j + 14];
296     x3i = a[j + 13] - a[j + 15];
297     a[j + 8] = x0r + x2r;
298     a[j + 9] = x0i + x2i;
299     x0r -= x2r;
300     x0i -= x2i;
301     a[j + 12] = -wk2i * x0r - wk2r * x0i;
302     a[j + 13] = -wk2i * x0i + wk2r * x0r;
303     x0r = x1r - x3i;
304     x0i = x1i + x3r;
305     a[j + 10] = wk1r * x0r - wk1i * x0i;
306     a[j + 11] = wk1r * x0i + wk1i * x0r;
307     x0r = x1r + x3i;
308     x0i = x1i - x3r;
309     a[j + 14] = wk3r * x0r - wk3i * x0i;
310     a[j + 15] = wk3r * x0i + wk3i * x0r;
311   }
312 }
313 
cftmdl_128_C(float * a)314 static void cftmdl_128_C(float *a) {
315   const int l = 8;
316   const int n = 128;
317   const int m = 32;
318   int j0, j1, j2, j3, k, k1, k2, m2;
319   float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
320   float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
321 
322   for (j0 = 0; j0 < l; j0 += 2) {
323     j1 = j0 +  8;
324     j2 = j0 + 16;
325     j3 = j0 + 24;
326     x0r = a[j0 + 0] + a[j1 + 0];
327     x0i = a[j0 + 1] + a[j1 + 1];
328     x1r = a[j0 + 0] - a[j1 + 0];
329     x1i = a[j0 + 1] - a[j1 + 1];
330     x2r = a[j2 + 0] + a[j3 + 0];
331     x2i = a[j2 + 1] + a[j3 + 1];
332     x3r = a[j2 + 0] - a[j3 + 0];
333     x3i = a[j2 + 1] - a[j3 + 1];
334     a[j0 + 0] = x0r + x2r;
335     a[j0 + 1] = x0i + x2i;
336     a[j2 + 0] = x0r - x2r;
337     a[j2 + 1] = x0i - x2i;
338     a[j1 + 0] = x1r - x3i;
339     a[j1 + 1] = x1i + x3r;
340     a[j3 + 0] = x1r + x3i;
341     a[j3 + 1] = x1i - x3r;
342   }
343   wk1r = rdft_w[2];
344   for (j0 = m; j0 < l + m; j0 += 2) {
345     j1 = j0 +  8;
346     j2 = j0 + 16;
347     j3 = j0 + 24;
348     x0r = a[j0 + 0] + a[j1 + 0];
349     x0i = a[j0 + 1] + a[j1 + 1];
350     x1r = a[j0 + 0] - a[j1 + 0];
351     x1i = a[j0 + 1] - a[j1 + 1];
352     x2r = a[j2 + 0] + a[j3 + 0];
353     x2i = a[j2 + 1] + a[j3 + 1];
354     x3r = a[j2 + 0] - a[j3 + 0];
355     x3i = a[j2 + 1] - a[j3 + 1];
356     a[j0 + 0] = x0r + x2r;
357     a[j0 + 1] = x0i + x2i;
358     a[j2 + 0] = x2i - x0i;
359     a[j2 + 1] = x0r - x2r;
360     x0r = x1r - x3i;
361     x0i = x1i + x3r;
362     a[j1 + 0] = wk1r * (x0r - x0i);
363     a[j1 + 1] = wk1r * (x0r + x0i);
364     x0r = x3i + x1r;
365     x0i = x3r - x1i;
366     a[j3 + 0] = wk1r * (x0i - x0r);
367     a[j3 + 1] = wk1r * (x0i + x0r);
368   }
369   k1 = 0;
370   m2 = 2 * m;
371   for (k = m2; k < n; k += m2) {
372     k1 += 2;
373     k2 = 2 * k1;
374     wk2r = rdft_w[k1 + 0];
375     wk2i = rdft_w[k1 + 1];
376     wk1r = rdft_w[k2 + 0];
377     wk1i = rdft_w[k2 + 1];
378     wk3r = rdft_wk3ri_first[k1 + 0];
379     wk3i = rdft_wk3ri_first[k1 + 1];
380     for (j0 = k; j0 < l + k; j0 += 2) {
381       j1 = j0 +  8;
382       j2 = j0 + 16;
383       j3 = j0 + 24;
384       x0r = a[j0 + 0] + a[j1 + 0];
385       x0i = a[j0 + 1] + a[j1 + 1];
386       x1r = a[j0 + 0] - a[j1 + 0];
387       x1i = a[j0 + 1] - a[j1 + 1];
388       x2r = a[j2 + 0] + a[j3 + 0];
389       x2i = a[j2 + 1] + a[j3 + 1];
390       x3r = a[j2 + 0] - a[j3 + 0];
391       x3i = a[j2 + 1] - a[j3 + 1];
392       a[j0 + 0] = x0r + x2r;
393       a[j0 + 1] = x0i + x2i;
394       x0r -= x2r;
395       x0i -= x2i;
396       a[j2 + 0] = wk2r * x0r - wk2i * x0i;
397       a[j2 + 1] = wk2r * x0i + wk2i * x0r;
398       x0r = x1r - x3i;
399       x0i = x1i + x3r;
400       a[j1 + 0] = wk1r * x0r - wk1i * x0i;
401       a[j1 + 1] = wk1r * x0i + wk1i * x0r;
402       x0r = x1r + x3i;
403       x0i = x1i - x3r;
404       a[j3 + 0] = wk3r * x0r - wk3i * x0i;
405       a[j3 + 1] = wk3r * x0i + wk3i * x0r;
406     }
407     wk1r = rdft_w[k2 + 2];
408     wk1i = rdft_w[k2 + 3];
409     wk3r = rdft_wk3ri_second[k1 + 0];
410     wk3i = rdft_wk3ri_second[k1 + 1];
411     for (j0 = k + m; j0 < l + (k + m); j0 += 2) {
412       j1 = j0 +  8;
413       j2 = j0 + 16;
414       j3 = j0 + 24;
415       x0r = a[j0 + 0] + a[j1 + 0];
416       x0i = a[j0 + 1] + a[j1 + 1];
417       x1r = a[j0 + 0] - a[j1 + 0];
418       x1i = a[j0 + 1] - a[j1 + 1];
419       x2r = a[j2 + 0] + a[j3 + 0];
420       x2i = a[j2 + 1] + a[j3 + 1];
421       x3r = a[j2 + 0] - a[j3 + 0];
422       x3i = a[j2 + 1] - a[j3 + 1];
423       a[j0 + 0] = x0r + x2r;
424       a[j0 + 1] = x0i + x2i;
425       x0r -= x2r;
426       x0i -= x2i;
427       a[j2 + 0] = -wk2i * x0r - wk2r * x0i;
428       a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
429       x0r = x1r - x3i;
430       x0i = x1i + x3r;
431       a[j1 + 0] = wk1r * x0r - wk1i * x0i;
432       a[j1 + 1] = wk1r * x0i + wk1i * x0r;
433       x0r = x1r + x3i;
434       x0i = x1i - x3r;
435       a[j3 + 0] = wk3r * x0r - wk3i * x0i;
436       a[j3 + 1] = wk3r * x0i + wk3i * x0r;
437     }
438   }
439 }
440 
cftfsub_128(float * a)441 static void cftfsub_128(float *a) {
442   int j, j1, j2, j3, l;
443   float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
444 
445   cft1st_128(a);
446   cftmdl_128(a);
447   l = 32;
448   for (j = 0; j < l; j += 2) {
449     j1 = j + l;
450     j2 = j1 + l;
451     j3 = j2 + l;
452     x0r = a[j] + a[j1];
453     x0i = a[j + 1] + a[j1 + 1];
454     x1r = a[j] - a[j1];
455     x1i = a[j + 1] - a[j1 + 1];
456     x2r = a[j2] + a[j3];
457     x2i = a[j2 + 1] + a[j3 + 1];
458     x3r = a[j2] - a[j3];
459     x3i = a[j2 + 1] - a[j3 + 1];
460     a[j] = x0r + x2r;
461     a[j + 1] = x0i + x2i;
462     a[j2] = x0r - x2r;
463     a[j2 + 1] = x0i - x2i;
464     a[j1] = x1r - x3i;
465     a[j1 + 1] = x1i + x3r;
466     a[j3] = x1r + x3i;
467     a[j3 + 1] = x1i - x3r;
468   }
469 }
470 
cftbsub_128(float * a)471 static void cftbsub_128(float *a) {
472   int j, j1, j2, j3, l;
473   float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
474 
475   cft1st_128(a);
476   cftmdl_128(a);
477   l = 32;
478 
479   for (j = 0; j < l; j += 2) {
480     j1 = j + l;
481     j2 = j1 + l;
482     j3 = j2 + l;
483     x0r = a[j] + a[j1];
484     x0i = -a[j + 1] - a[j1 + 1];
485     x1r = a[j] - a[j1];
486     x1i = -a[j + 1] + a[j1 + 1];
487     x2r = a[j2] + a[j3];
488     x2i = a[j2 + 1] + a[j3 + 1];
489     x3r = a[j2] - a[j3];
490     x3i = a[j2 + 1] - a[j3 + 1];
491     a[j] = x0r + x2r;
492     a[j + 1] = x0i - x2i;
493     a[j2] = x0r - x2r;
494     a[j2 + 1] = x0i + x2i;
495     a[j1] = x1r - x3i;
496     a[j1 + 1] = x1i - x3r;
497     a[j3] = x1r + x3i;
498     a[j3 + 1] = x1i + x3r;
499   }
500 }
501 
rftfsub_128_C(float * a)502 static void rftfsub_128_C(float *a) {
503   const float *c = rdft_w + 32;
504   int j1, j2, k1, k2;
505   float wkr, wki, xr, xi, yr, yi;
506 
507   for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
508     k2 = 128 - j2;
509     k1 =  32 - j1;
510     wkr = 0.5f - c[k1];
511     wki = c[j1];
512     xr = a[j2 + 0] - a[k2 + 0];
513     xi = a[j2 + 1] + a[k2 + 1];
514     yr = wkr * xr - wki * xi;
515     yi = wkr * xi + wki * xr;
516     a[j2 + 0] -= yr;
517     a[j2 + 1] -= yi;
518     a[k2 + 0] += yr;
519     a[k2 + 1] -= yi;
520   }
521 }
522 
rftbsub_128_C(float * a)523 static void rftbsub_128_C(float *a) {
524   const float *c = rdft_w + 32;
525   int j1, j2, k1, k2;
526   float wkr, wki, xr, xi, yr, yi;
527 
528   a[1] = -a[1];
529   for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
530     k2 = 128 - j2;
531     k1 =  32 - j1;
532     wkr = 0.5f - c[k1];
533     wki = c[j1];
534     xr = a[j2 + 0] - a[k2 + 0];
535     xi = a[j2 + 1] + a[k2 + 1];
536     yr = wkr * xr + wki * xi;
537     yi = wkr * xi - wki * xr;
538     a[j2 + 0] = a[j2 + 0] - yr;
539     a[j2 + 1] = yi - a[j2 + 1];
540     a[k2 + 0] = yr + a[k2 + 0];
541     a[k2 + 1] = yi - a[k2 + 1];
542   }
543   a[65] = -a[65];
544 }
545 
aec_rdft_forward_128(float * a)546 void aec_rdft_forward_128(float *a) {
547   const int n = 128;
548   float xi;
549 
550   bitrv2_32or128(n, ip + 2, a);
551   cftfsub_128(a);
552   rftfsub_128(a);
553   xi = a[0] - a[1];
554   a[0] += a[1];
555   a[1] = xi;
556 }
557 
aec_rdft_inverse_128(float * a)558 void aec_rdft_inverse_128(float *a) {
559   const int n = 128;
560 
561   a[1] = 0.5f * (a[0] - a[1]);
562   a[0] -= a[1];
563   rftbsub_128(a);
564   bitrv2_32or128(n, ip + 2, a);
565   cftbsub_128(a);
566 }
567 
568 // code path selection
569 rft_sub_128_t cft1st_128;
570 rft_sub_128_t cftmdl_128;
571 rft_sub_128_t rftfsub_128;
572 rft_sub_128_t rftbsub_128;
573 
aec_rdft_init(void)574 void aec_rdft_init(void) {
575   cft1st_128 = cft1st_128_C;
576   cftmdl_128 = cftmdl_128_C;
577   rftfsub_128 = rftfsub_128_C;
578   rftbsub_128 = rftbsub_128_C;
579 #ifdef SSE2_OPS
580 #if defined(WEBRTC_ARCH_X86_FAMILY)
581   if (WebRtc_GetCPUInfo(kSSE2)) {
582     aec_rdft_init_sse2();
583   }
584 #endif
585 #endif
586   // init library constants.
587   makewt_32();
588   makect_32();
589 }
590