1 /*
2  * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
3  * Copyright Takuya OOURA, 1996-2001
4  *
5  * You may use, copy, modify and distribute this code for any purpose (include
6  * commercial use) and without fee. Please refer to this package when you modify
7  * this code.
8  *
9  * Changes:
10  * Trivial type modifications by the WebRTC authors.
11  */
12 
13 /*
14 Fast Fourier/Cosine/Sine Transform
15     dimension   :one
16     data length :power of 2
17     decimation  :frequency
18     radix       :4, 2
19     data        :inplace
20     table       :use
21 functions
22     cdft: Complex Discrete Fourier Transform
23     rdft: Real Discrete Fourier Transform
24     ddct: Discrete Cosine Transform
25     ddst: Discrete Sine Transform
26     dfct: Cosine Transform of RDFT (Real Symmetric DFT)
27     dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
28 function prototypes
29     void cdft(int, int, float *, int *, float *);
30     void rdft(size_t, int, float *, size_t *, float *);
31     void ddct(int, int, float *, int *, float *);
32     void ddst(int, int, float *, int *, float *);
33     void dfct(int, float *, float *, int *, float *);
34     void dfst(int, float *, float *, int *, float *);
35 
36 
37 -------- Complex DFT (Discrete Fourier Transform) --------
38     [definition]
39         <case1>
40             X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
41         <case2>
42             X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
43         (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
44     [usage]
45         <case1>
46             ip[0] = 0; // first time only
47             cdft(2*n, 1, a, ip, w);
48         <case2>
49             ip[0] = 0; // first time only
50             cdft(2*n, -1, a, ip, w);
51     [parameters]
52         2*n            :data length (int)
53                         n >= 1, n = power of 2
54         a[0...2*n-1]   :input/output data (float *)
55                         input data
56                             a[2*j] = Re(x[j]),
57                             a[2*j+1] = Im(x[j]), 0<=j<n
58                         output data
59                             a[2*k] = Re(X[k]),
60                             a[2*k+1] = Im(X[k]), 0<=k<n
61         ip[0...*]      :work area for bit reversal (int *)
62                         length of ip >= 2+sqrt(n)
63                         strictly,
64                         length of ip >=
65                             2+(1<<(int)(log(n+0.5)/log(2))/2).
66                         ip[0],ip[1] are pointers of the cos/sin table.
67         w[0...n/2-1]   :cos/sin table (float *)
68                         w[],ip[] are initialized if ip[0] == 0.
69     [remark]
70         Inverse of
71             cdft(2*n, -1, a, ip, w);
72         is
73             cdft(2*n, 1, a, ip, w);
74             for (j = 0; j <= 2 * n - 1; j++) {
75                 a[j] *= 1.0 / n;
76             }
77         .
78 
79 
80 -------- Real DFT / Inverse of Real DFT --------
81     [definition]
82         <case1> RDFT
83             R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
84             I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
85         <case2> IRDFT (excluding scale)
86             a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
87                    sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
88                    sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
89     [usage]
90         <case1>
91             ip[0] = 0; // first time only
92             rdft(n, 1, a, ip, w);
93         <case2>
94             ip[0] = 0; // first time only
95             rdft(n, -1, a, ip, w);
96     [parameters]
97         n              :data length (size_t)
98                         n >= 2, n = power of 2
99         a[0...n-1]     :input/output data (float *)
100                         <case1>
101                             output data
102                                 a[2*k] = R[k], 0<=k<n/2
103                                 a[2*k+1] = I[k], 0<k<n/2
104                                 a[1] = R[n/2]
105                         <case2>
106                             input data
107                                 a[2*j] = R[j], 0<=j<n/2
108                                 a[2*j+1] = I[j], 0<j<n/2
109                                 a[1] = R[n/2]
110         ip[0...*]      :work area for bit reversal (size_t *)
111                         length of ip >= 2+sqrt(n/2)
112                         strictly,
113                         length of ip >=
114                             2+(1<<(int)(log(n/2+0.5)/log(2))/2).
115                         ip[0],ip[1] are pointers of the cos/sin table.
116         w[0...n/2-1]   :cos/sin table (float *)
117                         w[],ip[] are initialized if ip[0] == 0.
118     [remark]
119         Inverse of
120             rdft(n, 1, a, ip, w);
121         is
122             rdft(n, -1, a, ip, w);
123             for (j = 0; j <= n - 1; j++) {
124                 a[j] *= 2.0 / n;
125             }
126         .
127 
128 
129 -------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
130     [definition]
131         <case1> IDCT (excluding scale)
132             C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
133         <case2> DCT
134             C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
135     [usage]
136         <case1>
137             ip[0] = 0; // first time only
138             ddct(n, 1, a, ip, w);
139         <case2>
140             ip[0] = 0; // first time only
141             ddct(n, -1, a, ip, w);
142     [parameters]
143         n              :data length (int)
144                         n >= 2, n = power of 2
145         a[0...n-1]     :input/output data (float *)
146                         output data
147                             a[k] = C[k], 0<=k<n
148         ip[0...*]      :work area for bit reversal (int *)
149                         length of ip >= 2+sqrt(n/2)
150                         strictly,
151                         length of ip >=
152                             2+(1<<(int)(log(n/2+0.5)/log(2))/2).
153                         ip[0],ip[1] are pointers of the cos/sin table.
154         w[0...n*5/4-1] :cos/sin table (float *)
155                         w[],ip[] are initialized if ip[0] == 0.
156     [remark]
157         Inverse of
158             ddct(n, -1, a, ip, w);
159         is
160             a[0] *= 0.5;
161             ddct(n, 1, a, ip, w);
162             for (j = 0; j <= n - 1; j++) {
163                 a[j] *= 2.0 / n;
164             }
165         .
166 
167 
168 -------- DST (Discrete Sine Transform) / Inverse of DST --------
169     [definition]
170         <case1> IDST (excluding scale)
171             S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
172         <case2> DST
173             S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
174     [usage]
175         <case1>
176             ip[0] = 0; // first time only
177             ddst(n, 1, a, ip, w);
178         <case2>
179             ip[0] = 0; // first time only
180             ddst(n, -1, a, ip, w);
181     [parameters]
182         n              :data length (int)
183                         n >= 2, n = power of 2
184         a[0...n-1]     :input/output data (float *)
185                         <case1>
186                             input data
187                                 a[j] = A[j], 0<j<n
188                                 a[0] = A[n]
189                             output data
190                                 a[k] = S[k], 0<=k<n
191                         <case2>
192                             output data
193                                 a[k] = S[k], 0<k<n
194                                 a[0] = S[n]
195         ip[0...*]      :work area for bit reversal (int *)
196                         length of ip >= 2+sqrt(n/2)
197                         strictly,
198                         length of ip >=
199                             2+(1<<(int)(log(n/2+0.5)/log(2))/2).
200                         ip[0],ip[1] are pointers of the cos/sin table.
201         w[0...n*5/4-1] :cos/sin table (float *)
202                         w[],ip[] are initialized if ip[0] == 0.
203     [remark]
204         Inverse of
205             ddst(n, -1, a, ip, w);
206         is
207             a[0] *= 0.5;
208             ddst(n, 1, a, ip, w);
209             for (j = 0; j <= n - 1; j++) {
210                 a[j] *= 2.0 / n;
211             }
212         .
213 
214 
215 -------- Cosine Transform of RDFT (Real Symmetric DFT) --------
216     [definition]
217         C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
218     [usage]
219         ip[0] = 0; // first time only
220         dfct(n, a, t, ip, w);
221     [parameters]
222         n              :data length - 1 (int)
223                         n >= 2, n = power of 2
224         a[0...n]       :input/output data (float *)
225                         output data
226                             a[k] = C[k], 0<=k<=n
227         t[0...n/2]     :work area (float *)
228         ip[0...*]      :work area for bit reversal (int *)
229                         length of ip >= 2+sqrt(n/4)
230                         strictly,
231                         length of ip >=
232                             2+(1<<(int)(log(n/4+0.5)/log(2))/2).
233                         ip[0],ip[1] are pointers of the cos/sin table.
234         w[0...n*5/8-1] :cos/sin table (float *)
235                         w[],ip[] are initialized if ip[0] == 0.
236     [remark]
237         Inverse of
238             a[0] *= 0.5;
239             a[n] *= 0.5;
240             dfct(n, a, t, ip, w);
241         is
242             a[0] *= 0.5;
243             a[n] *= 0.5;
244             dfct(n, a, t, ip, w);
245             for (j = 0; j <= n; j++) {
246                 a[j] *= 2.0 / n;
247             }
248         .
249 
250 
251 -------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
252     [definition]
253         S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
254     [usage]
255         ip[0] = 0; // first time only
256         dfst(n, a, t, ip, w);
257     [parameters]
258         n              :data length + 1 (int)
259                         n >= 2, n = power of 2
260         a[0...n-1]     :input/output data (float *)
261                         output data
262                             a[k] = S[k], 0<k<n
263                         (a[0] is used for work area)
264         t[0...n/2-1]   :work area (float *)
265         ip[0...*]      :work area for bit reversal (int *)
266                         length of ip >= 2+sqrt(n/4)
267                         strictly,
268                         length of ip >=
269                             2+(1<<(int)(log(n/4+0.5)/log(2))/2).
270                         ip[0],ip[1] are pointers of the cos/sin table.
271         w[0...n*5/8-1] :cos/sin table (float *)
272                         w[],ip[] are initialized if ip[0] == 0.
273     [remark]
274         Inverse of
275             dfst(n, a, t, ip, w);
276         is
277             dfst(n, a, t, ip, w);
278             for (j = 1; j <= n - 1; j++) {
279                 a[j] *= 2.0 / n;
280             }
281         .
282 
283 
284 Appendix :
285     The cos/sin table is recalculated when the larger table required.
286     w[] and ip[] are compatible with all routines.
287 */
288 
289 #include <stddef.h>
290 
291 static void makewt(size_t nw, size_t *ip, float *w);
292 static void makect(size_t nc, size_t *ip, float *c);
293 static void bitrv2(size_t n, size_t *ip, float *a);
294 #if 0  // Not used.
295 static void bitrv2conj(int n, int *ip, float *a);
296 #endif
297 static void cftfsub(size_t n, float *a, float *w);
298 static void cftbsub(size_t n, float *a, float *w);
299 static void cft1st(size_t n, float *a, float *w);
300 static void cftmdl(size_t n, size_t l, float *a, float *w);
301 static void rftfsub(size_t n, float *a, size_t nc, float *c);
302 static void rftbsub(size_t n, float *a, size_t nc, float *c);
303 #if 0  // Not used.
304 static void dctsub(int n, float *a, int nc, float *c)
305 static void dstsub(int n, float *a, int nc, float *c)
306 #endif
307 
308 
309 #if 0  // Not used.
310 void WebRtc_cdft(int n, int isgn, float *a, int *ip, float *w)
311 {
312     if (n > (ip[0] << 2)) {
313         makewt(n >> 2, ip, w);
314     }
315     if (n > 4) {
316         if (isgn >= 0) {
317             bitrv2(n, ip + 2, a);
318             cftfsub(n, a, w);
319         } else {
320             bitrv2conj(n, ip + 2, a);
321             cftbsub(n, a, w);
322         }
323     } else if (n == 4) {
324         cftfsub(n, a, w);
325     }
326 }
327 #endif
328 
329 
WebRtc_rdft(size_t n,int isgn,float * a,size_t * ip,float * w)330 void WebRtc_rdft(size_t n, int isgn, float *a, size_t *ip, float *w)
331 {
332     size_t nw, nc;
333     float xi;
334 
335     nw = ip[0];
336     if (n > (nw << 2)) {
337         nw = n >> 2;
338         makewt(nw, ip, w);
339     }
340     nc = ip[1];
341     if (n > (nc << 2)) {
342         nc = n >> 2;
343         makect(nc, ip, w + nw);
344     }
345     if (isgn >= 0) {
346         if (n > 4) {
347             bitrv2(n, ip + 2, a);
348             cftfsub(n, a, w);
349             rftfsub(n, a, nc, w + nw);
350         } else if (n == 4) {
351             cftfsub(n, a, w);
352         }
353         xi = a[0] - a[1];
354         a[0] += a[1];
355         a[1] = xi;
356     } else {
357         a[1] = 0.5f * (a[0] - a[1]);
358         a[0] -= a[1];
359         if (n > 4) {
360             rftbsub(n, a, nc, w + nw);
361             bitrv2(n, ip + 2, a);
362             cftbsub(n, a, w);
363         } else if (n == 4) {
364             cftfsub(n, a, w);
365         }
366     }
367 }
368 
369 #if 0  // Not used.
370 static void ddct(int n, int isgn, float *a, int *ip, float *w)
371 {
372     int j, nw, nc;
373     float xr;
374 
375     nw = ip[0];
376     if (n > (nw << 2)) {
377         nw = n >> 2;
378         makewt(nw, ip, w);
379     }
380     nc = ip[1];
381     if (n > nc) {
382         nc = n;
383         makect(nc, ip, w + nw);
384     }
385     if (isgn < 0) {
386         xr = a[n - 1];
387         for (j = n - 2; j >= 2; j -= 2) {
388             a[j + 1] = a[j] - a[j - 1];
389             a[j] += a[j - 1];
390         }
391         a[1] = a[0] - xr;
392         a[0] += xr;
393         if (n > 4) {
394             rftbsub(n, a, nc, w + nw);
395             bitrv2(n, ip + 2, a);
396             cftbsub(n, a, w);
397         } else if (n == 4) {
398             cftfsub(n, a, w);
399         }
400     }
401     dctsub(n, a, nc, w + nw);
402     if (isgn >= 0) {
403         if (n > 4) {
404             bitrv2(n, ip + 2, a);
405             cftfsub(n, a, w);
406             rftfsub(n, a, nc, w + nw);
407         } else if (n == 4) {
408             cftfsub(n, a, w);
409         }
410         xr = a[0] - a[1];
411         a[0] += a[1];
412         for (j = 2; j < n; j += 2) {
413             a[j - 1] = a[j] - a[j + 1];
414             a[j] += a[j + 1];
415         }
416         a[n - 1] = xr;
417     }
418 }
419 
420 
421 static void ddst(int n, int isgn, float *a, int *ip, float *w)
422 {
423     int j, nw, nc;
424     float xr;
425 
426     nw = ip[0];
427     if (n > (nw << 2)) {
428         nw = n >> 2;
429         makewt(nw, ip, w);
430     }
431     nc = ip[1];
432     if (n > nc) {
433         nc = n;
434         makect(nc, ip, w + nw);
435     }
436     if (isgn < 0) {
437         xr = a[n - 1];
438         for (j = n - 2; j >= 2; j -= 2) {
439             a[j + 1] = -a[j] - a[j - 1];
440             a[j] -= a[j - 1];
441         }
442         a[1] = a[0] + xr;
443         a[0] -= xr;
444         if (n > 4) {
445             rftbsub(n, a, nc, w + nw);
446             bitrv2(n, ip + 2, a);
447             cftbsub(n, a, w);
448         } else if (n == 4) {
449             cftfsub(n, a, w);
450         }
451     }
452     dstsub(n, a, nc, w + nw);
453     if (isgn >= 0) {
454         if (n > 4) {
455             bitrv2(n, ip + 2, a);
456             cftfsub(n, a, w);
457             rftfsub(n, a, nc, w + nw);
458         } else if (n == 4) {
459             cftfsub(n, a, w);
460         }
461         xr = a[0] - a[1];
462         a[0] += a[1];
463         for (j = 2; j < n; j += 2) {
464             a[j - 1] = -a[j] - a[j + 1];
465             a[j] -= a[j + 1];
466         }
467         a[n - 1] = -xr;
468     }
469 }
470 
471 
472 static void dfct(int n, float *a, float *t, int *ip, float *w)
473 {
474     int j, k, l, m, mh, nw, nc;
475     float xr, xi, yr, yi;
476 
477     nw = ip[0];
478     if (n > (nw << 3)) {
479         nw = n >> 3;
480         makewt(nw, ip, w);
481     }
482     nc = ip[1];
483     if (n > (nc << 1)) {
484         nc = n >> 1;
485         makect(nc, ip, w + nw);
486     }
487     m = n >> 1;
488     yi = a[m];
489     xi = a[0] + a[n];
490     a[0] -= a[n];
491     t[0] = xi - yi;
492     t[m] = xi + yi;
493     if (n > 2) {
494         mh = m >> 1;
495         for (j = 1; j < mh; j++) {
496             k = m - j;
497             xr = a[j] - a[n - j];
498             xi = a[j] + a[n - j];
499             yr = a[k] - a[n - k];
500             yi = a[k] + a[n - k];
501             a[j] = xr;
502             a[k] = yr;
503             t[j] = xi - yi;
504             t[k] = xi + yi;
505         }
506         t[mh] = a[mh] + a[n - mh];
507         a[mh] -= a[n - mh];
508         dctsub(m, a, nc, w + nw);
509         if (m > 4) {
510             bitrv2(m, ip + 2, a);
511             cftfsub(m, a, w);
512             rftfsub(m, a, nc, w + nw);
513         } else if (m == 4) {
514             cftfsub(m, a, w);
515         }
516         a[n - 1] = a[0] - a[1];
517         a[1] = a[0] + a[1];
518         for (j = m - 2; j >= 2; j -= 2) {
519             a[2 * j + 1] = a[j] + a[j + 1];
520             a[2 * j - 1] = a[j] - a[j + 1];
521         }
522         l = 2;
523         m = mh;
524         while (m >= 2) {
525             dctsub(m, t, nc, w + nw);
526             if (m > 4) {
527                 bitrv2(m, ip + 2, t);
528                 cftfsub(m, t, w);
529                 rftfsub(m, t, nc, w + nw);
530             } else if (m == 4) {
531                 cftfsub(m, t, w);
532             }
533             a[n - l] = t[0] - t[1];
534             a[l] = t[0] + t[1];
535             k = 0;
536             for (j = 2; j < m; j += 2) {
537                 k += l << 2;
538                 a[k - l] = t[j] - t[j + 1];
539                 a[k + l] = t[j] + t[j + 1];
540             }
541             l <<= 1;
542             mh = m >> 1;
543             for (j = 0; j < mh; j++) {
544                 k = m - j;
545                 t[j] = t[m + k] - t[m + j];
546                 t[k] = t[m + k] + t[m + j];
547             }
548             t[mh] = t[m + mh];
549             m = mh;
550         }
551         a[l] = t[0];
552         a[n] = t[2] - t[1];
553         a[0] = t[2] + t[1];
554     } else {
555         a[1] = a[0];
556         a[2] = t[0];
557         a[0] = t[1];
558     }
559 }
560 
561 static void dfst(int n, float *a, float *t, int *ip, float *w)
562 {
563     int j, k, l, m, mh, nw, nc;
564     float xr, xi, yr, yi;
565 
566     nw = ip[0];
567     if (n > (nw << 3)) {
568         nw = n >> 3;
569         makewt(nw, ip, w);
570     }
571     nc = ip[1];
572     if (n > (nc << 1)) {
573         nc = n >> 1;
574         makect(nc, ip, w + nw);
575     }
576     if (n > 2) {
577         m = n >> 1;
578         mh = m >> 1;
579         for (j = 1; j < mh; j++) {
580             k = m - j;
581             xr = a[j] + a[n - j];
582             xi = a[j] - a[n - j];
583             yr = a[k] + a[n - k];
584             yi = a[k] - a[n - k];
585             a[j] = xr;
586             a[k] = yr;
587             t[j] = xi + yi;
588             t[k] = xi - yi;
589         }
590         t[0] = a[mh] - a[n - mh];
591         a[mh] += a[n - mh];
592         a[0] = a[m];
593         dstsub(m, a, nc, w + nw);
594         if (m > 4) {
595             bitrv2(m, ip + 2, a);
596             cftfsub(m, a, w);
597             rftfsub(m, a, nc, w + nw);
598         } else if (m == 4) {
599             cftfsub(m, a, w);
600         }
601         a[n - 1] = a[1] - a[0];
602         a[1] = a[0] + a[1];
603         for (j = m - 2; j >= 2; j -= 2) {
604             a[2 * j + 1] = a[j] - a[j + 1];
605             a[2 * j - 1] = -a[j] - a[j + 1];
606         }
607         l = 2;
608         m = mh;
609         while (m >= 2) {
610             dstsub(m, t, nc, w + nw);
611             if (m > 4) {
612                 bitrv2(m, ip + 2, t);
613                 cftfsub(m, t, w);
614                 rftfsub(m, t, nc, w + nw);
615             } else if (m == 4) {
616                 cftfsub(m, t, w);
617             }
618             a[n - l] = t[1] - t[0];
619             a[l] = t[0] + t[1];
620             k = 0;
621             for (j = 2; j < m; j += 2) {
622                 k += l << 2;
623                 a[k - l] = -t[j] - t[j + 1];
624                 a[k + l] = t[j] - t[j + 1];
625             }
626             l <<= 1;
627             mh = m >> 1;
628             for (j = 1; j < mh; j++) {
629                 k = m - j;
630                 t[j] = t[m + k] + t[m + j];
631                 t[k] = t[m + k] - t[m + j];
632             }
633             t[0] = t[m + mh];
634             m = mh;
635         }
636         a[l] = t[0];
637     }
638     a[0] = 0;
639 }
640 #endif  // Not used.
641 
642 
643 /* -------- initializing routines -------- */
644 
645 
646 #include <math.h>
647 
makewt(size_t nw,size_t * ip,float * w)648 static void makewt(size_t nw, size_t *ip, float *w)
649 {
650     size_t j, nwh;
651     float delta, x, y;
652 
653     ip[0] = nw;
654     ip[1] = 1;
655     if (nw > 2) {
656         nwh = nw >> 1;
657         delta = atanf(1.0f) / nwh;
658         w[0] = 1;
659         w[1] = 0;
660         w[nwh] = (float)cos(delta * nwh);
661         w[nwh + 1] = w[nwh];
662         if (nwh > 2) {
663             for (j = 2; j < nwh; j += 2) {
664                 x = (float)cos(delta * j);
665                 y = (float)sin(delta * j);
666                 w[j] = x;
667                 w[j + 1] = y;
668                 w[nw - j] = y;
669                 w[nw - j + 1] = x;
670             }
671             bitrv2(nw, ip + 2, w);
672         }
673     }
674 }
675 
676 
makect(size_t nc,size_t * ip,float * c)677 static void makect(size_t nc, size_t *ip, float *c)
678 {
679     size_t j, nch;
680     float delta;
681 
682     ip[1] = nc;
683     if (nc > 1) {
684         nch = nc >> 1;
685         delta = atanf(1.0f) / nch;
686         c[0] = (float)cos(delta * nch);
687         c[nch] = 0.5f * c[0];
688         for (j = 1; j < nch; j++) {
689             c[j] = 0.5f * (float)cos(delta * j);
690             c[nc - j] = 0.5f * (float)sin(delta * j);
691         }
692     }
693 }
694 
695 
696 /* -------- child routines -------- */
697 
698 
bitrv2(size_t n,size_t * ip,float * a)699 static void bitrv2(size_t n, size_t *ip, float *a)
700 {
701     size_t j, j1, k, k1, l, m, m2;
702     float xr, xi, yr, yi;
703 
704     ip[0] = 0;
705     l = n;
706     m = 1;
707     while ((m << 3) < l) {
708         l >>= 1;
709         for (j = 0; j < m; j++) {
710             ip[m + j] = ip[j] + l;
711         }
712         m <<= 1;
713     }
714     m2 = 2 * m;
715     if ((m << 3) == l) {
716         for (k = 0; k < m; k++) {
717             for (j = 0; j < k; j++) {
718                 j1 = 2 * j + ip[k];
719                 k1 = 2 * k + ip[j];
720                 xr = a[j1];
721                 xi = a[j1 + 1];
722                 yr = a[k1];
723                 yi = a[k1 + 1];
724                 a[j1] = yr;
725                 a[j1 + 1] = yi;
726                 a[k1] = xr;
727                 a[k1 + 1] = xi;
728                 j1 += m2;
729                 k1 += 2 * m2;
730                 xr = a[j1];
731                 xi = a[j1 + 1];
732                 yr = a[k1];
733                 yi = a[k1 + 1];
734                 a[j1] = yr;
735                 a[j1 + 1] = yi;
736                 a[k1] = xr;
737                 a[k1 + 1] = xi;
738                 j1 += m2;
739                 k1 -= m2;
740                 xr = a[j1];
741                 xi = a[j1 + 1];
742                 yr = a[k1];
743                 yi = a[k1 + 1];
744                 a[j1] = yr;
745                 a[j1 + 1] = yi;
746                 a[k1] = xr;
747                 a[k1 + 1] = xi;
748                 j1 += m2;
749                 k1 += 2 * m2;
750                 xr = a[j1];
751                 xi = a[j1 + 1];
752                 yr = a[k1];
753                 yi = a[k1 + 1];
754                 a[j1] = yr;
755                 a[j1 + 1] = yi;
756                 a[k1] = xr;
757                 a[k1 + 1] = xi;
758             }
759             j1 = 2 * k + m2 + ip[k];
760             k1 = j1 + m2;
761             xr = a[j1];
762             xi = a[j1 + 1];
763             yr = a[k1];
764             yi = a[k1 + 1];
765             a[j1] = yr;
766             a[j1 + 1] = yi;
767             a[k1] = xr;
768             a[k1 + 1] = xi;
769         }
770     } else {
771         for (k = 1; k < m; k++) {
772             for (j = 0; j < k; j++) {
773                 j1 = 2 * j + ip[k];
774                 k1 = 2 * k + ip[j];
775                 xr = a[j1];
776                 xi = a[j1 + 1];
777                 yr = a[k1];
778                 yi = a[k1 + 1];
779                 a[j1] = yr;
780                 a[j1 + 1] = yi;
781                 a[k1] = xr;
782                 a[k1 + 1] = xi;
783                 j1 += m2;
784                 k1 += m2;
785                 xr = a[j1];
786                 xi = a[j1 + 1];
787                 yr = a[k1];
788                 yi = a[k1 + 1];
789                 a[j1] = yr;
790                 a[j1 + 1] = yi;
791                 a[k1] = xr;
792                 a[k1 + 1] = xi;
793             }
794         }
795     }
796 }
797 
798 #if 0  // Not used.
799 static void bitrv2conj(int n, int *ip, float *a)
800 {
801     int j, j1, k, k1, l, m, m2;
802     float xr, xi, yr, yi;
803 
804     ip[0] = 0;
805     l = n;
806     m = 1;
807     while ((m << 3) < l) {
808         l >>= 1;
809         for (j = 0; j < m; j++) {
810             ip[m + j] = ip[j] + l;
811         }
812         m <<= 1;
813     }
814     m2 = 2 * m;
815     if ((m << 3) == l) {
816         for (k = 0; k < m; k++) {
817             for (j = 0; j < k; j++) {
818                 j1 = 2 * j + ip[k];
819                 k1 = 2 * k + ip[j];
820                 xr = a[j1];
821                 xi = -a[j1 + 1];
822                 yr = a[k1];
823                 yi = -a[k1 + 1];
824                 a[j1] = yr;
825                 a[j1 + 1] = yi;
826                 a[k1] = xr;
827                 a[k1 + 1] = xi;
828                 j1 += m2;
829                 k1 += 2 * m2;
830                 xr = a[j1];
831                 xi = -a[j1 + 1];
832                 yr = a[k1];
833                 yi = -a[k1 + 1];
834                 a[j1] = yr;
835                 a[j1 + 1] = yi;
836                 a[k1] = xr;
837                 a[k1 + 1] = xi;
838                 j1 += m2;
839                 k1 -= m2;
840                 xr = a[j1];
841                 xi = -a[j1 + 1];
842                 yr = a[k1];
843                 yi = -a[k1 + 1];
844                 a[j1] = yr;
845                 a[j1 + 1] = yi;
846                 a[k1] = xr;
847                 a[k1 + 1] = xi;
848                 j1 += m2;
849                 k1 += 2 * m2;
850                 xr = a[j1];
851                 xi = -a[j1 + 1];
852                 yr = a[k1];
853                 yi = -a[k1 + 1];
854                 a[j1] = yr;
855                 a[j1 + 1] = yi;
856                 a[k1] = xr;
857                 a[k1 + 1] = xi;
858             }
859             k1 = 2 * k + ip[k];
860             a[k1 + 1] = -a[k1 + 1];
861             j1 = k1 + m2;
862             k1 = j1 + m2;
863             xr = a[j1];
864             xi = -a[j1 + 1];
865             yr = a[k1];
866             yi = -a[k1 + 1];
867             a[j1] = yr;
868             a[j1 + 1] = yi;
869             a[k1] = xr;
870             a[k1 + 1] = xi;
871             k1 += m2;
872             a[k1 + 1] = -a[k1 + 1];
873         }
874     } else {
875         a[1] = -a[1];
876         a[m2 + 1] = -a[m2 + 1];
877         for (k = 1; k < m; k++) {
878             for (j = 0; j < k; j++) {
879                 j1 = 2 * j + ip[k];
880                 k1 = 2 * k + ip[j];
881                 xr = a[j1];
882                 xi = -a[j1 + 1];
883                 yr = a[k1];
884                 yi = -a[k1 + 1];
885                 a[j1] = yr;
886                 a[j1 + 1] = yi;
887                 a[k1] = xr;
888                 a[k1 + 1] = xi;
889                 j1 += m2;
890                 k1 += m2;
891                 xr = a[j1];
892                 xi = -a[j1 + 1];
893                 yr = a[k1];
894                 yi = -a[k1 + 1];
895                 a[j1] = yr;
896                 a[j1 + 1] = yi;
897                 a[k1] = xr;
898                 a[k1 + 1] = xi;
899             }
900             k1 = 2 * k + ip[k];
901             a[k1 + 1] = -a[k1 + 1];
902             a[k1 + m2 + 1] = -a[k1 + m2 + 1];
903         }
904     }
905 }
906 #endif
907 
cftfsub(size_t n,float * a,float * w)908 static void cftfsub(size_t n, float *a, float *w)
909 {
910     size_t j, j1, j2, j3, l;
911     float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
912 
913     l = 2;
914     if (n > 8) {
915         cft1st(n, a, w);
916         l = 8;
917         while ((l << 2) < n) {
918             cftmdl(n, l, a, w);
919             l <<= 2;
920         }
921     }
922     if ((l << 2) == n) {
923         for (j = 0; j < l; j += 2) {
924             j1 = j + l;
925             j2 = j1 + l;
926             j3 = j2 + l;
927             x0r = a[j] + a[j1];
928             x0i = a[j + 1] + a[j1 + 1];
929             x1r = a[j] - a[j1];
930             x1i = a[j + 1] - a[j1 + 1];
931             x2r = a[j2] + a[j3];
932             x2i = a[j2 + 1] + a[j3 + 1];
933             x3r = a[j2] - a[j3];
934             x3i = a[j2 + 1] - a[j3 + 1];
935             a[j] = x0r + x2r;
936             a[j + 1] = x0i + x2i;
937             a[j2] = x0r - x2r;
938             a[j2 + 1] = x0i - x2i;
939             a[j1] = x1r - x3i;
940             a[j1 + 1] = x1i + x3r;
941             a[j3] = x1r + x3i;
942             a[j3 + 1] = x1i - x3r;
943         }
944     } else {
945         for (j = 0; j < l; j += 2) {
946             j1 = j + l;
947             x0r = a[j] - a[j1];
948             x0i = a[j + 1] - a[j1 + 1];
949             a[j] += a[j1];
950             a[j + 1] += a[j1 + 1];
951             a[j1] = x0r;
952             a[j1 + 1] = x0i;
953         }
954     }
955 }
956 
957 
cftbsub(size_t n,float * a,float * w)958 static void cftbsub(size_t n, float *a, float *w)
959 {
960     size_t j, j1, j2, j3, l;
961     float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
962 
963     l = 2;
964     if (n > 8) {
965         cft1st(n, a, w);
966         l = 8;
967         while ((l << 2) < n) {
968             cftmdl(n, l, a, w);
969             l <<= 2;
970         }
971     }
972     if ((l << 2) == n) {
973         for (j = 0; j < l; j += 2) {
974             j1 = j + l;
975             j2 = j1 + l;
976             j3 = j2 + l;
977             x0r = a[j] + a[j1];
978             x0i = -a[j + 1] - a[j1 + 1];
979             x1r = a[j] - a[j1];
980             x1i = -a[j + 1] + a[j1 + 1];
981             x2r = a[j2] + a[j3];
982             x2i = a[j2 + 1] + a[j3 + 1];
983             x3r = a[j2] - a[j3];
984             x3i = a[j2 + 1] - a[j3 + 1];
985             a[j] = x0r + x2r;
986             a[j + 1] = x0i - x2i;
987             a[j2] = x0r - x2r;
988             a[j2 + 1] = x0i + x2i;
989             a[j1] = x1r - x3i;
990             a[j1 + 1] = x1i - x3r;
991             a[j3] = x1r + x3i;
992             a[j3 + 1] = x1i + x3r;
993         }
994     } else {
995         for (j = 0; j < l; j += 2) {
996             j1 = j + l;
997             x0r = a[j] - a[j1];
998             x0i = -a[j + 1] + a[j1 + 1];
999             a[j] += a[j1];
1000             a[j + 1] = -a[j + 1] - a[j1 + 1];
1001             a[j1] = x0r;
1002             a[j1 + 1] = x0i;
1003         }
1004     }
1005 }
1006 
1007 
cft1st(size_t n,float * a,float * w)1008 static void cft1st(size_t n, float *a, float *w)
1009 {
1010     size_t j, k1, k2;
1011     float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
1012     float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1013 
1014     x0r = a[0] + a[2];
1015     x0i = a[1] + a[3];
1016     x1r = a[0] - a[2];
1017     x1i = a[1] - a[3];
1018     x2r = a[4] + a[6];
1019     x2i = a[5] + a[7];
1020     x3r = a[4] - a[6];
1021     x3i = a[5] - a[7];
1022     a[0] = x0r + x2r;
1023     a[1] = x0i + x2i;
1024     a[4] = x0r - x2r;
1025     a[5] = x0i - x2i;
1026     a[2] = x1r - x3i;
1027     a[3] = x1i + x3r;
1028     a[6] = x1r + x3i;
1029     a[7] = x1i - x3r;
1030     wk1r = w[2];
1031     x0r = a[8] + a[10];
1032     x0i = a[9] + a[11];
1033     x1r = a[8] - a[10];
1034     x1i = a[9] - a[11];
1035     x2r = a[12] + a[14];
1036     x2i = a[13] + a[15];
1037     x3r = a[12] - a[14];
1038     x3i = a[13] - a[15];
1039     a[8] = x0r + x2r;
1040     a[9] = x0i + x2i;
1041     a[12] = x2i - x0i;
1042     a[13] = x0r - x2r;
1043     x0r = x1r - x3i;
1044     x0i = x1i + x3r;
1045     a[10] = wk1r * (x0r - x0i);
1046     a[11] = wk1r * (x0r + x0i);
1047     x0r = x3i + x1r;
1048     x0i = x3r - x1i;
1049     a[14] = wk1r * (x0i - x0r);
1050     a[15] = wk1r * (x0i + x0r);
1051     k1 = 0;
1052     for (j = 16; j < n; j += 16) {
1053         k1 += 2;
1054         k2 = 2 * k1;
1055         wk2r = w[k1];
1056         wk2i = w[k1 + 1];
1057         wk1r = w[k2];
1058         wk1i = w[k2 + 1];
1059         wk3r = wk1r - 2 * wk2i * wk1i;
1060         wk3i = 2 * wk2i * wk1r - wk1i;
1061         x0r = a[j] + a[j + 2];
1062         x0i = a[j + 1] + a[j + 3];
1063         x1r = a[j] - a[j + 2];
1064         x1i = a[j + 1] - a[j + 3];
1065         x2r = a[j + 4] + a[j + 6];
1066         x2i = a[j + 5] + a[j + 7];
1067         x3r = a[j + 4] - a[j + 6];
1068         x3i = a[j + 5] - a[j + 7];
1069         a[j] = x0r + x2r;
1070         a[j + 1] = x0i + x2i;
1071         x0r -= x2r;
1072         x0i -= x2i;
1073         a[j + 4] = wk2r * x0r - wk2i * x0i;
1074         a[j + 5] = wk2r * x0i + wk2i * x0r;
1075         x0r = x1r - x3i;
1076         x0i = x1i + x3r;
1077         a[j + 2] = wk1r * x0r - wk1i * x0i;
1078         a[j + 3] = wk1r * x0i + wk1i * x0r;
1079         x0r = x1r + x3i;
1080         x0i = x1i - x3r;
1081         a[j + 6] = wk3r * x0r - wk3i * x0i;
1082         a[j + 7] = wk3r * x0i + wk3i * x0r;
1083         wk1r = w[k2 + 2];
1084         wk1i = w[k2 + 3];
1085         wk3r = wk1r - 2 * wk2r * wk1i;
1086         wk3i = 2 * wk2r * wk1r - wk1i;
1087         x0r = a[j + 8] + a[j + 10];
1088         x0i = a[j + 9] + a[j + 11];
1089         x1r = a[j + 8] - a[j + 10];
1090         x1i = a[j + 9] - a[j + 11];
1091         x2r = a[j + 12] + a[j + 14];
1092         x2i = a[j + 13] + a[j + 15];
1093         x3r = a[j + 12] - a[j + 14];
1094         x3i = a[j + 13] - a[j + 15];
1095         a[j + 8] = x0r + x2r;
1096         a[j + 9] = x0i + x2i;
1097         x0r -= x2r;
1098         x0i -= x2i;
1099         a[j + 12] = -wk2i * x0r - wk2r * x0i;
1100         a[j + 13] = -wk2i * x0i + wk2r * x0r;
1101         x0r = x1r - x3i;
1102         x0i = x1i + x3r;
1103         a[j + 10] = wk1r * x0r - wk1i * x0i;
1104         a[j + 11] = wk1r * x0i + wk1i * x0r;
1105         x0r = x1r + x3i;
1106         x0i = x1i - x3r;
1107         a[j + 14] = wk3r * x0r - wk3i * x0i;
1108         a[j + 15] = wk3r * x0i + wk3i * x0r;
1109     }
1110 }
1111 
1112 
cftmdl(size_t n,size_t l,float * a,float * w)1113 static void cftmdl(size_t n, size_t l, float *a, float *w)
1114 {
1115     size_t j, j1, j2, j3, k, k1, k2, m, m2;
1116     float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
1117     float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
1118 
1119     m = l << 2;
1120     for (j = 0; j < l; j += 2) {
1121         j1 = j + l;
1122         j2 = j1 + l;
1123         j3 = j2 + l;
1124         x0r = a[j] + a[j1];
1125         x0i = a[j + 1] + a[j1 + 1];
1126         x1r = a[j] - a[j1];
1127         x1i = a[j + 1] - a[j1 + 1];
1128         x2r = a[j2] + a[j3];
1129         x2i = a[j2 + 1] + a[j3 + 1];
1130         x3r = a[j2] - a[j3];
1131         x3i = a[j2 + 1] - a[j3 + 1];
1132         a[j] = x0r + x2r;
1133         a[j + 1] = x0i + x2i;
1134         a[j2] = x0r - x2r;
1135         a[j2 + 1] = x0i - x2i;
1136         a[j1] = x1r - x3i;
1137         a[j1 + 1] = x1i + x3r;
1138         a[j3] = x1r + x3i;
1139         a[j3 + 1] = x1i - x3r;
1140     }
1141     wk1r = w[2];
1142     for (j = m; j < l + m; j += 2) {
1143         j1 = j + l;
1144         j2 = j1 + l;
1145         j3 = j2 + l;
1146         x0r = a[j] + a[j1];
1147         x0i = a[j + 1] + a[j1 + 1];
1148         x1r = a[j] - a[j1];
1149         x1i = a[j + 1] - a[j1 + 1];
1150         x2r = a[j2] + a[j3];
1151         x2i = a[j2 + 1] + a[j3 + 1];
1152         x3r = a[j2] - a[j3];
1153         x3i = a[j2 + 1] - a[j3 + 1];
1154         a[j] = x0r + x2r;
1155         a[j + 1] = x0i + x2i;
1156         a[j2] = x2i - x0i;
1157         a[j2 + 1] = x0r - x2r;
1158         x0r = x1r - x3i;
1159         x0i = x1i + x3r;
1160         a[j1] = wk1r * (x0r - x0i);
1161         a[j1 + 1] = wk1r * (x0r + x0i);
1162         x0r = x3i + x1r;
1163         x0i = x3r - x1i;
1164         a[j3] = wk1r * (x0i - x0r);
1165         a[j3 + 1] = wk1r * (x0i + x0r);
1166     }
1167     k1 = 0;
1168     m2 = 2 * m;
1169     for (k = m2; k < n; k += m2) {
1170         k1 += 2;
1171         k2 = 2 * k1;
1172         wk2r = w[k1];
1173         wk2i = w[k1 + 1];
1174         wk1r = w[k2];
1175         wk1i = w[k2 + 1];
1176         wk3r = wk1r - 2 * wk2i * wk1i;
1177         wk3i = 2 * wk2i * wk1r - wk1i;
1178         for (j = k; j < l + k; j += 2) {
1179             j1 = j + l;
1180             j2 = j1 + l;
1181             j3 = j2 + l;
1182             x0r = a[j] + a[j1];
1183             x0i = a[j + 1] + a[j1 + 1];
1184             x1r = a[j] - a[j1];
1185             x1i = a[j + 1] - a[j1 + 1];
1186             x2r = a[j2] + a[j3];
1187             x2i = a[j2 + 1] + a[j3 + 1];
1188             x3r = a[j2] - a[j3];
1189             x3i = a[j2 + 1] - a[j3 + 1];
1190             a[j] = x0r + x2r;
1191             a[j + 1] = x0i + x2i;
1192             x0r -= x2r;
1193             x0i -= x2i;
1194             a[j2] = wk2r * x0r - wk2i * x0i;
1195             a[j2 + 1] = wk2r * x0i + wk2i * x0r;
1196             x0r = x1r - x3i;
1197             x0i = x1i + x3r;
1198             a[j1] = wk1r * x0r - wk1i * x0i;
1199             a[j1 + 1] = wk1r * x0i + wk1i * x0r;
1200             x0r = x1r + x3i;
1201             x0i = x1i - x3r;
1202             a[j3] = wk3r * x0r - wk3i * x0i;
1203             a[j3 + 1] = wk3r * x0i + wk3i * x0r;
1204         }
1205         wk1r = w[k2 + 2];
1206         wk1i = w[k2 + 3];
1207         wk3r = wk1r - 2 * wk2r * wk1i;
1208         wk3i = 2 * wk2r * wk1r - wk1i;
1209         for (j = k + m; j < l + (k + m); j += 2) {
1210             j1 = j + l;
1211             j2 = j1 + l;
1212             j3 = j2 + l;
1213             x0r = a[j] + a[j1];
1214             x0i = a[j + 1] + a[j1 + 1];
1215             x1r = a[j] - a[j1];
1216             x1i = a[j + 1] - a[j1 + 1];
1217             x2r = a[j2] + a[j3];
1218             x2i = a[j2 + 1] + a[j3 + 1];
1219             x3r = a[j2] - a[j3];
1220             x3i = a[j2 + 1] - a[j3 + 1];
1221             a[j] = x0r + x2r;
1222             a[j + 1] = x0i + x2i;
1223             x0r -= x2r;
1224             x0i -= x2i;
1225             a[j2] = -wk2i * x0r - wk2r * x0i;
1226             a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
1227             x0r = x1r - x3i;
1228             x0i = x1i + x3r;
1229             a[j1] = wk1r * x0r - wk1i * x0i;
1230             a[j1 + 1] = wk1r * x0i + wk1i * x0r;
1231             x0r = x1r + x3i;
1232             x0i = x1i - x3r;
1233             a[j3] = wk3r * x0r - wk3i * x0i;
1234             a[j3 + 1] = wk3r * x0i + wk3i * x0r;
1235         }
1236     }
1237 }
1238 
1239 
rftfsub(size_t n,float * a,size_t nc,float * c)1240 static void rftfsub(size_t n, float *a, size_t nc, float *c)
1241 {
1242     size_t j, k, kk, ks, m;
1243     float wkr, wki, xr, xi, yr, yi;
1244 
1245     m = n >> 1;
1246     ks = 2 * nc / m;
1247     kk = 0;
1248     for (j = 2; j < m; j += 2) {
1249         k = n - j;
1250         kk += ks;
1251         wkr = 0.5f - c[nc - kk];
1252         wki = c[kk];
1253         xr = a[j] - a[k];
1254         xi = a[j + 1] + a[k + 1];
1255         yr = wkr * xr - wki * xi;
1256         yi = wkr * xi + wki * xr;
1257         a[j] -= yr;
1258         a[j + 1] -= yi;
1259         a[k] += yr;
1260         a[k + 1] -= yi;
1261     }
1262 }
1263 
1264 
rftbsub(size_t n,float * a,size_t nc,float * c)1265 static void rftbsub(size_t n, float *a, size_t nc, float *c)
1266 {
1267     size_t j, k, kk, ks, m;
1268     float wkr, wki, xr, xi, yr, yi;
1269 
1270     a[1] = -a[1];
1271     m = n >> 1;
1272     ks = 2 * nc / m;
1273     kk = 0;
1274     for (j = 2; j < m; j += 2) {
1275         k = n - j;
1276         kk += ks;
1277         wkr = 0.5f - c[nc - kk];
1278         wki = c[kk];
1279         xr = a[j] - a[k];
1280         xi = a[j + 1] + a[k + 1];
1281         yr = wkr * xr + wki * xi;
1282         yi = wkr * xi - wki * xr;
1283         a[j] -= yr;
1284         a[j + 1] = yi - a[j + 1];
1285         a[k] += yr;
1286         a[k + 1] = yi - a[k + 1];
1287     }
1288     a[m + 1] = -a[m + 1];
1289 }
1290 
1291 #if 0  // Not used.
1292 static void dctsub(int n, float *a, int nc, float *c)
1293 {
1294     int j, k, kk, ks, m;
1295     float wkr, wki, xr;
1296 
1297     m = n >> 1;
1298     ks = nc / n;
1299     kk = 0;
1300     for (j = 1; j < m; j++) {
1301         k = n - j;
1302         kk += ks;
1303         wkr = c[kk] - c[nc - kk];
1304         wki = c[kk] + c[nc - kk];
1305         xr = wki * a[j] - wkr * a[k];
1306         a[j] = wkr * a[j] + wki * a[k];
1307         a[k] = xr;
1308     }
1309     a[m] *= c[0];
1310 }
1311 
1312 
1313 static void dstsub(int n, float *a, int nc, float *c)
1314 {
1315     int j, k, kk, ks, m;
1316     float wkr, wki, xr;
1317 
1318     m = n >> 1;
1319     ks = nc / n;
1320     kk = 0;
1321     for (j = 1; j < m; j++) {
1322         k = n - j;
1323         kk += ks;
1324         wkr = c[kk] - c[nc - kk];
1325         wki = c[kk] + c[nc - kk];
1326         xr = wki * a[k] - wkr * a[j];
1327         a[k] = wkr * a[k] + wki * a[j];
1328         a[j] = xr;
1329     }
1330     a[m] *= c[0];
1331 }
1332 #endif  // Not used.
1333