1 ///////////////////////////////////////////////////////////////////////////////
2 // BSD 3-Clause License
3 //
4 // Copyright (c) 2018-2020, The Regents of the University of California
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 // * Redistributions of source code must retain the above copyright notice, this
11 //   list of conditions and the following disclaimer.
12 //
13 // * Redistributions in binary form must reproduce the above copyright notice,
14 //   this list of conditions and the following disclaimer in the documentation
15 //   and/or other materials provided with the distribution.
16 //
17 // * Neither the name of the copyright holder nor the names of its
18 //   contributors may be used to endorse or promote products derived from
19 //   this software without specific prior written permission.
20 //
21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 // ARE
25 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
26 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
28 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 ///////////////////////////////////////////////////////////////////////////////
33 
34 /*
35 Fast Fourier/Cosine/Sine Transform
36     dimension   :one
37     data length :power of 2
38     decimation  :frequency
39     radix       :split-radix
40     data        :inplace
41     table       :use
42 functions
43     cdft: Complex Discrete Fourier Transform
44     rdft: Real Discrete Fourier Transform
45     ddct: Discrete Cosine Transform
46     ddst: Discrete Sine Transform
47     dfct: Cosine Transform of RDFT (Real Symmetric DFT)
48     dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
49 function prototypes
50     void cdft(int, int, float   *, int *, float   *);
51     void rdft(int, int, float   *, int *, float   *);
52     void ddct(int, int, float   *, int *, float   *);
53     void ddst(int, int, float   *, int *, float   *);
54     void dfct(int, float   *, float   *, int *, float   *);
55     void dfst(int, float   *, float   *, int *, float   *);
56 macro definitions
57     USE_CDFT_PTHREADS : default=not defined
58         CDFT_THREADS_BEGIN_N  : must be >= 512, default=8192
59         CDFT_4THREADS_BEGIN_N : must be >= 512, default=65536
60     USE_CDFT_WINTHREADS : default=not defined
61         CDFT_THREADS_BEGIN_N  : must be >= 512, default=32768
62         CDFT_4THREADS_BEGIN_N : must be >= 512, default=524288
63 
64 
65 -------- Complex DFT (Discrete Fourier Transform) --------
66     [definition]
67         <case1>
68             X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
69         <case2>
70             X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
71         (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
72     [usage]
73         <case1>
74             ip[0] = 0; // first time only
75             cdft(2*n, 1, a, ip, w);
76         <case2>
77             ip[0] = 0; // first time only
78             cdft(2*n, -1, a, ip, w);
79     [parameters]
80         2*n            :data length (int)
81                         n >= 1, n = power of 2
82         a[0...2*n-1]   :input/output data (float   *)
83                         input data
84                             a[2*j] = Re(x[j]),
85                             a[2*j+1] = Im(x[j]), 0<=j<n
86                         output data
87                             a[2*k] = Re(X[k]),
88                             a[2*k+1] = Im(X[k]), 0<=k<n
89         ip[0...*]      :work area for bit reversal (int *)
90                         length of ip >= 2+sqrt(n)
91                         strictly,
92                         length of ip >=
93                             2+(1<<(int)(log(n+0.5)/log(2))/2).
94                         ip[0],ip[1] are pointers of the cos/sin table.
95         w[0...n/2-1]   :cos/sin table (float   *)
96                         w[],ip[] are initialized if ip[0] == 0.
97     [remark]
98         Inverse of
99             cdft(2*n, -1, a, ip, w);
100         is
101             cdft(2*n, 1, a, ip, w);
102             for (j = 0; j <= 2 * n - 1; j++) {
103                 a[j] *= 1.0 / n;
104             }
105         .
106 
107 
108 -------- Real DFT / Inverse of Real DFT --------
109     [definition]
110         <case1> RDFT
111             R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
112             I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
113         <case2> IRDFT (excluding scale)
114             a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
115                    sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
116                    sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
117     [usage]
118         <case1>
119             ip[0] = 0; // first time only
120             rdft(n, 1, a, ip, w);
121         <case2>
122             ip[0] = 0; // first time only
123             rdft(n, -1, a, ip, w);
124     [parameters]
125         n              :data length (int)
126                         n >= 2, n = power of 2
127         a[0...n-1]     :input/output data (float   *)
128                         <case1>
129                             output data
130                                 a[2*k] = R[k], 0<=k<n/2
131                                 a[2*k+1] = I[k], 0<k<n/2
132                                 a[1] = R[n/2]
133                         <case2>
134                             input data
135                                 a[2*j] = R[j], 0<=j<n/2
136                                 a[2*j+1] = I[j], 0<j<n/2
137                                 a[1] = R[n/2]
138         ip[0...*]      :work area for bit reversal (int *)
139                         length of ip >= 2+sqrt(n/2)
140                         strictly,
141                         length of ip >=
142                             2+(1<<(int)(log(n/2+0.5)/log(2))/2).
143                         ip[0],ip[1] are pointers of the cos/sin table.
144         w[0...n/2-1]   :cos/sin table (float   *)
145                         w[],ip[] are initialized if ip[0] == 0.
146     [remark]
147         Inverse of
148             rdft(n, 1, a, ip, w);
149         is
150             rdft(n, -1, a, ip, w);
151             for (j = 0; j <= n - 1; j++) {
152                 a[j] *= 2.0 / n;
153             }
154         .
155 
156 
157 -------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
158     [definition]
159         <case1> IDCT (excluding scale)
160             C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
161         <case2> DCT
162             C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
163     [usage]
164         <case1>
165             ip[0] = 0; // first time only
166             ddct(n, 1, a, ip, w);
167         <case2>
168             ip[0] = 0; // first time only
169             ddct(n, -1, a, ip, w);
170     [parameters]
171         n              :data length (int)
172                         n >= 2, n = power of 2
173         a[0...n-1]     :input/output data (float   *)
174                         output data
175                             a[k] = C[k], 0<=k<n
176         ip[0...*]      :work area for bit reversal (int *)
177                         length of ip >= 2+sqrt(n/2)
178                         strictly,
179                         length of ip >=
180                             2+(1<<(int)(log(n/2+0.5)/log(2))/2).
181                         ip[0],ip[1] are pointers of the cos/sin table.
182         w[0...n*5/4-1] :cos/sin table (float   *)
183                         w[],ip[] are initialized if ip[0] == 0.
184     [remark]
185         Inverse of
186             ddct(n, -1, a, ip, w);
187         is
188             a[0] *= 0.5;
189             ddct(n, 1, a, ip, w);
190             for (j = 0; j <= n - 1; j++) {
191                 a[j] *= 2.0 / n;
192             }
193         .
194 
195 
196 -------- DST (Discrete Sine Transform) / Inverse of DST --------
197     [definition]
198         <case1> IDST (excluding scale)
199             S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
200         <case2> DST
201             S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
202     [usage]
203         <case1>
204             ip[0] = 0; // first time only
205             ddst(n, 1, a, ip, w);
206         <case2>
207             ip[0] = 0; // first time only
208             ddst(n, -1, a, ip, w);
209     [parameters]
210         n              :data length (int)
211                         n >= 2, n = power of 2
212         a[0...n-1]     :input/output data (float   *)
213                         <case1>
214                             input data
215                                 a[j] = A[j], 0<j<n
216                                 a[0] = A[n]
217                             output data
218                                 a[k] = S[k], 0<=k<n
219                         <case2>
220                             output data
221                                 a[k] = S[k], 0<k<n
222                                 a[0] = S[n]
223         ip[0...*]      :work area for bit reversal (int *)
224                         length of ip >= 2+sqrt(n/2)
225                         strictly,
226                         length of ip >=
227                             2+(1<<(int)(log(n/2+0.5)/log(2))/2).
228                         ip[0],ip[1] are pointers of the cos/sin table.
229         w[0...n*5/4-1] :cos/sin table (float   *)
230                         w[],ip[] are initialized if ip[0] == 0.
231     [remark]
232         Inverse of
233             ddst(n, -1, a, ip, w);
234         is
235             a[0] *= 0.5;
236             ddst(n, 1, a, ip, w);
237             for (j = 0; j <= n - 1; j++) {
238                 a[j] *= 2.0 / n;
239             }
240         .
241 
242 
243 -------- Cosine Transform of RDFT (Real Symmetric DFT) --------
244     [definition]
245         C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
246     [usage]
247         ip[0] = 0; // first time only
248         dfct(n, a, t, ip, w);
249     [parameters]
250         n              :data length - 1 (int)
251                         n >= 2, n = power of 2
252         a[0...n]       :input/output data (float   *)
253                         output data
254                             a[k] = C[k], 0<=k<=n
255         t[0...n/2]     :work area (float   *)
256         ip[0...*]      :work area for bit reversal (int *)
257                         length of ip >= 2+sqrt(n/4)
258                         strictly,
259                         length of ip >=
260                             2+(1<<(int)(log(n/4+0.5)/log(2))/2).
261                         ip[0],ip[1] are pointers of the cos/sin table.
262         w[0...n*5/8-1] :cos/sin table (float   *)
263                         w[],ip[] are initialized if ip[0] == 0.
264     [remark]
265         Inverse of
266             a[0] *= 0.5;
267             a[n] *= 0.5;
268             dfct(n, a, t, ip, w);
269         is
270             a[0] *= 0.5;
271             a[n] *= 0.5;
272             dfct(n, a, t, ip, w);
273             for (j = 0; j <= n; j++) {
274                 a[j] *= 2.0 / n;
275             }
276         .
277 
278 
279 -------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
280     [definition]
281         S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
282     [usage]
283         ip[0] = 0; // first time only
284         dfst(n, a, t, ip, w);
285     [parameters]
286         n              :data length + 1 (int)
287                         n >= 2, n = power of 2
288         a[0...n-1]     :input/output data (float   *)
289                         output data
290                             a[k] = S[k], 0<k<n
291                         (a[0] is used for work area)
292         t[0...n/2-1]   :work area (float   *)
293         ip[0...*]      :work area for bit reversal (int *)
294                         length of ip >= 2+sqrt(n/4)
295                         strictly,
296                         length of ip >=
297                             2+(1<<(int)(log(n/4+0.5)/log(2))/2).
298                         ip[0],ip[1] are pointers of the cos/sin table.
299         w[0...n*5/8-1] :cos/sin table (float   *)
300                         w[],ip[] are initialized if ip[0] == 0.
301     [remark]
302         Inverse of
303             dfst(n, a, t, ip, w);
304         is
305             dfst(n, a, t, ip, w);
306             for (j = 1; j <= n - 1; j++) {
307                 a[j] *= 2.0 / n;
308             }
309         .
310 
311 
312 Appendix :
313     The cos/sin table is recalculated when the larger table required.
314     w[] and ip[] are compatible with all routines.
315 */
316 
317 #include <cmath>
318 #include <iostream>
319 
320 namespace gpl {
321 
322 
cdft(int n,int isgn,float * a,int * ip,float * w)323 void cdft(int n, int isgn, float *a, int *ip, float *w) {
324   void makewt(int nw, int *ip, float *w);
325   void cftfsub(int n, float *a, int *ip, int nw, float *w);
326   void cftbsub(int n, float *a, int *ip, int nw, float *w);
327   int nw;
328 
329   nw = ip[0];
330   if(n > (nw << 2)) {
331     nw = n >> 2;
332     makewt(nw, ip, w);
333   }
334   if(isgn >= 0) {
335     cftfsub(n, a, ip, nw, w);
336   }
337   else {
338     cftbsub(n, a, ip, nw, w);
339   }
340 }
341 
rdft(int n,int isgn,float * a,int * ip,float * w)342 void rdft(int n, int isgn, float *a, int *ip, float *w) {
343   void makewt(int nw, int *ip, float *w);
344   void makect(int nc, int *ip, float *c);
345   void cftfsub(int n, float *a, int *ip, int nw, float *w);
346   void cftbsub(int n, float *a, int *ip, int nw, float *w);
347   void rftfsub(int n, float *a, int nc, float *c);
348   void rftbsub(int n, float *a, int nc, float *c);
349   int nw, nc;
350   float xi;
351 
352   nw = ip[0];
353   if(n > (nw << 2)) {
354     nw = n >> 2;
355     makewt(nw, ip, w);
356   }
357   nc = ip[1];
358   if(n > (nc << 2)) {
359     nc = n >> 2;
360     makect(nc, ip, w + nw);
361   }
362   if(isgn >= 0) {
363     if(n > 4) {
364       cftfsub(n, a, ip, nw, w);
365       rftfsub(n, a, nc, w + nw);
366     }
367     else if(n == 4) {
368       cftfsub(n, a, ip, nw, w);
369     }
370     xi = a[0] - a[1];
371     a[0] += a[1];
372     a[1] = xi;
373   }
374   else {
375     a[1] = 0.5 * (a[0] - a[1]);
376     a[0] -= a[1];
377     if(n > 4) {
378       rftbsub(n, a, nc, w + nw);
379       cftbsub(n, a, ip, nw, w);
380     }
381     else if(n == 4) {
382       cftbsub(n, a, ip, nw, w);
383     }
384   }
385 }
386 
ddct(int n,int isgn,float * a,int * ip,float * w)387 void ddct(int n, int isgn, float *a, int *ip, float *w) {
388   void makewt(int nw, int *ip, float *w);
389   void makect(int nc, int *ip, float *c);
390   void cftfsub(int n, float *a, int *ip, int nw, float *w);
391   void cftbsub(int n, float *a, int *ip, int nw, float *w);
392   void rftfsub(int n, float *a, int nc, float *c);
393   void rftbsub(int n, float *a, int nc, float *c);
394   void dctsub(int n, float *a, int nc, float *c);
395   int j, nw, nc;
396   float xr;
397 
398   nw = ip[0];
399   if(n > (nw << 2)) {
400     nw = n >> 2;
401     makewt(nw, ip, w);
402   }
403   nc = ip[1];
404   if(n > nc) {
405     nc = n;
406     makect(nc, ip, w + nw);
407   }
408   if(isgn < 0) {
409     xr = a[n - 1];
410 
411     for(j = n - 2; j >= 2; j -= 2) {
412       a[j + 1] = a[j] - a[j - 1];
413       a[j] += a[j - 1];
414     }
415     a[1] = a[0] - xr;
416     a[0] += xr;
417     if(n > 4) {
418       rftbsub(n, a, nc, w + nw);
419       cftbsub(n, a, ip, nw, w);
420     }
421     else if(n == 4) {
422       cftbsub(n, a, ip, nw, w);
423     }
424   }
425   dctsub(n, a, nc, w + nw);
426   if(isgn >= 0) {
427     if(n > 4) {
428       cftfsub(n, a, ip, nw, w);
429       rftfsub(n, a, nc, w + nw);
430     }
431     else if(n == 4) {
432       cftfsub(n, a, ip, nw, w);
433     }
434     xr = a[0] - a[1];
435     a[0] += a[1];
436     for(j = 2; j < n; j += 2) {
437       a[j - 1] = a[j] - a[j + 1];
438       a[j] += a[j + 1];
439     }
440     a[n - 1] = xr;
441   }
442 }
443 
ddst(int n,int isgn,float * a,int * ip,float * w)444 void ddst(int n, int isgn, float *a, int *ip, float *w) {
445   void makewt(int nw, int *ip, float *w);
446   void makect(int nc, int *ip, float *c);
447   void cftfsub(int n, float *a, int *ip, int nw, float *w);
448   void cftbsub(int n, float *a, int *ip, int nw, float *w);
449   void rftfsub(int n, float *a, int nc, float *c);
450   void rftbsub(int n, float *a, int nc, float *c);
451   void dstsub(int n, float *a, int nc, float *c);
452   int j, nw, nc;
453   float xr;
454 
455   nw = ip[0];
456   if(n > (nw << 2)) {
457     nw = n >> 2;
458     makewt(nw, ip, w);
459   }
460   nc = ip[1];
461   if(n > nc) {
462     nc = n;
463     makect(nc, ip, w + nw);
464   }
465   if(isgn < 0) {
466     xr = a[n - 1];
467     for(j = n - 2; j >= 2; j -= 2) {
468       a[j + 1] = -a[j] - a[j - 1];
469       a[j] -= a[j - 1];
470     }
471     a[1] = a[0] + xr;
472     a[0] -= xr;
473     if(n > 4) {
474       rftbsub(n, a, nc, w + nw);
475       cftbsub(n, a, ip, nw, w);
476     }
477     else if(n == 4) {
478       cftbsub(n, a, ip, nw, w);
479     }
480   }
481   dstsub(n, a, nc, w + nw);
482   if(isgn >= 0) {
483     if(n > 4) {
484       cftfsub(n, a, ip, nw, w);
485       rftfsub(n, a, nc, w + nw);
486     }
487     else if(n == 4) {
488       cftfsub(n, a, ip, nw, w);
489     }
490     xr = a[0] - a[1];
491     a[0] += a[1];
492     for(j = 2; j < n; j += 2) {
493       a[j - 1] = -a[j] - a[j + 1];
494       a[j] -= a[j + 1];
495     }
496     a[n - 1] = -xr;
497   }
498 }
499 
dfct(int n,float * a,float * t,int * ip,float * w)500 void dfct(int n, float *a, float *t, int *ip, float *w) {
501   void makewt(int nw, int *ip, float *w);
502   void makect(int nc, int *ip, float *c);
503   void cftfsub(int n, float *a, int *ip, int nw, float *w);
504   void rftfsub(int n, float *a, int nc, float *c);
505   void dctsub(int n, float *a, int nc, float *c);
506   int j, k, l, m, mh, nw, nc;
507   float xr, xi, yr, yi;
508 
509   nw = ip[0];
510   if(n > (nw << 3)) {
511     nw = n >> 3;
512     makewt(nw, ip, w);
513   }
514   nc = ip[1];
515   if(n > (nc << 1)) {
516     nc = n >> 1;
517     makect(nc, ip, w + nw);
518   }
519   m = n >> 1;
520   yi = a[m];
521   xi = a[0] + a[n];
522   a[0] -= a[n];
523   t[0] = xi - yi;
524   t[m] = xi + yi;
525   if(n > 2) {
526     mh = m >> 1;
527     for(j = 1; j < mh; j++) {
528       k = m - j;
529       xr = a[j] - a[n - j];
530       xi = a[j] + a[n - j];
531       yr = a[k] - a[n - k];
532       yi = a[k] + a[n - k];
533       a[j] = xr;
534       a[k] = yr;
535       t[j] = xi - yi;
536       t[k] = xi + yi;
537     }
538     t[mh] = a[mh] + a[n - mh];
539     a[mh] -= a[n - mh];
540     dctsub(m, a, nc, w + nw);
541     if(m > 4) {
542       cftfsub(m, a, ip, nw, w);
543       rftfsub(m, a, nc, w + nw);
544     }
545     else if(m == 4) {
546       cftfsub(m, a, ip, nw, w);
547     }
548     a[n - 1] = a[0] - a[1];
549     a[1] = a[0] + a[1];
550     for(j = m - 2; j >= 2; j -= 2) {
551       a[2 * j + 1] = a[j] + a[j + 1];
552       a[2 * j - 1] = a[j] - a[j + 1];
553     }
554     l = 2;
555     m = mh;
556     while(m >= 2) {
557       dctsub(m, t, nc, w + nw);
558       if(m > 4) {
559         cftfsub(m, t, ip, nw, w);
560         rftfsub(m, t, nc, w + nw);
561       }
562       else if(m == 4) {
563         cftfsub(m, t, ip, nw, w);
564       }
565       a[n - l] = t[0] - t[1];
566       a[l] = t[0] + t[1];
567       k = 0;
568       for(j = 2; j < m; j += 2) {
569         k += l << 2;
570         a[k - l] = t[j] - t[j + 1];
571         a[k + l] = t[j] + t[j + 1];
572       }
573       l <<= 1;
574       mh = m >> 1;
575       for(j = 0; j < mh; j++) {
576         k = m - j;
577         t[j] = t[m + k] - t[m + j];
578         t[k] = t[m + k] + t[m + j];
579       }
580       t[mh] = t[m + mh];
581       m = mh;
582     }
583     a[l] = t[0];
584     a[n] = t[2] - t[1];
585     a[0] = t[2] + t[1];
586   }
587   else {
588     a[1] = a[0];
589     a[2] = t[0];
590     a[0] = t[1];
591   }
592 }
593 
dfst(int n,float * a,float * t,int * ip,float * w)594 void dfst(int n, float *a, float *t, int *ip, float *w) {
595   void makewt(int nw, int *ip, float *w);
596   void makect(int nc, int *ip, float *c);
597   void cftfsub(int n, float *a, int *ip, int nw, float *w);
598   void rftfsub(int n, float *a, int nc, float *c);
599   void dstsub(int n, float *a, int nc, float *c);
600   int j, k, l, m, mh, nw, nc;
601   float xr, xi, yr, yi;
602 
603   nw = ip[0];
604   if(n > (nw << 3)) {
605     nw = n >> 3;
606     makewt(nw, ip, w);
607   }
608   nc = ip[1];
609   if(n > (nc << 1)) {
610     nc = n >> 1;
611     makect(nc, ip, w + nw);
612   }
613   if(n > 2) {
614     m = n >> 1;
615     mh = m >> 1;
616     for(j = 1; j < mh; j++) {
617       k = m - j;
618       xr = a[j] + a[n - j];
619       xi = a[j] - a[n - j];
620       yr = a[k] + a[n - k];
621       yi = a[k] - a[n - k];
622       a[j] = xr;
623       a[k] = yr;
624       t[j] = xi + yi;
625       t[k] = xi - yi;
626     }
627     t[0] = a[mh] - a[n - mh];
628     a[mh] += a[n - mh];
629     a[0] = a[m];
630     dstsub(m, a, nc, w + nw);
631     if(m > 4) {
632       cftfsub(m, a, ip, nw, w);
633       rftfsub(m, a, nc, w + nw);
634     }
635     else if(m == 4) {
636       cftfsub(m, a, ip, nw, w);
637     }
638     a[n - 1] = a[1] - a[0];
639     a[1] = a[0] + a[1];
640     for(j = m - 2; j >= 2; j -= 2) {
641       a[2 * j + 1] = a[j] - a[j + 1];
642       a[2 * j - 1] = -a[j] - a[j + 1];
643     }
644     l = 2;
645     m = mh;
646     while(m >= 2) {
647       dstsub(m, t, nc, w + nw);
648       if(m > 4) {
649         cftfsub(m, t, ip, nw, w);
650         rftfsub(m, t, nc, w + nw);
651       }
652       else if(m == 4) {
653         cftfsub(m, t, ip, nw, w);
654       }
655       a[n - l] = t[1] - t[0];
656       a[l] = t[0] + t[1];
657       k = 0;
658       for(j = 2; j < m; j += 2) {
659         k += l << 2;
660         a[k - l] = -t[j] - t[j + 1];
661         a[k + l] = t[j] - t[j + 1];
662       }
663       l <<= 1;
664       mh = m >> 1;
665       for(j = 1; j < mh; j++) {
666         k = m - j;
667         t[j] = t[m + k] + t[m + j];
668         t[k] = t[m + k] - t[m + j];
669       }
670       t[0] = t[m + mh];
671       m = mh;
672     }
673     a[l] = t[0];
674   }
675   a[0] = 0;
676 }
677 
678 /* -------- initializing routines -------- */
679 
680 
makewt(int nw,int * ip,float * w)681 void makewt(int nw, int *ip, float *w) {
682   void makeipt(int nw, int *ip);
683   int j, nwh, nw0, nw1;
684   float delta, wn4r, wk1r, wk1i, wk3r, wk3i;
685 
686   ip[0] = nw;
687   ip[1] = 1;
688   if(nw > 2) {
689     nwh = nw >> 1;
690     delta = atan(1.0) / nwh;
691     wn4r = cos(delta * nwh);
692     w[0] = 1;
693     w[1] = wn4r;
694     if(nwh == 4) {
695       w[2] = cos(delta * 2);
696       w[3] = sin(delta * 2);
697     }
698     else if(nwh > 4) {
699       makeipt(nw, ip);
700       w[2] = 0.5 / cos(delta * 2);
701       w[3] = 0.5 / cos(delta * 6);
702       for(j = 4; j < nwh; j += 4) {
703         w[j] = cos(delta * j);
704         w[j + 1] = sin(delta * j);
705         w[j + 2] = cos(3 * delta * j);
706         w[j + 3] = -sin(3 * delta * j);
707       }
708     }
709     nw0 = 0;
710     while(nwh > 2) {
711       nw1 = nw0 + nwh;
712       nwh >>= 1;
713       w[nw1] = 1;
714       w[nw1 + 1] = wn4r;
715       if(nwh == 4) {
716         wk1r = w[nw0 + 4];
717         wk1i = w[nw0 + 5];
718         w[nw1 + 2] = wk1r;
719         w[nw1 + 3] = wk1i;
720       }
721       else if(nwh > 4) {
722         wk1r = w[nw0 + 4];
723         wk3r = w[nw0 + 6];
724         w[nw1 + 2] = 0.5 / wk1r;
725         w[nw1 + 3] = 0.5 / wk3r;
726         for(j = 4; j < nwh; j += 4) {
727           wk1r = w[nw0 + 2 * j];
728           wk1i = w[nw0 + 2 * j + 1];
729           wk3r = w[nw0 + 2 * j + 2];
730           wk3i = w[nw0 + 2 * j + 3];
731           w[nw1 + j] = wk1r;
732           w[nw1 + j + 1] = wk1i;
733           w[nw1 + j + 2] = wk3r;
734           w[nw1 + j + 3] = wk3i;
735         }
736       }
737       nw0 = nw1;
738     }
739   }
740 }
741 
makeipt(int nw,int * ip)742 void makeipt(int nw, int *ip) {
743   int j, l, m, m2, p, q;
744 
745   ip[2] = 0;
746   ip[3] = 16;
747   m = 2;
748   for(l = nw; l > 32; l >>= 2) {
749     m2 = m << 1;
750     q = m2 << 3;
751     for(j = m; j < m2; j++) {
752       p = ip[j] << 2;
753       ip[m + j] = p;
754       ip[m2 + j] = p + q;
755     }
756     m = m2;
757   }
758 }
759 
makect(int nc,int * ip,float * c)760 void makect(int nc, int *ip, float *c) {
761   int j, nch;
762   float delta;
763 
764   ip[1] = nc;
765   if(nc > 1) {
766     nch = nc >> 1;
767     delta = atan(1.0) / nch;
768     c[0] = cos(delta * nch);
769     c[nch] = 0.5 * c[0];
770     for(j = 1; j < nch; j++) {
771       c[j] = 0.5 * cos(delta * j);
772       c[nc - j] = 0.5 * sin(delta * j);
773     }
774   }
775 }
776 
777 /* -------- child routines -------- */
778 
779 #ifdef USE_CDFT_PTHREADS
780 #define USE_CDFT_THREADS
781 #ifndef CDFT_THREADS_BEGIN_N
782 #define CDFT_THREADS_BEGIN_N 8192
783 #endif
784 #ifndef CDFT_4THREADS_BEGIN_N
785 #define CDFT_4THREADS_BEGIN_N 65536
786 #endif
787 #include <pthread.h>
788 #include <stdio.h>
789 #include <stdlib.h>
790 #define cdft_thread_t pthread_t
791 #define cdft_thread_create(thp, func, argp)                  \
792   {                                                          \
793     if(pthread_create(thp, NULL, func, (void *)argp) != 0) { \
794       fprintf(stderr, "cdft thread error\n");                \
795       exit(1);                                               \
796     }                                                        \
797   }
798 #define cdft_thread_wait(th)                  \
799   {                                           \
800     if(pthread_join(th, NULL) != 0) {         \
801       fprintf(stderr, "cdft thread error\n"); \
802       exit(1);                                \
803     }                                         \
804   }
805 #endif /* USE_CDFT_PTHREADS */
806 
807 #ifdef USE_CDFT_WINTHREADS
808 #define USE_CDFT_THREADS
809 #ifndef CDFT_THREADS_BEGIN_N
810 #define CDFT_THREADS_BEGIN_N 32768
811 #endif
812 #ifndef CDFT_4THREADS_BEGIN_N
813 #define CDFT_4THREADS_BEGIN_N 524288
814 #endif
815 #include <windows.h>
816 #include <stdio.h>
817 #include <stdlib.h>
818 #define cdft_thread_t HANDLE
819 #define cdft_thread_create(thp, func, argp)                                    \
820   {                                                                            \
821     DWORD thid;                                                                \
822     *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)func, (LPVOID)argp, \
823                           0, &thid);                                           \
824     if(*(thp) == 0) {                                                          \
825       fprintf(stderr, "cdft thread error\n");                                  \
826       exit(1);                                                                 \
827     }                                                                          \
828   }
829 #define cdft_thread_wait(th)           \
830   {                                    \
831     WaitForSingleObject(th, INFINITE); \
832     CloseHandle(th);                   \
833   }
834 #endif /* USE_CDFT_WINTHREADS */
835 
cftfsub(int n,float * a,int * ip,int nw,float * w)836 void cftfsub(int n, float *a, int *ip, int nw, float *w) {
837   void bitrv2(int n, int *ip, float *a);
838   void bitrv216(float * a);
839   void bitrv208(float * a);
840   void cftf1st(int n, float *a, float *w);
841   void cftrec4(int n, float *a, int nw, float *w);
842   void cftleaf(int n, int isplt, float *a, int nw, float *w);
843   void cftfx41(int n, float *a, int nw, float *w);
844   void cftf161(float * a, float * w);
845   void cftf081(float * a, float * w);
846   void cftf040(float * a);
847   void cftx020(float * a);
848 #ifdef USE_CDFT_THREADS
849   void cftrec4_th(int n, float *a, int nw, float *w);
850 #endif /* USE_CDFT_THREADS */
851 
852   if(n > 8) {
853     if(n > 32) {
854       cftf1st(n, a, &w[nw - (n >> 2)]);
855 #ifdef USE_CDFT_THREADS
856       if(n > CDFT_THREADS_BEGIN_N) {
857         cftrec4_th(n, a, nw, w);
858       }
859       else
860 #endif /* USE_CDFT_THREADS */
861           if(n > 512) {
862         cftrec4(n, a, nw, w);
863       }
864       else if(n > 128) {
865         cftleaf(n, 1, a, nw, w);
866       }
867       else {
868         cftfx41(n, a, nw, w);
869       }
870       bitrv2(n, ip, a);
871     }
872     else if(n == 32) {
873       cftf161(a, &w[nw - 8]);
874       bitrv216(a);
875     }
876     else {
877       cftf081(a, w);
878       bitrv208(a);
879     }
880   }
881   else if(n == 8) {
882     cftf040(a);
883   }
884   else if(n == 4) {
885     cftx020(a);
886   }
887 }
888 
cftbsub(int n,float * a,int * ip,int nw,float * w)889 void cftbsub(int n, float *a, int *ip, int nw, float *w) {
890   void bitrv2conj(int n, int *ip, float *a);
891   void bitrv216neg(float * a);
892   void bitrv208neg(float * a);
893   void cftb1st(int n, float *a, float *w);
894   void cftrec4(int n, float *a, int nw, float *w);
895   void cftleaf(int n, int isplt, float *a, int nw, float *w);
896   void cftfx41(int n, float *a, int nw, float *w);
897   void cftf161(float * a, float * w);
898   void cftf081(float * a, float * w);
899   void cftb040(float * a);
900   void cftx020(float * a);
901 #ifdef USE_CDFT_THREADS
902   void cftrec4_th(int n, float *a, int nw, float *w);
903 #endif /* USE_CDFT_THREADS */
904 
905   if(n > 8) {
906     if(n > 32) {
907       cftb1st(n, a, &w[nw - (n >> 2)]);
908 #ifdef USE_CDFT_THREADS
909       if(n > CDFT_THREADS_BEGIN_N) {
910         cftrec4_th(n, a, nw, w);
911       }
912       else
913 #endif /* USE_CDFT_THREADS */
914           if(n > 512) {
915         cftrec4(n, a, nw, w);
916       }
917       else if(n > 128) {
918         cftleaf(n, 1, a, nw, w);
919       }
920       else {
921         cftfx41(n, a, nw, w);
922       }
923       bitrv2conj(n, ip, a);
924     }
925     else if(n == 32) {
926       cftf161(a, &w[nw - 8]);
927       bitrv216neg(a);
928     }
929     else {
930       cftf081(a, w);
931       bitrv208neg(a);
932     }
933   }
934   else if(n == 8) {
935     cftb040(a);
936   }
937   else if(n == 4) {
938     cftx020(a);
939   }
940 }
941 
bitrv2(int n,int * ip,float * a)942 void bitrv2(int n, int *ip, float *a) {
943   int j, j1, k, k1, l, m, nh, nm;
944   float xr, xi, yr, yi;
945 
946   m = 1;
947   for(l = n >> 2; l > 8; l >>= 2) {
948     m <<= 1;
949   }
950   nh = n >> 1;
951   nm = 4 * m;
952   if(l == 8) {
953     for(k = 0; k < m; k++) {
954       for(j = 0; j < k; j++) {
955         j1 = 4 * j + 2 * ip[m + k];
956         k1 = 4 * k + 2 * ip[m + j];
957         xr = a[j1];
958         xi = a[j1 + 1];
959         yr = a[k1];
960         yi = a[k1 + 1];
961         a[j1] = yr;
962         a[j1 + 1] = yi;
963         a[k1] = xr;
964         a[k1 + 1] = xi;
965         j1 += nm;
966         k1 += 2 * nm;
967         xr = a[j1];
968         xi = a[j1 + 1];
969         yr = a[k1];
970         yi = a[k1 + 1];
971         a[j1] = yr;
972         a[j1 + 1] = yi;
973         a[k1] = xr;
974         a[k1 + 1] = xi;
975         j1 += nm;
976         k1 -= nm;
977         xr = a[j1];
978         xi = a[j1 + 1];
979         yr = a[k1];
980         yi = a[k1 + 1];
981         a[j1] = yr;
982         a[j1 + 1] = yi;
983         a[k1] = xr;
984         a[k1 + 1] = xi;
985         j1 += nm;
986         k1 += 2 * nm;
987         xr = a[j1];
988         xi = a[j1 + 1];
989         yr = a[k1];
990         yi = a[k1 + 1];
991         a[j1] = yr;
992         a[j1 + 1] = yi;
993         a[k1] = xr;
994         a[k1 + 1] = xi;
995         j1 += nh;
996         k1 += 2;
997         xr = a[j1];
998         xi = a[j1 + 1];
999         yr = a[k1];
1000         yi = a[k1 + 1];
1001         a[j1] = yr;
1002         a[j1 + 1] = yi;
1003         a[k1] = xr;
1004         a[k1 + 1] = xi;
1005         j1 -= nm;
1006         k1 -= 2 * nm;
1007         xr = a[j1];
1008         xi = a[j1 + 1];
1009         yr = a[k1];
1010         yi = a[k1 + 1];
1011         a[j1] = yr;
1012         a[j1 + 1] = yi;
1013         a[k1] = xr;
1014         a[k1 + 1] = xi;
1015         j1 -= nm;
1016         k1 += nm;
1017         xr = a[j1];
1018         xi = a[j1 + 1];
1019         yr = a[k1];
1020         yi = a[k1 + 1];
1021         a[j1] = yr;
1022         a[j1 + 1] = yi;
1023         a[k1] = xr;
1024         a[k1 + 1] = xi;
1025         j1 -= nm;
1026         k1 -= 2 * nm;
1027         xr = a[j1];
1028         xi = a[j1 + 1];
1029         yr = a[k1];
1030         yi = a[k1 + 1];
1031         a[j1] = yr;
1032         a[j1 + 1] = yi;
1033         a[k1] = xr;
1034         a[k1 + 1] = xi;
1035         j1 += 2;
1036         k1 += nh;
1037         xr = a[j1];
1038         xi = a[j1 + 1];
1039         yr = a[k1];
1040         yi = a[k1 + 1];
1041         a[j1] = yr;
1042         a[j1 + 1] = yi;
1043         a[k1] = xr;
1044         a[k1 + 1] = xi;
1045         j1 += nm;
1046         k1 += 2 * nm;
1047         xr = a[j1];
1048         xi = a[j1 + 1];
1049         yr = a[k1];
1050         yi = a[k1 + 1];
1051         a[j1] = yr;
1052         a[j1 + 1] = yi;
1053         a[k1] = xr;
1054         a[k1 + 1] = xi;
1055         j1 += nm;
1056         k1 -= nm;
1057         xr = a[j1];
1058         xi = a[j1 + 1];
1059         yr = a[k1];
1060         yi = a[k1 + 1];
1061         a[j1] = yr;
1062         a[j1 + 1] = yi;
1063         a[k1] = xr;
1064         a[k1 + 1] = xi;
1065         j1 += nm;
1066         k1 += 2 * nm;
1067         xr = a[j1];
1068         xi = a[j1 + 1];
1069         yr = a[k1];
1070         yi = a[k1 + 1];
1071         a[j1] = yr;
1072         a[j1 + 1] = yi;
1073         a[k1] = xr;
1074         a[k1 + 1] = xi;
1075         j1 -= nh;
1076         k1 -= 2;
1077         xr = a[j1];
1078         xi = a[j1 + 1];
1079         yr = a[k1];
1080         yi = a[k1 + 1];
1081         a[j1] = yr;
1082         a[j1 + 1] = yi;
1083         a[k1] = xr;
1084         a[k1 + 1] = xi;
1085         j1 -= nm;
1086         k1 -= 2 * nm;
1087         xr = a[j1];
1088         xi = a[j1 + 1];
1089         yr = a[k1];
1090         yi = a[k1 + 1];
1091         a[j1] = yr;
1092         a[j1 + 1] = yi;
1093         a[k1] = xr;
1094         a[k1 + 1] = xi;
1095         j1 -= nm;
1096         k1 += nm;
1097         xr = a[j1];
1098         xi = a[j1 + 1];
1099         yr = a[k1];
1100         yi = a[k1 + 1];
1101         a[j1] = yr;
1102         a[j1 + 1] = yi;
1103         a[k1] = xr;
1104         a[k1 + 1] = xi;
1105         j1 -= nm;
1106         k1 -= 2 * nm;
1107         xr = a[j1];
1108         xi = a[j1 + 1];
1109         yr = a[k1];
1110         yi = a[k1 + 1];
1111         a[j1] = yr;
1112         a[j1 + 1] = yi;
1113         a[k1] = xr;
1114         a[k1 + 1] = xi;
1115       }
1116       k1 = 4 * k + 2 * ip[m + k];
1117       j1 = k1 + 2;
1118       k1 += nh;
1119       xr = a[j1];
1120       xi = a[j1 + 1];
1121       yr = a[k1];
1122       yi = a[k1 + 1];
1123       a[j1] = yr;
1124       a[j1 + 1] = yi;
1125       a[k1] = xr;
1126       a[k1 + 1] = xi;
1127       j1 += nm;
1128       k1 += 2 * nm;
1129       xr = a[j1];
1130       xi = a[j1 + 1];
1131       yr = a[k1];
1132       yi = a[k1 + 1];
1133       a[j1] = yr;
1134       a[j1 + 1] = yi;
1135       a[k1] = xr;
1136       a[k1 + 1] = xi;
1137       j1 += nm;
1138       k1 -= nm;
1139       xr = a[j1];
1140       xi = a[j1 + 1];
1141       yr = a[k1];
1142       yi = a[k1 + 1];
1143       a[j1] = yr;
1144       a[j1 + 1] = yi;
1145       a[k1] = xr;
1146       a[k1 + 1] = xi;
1147       j1 -= 2;
1148       k1 -= nh;
1149       xr = a[j1];
1150       xi = a[j1 + 1];
1151       yr = a[k1];
1152       yi = a[k1 + 1];
1153       a[j1] = yr;
1154       a[j1 + 1] = yi;
1155       a[k1] = xr;
1156       a[k1 + 1] = xi;
1157       j1 += nh + 2;
1158       k1 += nh + 2;
1159       xr = a[j1];
1160       xi = a[j1 + 1];
1161       yr = a[k1];
1162       yi = a[k1 + 1];
1163       a[j1] = yr;
1164       a[j1 + 1] = yi;
1165       a[k1] = xr;
1166       a[k1 + 1] = xi;
1167       j1 -= nh - nm;
1168       k1 += 2 * nm - 2;
1169       xr = a[j1];
1170       xi = a[j1 + 1];
1171       yr = a[k1];
1172       yi = a[k1 + 1];
1173       a[j1] = yr;
1174       a[j1 + 1] = yi;
1175       a[k1] = xr;
1176       a[k1 + 1] = xi;
1177     }
1178   }
1179   else {
1180     for(k = 0; k < m; k++) {
1181       for(j = 0; j < k; j++) {
1182         j1 = 4 * j + ip[m + k];
1183         k1 = 4 * k + ip[m + j];
1184         xr = a[j1];
1185         xi = a[j1 + 1];
1186         yr = a[k1];
1187         yi = a[k1 + 1];
1188         a[j1] = yr;
1189         a[j1 + 1] = yi;
1190         a[k1] = xr;
1191         a[k1 + 1] = xi;
1192         j1 += nm;
1193         k1 += nm;
1194         xr = a[j1];
1195         xi = a[j1 + 1];
1196         yr = a[k1];
1197         yi = a[k1 + 1];
1198         a[j1] = yr;
1199         a[j1 + 1] = yi;
1200         a[k1] = xr;
1201         a[k1 + 1] = xi;
1202         j1 += nh;
1203         k1 += 2;
1204         xr = a[j1];
1205         xi = a[j1 + 1];
1206         yr = a[k1];
1207         yi = a[k1 + 1];
1208         a[j1] = yr;
1209         a[j1 + 1] = yi;
1210         a[k1] = xr;
1211         a[k1 + 1] = xi;
1212         j1 -= nm;
1213         k1 -= nm;
1214         xr = a[j1];
1215         xi = a[j1 + 1];
1216         yr = a[k1];
1217         yi = a[k1 + 1];
1218         a[j1] = yr;
1219         a[j1 + 1] = yi;
1220         a[k1] = xr;
1221         a[k1 + 1] = xi;
1222         j1 += 2;
1223         k1 += nh;
1224         xr = a[j1];
1225         xi = a[j1 + 1];
1226         yr = a[k1];
1227         yi = a[k1 + 1];
1228         a[j1] = yr;
1229         a[j1 + 1] = yi;
1230         a[k1] = xr;
1231         a[k1 + 1] = xi;
1232         j1 += nm;
1233         k1 += nm;
1234         xr = a[j1];
1235         xi = a[j1 + 1];
1236         yr = a[k1];
1237         yi = a[k1 + 1];
1238         a[j1] = yr;
1239         a[j1 + 1] = yi;
1240         a[k1] = xr;
1241         a[k1 + 1] = xi;
1242         j1 -= nh;
1243         k1 -= 2;
1244         xr = a[j1];
1245         xi = a[j1 + 1];
1246         yr = a[k1];
1247         yi = a[k1 + 1];
1248         a[j1] = yr;
1249         a[j1 + 1] = yi;
1250         a[k1] = xr;
1251         a[k1 + 1] = xi;
1252         j1 -= nm;
1253         k1 -= nm;
1254         xr = a[j1];
1255         xi = a[j1 + 1];
1256         yr = a[k1];
1257         yi = a[k1 + 1];
1258         a[j1] = yr;
1259         a[j1 + 1] = yi;
1260         a[k1] = xr;
1261         a[k1 + 1] = xi;
1262       }
1263       k1 = 4 * k + ip[m + k];
1264       j1 = k1 + 2;
1265       k1 += nh;
1266       xr = a[j1];
1267       xi = a[j1 + 1];
1268       yr = a[k1];
1269       yi = a[k1 + 1];
1270       a[j1] = yr;
1271       a[j1 + 1] = yi;
1272       a[k1] = xr;
1273       a[k1 + 1] = xi;
1274       j1 += nm;
1275       k1 += nm;
1276       xr = a[j1];
1277       xi = a[j1 + 1];
1278       yr = a[k1];
1279       yi = a[k1 + 1];
1280       a[j1] = yr;
1281       a[j1 + 1] = yi;
1282       a[k1] = xr;
1283       a[k1 + 1] = xi;
1284     }
1285   }
1286 }
1287 
bitrv2conj(int n,int * ip,float * a)1288 void bitrv2conj(int n, int *ip, float *a) {
1289   int j, j1, k, k1, l, m, nh, nm;
1290   float xr, xi, yr, yi;
1291 
1292   m = 1;
1293   for(l = n >> 2; l > 8; l >>= 2) {
1294     m <<= 1;
1295   }
1296   nh = n >> 1;
1297   nm = 4 * m;
1298   if(l == 8) {
1299     for(k = 0; k < m; k++) {
1300       for(j = 0; j < k; j++) {
1301         j1 = 4 * j + 2 * ip[m + k];
1302         k1 = 4 * k + 2 * ip[m + j];
1303         xr = a[j1];
1304         xi = -a[j1 + 1];
1305         yr = a[k1];
1306         yi = -a[k1 + 1];
1307         a[j1] = yr;
1308         a[j1 + 1] = yi;
1309         a[k1] = xr;
1310         a[k1 + 1] = xi;
1311         j1 += nm;
1312         k1 += 2 * nm;
1313         xr = a[j1];
1314         xi = -a[j1 + 1];
1315         yr = a[k1];
1316         yi = -a[k1 + 1];
1317         a[j1] = yr;
1318         a[j1 + 1] = yi;
1319         a[k1] = xr;
1320         a[k1 + 1] = xi;
1321         j1 += nm;
1322         k1 -= nm;
1323         xr = a[j1];
1324         xi = -a[j1 + 1];
1325         yr = a[k1];
1326         yi = -a[k1 + 1];
1327         a[j1] = yr;
1328         a[j1 + 1] = yi;
1329         a[k1] = xr;
1330         a[k1 + 1] = xi;
1331         j1 += nm;
1332         k1 += 2 * nm;
1333         xr = a[j1];
1334         xi = -a[j1 + 1];
1335         yr = a[k1];
1336         yi = -a[k1 + 1];
1337         a[j1] = yr;
1338         a[j1 + 1] = yi;
1339         a[k1] = xr;
1340         a[k1 + 1] = xi;
1341         j1 += nh;
1342         k1 += 2;
1343         xr = a[j1];
1344         xi = -a[j1 + 1];
1345         yr = a[k1];
1346         yi = -a[k1 + 1];
1347         a[j1] = yr;
1348         a[j1 + 1] = yi;
1349         a[k1] = xr;
1350         a[k1 + 1] = xi;
1351         j1 -= nm;
1352         k1 -= 2 * nm;
1353         xr = a[j1];
1354         xi = -a[j1 + 1];
1355         yr = a[k1];
1356         yi = -a[k1 + 1];
1357         a[j1] = yr;
1358         a[j1 + 1] = yi;
1359         a[k1] = xr;
1360         a[k1 + 1] = xi;
1361         j1 -= nm;
1362         k1 += nm;
1363         xr = a[j1];
1364         xi = -a[j1 + 1];
1365         yr = a[k1];
1366         yi = -a[k1 + 1];
1367         a[j1] = yr;
1368         a[j1 + 1] = yi;
1369         a[k1] = xr;
1370         a[k1 + 1] = xi;
1371         j1 -= nm;
1372         k1 -= 2 * nm;
1373         xr = a[j1];
1374         xi = -a[j1 + 1];
1375         yr = a[k1];
1376         yi = -a[k1 + 1];
1377         a[j1] = yr;
1378         a[j1 + 1] = yi;
1379         a[k1] = xr;
1380         a[k1 + 1] = xi;
1381         j1 += 2;
1382         k1 += nh;
1383         xr = a[j1];
1384         xi = -a[j1 + 1];
1385         yr = a[k1];
1386         yi = -a[k1 + 1];
1387         a[j1] = yr;
1388         a[j1 + 1] = yi;
1389         a[k1] = xr;
1390         a[k1 + 1] = xi;
1391         j1 += nm;
1392         k1 += 2 * nm;
1393         xr = a[j1];
1394         xi = -a[j1 + 1];
1395         yr = a[k1];
1396         yi = -a[k1 + 1];
1397         a[j1] = yr;
1398         a[j1 + 1] = yi;
1399         a[k1] = xr;
1400         a[k1 + 1] = xi;
1401         j1 += nm;
1402         k1 -= nm;
1403         xr = a[j1];
1404         xi = -a[j1 + 1];
1405         yr = a[k1];
1406         yi = -a[k1 + 1];
1407         a[j1] = yr;
1408         a[j1 + 1] = yi;
1409         a[k1] = xr;
1410         a[k1 + 1] = xi;
1411         j1 += nm;
1412         k1 += 2 * nm;
1413         xr = a[j1];
1414         xi = -a[j1 + 1];
1415         yr = a[k1];
1416         yi = -a[k1 + 1];
1417         a[j1] = yr;
1418         a[j1 + 1] = yi;
1419         a[k1] = xr;
1420         a[k1 + 1] = xi;
1421         j1 -= nh;
1422         k1 -= 2;
1423         xr = a[j1];
1424         xi = -a[j1 + 1];
1425         yr = a[k1];
1426         yi = -a[k1 + 1];
1427         a[j1] = yr;
1428         a[j1 + 1] = yi;
1429         a[k1] = xr;
1430         a[k1 + 1] = xi;
1431         j1 -= nm;
1432         k1 -= 2 * nm;
1433         xr = a[j1];
1434         xi = -a[j1 + 1];
1435         yr = a[k1];
1436         yi = -a[k1 + 1];
1437         a[j1] = yr;
1438         a[j1 + 1] = yi;
1439         a[k1] = xr;
1440         a[k1 + 1] = xi;
1441         j1 -= nm;
1442         k1 += nm;
1443         xr = a[j1];
1444         xi = -a[j1 + 1];
1445         yr = a[k1];
1446         yi = -a[k1 + 1];
1447         a[j1] = yr;
1448         a[j1 + 1] = yi;
1449         a[k1] = xr;
1450         a[k1 + 1] = xi;
1451         j1 -= nm;
1452         k1 -= 2 * nm;
1453         xr = a[j1];
1454         xi = -a[j1 + 1];
1455         yr = a[k1];
1456         yi = -a[k1 + 1];
1457         a[j1] = yr;
1458         a[j1 + 1] = yi;
1459         a[k1] = xr;
1460         a[k1 + 1] = xi;
1461       }
1462       k1 = 4 * k + 2 * ip[m + k];
1463       j1 = k1 + 2;
1464       k1 += nh;
1465       a[j1 - 1] = -a[j1 - 1];
1466       xr = a[j1];
1467       xi = -a[j1 + 1];
1468       yr = a[k1];
1469       yi = -a[k1 + 1];
1470       a[j1] = yr;
1471       a[j1 + 1] = yi;
1472       a[k1] = xr;
1473       a[k1 + 1] = xi;
1474       a[k1 + 3] = -a[k1 + 3];
1475       j1 += nm;
1476       k1 += 2 * nm;
1477       xr = a[j1];
1478       xi = -a[j1 + 1];
1479       yr = a[k1];
1480       yi = -a[k1 + 1];
1481       a[j1] = yr;
1482       a[j1 + 1] = yi;
1483       a[k1] = xr;
1484       a[k1 + 1] = xi;
1485       j1 += nm;
1486       k1 -= nm;
1487       xr = a[j1];
1488       xi = -a[j1 + 1];
1489       yr = a[k1];
1490       yi = -a[k1 + 1];
1491       a[j1] = yr;
1492       a[j1 + 1] = yi;
1493       a[k1] = xr;
1494       a[k1 + 1] = xi;
1495       j1 -= 2;
1496       k1 -= nh;
1497       xr = a[j1];
1498       xi = -a[j1 + 1];
1499       yr = a[k1];
1500       yi = -a[k1 + 1];
1501       a[j1] = yr;
1502       a[j1 + 1] = yi;
1503       a[k1] = xr;
1504       a[k1 + 1] = xi;
1505       j1 += nh + 2;
1506       k1 += nh + 2;
1507       xr = a[j1];
1508       xi = -a[j1 + 1];
1509       yr = a[k1];
1510       yi = -a[k1 + 1];
1511       a[j1] = yr;
1512       a[j1 + 1] = yi;
1513       a[k1] = xr;
1514       a[k1 + 1] = xi;
1515       j1 -= nh - nm;
1516       k1 += 2 * nm - 2;
1517       a[j1 - 1] = -a[j1 - 1];
1518       xr = a[j1];
1519       xi = -a[j1 + 1];
1520       yr = a[k1];
1521       yi = -a[k1 + 1];
1522       a[j1] = yr;
1523       a[j1 + 1] = yi;
1524       a[k1] = xr;
1525       a[k1 + 1] = xi;
1526       a[k1 + 3] = -a[k1 + 3];
1527     }
1528   }
1529   else {
1530     for(k = 0; k < m; k++) {
1531       for(j = 0; j < k; j++) {
1532         j1 = 4 * j + ip[m + k];
1533         k1 = 4 * k + ip[m + j];
1534         xr = a[j1];
1535         xi = -a[j1 + 1];
1536         yr = a[k1];
1537         yi = -a[k1 + 1];
1538         a[j1] = yr;
1539         a[j1 + 1] = yi;
1540         a[k1] = xr;
1541         a[k1 + 1] = xi;
1542         j1 += nm;
1543         k1 += nm;
1544         xr = a[j1];
1545         xi = -a[j1 + 1];
1546         yr = a[k1];
1547         yi = -a[k1 + 1];
1548         a[j1] = yr;
1549         a[j1 + 1] = yi;
1550         a[k1] = xr;
1551         a[k1 + 1] = xi;
1552         j1 += nh;
1553         k1 += 2;
1554         xr = a[j1];
1555         xi = -a[j1 + 1];
1556         yr = a[k1];
1557         yi = -a[k1 + 1];
1558         a[j1] = yr;
1559         a[j1 + 1] = yi;
1560         a[k1] = xr;
1561         a[k1 + 1] = xi;
1562         j1 -= nm;
1563         k1 -= nm;
1564         xr = a[j1];
1565         xi = -a[j1 + 1];
1566         yr = a[k1];
1567         yi = -a[k1 + 1];
1568         a[j1] = yr;
1569         a[j1 + 1] = yi;
1570         a[k1] = xr;
1571         a[k1 + 1] = xi;
1572         j1 += 2;
1573         k1 += nh;
1574         xr = a[j1];
1575         xi = -a[j1 + 1];
1576         yr = a[k1];
1577         yi = -a[k1 + 1];
1578         a[j1] = yr;
1579         a[j1 + 1] = yi;
1580         a[k1] = xr;
1581         a[k1 + 1] = xi;
1582         j1 += nm;
1583         k1 += nm;
1584         xr = a[j1];
1585         xi = -a[j1 + 1];
1586         yr = a[k1];
1587         yi = -a[k1 + 1];
1588         a[j1] = yr;
1589         a[j1 + 1] = yi;
1590         a[k1] = xr;
1591         a[k1 + 1] = xi;
1592         j1 -= nh;
1593         k1 -= 2;
1594         xr = a[j1];
1595         xi = -a[j1 + 1];
1596         yr = a[k1];
1597         yi = -a[k1 + 1];
1598         a[j1] = yr;
1599         a[j1 + 1] = yi;
1600         a[k1] = xr;
1601         a[k1 + 1] = xi;
1602         j1 -= nm;
1603         k1 -= nm;
1604         xr = a[j1];
1605         xi = -a[j1 + 1];
1606         yr = a[k1];
1607         yi = -a[k1 + 1];
1608         a[j1] = yr;
1609         a[j1 + 1] = yi;
1610         a[k1] = xr;
1611         a[k1 + 1] = xi;
1612       }
1613       k1 = 4 * k + ip[m + k];
1614       j1 = k1 + 2;
1615       k1 += nh;
1616       a[j1 - 1] = -a[j1 - 1];
1617       xr = a[j1];
1618       xi = -a[j1 + 1];
1619       yr = a[k1];
1620       yi = -a[k1 + 1];
1621       a[j1] = yr;
1622       a[j1 + 1] = yi;
1623       a[k1] = xr;
1624       a[k1 + 1] = xi;
1625       a[k1 + 3] = -a[k1 + 3];
1626       j1 += nm;
1627       k1 += nm;
1628       a[j1 - 1] = -a[j1 - 1];
1629       xr = a[j1];
1630       xi = -a[j1 + 1];
1631       yr = a[k1];
1632       yi = -a[k1 + 1];
1633       a[j1] = yr;
1634       a[j1 + 1] = yi;
1635       a[k1] = xr;
1636       a[k1 + 1] = xi;
1637       a[k1 + 3] = -a[k1 + 3];
1638     }
1639   }
1640 }
1641 
bitrv216(float * a)1642 void bitrv216(float *a) {
1643   float x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x7r, x7i, x8r, x8i,
1644       x10r, x10i, x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i;
1645 
1646   x1r = a[2];
1647   x1i = a[3];
1648   x2r = a[4];
1649   x2i = a[5];
1650   x3r = a[6];
1651   x3i = a[7];
1652   x4r = a[8];
1653   x4i = a[9];
1654   x5r = a[10];
1655   x5i = a[11];
1656   x7r = a[14];
1657   x7i = a[15];
1658   x8r = a[16];
1659   x8i = a[17];
1660   x10r = a[20];
1661   x10i = a[21];
1662   x11r = a[22];
1663   x11i = a[23];
1664   x12r = a[24];
1665   x12i = a[25];
1666   x13r = a[26];
1667   x13i = a[27];
1668   x14r = a[28];
1669   x14i = a[29];
1670   a[2] = x8r;
1671   a[3] = x8i;
1672   a[4] = x4r;
1673   a[5] = x4i;
1674   a[6] = x12r;
1675   a[7] = x12i;
1676   a[8] = x2r;
1677   a[9] = x2i;
1678   a[10] = x10r;
1679   a[11] = x10i;
1680   a[14] = x14r;
1681   a[15] = x14i;
1682   a[16] = x1r;
1683   a[17] = x1i;
1684   a[20] = x5r;
1685   a[21] = x5i;
1686   a[22] = x13r;
1687   a[23] = x13i;
1688   a[24] = x3r;
1689   a[25] = x3i;
1690   a[26] = x11r;
1691   a[27] = x11i;
1692   a[28] = x7r;
1693   a[29] = x7i;
1694 }
1695 
bitrv216neg(float * a)1696 void bitrv216neg(float *a) {
1697   float x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x6r, x6i, x7r, x7i,
1698       x8r, x8i, x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i, x13r, x13i, x14r,
1699       x14i, x15r, x15i;
1700 
1701   x1r = a[2];
1702   x1i = a[3];
1703   x2r = a[4];
1704   x2i = a[5];
1705   x3r = a[6];
1706   x3i = a[7];
1707   x4r = a[8];
1708   x4i = a[9];
1709   x5r = a[10];
1710   x5i = a[11];
1711   x6r = a[12];
1712   x6i = a[13];
1713   x7r = a[14];
1714   x7i = a[15];
1715   x8r = a[16];
1716   x8i = a[17];
1717   x9r = a[18];
1718   x9i = a[19];
1719   x10r = a[20];
1720   x10i = a[21];
1721   x11r = a[22];
1722   x11i = a[23];
1723   x12r = a[24];
1724   x12i = a[25];
1725   x13r = a[26];
1726   x13i = a[27];
1727   x14r = a[28];
1728   x14i = a[29];
1729   x15r = a[30];
1730   x15i = a[31];
1731   a[2] = x15r;
1732   a[3] = x15i;
1733   a[4] = x7r;
1734   a[5] = x7i;
1735   a[6] = x11r;
1736   a[7] = x11i;
1737   a[8] = x3r;
1738   a[9] = x3i;
1739   a[10] = x13r;
1740   a[11] = x13i;
1741   a[12] = x5r;
1742   a[13] = x5i;
1743   a[14] = x9r;
1744   a[15] = x9i;
1745   a[16] = x1r;
1746   a[17] = x1i;
1747   a[18] = x14r;
1748   a[19] = x14i;
1749   a[20] = x6r;
1750   a[21] = x6i;
1751   a[22] = x10r;
1752   a[23] = x10i;
1753   a[24] = x2r;
1754   a[25] = x2i;
1755   a[26] = x12r;
1756   a[27] = x12i;
1757   a[28] = x4r;
1758   a[29] = x4i;
1759   a[30] = x8r;
1760   a[31] = x8i;
1761 }
1762 
bitrv208(float * a)1763 void bitrv208(float *a) {
1764   float x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i;
1765 
1766   x1r = a[2];
1767   x1i = a[3];
1768   x3r = a[6];
1769   x3i = a[7];
1770   x4r = a[8];
1771   x4i = a[9];
1772   x6r = a[12];
1773   x6i = a[13];
1774   a[2] = x4r;
1775   a[3] = x4i;
1776   a[6] = x6r;
1777   a[7] = x6i;
1778   a[8] = x1r;
1779   a[9] = x1i;
1780   a[12] = x3r;
1781   a[13] = x3i;
1782 }
1783 
bitrv208neg(float * a)1784 void bitrv208neg(float *a) {
1785   float x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x6r, x6i, x7r, x7i;
1786 
1787   x1r = a[2];
1788   x1i = a[3];
1789   x2r = a[4];
1790   x2i = a[5];
1791   x3r = a[6];
1792   x3i = a[7];
1793   x4r = a[8];
1794   x4i = a[9];
1795   x5r = a[10];
1796   x5i = a[11];
1797   x6r = a[12];
1798   x6i = a[13];
1799   x7r = a[14];
1800   x7i = a[15];
1801   a[2] = x7r;
1802   a[3] = x7i;
1803   a[4] = x3r;
1804   a[5] = x3i;
1805   a[6] = x5r;
1806   a[7] = x5i;
1807   a[8] = x1r;
1808   a[9] = x1i;
1809   a[10] = x6r;
1810   a[11] = x6i;
1811   a[12] = x2r;
1812   a[13] = x2i;
1813   a[14] = x4r;
1814   a[15] = x4i;
1815 }
1816 
cftf1st(int n,float * a,float * w)1817 void cftf1st(int n, float *a, float *w) {
1818   int j, j0, j1, j2, j3, k, m, mh;
1819   float wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i;
1820   float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i,
1821       y3r, y3i;
1822 
1823   mh = n >> 3;
1824   m = 2 * mh;
1825   j1 = m;
1826   j2 = j1 + m;
1827   j3 = j2 + m;
1828   x0r = a[0] + a[j2];
1829   x0i = a[1] + a[j2 + 1];
1830   x1r = a[0] - a[j2];
1831   x1i = a[1] - a[j2 + 1];
1832   x2r = a[j1] + a[j3];
1833   x2i = a[j1 + 1] + a[j3 + 1];
1834   x3r = a[j1] - a[j3];
1835   x3i = a[j1 + 1] - a[j3 + 1];
1836   a[0] = x0r + x2r;
1837   a[1] = x0i + x2i;
1838   a[j1] = x0r - x2r;
1839   a[j1 + 1] = x0i - x2i;
1840   a[j2] = x1r - x3i;
1841   a[j2 + 1] = x1i + x3r;
1842   a[j3] = x1r + x3i;
1843   a[j3 + 1] = x1i - x3r;
1844   wn4r = w[1];
1845   csc1 = w[2];
1846   csc3 = w[3];
1847   wd1r = 1;
1848   wd1i = 0;
1849   wd3r = 1;
1850   wd3i = 0;
1851   k = 0;
1852   for(j = 2; j < mh - 2; j += 4) {
1853     k += 4;
1854     wk1r = csc1 * (wd1r + w[k]);
1855     wk1i = csc1 * (wd1i + w[k + 1]);
1856     wk3r = csc3 * (wd3r + w[k + 2]);
1857     wk3i = csc3 * (wd3i + w[k + 3]);
1858     wd1r = w[k];
1859     wd1i = w[k + 1];
1860     wd3r = w[k + 2];
1861     wd3i = w[k + 3];
1862     j1 = j + m;
1863     j2 = j1 + m;
1864     j3 = j2 + m;
1865     x0r = a[j] + a[j2];
1866     x0i = a[j + 1] + a[j2 + 1];
1867     x1r = a[j] - a[j2];
1868     x1i = a[j + 1] - a[j2 + 1];
1869     y0r = a[j + 2] + a[j2 + 2];
1870     y0i = a[j + 3] + a[j2 + 3];
1871     y1r = a[j + 2] - a[j2 + 2];
1872     y1i = a[j + 3] - a[j2 + 3];
1873     x2r = a[j1] + a[j3];
1874     x2i = a[j1 + 1] + a[j3 + 1];
1875     x3r = a[j1] - a[j3];
1876     x3i = a[j1 + 1] - a[j3 + 1];
1877     y2r = a[j1 + 2] + a[j3 + 2];
1878     y2i = a[j1 + 3] + a[j3 + 3];
1879     y3r = a[j1 + 2] - a[j3 + 2];
1880     y3i = a[j1 + 3] - a[j3 + 3];
1881     a[j] = x0r + x2r;
1882     a[j + 1] = x0i + x2i;
1883     a[j + 2] = y0r + y2r;
1884     a[j + 3] = y0i + y2i;
1885     a[j1] = x0r - x2r;
1886     a[j1 + 1] = x0i - x2i;
1887     a[j1 + 2] = y0r - y2r;
1888     a[j1 + 3] = y0i - y2i;
1889     x0r = x1r - x3i;
1890     x0i = x1i + x3r;
1891     a[j2] = wk1r * x0r - wk1i * x0i;
1892     a[j2 + 1] = wk1r * x0i + wk1i * x0r;
1893     x0r = y1r - y3i;
1894     x0i = y1i + y3r;
1895     a[j2 + 2] = wd1r * x0r - wd1i * x0i;
1896     a[j2 + 3] = wd1r * x0i + wd1i * x0r;
1897     x0r = x1r + x3i;
1898     x0i = x1i - x3r;
1899     a[j3] = wk3r * x0r + wk3i * x0i;
1900     a[j3 + 1] = wk3r * x0i - wk3i * x0r;
1901     x0r = y1r + y3i;
1902     x0i = y1i - y3r;
1903     a[j3 + 2] = wd3r * x0r + wd3i * x0i;
1904     a[j3 + 3] = wd3r * x0i - wd3i * x0r;
1905     j0 = m - j;
1906     j1 = j0 + m;
1907     j2 = j1 + m;
1908     j3 = j2 + m;
1909     x0r = a[j0] + a[j2];
1910     x0i = a[j0 + 1] + a[j2 + 1];
1911     x1r = a[j0] - a[j2];
1912     x1i = a[j0 + 1] - a[j2 + 1];
1913     y0r = a[j0 - 2] + a[j2 - 2];
1914     y0i = a[j0 - 1] + a[j2 - 1];
1915     y1r = a[j0 - 2] - a[j2 - 2];
1916     y1i = a[j0 - 1] - a[j2 - 1];
1917     x2r = a[j1] + a[j3];
1918     x2i = a[j1 + 1] + a[j3 + 1];
1919     x3r = a[j1] - a[j3];
1920     x3i = a[j1 + 1] - a[j3 + 1];
1921     y2r = a[j1 - 2] + a[j3 - 2];
1922     y2i = a[j1 - 1] + a[j3 - 1];
1923     y3r = a[j1 - 2] - a[j3 - 2];
1924     y3i = a[j1 - 1] - a[j3 - 1];
1925     a[j0] = x0r + x2r;
1926     a[j0 + 1] = x0i + x2i;
1927     a[j0 - 2] = y0r + y2r;
1928     a[j0 - 1] = y0i + y2i;
1929     a[j1] = x0r - x2r;
1930     a[j1 + 1] = x0i - x2i;
1931     a[j1 - 2] = y0r - y2r;
1932     a[j1 - 1] = y0i - y2i;
1933     x0r = x1r - x3i;
1934     x0i = x1i + x3r;
1935     a[j2] = wk1i * x0r - wk1r * x0i;
1936     a[j2 + 1] = wk1i * x0i + wk1r * x0r;
1937     x0r = y1r - y3i;
1938     x0i = y1i + y3r;
1939     a[j2 - 2] = wd1i * x0r - wd1r * x0i;
1940     a[j2 - 1] = wd1i * x0i + wd1r * x0r;
1941     x0r = x1r + x3i;
1942     x0i = x1i - x3r;
1943     a[j3] = wk3i * x0r + wk3r * x0i;
1944     a[j3 + 1] = wk3i * x0i - wk3r * x0r;
1945     x0r = y1r + y3i;
1946     x0i = y1i - y3r;
1947     a[j3 - 2] = wd3i * x0r + wd3r * x0i;
1948     a[j3 - 1] = wd3i * x0i - wd3r * x0r;
1949   }
1950   wk1r = csc1 * (wd1r + wn4r);
1951   wk1i = csc1 * (wd1i + wn4r);
1952   wk3r = csc3 * (wd3r - wn4r);
1953   wk3i = csc3 * (wd3i - wn4r);
1954   j0 = mh;
1955   j1 = j0 + m;
1956   j2 = j1 + m;
1957   j3 = j2 + m;
1958   x0r = a[j0 - 2] + a[j2 - 2];
1959   x0i = a[j0 - 1] + a[j2 - 1];
1960   x1r = a[j0 - 2] - a[j2 - 2];
1961   x1i = a[j0 - 1] - a[j2 - 1];
1962   x2r = a[j1 - 2] + a[j3 - 2];
1963   x2i = a[j1 - 1] + a[j3 - 1];
1964   x3r = a[j1 - 2] - a[j3 - 2];
1965   x3i = a[j1 - 1] - a[j3 - 1];
1966   a[j0 - 2] = x0r + x2r;
1967   a[j0 - 1] = x0i + x2i;
1968   a[j1 - 2] = x0r - x2r;
1969   a[j1 - 1] = x0i - x2i;
1970   x0r = x1r - x3i;
1971   x0i = x1i + x3r;
1972   a[j2 - 2] = wk1r * x0r - wk1i * x0i;
1973   a[j2 - 1] = wk1r * x0i + wk1i * x0r;
1974   x0r = x1r + x3i;
1975   x0i = x1i - x3r;
1976   a[j3 - 2] = wk3r * x0r + wk3i * x0i;
1977   a[j3 - 1] = wk3r * x0i - wk3i * x0r;
1978   x0r = a[j0] + a[j2];
1979   x0i = a[j0 + 1] + a[j2 + 1];
1980   x1r = a[j0] - a[j2];
1981   x1i = a[j0 + 1] - a[j2 + 1];
1982   x2r = a[j1] + a[j3];
1983   x2i = a[j1 + 1] + a[j3 + 1];
1984   x3r = a[j1] - a[j3];
1985   x3i = a[j1 + 1] - a[j3 + 1];
1986   a[j0] = x0r + x2r;
1987   a[j0 + 1] = x0i + x2i;
1988   a[j1] = x0r - x2r;
1989   a[j1 + 1] = x0i - x2i;
1990   x0r = x1r - x3i;
1991   x0i = x1i + x3r;
1992   a[j2] = wn4r * (x0r - x0i);
1993   a[j2 + 1] = wn4r * (x0i + x0r);
1994   x0r = x1r + x3i;
1995   x0i = x1i - x3r;
1996   a[j3] = -wn4r * (x0r + x0i);
1997   a[j3 + 1] = -wn4r * (x0i - x0r);
1998   x0r = a[j0 + 2] + a[j2 + 2];
1999   x0i = a[j0 + 3] + a[j2 + 3];
2000   x1r = a[j0 + 2] - a[j2 + 2];
2001   x1i = a[j0 + 3] - a[j2 + 3];
2002   x2r = a[j1 + 2] + a[j3 + 2];
2003   x2i = a[j1 + 3] + a[j3 + 3];
2004   x3r = a[j1 + 2] - a[j3 + 2];
2005   x3i = a[j1 + 3] - a[j3 + 3];
2006   a[j0 + 2] = x0r + x2r;
2007   a[j0 + 3] = x0i + x2i;
2008   a[j1 + 2] = x0r - x2r;
2009   a[j1 + 3] = x0i - x2i;
2010   x0r = x1r - x3i;
2011   x0i = x1i + x3r;
2012   a[j2 + 2] = wk1i * x0r - wk1r * x0i;
2013   a[j2 + 3] = wk1i * x0i + wk1r * x0r;
2014   x0r = x1r + x3i;
2015   x0i = x1i - x3r;
2016   a[j3 + 2] = wk3i * x0r + wk3r * x0i;
2017   a[j3 + 3] = wk3i * x0i - wk3r * x0r;
2018 }
2019 
cftb1st(int n,float * a,float * w)2020 void cftb1st(int n, float *a, float *w) {
2021   int j, j0, j1, j2, j3, k, m, mh;
2022   float wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i;
2023   float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i,
2024       y3r, y3i;
2025 
2026   mh = n >> 3;
2027   m = 2 * mh;
2028   j1 = m;
2029   j2 = j1 + m;
2030   j3 = j2 + m;
2031   x0r = a[0] + a[j2];
2032   x0i = -a[1] - a[j2 + 1];
2033   x1r = a[0] - a[j2];
2034   x1i = -a[1] + a[j2 + 1];
2035   x2r = a[j1] + a[j3];
2036   x2i = a[j1 + 1] + a[j3 + 1];
2037   x3r = a[j1] - a[j3];
2038   x3i = a[j1 + 1] - a[j3 + 1];
2039   a[0] = x0r + x2r;
2040   a[1] = x0i - x2i;
2041   a[j1] = x0r - x2r;
2042   a[j1 + 1] = x0i + x2i;
2043   a[j2] = x1r + x3i;
2044   a[j2 + 1] = x1i + x3r;
2045   a[j3] = x1r - x3i;
2046   a[j3 + 1] = x1i - x3r;
2047   wn4r = w[1];
2048   csc1 = w[2];
2049   csc3 = w[3];
2050   wd1r = 1;
2051   wd1i = 0;
2052   wd3r = 1;
2053   wd3i = 0;
2054   k = 0;
2055   for(j = 2; j < mh - 2; j += 4) {
2056     k += 4;
2057     wk1r = csc1 * (wd1r + w[k]);
2058     wk1i = csc1 * (wd1i + w[k + 1]);
2059     wk3r = csc3 * (wd3r + w[k + 2]);
2060     wk3i = csc3 * (wd3i + w[k + 3]);
2061     wd1r = w[k];
2062     wd1i = w[k + 1];
2063     wd3r = w[k + 2];
2064     wd3i = w[k + 3];
2065     j1 = j + m;
2066     j2 = j1 + m;
2067     j3 = j2 + m;
2068     x0r = a[j] + a[j2];
2069     x0i = -a[j + 1] - a[j2 + 1];
2070     x1r = a[j] - a[j2];
2071     x1i = -a[j + 1] + a[j2 + 1];
2072     y0r = a[j + 2] + a[j2 + 2];
2073     y0i = -a[j + 3] - a[j2 + 3];
2074     y1r = a[j + 2] - a[j2 + 2];
2075     y1i = -a[j + 3] + a[j2 + 3];
2076     x2r = a[j1] + a[j3];
2077     x2i = a[j1 + 1] + a[j3 + 1];
2078     x3r = a[j1] - a[j3];
2079     x3i = a[j1 + 1] - a[j3 + 1];
2080     y2r = a[j1 + 2] + a[j3 + 2];
2081     y2i = a[j1 + 3] + a[j3 + 3];
2082     y3r = a[j1 + 2] - a[j3 + 2];
2083     y3i = a[j1 + 3] - a[j3 + 3];
2084     a[j] = x0r + x2r;
2085     a[j + 1] = x0i - x2i;
2086     a[j + 2] = y0r + y2r;
2087     a[j + 3] = y0i - y2i;
2088     a[j1] = x0r - x2r;
2089     a[j1 + 1] = x0i + x2i;
2090     a[j1 + 2] = y0r - y2r;
2091     a[j1 + 3] = y0i + y2i;
2092     x0r = x1r + x3i;
2093     x0i = x1i + x3r;
2094     a[j2] = wk1r * x0r - wk1i * x0i;
2095     a[j2 + 1] = wk1r * x0i + wk1i * x0r;
2096     x0r = y1r + y3i;
2097     x0i = y1i + y3r;
2098     a[j2 + 2] = wd1r * x0r - wd1i * x0i;
2099     a[j2 + 3] = wd1r * x0i + wd1i * x0r;
2100     x0r = x1r - x3i;
2101     x0i = x1i - x3r;
2102     a[j3] = wk3r * x0r + wk3i * x0i;
2103     a[j3 + 1] = wk3r * x0i - wk3i * x0r;
2104     x0r = y1r - y3i;
2105     x0i = y1i - y3r;
2106     a[j3 + 2] = wd3r * x0r + wd3i * x0i;
2107     a[j3 + 3] = wd3r * x0i - wd3i * x0r;
2108     j0 = m - j;
2109     j1 = j0 + m;
2110     j2 = j1 + m;
2111     j3 = j2 + m;
2112     x0r = a[j0] + a[j2];
2113     x0i = -a[j0 + 1] - a[j2 + 1];
2114     x1r = a[j0] - a[j2];
2115     x1i = -a[j0 + 1] + a[j2 + 1];
2116     y0r = a[j0 - 2] + a[j2 - 2];
2117     y0i = -a[j0 - 1] - a[j2 - 1];
2118     y1r = a[j0 - 2] - a[j2 - 2];
2119     y1i = -a[j0 - 1] + a[j2 - 1];
2120     x2r = a[j1] + a[j3];
2121     x2i = a[j1 + 1] + a[j3 + 1];
2122     x3r = a[j1] - a[j3];
2123     x3i = a[j1 + 1] - a[j3 + 1];
2124     y2r = a[j1 - 2] + a[j3 - 2];
2125     y2i = a[j1 - 1] + a[j3 - 1];
2126     y3r = a[j1 - 2] - a[j3 - 2];
2127     y3i = a[j1 - 1] - a[j3 - 1];
2128     a[j0] = x0r + x2r;
2129     a[j0 + 1] = x0i - x2i;
2130     a[j0 - 2] = y0r + y2r;
2131     a[j0 - 1] = y0i - y2i;
2132     a[j1] = x0r - x2r;
2133     a[j1 + 1] = x0i + x2i;
2134     a[j1 - 2] = y0r - y2r;
2135     a[j1 - 1] = y0i + y2i;
2136     x0r = x1r + x3i;
2137     x0i = x1i + x3r;
2138     a[j2] = wk1i * x0r - wk1r * x0i;
2139     a[j2 + 1] = wk1i * x0i + wk1r * x0r;
2140     x0r = y1r + y3i;
2141     x0i = y1i + y3r;
2142     a[j2 - 2] = wd1i * x0r - wd1r * x0i;
2143     a[j2 - 1] = wd1i * x0i + wd1r * x0r;
2144     x0r = x1r - x3i;
2145     x0i = x1i - x3r;
2146     a[j3] = wk3i * x0r + wk3r * x0i;
2147     a[j3 + 1] = wk3i * x0i - wk3r * x0r;
2148     x0r = y1r - y3i;
2149     x0i = y1i - y3r;
2150     a[j3 - 2] = wd3i * x0r + wd3r * x0i;
2151     a[j3 - 1] = wd3i * x0i - wd3r * x0r;
2152   }
2153   wk1r = csc1 * (wd1r + wn4r);
2154   wk1i = csc1 * (wd1i + wn4r);
2155   wk3r = csc3 * (wd3r - wn4r);
2156   wk3i = csc3 * (wd3i - wn4r);
2157   j0 = mh;
2158   j1 = j0 + m;
2159   j2 = j1 + m;
2160   j3 = j2 + m;
2161   x0r = a[j0 - 2] + a[j2 - 2];
2162   x0i = -a[j0 - 1] - a[j2 - 1];
2163   x1r = a[j0 - 2] - a[j2 - 2];
2164   x1i = -a[j0 - 1] + a[j2 - 1];
2165   x2r = a[j1 - 2] + a[j3 - 2];
2166   x2i = a[j1 - 1] + a[j3 - 1];
2167   x3r = a[j1 - 2] - a[j3 - 2];
2168   x3i = a[j1 - 1] - a[j3 - 1];
2169   a[j0 - 2] = x0r + x2r;
2170   a[j0 - 1] = x0i - x2i;
2171   a[j1 - 2] = x0r - x2r;
2172   a[j1 - 1] = x0i + x2i;
2173   x0r = x1r + x3i;
2174   x0i = x1i + x3r;
2175   a[j2 - 2] = wk1r * x0r - wk1i * x0i;
2176   a[j2 - 1] = wk1r * x0i + wk1i * x0r;
2177   x0r = x1r - x3i;
2178   x0i = x1i - x3r;
2179   a[j3 - 2] = wk3r * x0r + wk3i * x0i;
2180   a[j3 - 1] = wk3r * x0i - wk3i * x0r;
2181   x0r = a[j0] + a[j2];
2182   x0i = -a[j0 + 1] - a[j2 + 1];
2183   x1r = a[j0] - a[j2];
2184   x1i = -a[j0 + 1] + a[j2 + 1];
2185   x2r = a[j1] + a[j3];
2186   x2i = a[j1 + 1] + a[j3 + 1];
2187   x3r = a[j1] - a[j3];
2188   x3i = a[j1 + 1] - a[j3 + 1];
2189   a[j0] = x0r + x2r;
2190   a[j0 + 1] = x0i - x2i;
2191   a[j1] = x0r - x2r;
2192   a[j1 + 1] = x0i + x2i;
2193   x0r = x1r + x3i;
2194   x0i = x1i + x3r;
2195   a[j2] = wn4r * (x0r - x0i);
2196   a[j2 + 1] = wn4r * (x0i + x0r);
2197   x0r = x1r - x3i;
2198   x0i = x1i - x3r;
2199   a[j3] = -wn4r * (x0r + x0i);
2200   a[j3 + 1] = -wn4r * (x0i - x0r);
2201   x0r = a[j0 + 2] + a[j2 + 2];
2202   x0i = -a[j0 + 3] - a[j2 + 3];
2203   x1r = a[j0 + 2] - a[j2 + 2];
2204   x1i = -a[j0 + 3] + a[j2 + 3];
2205   x2r = a[j1 + 2] + a[j3 + 2];
2206   x2i = a[j1 + 3] + a[j3 + 3];
2207   x3r = a[j1 + 2] - a[j3 + 2];
2208   x3i = a[j1 + 3] - a[j3 + 3];
2209   a[j0 + 2] = x0r + x2r;
2210   a[j0 + 3] = x0i - x2i;
2211   a[j1 + 2] = x0r - x2r;
2212   a[j1 + 3] = x0i + x2i;
2213   x0r = x1r + x3i;
2214   x0i = x1i + x3r;
2215   a[j2 + 2] = wk1i * x0r - wk1r * x0i;
2216   a[j2 + 3] = wk1i * x0i + wk1r * x0r;
2217   x0r = x1r - x3i;
2218   x0i = x1i - x3r;
2219   a[j3 + 2] = wk3i * x0r + wk3r * x0i;
2220   a[j3 + 3] = wk3i * x0i - wk3r * x0r;
2221 }
2222 
2223 #ifdef USE_CDFT_THREADS
2224 struct cdft_arg_st {
2225   int n0;
2226   int n;
2227   float *a;
2228   int nw;
2229   float *w;
2230 };
2231 typedef struct cdft_arg_st cdft_arg_t;
2232 
cftrec4_th(int n,float * a,int nw,float * w)2233 void cftrec4_th(int n, float *a, int nw, float *w) {
2234   void *cftrec1_th(void *p);
2235   void *cftrec2_th(void *p);
2236   int i, idiv4, m, nthread;
2237   cdft_thread_t th[4];
2238   cdft_arg_t ag[4];
2239 
2240   nthread = 2;
2241   idiv4 = 0;
2242   m = n >> 1;
2243   if(n > CDFT_4THREADS_BEGIN_N) {
2244     nthread = 4;
2245     idiv4 = 1;
2246     m >>= 1;
2247   }
2248   for(i = 0; i < nthread; i++) {
2249     ag[i].n0 = n;
2250     ag[i].n = m;
2251     ag[i].a = &a[i * m];
2252     ag[i].nw = nw;
2253     ag[i].w = w;
2254     if(i != idiv4) {
2255       cdft_thread_create(&th[i], cftrec1_th, &ag[i]);
2256     }
2257     else {
2258       cdft_thread_create(&th[i], cftrec2_th, &ag[i]);
2259     }
2260   }
2261   for(i = 0; i < nthread; i++) {
2262     cdft_thread_wait(th[i]);
2263   }
2264 }
2265 
cftrec1_th(void * p)2266 void *cftrec1_th(void *p) {
2267   int cfttree(int n, int j, int k, float *a, int nw, float *w);
2268   void cftleaf(int n, int isplt, float *a, int nw, float *w);
2269   void cftmdl1(int n, float *a, float *w);
2270   int isplt, j, k, m, n, n0, nw;
2271   float *a, *w;
2272 
2273   n0 = ((cdft_arg_t *)p)->n0;
2274   n = ((cdft_arg_t *)p)->n;
2275   a = ((cdft_arg_t *)p)->a;
2276   nw = ((cdft_arg_t *)p)->nw;
2277   w = ((cdft_arg_t *)p)->w;
2278   m = n0;
2279   while(m > 512) {
2280     m >>= 2;
2281     cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]);
2282   }
2283   cftleaf(m, 1, &a[n - m], nw, w);
2284   k = 0;
2285   for(j = n - m; j > 0; j -= m) {
2286     k++;
2287     isplt = cfttree(m, j, k, a, nw, w);
2288     cftleaf(m, isplt, &a[j - m], nw, w);
2289   }
2290   return (void *)0;
2291 }
2292 
cftrec2_th(void * p)2293 void *cftrec2_th(void *p) {
2294   int cfttree(int n, int j, int k, float *a, int nw, float *w);
2295   void cftleaf(int n, int isplt, float *a, int nw, float *w);
2296   void cftmdl2(int n, float *a, float *w);
2297   int isplt, j, k, m, n, n0, nw;
2298   float *a, *w;
2299 
2300   n0 = ((cdft_arg_t *)p)->n0;
2301   n = ((cdft_arg_t *)p)->n;
2302   a = ((cdft_arg_t *)p)->a;
2303   nw = ((cdft_arg_t *)p)->nw;
2304   w = ((cdft_arg_t *)p)->w;
2305   k = 1;
2306   m = n0;
2307   while(m > 512) {
2308     m >>= 2;
2309     k <<= 2;
2310     cftmdl2(m, &a[n - m], &w[nw - m]);
2311   }
2312   cftleaf(m, 0, &a[n - m], nw, w);
2313   k >>= 1;
2314   for(j = n - m; j > 0; j -= m) {
2315     k++;
2316     isplt = cfttree(m, j, k, a, nw, w);
2317     cftleaf(m, isplt, &a[j - m], nw, w);
2318   }
2319   return (void *)0;
2320 }
2321 #endif /* USE_CDFT_THREADS */
2322 
cftrec4(int n,float * a,int nw,float * w)2323 void cftrec4(int n, float *a, int nw, float *w) {
2324   int cfttree(int n, int j, int k, float *a, int nw, float *w);
2325   void cftleaf(int n, int isplt, float *a, int nw, float *w);
2326   void cftmdl1(int n, float *a, float *w);
2327   int isplt, j, k, m;
2328 
2329   m = n;
2330   while(m > 512) {
2331     m >>= 2;
2332     cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]);
2333   }
2334   cftleaf(m, 1, &a[n - m], nw, w);
2335   k = 0;
2336   for(j = n - m; j > 0; j -= m) {
2337     k++;
2338     isplt = cfttree(m, j, k, a, nw, w);
2339     cftleaf(m, isplt, &a[j - m], nw, w);
2340   }
2341 }
2342 
cfttree(int n,int j,int k,float * a,int nw,float * w)2343 int cfttree(int n, int j, int k, float *a, int nw, float *w) {
2344   void cftmdl1(int n, float *a, float *w);
2345   void cftmdl2(int n, float *a, float *w);
2346   int i, isplt, m;
2347 
2348   if((k & 3) != 0) {
2349     isplt = k & 1;
2350     if(isplt != 0) {
2351       cftmdl1(n, &a[j - n], &w[nw - (n >> 1)]);
2352     }
2353     else {
2354       cftmdl2(n, &a[j - n], &w[nw - n]);
2355     }
2356   }
2357   else {
2358     m = n;
2359     for(i = k; (i & 3) == 0; i >>= 2) {
2360       m <<= 2;
2361     }
2362     isplt = i & 1;
2363     if(isplt != 0) {
2364       while(m > 128) {
2365         cftmdl1(m, &a[j - m], &w[nw - (m >> 1)]);
2366         m >>= 2;
2367       }
2368     }
2369     else {
2370       while(m > 128) {
2371         cftmdl2(m, &a[j - m], &w[nw - m]);
2372         m >>= 2;
2373       }
2374     }
2375   }
2376   return isplt;
2377 }
2378 
cftleaf(int n,int isplt,float * a,int nw,float * w)2379 void cftleaf(int n, int isplt, float *a, int nw, float *w) {
2380   void cftmdl1(int n, float *a, float *w);
2381   void cftmdl2(int n, float *a, float *w);
2382   void cftf161(float * a, float * w);
2383   void cftf162(float * a, float * w);
2384   void cftf081(float * a, float * w);
2385   void cftf082(float * a, float * w);
2386 
2387   if(n == 512) {
2388     cftmdl1(128, a, &w[nw - 64]);
2389     cftf161(a, &w[nw - 8]);
2390     cftf162(&a[32], &w[nw - 32]);
2391     cftf161(&a[64], &w[nw - 8]);
2392     cftf161(&a[96], &w[nw - 8]);
2393     cftmdl2(128, &a[128], &w[nw - 128]);
2394     cftf161(&a[128], &w[nw - 8]);
2395     cftf162(&a[160], &w[nw - 32]);
2396     cftf161(&a[192], &w[nw - 8]);
2397     cftf162(&a[224], &w[nw - 32]);
2398     cftmdl1(128, &a[256], &w[nw - 64]);
2399     cftf161(&a[256], &w[nw - 8]);
2400     cftf162(&a[288], &w[nw - 32]);
2401     cftf161(&a[320], &w[nw - 8]);
2402     cftf161(&a[352], &w[nw - 8]);
2403     if(isplt != 0) {
2404       cftmdl1(128, &a[384], &w[nw - 64]);
2405       cftf161(&a[480], &w[nw - 8]);
2406     }
2407     else {
2408       cftmdl2(128, &a[384], &w[nw - 128]);
2409       cftf162(&a[480], &w[nw - 32]);
2410     }
2411     cftf161(&a[384], &w[nw - 8]);
2412     cftf162(&a[416], &w[nw - 32]);
2413     cftf161(&a[448], &w[nw - 8]);
2414   }
2415   else {
2416     cftmdl1(64, a, &w[nw - 32]);
2417     cftf081(a, &w[nw - 8]);
2418     cftf082(&a[16], &w[nw - 8]);
2419     cftf081(&a[32], &w[nw - 8]);
2420     cftf081(&a[48], &w[nw - 8]);
2421     cftmdl2(64, &a[64], &w[nw - 64]);
2422     cftf081(&a[64], &w[nw - 8]);
2423     cftf082(&a[80], &w[nw - 8]);
2424     cftf081(&a[96], &w[nw - 8]);
2425     cftf082(&a[112], &w[nw - 8]);
2426     cftmdl1(64, &a[128], &w[nw - 32]);
2427     cftf081(&a[128], &w[nw - 8]);
2428     cftf082(&a[144], &w[nw - 8]);
2429     cftf081(&a[160], &w[nw - 8]);
2430     cftf081(&a[176], &w[nw - 8]);
2431     if(isplt != 0) {
2432       cftmdl1(64, &a[192], &w[nw - 32]);
2433       cftf081(&a[240], &w[nw - 8]);
2434     }
2435     else {
2436       cftmdl2(64, &a[192], &w[nw - 64]);
2437       cftf082(&a[240], &w[nw - 8]);
2438     }
2439     cftf081(&a[192], &w[nw - 8]);
2440     cftf082(&a[208], &w[nw - 8]);
2441     cftf081(&a[224], &w[nw - 8]);
2442   }
2443 }
2444 
cftmdl1(int n,float * a,float * w)2445 void cftmdl1(int n, float *a, float *w) {
2446   int j, j0, j1, j2, j3, k, m, mh;
2447   float wn4r, wk1r, wk1i, wk3r, wk3i;
2448   float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2449 
2450   mh = n >> 3;
2451   m = 2 * mh;
2452   j1 = m;
2453   j2 = j1 + m;
2454   j3 = j2 + m;
2455   x0r = a[0] + a[j2];
2456   x0i = a[1] + a[j2 + 1];
2457   x1r = a[0] - a[j2];
2458   x1i = a[1] - a[j2 + 1];
2459   x2r = a[j1] + a[j3];
2460   x2i = a[j1 + 1] + a[j3 + 1];
2461   x3r = a[j1] - a[j3];
2462   x3i = a[j1 + 1] - a[j3 + 1];
2463   a[0] = x0r + x2r;
2464   a[1] = x0i + x2i;
2465   a[j1] = x0r - x2r;
2466   a[j1 + 1] = x0i - x2i;
2467   a[j2] = x1r - x3i;
2468   a[j2 + 1] = x1i + x3r;
2469   a[j3] = x1r + x3i;
2470   a[j3 + 1] = x1i - x3r;
2471   wn4r = w[1];
2472   k = 0;
2473   for(j = 2; j < mh; j += 2) {
2474     k += 4;
2475     wk1r = w[k];
2476     wk1i = w[k + 1];
2477     wk3r = w[k + 2];
2478     wk3i = w[k + 3];
2479     j1 = j + m;
2480     j2 = j1 + m;
2481     j3 = j2 + m;
2482     x0r = a[j] + a[j2];
2483     x0i = a[j + 1] + a[j2 + 1];
2484     x1r = a[j] - a[j2];
2485     x1i = a[j + 1] - a[j2 + 1];
2486     x2r = a[j1] + a[j3];
2487     x2i = a[j1 + 1] + a[j3 + 1];
2488     x3r = a[j1] - a[j3];
2489     x3i = a[j1 + 1] - a[j3 + 1];
2490     a[j] = x0r + x2r;
2491     a[j + 1] = x0i + x2i;
2492     a[j1] = x0r - x2r;
2493     a[j1 + 1] = x0i - x2i;
2494     x0r = x1r - x3i;
2495     x0i = x1i + x3r;
2496     a[j2] = wk1r * x0r - wk1i * x0i;
2497     a[j2 + 1] = wk1r * x0i + wk1i * x0r;
2498     x0r = x1r + x3i;
2499     x0i = x1i - x3r;
2500     a[j3] = wk3r * x0r + wk3i * x0i;
2501     a[j3 + 1] = wk3r * x0i - wk3i * x0r;
2502     j0 = m - j;
2503     j1 = j0 + m;
2504     j2 = j1 + m;
2505     j3 = j2 + m;
2506     x0r = a[j0] + a[j2];
2507     x0i = a[j0 + 1] + a[j2 + 1];
2508     x1r = a[j0] - a[j2];
2509     x1i = a[j0 + 1] - a[j2 + 1];
2510     x2r = a[j1] + a[j3];
2511     x2i = a[j1 + 1] + a[j3 + 1];
2512     x3r = a[j1] - a[j3];
2513     x3i = a[j1 + 1] - a[j3 + 1];
2514     a[j0] = x0r + x2r;
2515     a[j0 + 1] = x0i + x2i;
2516     a[j1] = x0r - x2r;
2517     a[j1 + 1] = x0i - x2i;
2518     x0r = x1r - x3i;
2519     x0i = x1i + x3r;
2520     a[j2] = wk1i * x0r - wk1r * x0i;
2521     a[j2 + 1] = wk1i * x0i + wk1r * x0r;
2522     x0r = x1r + x3i;
2523     x0i = x1i - x3r;
2524     a[j3] = wk3i * x0r + wk3r * x0i;
2525     a[j3 + 1] = wk3i * x0i - wk3r * x0r;
2526   }
2527   j0 = mh;
2528   j1 = j0 + m;
2529   j2 = j1 + m;
2530   j3 = j2 + m;
2531   x0r = a[j0] + a[j2];
2532   x0i = a[j0 + 1] + a[j2 + 1];
2533   x1r = a[j0] - a[j2];
2534   x1i = a[j0 + 1] - a[j2 + 1];
2535   x2r = a[j1] + a[j3];
2536   x2i = a[j1 + 1] + a[j3 + 1];
2537   x3r = a[j1] - a[j3];
2538   x3i = a[j1 + 1] - a[j3 + 1];
2539   a[j0] = x0r + x2r;
2540   a[j0 + 1] = x0i + x2i;
2541   a[j1] = x0r - x2r;
2542   a[j1 + 1] = x0i - x2i;
2543   x0r = x1r - x3i;
2544   x0i = x1i + x3r;
2545   a[j2] = wn4r * (x0r - x0i);
2546   a[j2 + 1] = wn4r * (x0i + x0r);
2547   x0r = x1r + x3i;
2548   x0i = x1i - x3r;
2549   a[j3] = -wn4r * (x0r + x0i);
2550   a[j3 + 1] = -wn4r * (x0i - x0r);
2551 }
2552 
cftmdl2(int n,float * a,float * w)2553 void cftmdl2(int n, float *a, float *w) {
2554   int j, j0, j1, j2, j3, k, kr, m, mh;
2555   float wn4r, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i;
2556   float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y2r, y2i;
2557 
2558   mh = n >> 3;
2559   m = 2 * mh;
2560   wn4r = w[1];
2561   j1 = m;
2562   j2 = j1 + m;
2563   j3 = j2 + m;
2564   x0r = a[0] - a[j2 + 1];
2565   x0i = a[1] + a[j2];
2566   x1r = a[0] + a[j2 + 1];
2567   x1i = a[1] - a[j2];
2568   x2r = a[j1] - a[j3 + 1];
2569   x2i = a[j1 + 1] + a[j3];
2570   x3r = a[j1] + a[j3 + 1];
2571   x3i = a[j1 + 1] - a[j3];
2572   y0r = wn4r * (x2r - x2i);
2573   y0i = wn4r * (x2i + x2r);
2574   a[0] = x0r + y0r;
2575   a[1] = x0i + y0i;
2576   a[j1] = x0r - y0r;
2577   a[j1 + 1] = x0i - y0i;
2578   y0r = wn4r * (x3r - x3i);
2579   y0i = wn4r * (x3i + x3r);
2580   a[j2] = x1r - y0i;
2581   a[j2 + 1] = x1i + y0r;
2582   a[j3] = x1r + y0i;
2583   a[j3 + 1] = x1i - y0r;
2584   k = 0;
2585   kr = 2 * m;
2586   for(j = 2; j < mh; j += 2) {
2587     k += 4;
2588     wk1r = w[k];
2589     wk1i = w[k + 1];
2590     wk3r = w[k + 2];
2591     wk3i = w[k + 3];
2592     kr -= 4;
2593     wd1i = w[kr];
2594     wd1r = w[kr + 1];
2595     wd3i = w[kr + 2];
2596     wd3r = w[kr + 3];
2597     j1 = j + m;
2598     j2 = j1 + m;
2599     j3 = j2 + m;
2600     x0r = a[j] - a[j2 + 1];
2601     x0i = a[j + 1] + a[j2];
2602     x1r = a[j] + a[j2 + 1];
2603     x1i = a[j + 1] - a[j2];
2604     x2r = a[j1] - a[j3 + 1];
2605     x2i = a[j1 + 1] + a[j3];
2606     x3r = a[j1] + a[j3 + 1];
2607     x3i = a[j1 + 1] - a[j3];
2608     y0r = wk1r * x0r - wk1i * x0i;
2609     y0i = wk1r * x0i + wk1i * x0r;
2610     y2r = wd1r * x2r - wd1i * x2i;
2611     y2i = wd1r * x2i + wd1i * x2r;
2612     a[j] = y0r + y2r;
2613     a[j + 1] = y0i + y2i;
2614     a[j1] = y0r - y2r;
2615     a[j1 + 1] = y0i - y2i;
2616     y0r = wk3r * x1r + wk3i * x1i;
2617     y0i = wk3r * x1i - wk3i * x1r;
2618     y2r = wd3r * x3r + wd3i * x3i;
2619     y2i = wd3r * x3i - wd3i * x3r;
2620     a[j2] = y0r + y2r;
2621     a[j2 + 1] = y0i + y2i;
2622     a[j3] = y0r - y2r;
2623     a[j3 + 1] = y0i - y2i;
2624     j0 = m - j;
2625     j1 = j0 + m;
2626     j2 = j1 + m;
2627     j3 = j2 + m;
2628     x0r = a[j0] - a[j2 + 1];
2629     x0i = a[j0 + 1] + a[j2];
2630     x1r = a[j0] + a[j2 + 1];
2631     x1i = a[j0 + 1] - a[j2];
2632     x2r = a[j1] - a[j3 + 1];
2633     x2i = a[j1 + 1] + a[j3];
2634     x3r = a[j1] + a[j3 + 1];
2635     x3i = a[j1 + 1] - a[j3];
2636     y0r = wd1i * x0r - wd1r * x0i;
2637     y0i = wd1i * x0i + wd1r * x0r;
2638     y2r = wk1i * x2r - wk1r * x2i;
2639     y2i = wk1i * x2i + wk1r * x2r;
2640     a[j0] = y0r + y2r;
2641     a[j0 + 1] = y0i + y2i;
2642     a[j1] = y0r - y2r;
2643     a[j1 + 1] = y0i - y2i;
2644     y0r = wd3i * x1r + wd3r * x1i;
2645     y0i = wd3i * x1i - wd3r * x1r;
2646     y2r = wk3i * x3r + wk3r * x3i;
2647     y2i = wk3i * x3i - wk3r * x3r;
2648     a[j2] = y0r + y2r;
2649     a[j2 + 1] = y0i + y2i;
2650     a[j3] = y0r - y2r;
2651     a[j3 + 1] = y0i - y2i;
2652   }
2653   wk1r = w[m];
2654   wk1i = w[m + 1];
2655   j0 = mh;
2656   j1 = j0 + m;
2657   j2 = j1 + m;
2658   j3 = j2 + m;
2659   x0r = a[j0] - a[j2 + 1];
2660   x0i = a[j0 + 1] + a[j2];
2661   x1r = a[j0] + a[j2 + 1];
2662   x1i = a[j0 + 1] - a[j2];
2663   x2r = a[j1] - a[j3 + 1];
2664   x2i = a[j1 + 1] + a[j3];
2665   x3r = a[j1] + a[j3 + 1];
2666   x3i = a[j1 + 1] - a[j3];
2667   y0r = wk1r * x0r - wk1i * x0i;
2668   y0i = wk1r * x0i + wk1i * x0r;
2669   y2r = wk1i * x2r - wk1r * x2i;
2670   y2i = wk1i * x2i + wk1r * x2r;
2671   a[j0] = y0r + y2r;
2672   a[j0 + 1] = y0i + y2i;
2673   a[j1] = y0r - y2r;
2674   a[j1 + 1] = y0i - y2i;
2675   y0r = wk1i * x1r - wk1r * x1i;
2676   y0i = wk1i * x1i + wk1r * x1r;
2677   y2r = wk1r * x3r - wk1i * x3i;
2678   y2i = wk1r * x3i + wk1i * x3r;
2679   a[j2] = y0r - y2r;
2680   a[j2 + 1] = y0i - y2i;
2681   a[j3] = y0r + y2r;
2682   a[j3 + 1] = y0i + y2i;
2683 }
2684 
cftfx41(int n,float * a,int nw,float * w)2685 void cftfx41(int n, float *a, int nw, float *w) {
2686   void cftf161(float * a, float * w);
2687   void cftf162(float * a, float * w);
2688   void cftf081(float * a, float * w);
2689   void cftf082(float * a, float * w);
2690 
2691   if(n == 128) {
2692     cftf161(a, &w[nw - 8]);
2693     cftf162(&a[32], &w[nw - 32]);
2694     cftf161(&a[64], &w[nw - 8]);
2695     cftf161(&a[96], &w[nw - 8]);
2696   }
2697   else {
2698     cftf081(a, &w[nw - 8]);
2699     cftf082(&a[16], &w[nw - 8]);
2700     cftf081(&a[32], &w[nw - 8]);
2701     cftf081(&a[48], &w[nw - 8]);
2702   }
2703 }
2704 
cftf161(float * a,float * w)2705 void cftf161(float *a, float *w) {
2706   float wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r,
2707       y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i, y8r, y8i,
2708       y9r, y9i, y10r, y10i, y11r, y11i, y12r, y12i, y13r, y13i, y14r, y14i,
2709       y15r, y15i;
2710 
2711   wn4r = w[1];
2712   wk1r = w[2];
2713   wk1i = w[3];
2714   x0r = a[0] + a[16];
2715   x0i = a[1] + a[17];
2716   x1r = a[0] - a[16];
2717   x1i = a[1] - a[17];
2718   x2r = a[8] + a[24];
2719   x2i = a[9] + a[25];
2720   x3r = a[8] - a[24];
2721   x3i = a[9] - a[25];
2722   y0r = x0r + x2r;
2723   y0i = x0i + x2i;
2724   y4r = x0r - x2r;
2725   y4i = x0i - x2i;
2726   y8r = x1r - x3i;
2727   y8i = x1i + x3r;
2728   y12r = x1r + x3i;
2729   y12i = x1i - x3r;
2730   x0r = a[2] + a[18];
2731   x0i = a[3] + a[19];
2732   x1r = a[2] - a[18];
2733   x1i = a[3] - a[19];
2734   x2r = a[10] + a[26];
2735   x2i = a[11] + a[27];
2736   x3r = a[10] - a[26];
2737   x3i = a[11] - a[27];
2738   y1r = x0r + x2r;
2739   y1i = x0i + x2i;
2740   y5r = x0r - x2r;
2741   y5i = x0i - x2i;
2742   x0r = x1r - x3i;
2743   x0i = x1i + x3r;
2744   y9r = wk1r * x0r - wk1i * x0i;
2745   y9i = wk1r * x0i + wk1i * x0r;
2746   x0r = x1r + x3i;
2747   x0i = x1i - x3r;
2748   y13r = wk1i * x0r - wk1r * x0i;
2749   y13i = wk1i * x0i + wk1r * x0r;
2750   x0r = a[4] + a[20];
2751   x0i = a[5] + a[21];
2752   x1r = a[4] - a[20];
2753   x1i = a[5] - a[21];
2754   x2r = a[12] + a[28];
2755   x2i = a[13] + a[29];
2756   x3r = a[12] - a[28];
2757   x3i = a[13] - a[29];
2758   y2r = x0r + x2r;
2759   y2i = x0i + x2i;
2760   y6r = x0r - x2r;
2761   y6i = x0i - x2i;
2762   x0r = x1r - x3i;
2763   x0i = x1i + x3r;
2764   y10r = wn4r * (x0r - x0i);
2765   y10i = wn4r * (x0i + x0r);
2766   x0r = x1r + x3i;
2767   x0i = x1i - x3r;
2768   y14r = wn4r * (x0r + x0i);
2769   y14i = wn4r * (x0i - x0r);
2770   x0r = a[6] + a[22];
2771   x0i = a[7] + a[23];
2772   x1r = a[6] - a[22];
2773   x1i = a[7] - a[23];
2774   x2r = a[14] + a[30];
2775   x2i = a[15] + a[31];
2776   x3r = a[14] - a[30];
2777   x3i = a[15] - a[31];
2778   y3r = x0r + x2r;
2779   y3i = x0i + x2i;
2780   y7r = x0r - x2r;
2781   y7i = x0i - x2i;
2782   x0r = x1r - x3i;
2783   x0i = x1i + x3r;
2784   y11r = wk1i * x0r - wk1r * x0i;
2785   y11i = wk1i * x0i + wk1r * x0r;
2786   x0r = x1r + x3i;
2787   x0i = x1i - x3r;
2788   y15r = wk1r * x0r - wk1i * x0i;
2789   y15i = wk1r * x0i + wk1i * x0r;
2790   x0r = y12r - y14r;
2791   x0i = y12i - y14i;
2792   x1r = y12r + y14r;
2793   x1i = y12i + y14i;
2794   x2r = y13r - y15r;
2795   x2i = y13i - y15i;
2796   x3r = y13r + y15r;
2797   x3i = y13i + y15i;
2798   a[24] = x0r + x2r;
2799   a[25] = x0i + x2i;
2800   a[26] = x0r - x2r;
2801   a[27] = x0i - x2i;
2802   a[28] = x1r - x3i;
2803   a[29] = x1i + x3r;
2804   a[30] = x1r + x3i;
2805   a[31] = x1i - x3r;
2806   x0r = y8r + y10r;
2807   x0i = y8i + y10i;
2808   x1r = y8r - y10r;
2809   x1i = y8i - y10i;
2810   x2r = y9r + y11r;
2811   x2i = y9i + y11i;
2812   x3r = y9r - y11r;
2813   x3i = y9i - y11i;
2814   a[16] = x0r + x2r;
2815   a[17] = x0i + x2i;
2816   a[18] = x0r - x2r;
2817   a[19] = x0i - x2i;
2818   a[20] = x1r - x3i;
2819   a[21] = x1i + x3r;
2820   a[22] = x1r + x3i;
2821   a[23] = x1i - x3r;
2822   x0r = y5r - y7i;
2823   x0i = y5i + y7r;
2824   x2r = wn4r * (x0r - x0i);
2825   x2i = wn4r * (x0i + x0r);
2826   x0r = y5r + y7i;
2827   x0i = y5i - y7r;
2828   x3r = wn4r * (x0r - x0i);
2829   x3i = wn4r * (x0i + x0r);
2830   x0r = y4r - y6i;
2831   x0i = y4i + y6r;
2832   x1r = y4r + y6i;
2833   x1i = y4i - y6r;
2834   a[8] = x0r + x2r;
2835   a[9] = x0i + x2i;
2836   a[10] = x0r - x2r;
2837   a[11] = x0i - x2i;
2838   a[12] = x1r - x3i;
2839   a[13] = x1i + x3r;
2840   a[14] = x1r + x3i;
2841   a[15] = x1i - x3r;
2842   x0r = y0r + y2r;
2843   x0i = y0i + y2i;
2844   x1r = y0r - y2r;
2845   x1i = y0i - y2i;
2846   x2r = y1r + y3r;
2847   x2i = y1i + y3i;
2848   x3r = y1r - y3r;
2849   x3i = y1i - y3i;
2850   a[0] = x0r + x2r;
2851   a[1] = x0i + x2i;
2852   a[2] = x0r - x2r;
2853   a[3] = x0i - x2i;
2854   a[4] = x1r - x3i;
2855   a[5] = x1i + x3r;
2856   a[6] = x1r + x3i;
2857   a[7] = x1i - x3r;
2858 }
2859 
cftf162(float * a,float * w)2860 void cftf162(float *a, float *w) {
2861   float wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i, x0r, x0i, x1r, x1i, x2r, x2i,
2862       y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r,
2863       y7i, y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, y12r, y12i, y13r, y13i,
2864       y14r, y14i, y15r, y15i;
2865 
2866   wn4r = w[1];
2867   wk1r = w[4];
2868   wk1i = w[5];
2869   wk3r = w[6];
2870   wk3i = -w[7];
2871   wk2r = w[8];
2872   wk2i = w[9];
2873   x1r = a[0] - a[17];
2874   x1i = a[1] + a[16];
2875   x0r = a[8] - a[25];
2876   x0i = a[9] + a[24];
2877   x2r = wn4r * (x0r - x0i);
2878   x2i = wn4r * (x0i + x0r);
2879   y0r = x1r + x2r;
2880   y0i = x1i + x2i;
2881   y4r = x1r - x2r;
2882   y4i = x1i - x2i;
2883   x1r = a[0] + a[17];
2884   x1i = a[1] - a[16];
2885   x0r = a[8] + a[25];
2886   x0i = a[9] - a[24];
2887   x2r = wn4r * (x0r - x0i);
2888   x2i = wn4r * (x0i + x0r);
2889   y8r = x1r - x2i;
2890   y8i = x1i + x2r;
2891   y12r = x1r + x2i;
2892   y12i = x1i - x2r;
2893   x0r = a[2] - a[19];
2894   x0i = a[3] + a[18];
2895   x1r = wk1r * x0r - wk1i * x0i;
2896   x1i = wk1r * x0i + wk1i * x0r;
2897   x0r = a[10] - a[27];
2898   x0i = a[11] + a[26];
2899   x2r = wk3i * x0r - wk3r * x0i;
2900   x2i = wk3i * x0i + wk3r * x0r;
2901   y1r = x1r + x2r;
2902   y1i = x1i + x2i;
2903   y5r = x1r - x2r;
2904   y5i = x1i - x2i;
2905   x0r = a[2] + a[19];
2906   x0i = a[3] - a[18];
2907   x1r = wk3r * x0r - wk3i * x0i;
2908   x1i = wk3r * x0i + wk3i * x0r;
2909   x0r = a[10] + a[27];
2910   x0i = a[11] - a[26];
2911   x2r = wk1r * x0r + wk1i * x0i;
2912   x2i = wk1r * x0i - wk1i * x0r;
2913   y9r = x1r - x2r;
2914   y9i = x1i - x2i;
2915   y13r = x1r + x2r;
2916   y13i = x1i + x2i;
2917   x0r = a[4] - a[21];
2918   x0i = a[5] + a[20];
2919   x1r = wk2r * x0r - wk2i * x0i;
2920   x1i = wk2r * x0i + wk2i * x0r;
2921   x0r = a[12] - a[29];
2922   x0i = a[13] + a[28];
2923   x2r = wk2i * x0r - wk2r * x0i;
2924   x2i = wk2i * x0i + wk2r * x0r;
2925   y2r = x1r + x2r;
2926   y2i = x1i + x2i;
2927   y6r = x1r - x2r;
2928   y6i = x1i - x2i;
2929   x0r = a[4] + a[21];
2930   x0i = a[5] - a[20];
2931   x1r = wk2i * x0r - wk2r * x0i;
2932   x1i = wk2i * x0i + wk2r * x0r;
2933   x0r = a[12] + a[29];
2934   x0i = a[13] - a[28];
2935   x2r = wk2r * x0r - wk2i * x0i;
2936   x2i = wk2r * x0i + wk2i * x0r;
2937   y10r = x1r - x2r;
2938   y10i = x1i - x2i;
2939   y14r = x1r + x2r;
2940   y14i = x1i + x2i;
2941   x0r = a[6] - a[23];
2942   x0i = a[7] + a[22];
2943   x1r = wk3r * x0r - wk3i * x0i;
2944   x1i = wk3r * x0i + wk3i * x0r;
2945   x0r = a[14] - a[31];
2946   x0i = a[15] + a[30];
2947   x2r = wk1i * x0r - wk1r * x0i;
2948   x2i = wk1i * x0i + wk1r * x0r;
2949   y3r = x1r + x2r;
2950   y3i = x1i + x2i;
2951   y7r = x1r - x2r;
2952   y7i = x1i - x2i;
2953   x0r = a[6] + a[23];
2954   x0i = a[7] - a[22];
2955   x1r = wk1i * x0r + wk1r * x0i;
2956   x1i = wk1i * x0i - wk1r * x0r;
2957   x0r = a[14] + a[31];
2958   x0i = a[15] - a[30];
2959   x2r = wk3i * x0r - wk3r * x0i;
2960   x2i = wk3i * x0i + wk3r * x0r;
2961   y11r = x1r + x2r;
2962   y11i = x1i + x2i;
2963   y15r = x1r - x2r;
2964   y15i = x1i - x2i;
2965   x1r = y0r + y2r;
2966   x1i = y0i + y2i;
2967   x2r = y1r + y3r;
2968   x2i = y1i + y3i;
2969   a[0] = x1r + x2r;
2970   a[1] = x1i + x2i;
2971   a[2] = x1r - x2r;
2972   a[3] = x1i - x2i;
2973   x1r = y0r - y2r;
2974   x1i = y0i - y2i;
2975   x2r = y1r - y3r;
2976   x2i = y1i - y3i;
2977   a[4] = x1r - x2i;
2978   a[5] = x1i + x2r;
2979   a[6] = x1r + x2i;
2980   a[7] = x1i - x2r;
2981   x1r = y4r - y6i;
2982   x1i = y4i + y6r;
2983   x0r = y5r - y7i;
2984   x0i = y5i + y7r;
2985   x2r = wn4r * (x0r - x0i);
2986   x2i = wn4r * (x0i + x0r);
2987   a[8] = x1r + x2r;
2988   a[9] = x1i + x2i;
2989   a[10] = x1r - x2r;
2990   a[11] = x1i - x2i;
2991   x1r = y4r + y6i;
2992   x1i = y4i - y6r;
2993   x0r = y5r + y7i;
2994   x0i = y5i - y7r;
2995   x2r = wn4r * (x0r - x0i);
2996   x2i = wn4r * (x0i + x0r);
2997   a[12] = x1r - x2i;
2998   a[13] = x1i + x2r;
2999   a[14] = x1r + x2i;
3000   a[15] = x1i - x2r;
3001   x1r = y8r + y10r;
3002   x1i = y8i + y10i;
3003   x2r = y9r - y11r;
3004   x2i = y9i - y11i;
3005   a[16] = x1r + x2r;
3006   a[17] = x1i + x2i;
3007   a[18] = x1r - x2r;
3008   a[19] = x1i - x2i;
3009   x1r = y8r - y10r;
3010   x1i = y8i - y10i;
3011   x2r = y9r + y11r;
3012   x2i = y9i + y11i;
3013   a[20] = x1r - x2i;
3014   a[21] = x1i + x2r;
3015   a[22] = x1r + x2i;
3016   a[23] = x1i - x2r;
3017   x1r = y12r - y14i;
3018   x1i = y12i + y14r;
3019   x0r = y13r + y15i;
3020   x0i = y13i - y15r;
3021   x2r = wn4r * (x0r - x0i);
3022   x2i = wn4r * (x0i + x0r);
3023   a[24] = x1r + x2r;
3024   a[25] = x1i + x2i;
3025   a[26] = x1r - x2r;
3026   a[27] = x1i - x2i;
3027   x1r = y12r + y14i;
3028   x1i = y12i - y14r;
3029   x0r = y13r - y15i;
3030   x0i = y13i + y15r;
3031   x2r = wn4r * (x0r - x0i);
3032   x2i = wn4r * (x0i + x0r);
3033   a[28] = x1r - x2i;
3034   a[29] = x1i + x2r;
3035   a[30] = x1r + x2i;
3036   a[31] = x1i - x2r;
3037 }
3038 
cftf081(float * a,float * w)3039 void cftf081(float *a, float *w) {
3040   float wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r,
3041       y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
3042 
3043   wn4r = w[1];
3044   x0r = a[0] + a[8];
3045   x0i = a[1] + a[9];
3046   x1r = a[0] - a[8];
3047   x1i = a[1] - a[9];
3048   x2r = a[4] + a[12];
3049   x2i = a[5] + a[13];
3050   x3r = a[4] - a[12];
3051   x3i = a[5] - a[13];
3052   y0r = x0r + x2r;
3053   y0i = x0i + x2i;
3054   y2r = x0r - x2r;
3055   y2i = x0i - x2i;
3056   y1r = x1r - x3i;
3057   y1i = x1i + x3r;
3058   y3r = x1r + x3i;
3059   y3i = x1i - x3r;
3060   x0r = a[2] + a[10];
3061   x0i = a[3] + a[11];
3062   x1r = a[2] - a[10];
3063   x1i = a[3] - a[11];
3064   x2r = a[6] + a[14];
3065   x2i = a[7] + a[15];
3066   x3r = a[6] - a[14];
3067   x3i = a[7] - a[15];
3068   y4r = x0r + x2r;
3069   y4i = x0i + x2i;
3070   y6r = x0r - x2r;
3071   y6i = x0i - x2i;
3072   x0r = x1r - x3i;
3073   x0i = x1i + x3r;
3074   x2r = x1r + x3i;
3075   x2i = x1i - x3r;
3076   y5r = wn4r * (x0r - x0i);
3077   y5i = wn4r * (x0r + x0i);
3078   y7r = wn4r * (x2r - x2i);
3079   y7i = wn4r * (x2r + x2i);
3080   a[8] = y1r + y5r;
3081   a[9] = y1i + y5i;
3082   a[10] = y1r - y5r;
3083   a[11] = y1i - y5i;
3084   a[12] = y3r - y7i;
3085   a[13] = y3i + y7r;
3086   a[14] = y3r + y7i;
3087   a[15] = y3i - y7r;
3088   a[0] = y0r + y4r;
3089   a[1] = y0i + y4i;
3090   a[2] = y0r - y4r;
3091   a[3] = y0i - y4i;
3092   a[4] = y2r - y6i;
3093   a[5] = y2i + y6r;
3094   a[6] = y2r + y6i;
3095   a[7] = y2i - y6r;
3096 }
3097 
cftf082(float * a,float * w)3098 void cftf082(float *a, float *w) {
3099   float wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, y0r, y0i, y1r, y1i, y2r, y2i, y3r,
3100       y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
3101 
3102   wn4r = w[1];
3103   wk1r = w[2];
3104   wk1i = w[3];
3105   y0r = a[0] - a[9];
3106   y0i = a[1] + a[8];
3107   y1r = a[0] + a[9];
3108   y1i = a[1] - a[8];
3109   x0r = a[4] - a[13];
3110   x0i = a[5] + a[12];
3111   y2r = wn4r * (x0r - x0i);
3112   y2i = wn4r * (x0i + x0r);
3113   x0r = a[4] + a[13];
3114   x0i = a[5] - a[12];
3115   y3r = wn4r * (x0r - x0i);
3116   y3i = wn4r * (x0i + x0r);
3117   x0r = a[2] - a[11];
3118   x0i = a[3] + a[10];
3119   y4r = wk1r * x0r - wk1i * x0i;
3120   y4i = wk1r * x0i + wk1i * x0r;
3121   x0r = a[2] + a[11];
3122   x0i = a[3] - a[10];
3123   y5r = wk1i * x0r - wk1r * x0i;
3124   y5i = wk1i * x0i + wk1r * x0r;
3125   x0r = a[6] - a[15];
3126   x0i = a[7] + a[14];
3127   y6r = wk1i * x0r - wk1r * x0i;
3128   y6i = wk1i * x0i + wk1r * x0r;
3129   x0r = a[6] + a[15];
3130   x0i = a[7] - a[14];
3131   y7r = wk1r * x0r - wk1i * x0i;
3132   y7i = wk1r * x0i + wk1i * x0r;
3133   x0r = y0r + y2r;
3134   x0i = y0i + y2i;
3135   x1r = y4r + y6r;
3136   x1i = y4i + y6i;
3137   a[0] = x0r + x1r;
3138   a[1] = x0i + x1i;
3139   a[2] = x0r - x1r;
3140   a[3] = x0i - x1i;
3141   x0r = y0r - y2r;
3142   x0i = y0i - y2i;
3143   x1r = y4r - y6r;
3144   x1i = y4i - y6i;
3145   a[4] = x0r - x1i;
3146   a[5] = x0i + x1r;
3147   a[6] = x0r + x1i;
3148   a[7] = x0i - x1r;
3149   x0r = y1r - y3i;
3150   x0i = y1i + y3r;
3151   x1r = y5r - y7r;
3152   x1i = y5i - y7i;
3153   a[8] = x0r + x1r;
3154   a[9] = x0i + x1i;
3155   a[10] = x0r - x1r;
3156   a[11] = x0i - x1i;
3157   x0r = y1r + y3i;
3158   x0i = y1i - y3r;
3159   x1r = y5r + y7r;
3160   x1i = y5i + y7i;
3161   a[12] = x0r - x1i;
3162   a[13] = x0i + x1r;
3163   a[14] = x0r + x1i;
3164   a[15] = x0i - x1r;
3165 }
3166 
cftf040(float * a)3167 void cftf040(float *a) {
3168   float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
3169 
3170   x0r = a[0] + a[4];
3171   x0i = a[1] + a[5];
3172   x1r = a[0] - a[4];
3173   x1i = a[1] - a[5];
3174   x2r = a[2] + a[6];
3175   x2i = a[3] + a[7];
3176   x3r = a[2] - a[6];
3177   x3i = a[3] - a[7];
3178   a[0] = x0r + x2r;
3179   a[1] = x0i + x2i;
3180   a[2] = x1r - x3i;
3181   a[3] = x1i + x3r;
3182   a[4] = x0r - x2r;
3183   a[5] = x0i - x2i;
3184   a[6] = x1r + x3i;
3185   a[7] = x1i - x3r;
3186 }
3187 
cftb040(float * a)3188 void cftb040(float *a) {
3189   float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
3190 
3191   x0r = a[0] + a[4];
3192   x0i = a[1] + a[5];
3193   x1r = a[0] - a[4];
3194   x1i = a[1] - a[5];
3195   x2r = a[2] + a[6];
3196   x2i = a[3] + a[7];
3197   x3r = a[2] - a[6];
3198   x3i = a[3] - a[7];
3199   a[0] = x0r + x2r;
3200   a[1] = x0i + x2i;
3201   a[2] = x1r + x3i;
3202   a[3] = x1i - x3r;
3203   a[4] = x0r - x2r;
3204   a[5] = x0i - x2i;
3205   a[6] = x1r - x3i;
3206   a[7] = x1i + x3r;
3207 }
3208 
cftx020(float * a)3209 void cftx020(float *a) {
3210   float x0r, x0i;
3211 
3212   x0r = a[0] - a[2];
3213   x0i = a[1] - a[3];
3214   a[0] += a[2];
3215   a[1] += a[3];
3216   a[2] = x0r;
3217   a[3] = x0i;
3218 }
3219 
rftfsub(int n,float * a,int nc,float * c)3220 void rftfsub(int n, float *a, int nc, float *c) {
3221   int j, k, kk, ks, m;
3222   float wkr, wki, xr, xi, yr, yi;
3223 
3224   m = n >> 1;
3225   ks = 2 * nc / m;
3226   kk = 0;
3227   for(j = 2; j < m; j += 2) {
3228     k = n - j;
3229     kk += ks;
3230     wkr = 0.5 - c[nc - kk];
3231     wki = c[kk];
3232     xr = a[j] - a[k];
3233     xi = a[j + 1] + a[k + 1];
3234     yr = wkr * xr - wki * xi;
3235     yi = wkr * xi + wki * xr;
3236     a[j] -= yr;
3237     a[j + 1] -= yi;
3238     a[k] += yr;
3239     a[k + 1] -= yi;
3240   }
3241 }
3242 
rftbsub(int n,float * a,int nc,float * c)3243 void rftbsub(int n, float *a, int nc, float *c) {
3244   int j, k, kk, ks, m;
3245   float wkr, wki, xr, xi, yr, yi;
3246 
3247   m = n >> 1;
3248   ks = 2 * nc / m;
3249   kk = 0;
3250   for(j = 2; j < m; j += 2) {
3251     k = n - j;
3252     kk += ks;
3253     wkr = 0.5 - c[nc - kk];
3254     wki = c[kk];
3255     xr = a[j] - a[k];
3256     xi = a[j + 1] + a[k + 1];
3257     yr = wkr * xr + wki * xi;
3258     yi = wkr * xi - wki * xr;
3259     a[j] -= yr;
3260     a[j + 1] -= yi;
3261     a[k] += yr;
3262     a[k + 1] -= yi;
3263   }
3264 }
3265 
dctsub(int n,float * a,int nc,float * c)3266 void dctsub(int n, float *a, int nc, float *c) {
3267   int j, k, kk, ks, m;
3268   float wkr, wki, xr;
3269 
3270   m = n >> 1;
3271   ks = nc / n;
3272   kk = 0;
3273   for(j = 1; j < m; j++) {
3274     k = n - j;
3275     kk += ks;
3276     wkr = c[kk] - c[nc - kk];
3277     wki = c[kk] + c[nc - kk];
3278     xr = wki * a[j] - wkr * a[k];
3279     a[j] = wkr * a[j] + wki * a[k];
3280     a[k] = xr;
3281   }
3282   a[m] *= c[0];
3283 }
3284 
dstsub(int n,float * a,int nc,float * c)3285 void dstsub(int n, float *a, int nc, float *c) {
3286   int j, k, kk, ks, m;
3287   float wkr, wki, xr;
3288 
3289   m = n >> 1;
3290   ks = nc / n;
3291   kk = 0;
3292   for(j = 1; j < m; j++) {
3293     k = n - j;
3294     kk += ks;
3295     wkr = c[kk] - c[nc - kk];
3296     wki = c[kk] + c[nc - kk];
3297     xr = wki * a[k] - wkr * a[j];
3298     a[k] = wkr * a[k] + wki * a[j];
3299     a[j] = xr;
3300   }
3301   a[m] *= c[0];
3302 }
3303 
3304 }
3305