1 ///////////////////////////////////////////////////////////////////////////////
2 // BSD 3-Clause License
3 //
4 // Copyright (c) 2018-2020, The Regents of the University of California
5 // All rights reserved.
6 //
7 // Redistribution and use in source and binary forms, with or without
8 // modification, are permitted provided that the following conditions are met:
9 //
10 // * Redistributions of source code must retain the above copyright notice, this
11 // list of conditions and the following disclaimer.
12 //
13 // * Redistributions in binary form must reproduce the above copyright notice,
14 // this list of conditions and the following disclaimer in the documentation
15 // and/or other materials provided with the distribution.
16 //
17 // * Neither the name of the copyright holder nor the names of its
18 // contributors may be used to endorse or promote products derived from
19 // this software without specific prior written permission.
20 //
21 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 // ARE
25 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
26 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
28 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
29 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 ///////////////////////////////////////////////////////////////////////////////
33
34 /*
35 Fast Fourier/Cosine/Sine Transform
36 dimension :one
37 data length :power of 2
38 decimation :frequency
39 radix :split-radix
40 data :inplace
41 table :use
42 functions
43 cdft: Complex Discrete Fourier Transform
44 rdft: Real Discrete Fourier Transform
45 ddct: Discrete Cosine Transform
46 ddst: Discrete Sine Transform
47 dfct: Cosine Transform of RDFT (Real Symmetric DFT)
48 dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
49 function prototypes
50 void cdft(int, int, float *, int *, float *);
51 void rdft(int, int, float *, int *, float *);
52 void ddct(int, int, float *, int *, float *);
53 void ddst(int, int, float *, int *, float *);
54 void dfct(int, float *, float *, int *, float *);
55 void dfst(int, float *, float *, int *, float *);
56 macro definitions
57 USE_CDFT_PTHREADS : default=not defined
58 CDFT_THREADS_BEGIN_N : must be >= 512, default=8192
59 CDFT_4THREADS_BEGIN_N : must be >= 512, default=65536
60 USE_CDFT_WINTHREADS : default=not defined
61 CDFT_THREADS_BEGIN_N : must be >= 512, default=32768
62 CDFT_4THREADS_BEGIN_N : must be >= 512, default=524288
63
64
65 -------- Complex DFT (Discrete Fourier Transform) --------
66 [definition]
67 <case1>
68 X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
69 <case2>
70 X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
71 (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
72 [usage]
73 <case1>
74 ip[0] = 0; // first time only
75 cdft(2*n, 1, a, ip, w);
76 <case2>
77 ip[0] = 0; // first time only
78 cdft(2*n, -1, a, ip, w);
79 [parameters]
80 2*n :data length (int)
81 n >= 1, n = power of 2
82 a[0...2*n-1] :input/output data (float *)
83 input data
84 a[2*j] = Re(x[j]),
85 a[2*j+1] = Im(x[j]), 0<=j<n
86 output data
87 a[2*k] = Re(X[k]),
88 a[2*k+1] = Im(X[k]), 0<=k<n
89 ip[0...*] :work area for bit reversal (int *)
90 length of ip >= 2+sqrt(n)
91 strictly,
92 length of ip >=
93 2+(1<<(int)(log(n+0.5)/log(2))/2).
94 ip[0],ip[1] are pointers of the cos/sin table.
95 w[0...n/2-1] :cos/sin table (float *)
96 w[],ip[] are initialized if ip[0] == 0.
97 [remark]
98 Inverse of
99 cdft(2*n, -1, a, ip, w);
100 is
101 cdft(2*n, 1, a, ip, w);
102 for (j = 0; j <= 2 * n - 1; j++) {
103 a[j] *= 1.0 / n;
104 }
105 .
106
107
108 -------- Real DFT / Inverse of Real DFT --------
109 [definition]
110 <case1> RDFT
111 R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
112 I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
113 <case2> IRDFT (excluding scale)
114 a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
115 sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
116 sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
117 [usage]
118 <case1>
119 ip[0] = 0; // first time only
120 rdft(n, 1, a, ip, w);
121 <case2>
122 ip[0] = 0; // first time only
123 rdft(n, -1, a, ip, w);
124 [parameters]
125 n :data length (int)
126 n >= 2, n = power of 2
127 a[0...n-1] :input/output data (float *)
128 <case1>
129 output data
130 a[2*k] = R[k], 0<=k<n/2
131 a[2*k+1] = I[k], 0<k<n/2
132 a[1] = R[n/2]
133 <case2>
134 input data
135 a[2*j] = R[j], 0<=j<n/2
136 a[2*j+1] = I[j], 0<j<n/2
137 a[1] = R[n/2]
138 ip[0...*] :work area for bit reversal (int *)
139 length of ip >= 2+sqrt(n/2)
140 strictly,
141 length of ip >=
142 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
143 ip[0],ip[1] are pointers of the cos/sin table.
144 w[0...n/2-1] :cos/sin table (float *)
145 w[],ip[] are initialized if ip[0] == 0.
146 [remark]
147 Inverse of
148 rdft(n, 1, a, ip, w);
149 is
150 rdft(n, -1, a, ip, w);
151 for (j = 0; j <= n - 1; j++) {
152 a[j] *= 2.0 / n;
153 }
154 .
155
156
157 -------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
158 [definition]
159 <case1> IDCT (excluding scale)
160 C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
161 <case2> DCT
162 C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
163 [usage]
164 <case1>
165 ip[0] = 0; // first time only
166 ddct(n, 1, a, ip, w);
167 <case2>
168 ip[0] = 0; // first time only
169 ddct(n, -1, a, ip, w);
170 [parameters]
171 n :data length (int)
172 n >= 2, n = power of 2
173 a[0...n-1] :input/output data (float *)
174 output data
175 a[k] = C[k], 0<=k<n
176 ip[0...*] :work area for bit reversal (int *)
177 length of ip >= 2+sqrt(n/2)
178 strictly,
179 length of ip >=
180 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
181 ip[0],ip[1] are pointers of the cos/sin table.
182 w[0...n*5/4-1] :cos/sin table (float *)
183 w[],ip[] are initialized if ip[0] == 0.
184 [remark]
185 Inverse of
186 ddct(n, -1, a, ip, w);
187 is
188 a[0] *= 0.5;
189 ddct(n, 1, a, ip, w);
190 for (j = 0; j <= n - 1; j++) {
191 a[j] *= 2.0 / n;
192 }
193 .
194
195
196 -------- DST (Discrete Sine Transform) / Inverse of DST --------
197 [definition]
198 <case1> IDST (excluding scale)
199 S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
200 <case2> DST
201 S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
202 [usage]
203 <case1>
204 ip[0] = 0; // first time only
205 ddst(n, 1, a, ip, w);
206 <case2>
207 ip[0] = 0; // first time only
208 ddst(n, -1, a, ip, w);
209 [parameters]
210 n :data length (int)
211 n >= 2, n = power of 2
212 a[0...n-1] :input/output data (float *)
213 <case1>
214 input data
215 a[j] = A[j], 0<j<n
216 a[0] = A[n]
217 output data
218 a[k] = S[k], 0<=k<n
219 <case2>
220 output data
221 a[k] = S[k], 0<k<n
222 a[0] = S[n]
223 ip[0...*] :work area for bit reversal (int *)
224 length of ip >= 2+sqrt(n/2)
225 strictly,
226 length of ip >=
227 2+(1<<(int)(log(n/2+0.5)/log(2))/2).
228 ip[0],ip[1] are pointers of the cos/sin table.
229 w[0...n*5/4-1] :cos/sin table (float *)
230 w[],ip[] are initialized if ip[0] == 0.
231 [remark]
232 Inverse of
233 ddst(n, -1, a, ip, w);
234 is
235 a[0] *= 0.5;
236 ddst(n, 1, a, ip, w);
237 for (j = 0; j <= n - 1; j++) {
238 a[j] *= 2.0 / n;
239 }
240 .
241
242
243 -------- Cosine Transform of RDFT (Real Symmetric DFT) --------
244 [definition]
245 C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
246 [usage]
247 ip[0] = 0; // first time only
248 dfct(n, a, t, ip, w);
249 [parameters]
250 n :data length - 1 (int)
251 n >= 2, n = power of 2
252 a[0...n] :input/output data (float *)
253 output data
254 a[k] = C[k], 0<=k<=n
255 t[0...n/2] :work area (float *)
256 ip[0...*] :work area for bit reversal (int *)
257 length of ip >= 2+sqrt(n/4)
258 strictly,
259 length of ip >=
260 2+(1<<(int)(log(n/4+0.5)/log(2))/2).
261 ip[0],ip[1] are pointers of the cos/sin table.
262 w[0...n*5/8-1] :cos/sin table (float *)
263 w[],ip[] are initialized if ip[0] == 0.
264 [remark]
265 Inverse of
266 a[0] *= 0.5;
267 a[n] *= 0.5;
268 dfct(n, a, t, ip, w);
269 is
270 a[0] *= 0.5;
271 a[n] *= 0.5;
272 dfct(n, a, t, ip, w);
273 for (j = 0; j <= n; j++) {
274 a[j] *= 2.0 / n;
275 }
276 .
277
278
279 -------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
280 [definition]
281 S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
282 [usage]
283 ip[0] = 0; // first time only
284 dfst(n, a, t, ip, w);
285 [parameters]
286 n :data length + 1 (int)
287 n >= 2, n = power of 2
288 a[0...n-1] :input/output data (float *)
289 output data
290 a[k] = S[k], 0<k<n
291 (a[0] is used for work area)
292 t[0...n/2-1] :work area (float *)
293 ip[0...*] :work area for bit reversal (int *)
294 length of ip >= 2+sqrt(n/4)
295 strictly,
296 length of ip >=
297 2+(1<<(int)(log(n/4+0.5)/log(2))/2).
298 ip[0],ip[1] are pointers of the cos/sin table.
299 w[0...n*5/8-1] :cos/sin table (float *)
300 w[],ip[] are initialized if ip[0] == 0.
301 [remark]
302 Inverse of
303 dfst(n, a, t, ip, w);
304 is
305 dfst(n, a, t, ip, w);
306 for (j = 1; j <= n - 1; j++) {
307 a[j] *= 2.0 / n;
308 }
309 .
310
311
312 Appendix :
313 The cos/sin table is recalculated when the larger table required.
314 w[] and ip[] are compatible with all routines.
315 */
316
317 #include <cmath>
318 #include <iostream>
319
320 namespace gpl {
321
322
cdft(int n,int isgn,float * a,int * ip,float * w)323 void cdft(int n, int isgn, float *a, int *ip, float *w) {
324 void makewt(int nw, int *ip, float *w);
325 void cftfsub(int n, float *a, int *ip, int nw, float *w);
326 void cftbsub(int n, float *a, int *ip, int nw, float *w);
327 int nw;
328
329 nw = ip[0];
330 if(n > (nw << 2)) {
331 nw = n >> 2;
332 makewt(nw, ip, w);
333 }
334 if(isgn >= 0) {
335 cftfsub(n, a, ip, nw, w);
336 }
337 else {
338 cftbsub(n, a, ip, nw, w);
339 }
340 }
341
rdft(int n,int isgn,float * a,int * ip,float * w)342 void rdft(int n, int isgn, float *a, int *ip, float *w) {
343 void makewt(int nw, int *ip, float *w);
344 void makect(int nc, int *ip, float *c);
345 void cftfsub(int n, float *a, int *ip, int nw, float *w);
346 void cftbsub(int n, float *a, int *ip, int nw, float *w);
347 void rftfsub(int n, float *a, int nc, float *c);
348 void rftbsub(int n, float *a, int nc, float *c);
349 int nw, nc;
350 float xi;
351
352 nw = ip[0];
353 if(n > (nw << 2)) {
354 nw = n >> 2;
355 makewt(nw, ip, w);
356 }
357 nc = ip[1];
358 if(n > (nc << 2)) {
359 nc = n >> 2;
360 makect(nc, ip, w + nw);
361 }
362 if(isgn >= 0) {
363 if(n > 4) {
364 cftfsub(n, a, ip, nw, w);
365 rftfsub(n, a, nc, w + nw);
366 }
367 else if(n == 4) {
368 cftfsub(n, a, ip, nw, w);
369 }
370 xi = a[0] - a[1];
371 a[0] += a[1];
372 a[1] = xi;
373 }
374 else {
375 a[1] = 0.5 * (a[0] - a[1]);
376 a[0] -= a[1];
377 if(n > 4) {
378 rftbsub(n, a, nc, w + nw);
379 cftbsub(n, a, ip, nw, w);
380 }
381 else if(n == 4) {
382 cftbsub(n, a, ip, nw, w);
383 }
384 }
385 }
386
ddct(int n,int isgn,float * a,int * ip,float * w)387 void ddct(int n, int isgn, float *a, int *ip, float *w) {
388 void makewt(int nw, int *ip, float *w);
389 void makect(int nc, int *ip, float *c);
390 void cftfsub(int n, float *a, int *ip, int nw, float *w);
391 void cftbsub(int n, float *a, int *ip, int nw, float *w);
392 void rftfsub(int n, float *a, int nc, float *c);
393 void rftbsub(int n, float *a, int nc, float *c);
394 void dctsub(int n, float *a, int nc, float *c);
395 int j, nw, nc;
396 float xr;
397
398 nw = ip[0];
399 if(n > (nw << 2)) {
400 nw = n >> 2;
401 makewt(nw, ip, w);
402 }
403 nc = ip[1];
404 if(n > nc) {
405 nc = n;
406 makect(nc, ip, w + nw);
407 }
408 if(isgn < 0) {
409 xr = a[n - 1];
410
411 for(j = n - 2; j >= 2; j -= 2) {
412 a[j + 1] = a[j] - a[j - 1];
413 a[j] += a[j - 1];
414 }
415 a[1] = a[0] - xr;
416 a[0] += xr;
417 if(n > 4) {
418 rftbsub(n, a, nc, w + nw);
419 cftbsub(n, a, ip, nw, w);
420 }
421 else if(n == 4) {
422 cftbsub(n, a, ip, nw, w);
423 }
424 }
425 dctsub(n, a, nc, w + nw);
426 if(isgn >= 0) {
427 if(n > 4) {
428 cftfsub(n, a, ip, nw, w);
429 rftfsub(n, a, nc, w + nw);
430 }
431 else if(n == 4) {
432 cftfsub(n, a, ip, nw, w);
433 }
434 xr = a[0] - a[1];
435 a[0] += a[1];
436 for(j = 2; j < n; j += 2) {
437 a[j - 1] = a[j] - a[j + 1];
438 a[j] += a[j + 1];
439 }
440 a[n - 1] = xr;
441 }
442 }
443
ddst(int n,int isgn,float * a,int * ip,float * w)444 void ddst(int n, int isgn, float *a, int *ip, float *w) {
445 void makewt(int nw, int *ip, float *w);
446 void makect(int nc, int *ip, float *c);
447 void cftfsub(int n, float *a, int *ip, int nw, float *w);
448 void cftbsub(int n, float *a, int *ip, int nw, float *w);
449 void rftfsub(int n, float *a, int nc, float *c);
450 void rftbsub(int n, float *a, int nc, float *c);
451 void dstsub(int n, float *a, int nc, float *c);
452 int j, nw, nc;
453 float xr;
454
455 nw = ip[0];
456 if(n > (nw << 2)) {
457 nw = n >> 2;
458 makewt(nw, ip, w);
459 }
460 nc = ip[1];
461 if(n > nc) {
462 nc = n;
463 makect(nc, ip, w + nw);
464 }
465 if(isgn < 0) {
466 xr = a[n - 1];
467 for(j = n - 2; j >= 2; j -= 2) {
468 a[j + 1] = -a[j] - a[j - 1];
469 a[j] -= a[j - 1];
470 }
471 a[1] = a[0] + xr;
472 a[0] -= xr;
473 if(n > 4) {
474 rftbsub(n, a, nc, w + nw);
475 cftbsub(n, a, ip, nw, w);
476 }
477 else if(n == 4) {
478 cftbsub(n, a, ip, nw, w);
479 }
480 }
481 dstsub(n, a, nc, w + nw);
482 if(isgn >= 0) {
483 if(n > 4) {
484 cftfsub(n, a, ip, nw, w);
485 rftfsub(n, a, nc, w + nw);
486 }
487 else if(n == 4) {
488 cftfsub(n, a, ip, nw, w);
489 }
490 xr = a[0] - a[1];
491 a[0] += a[1];
492 for(j = 2; j < n; j += 2) {
493 a[j - 1] = -a[j] - a[j + 1];
494 a[j] -= a[j + 1];
495 }
496 a[n - 1] = -xr;
497 }
498 }
499
dfct(int n,float * a,float * t,int * ip,float * w)500 void dfct(int n, float *a, float *t, int *ip, float *w) {
501 void makewt(int nw, int *ip, float *w);
502 void makect(int nc, int *ip, float *c);
503 void cftfsub(int n, float *a, int *ip, int nw, float *w);
504 void rftfsub(int n, float *a, int nc, float *c);
505 void dctsub(int n, float *a, int nc, float *c);
506 int j, k, l, m, mh, nw, nc;
507 float xr, xi, yr, yi;
508
509 nw = ip[0];
510 if(n > (nw << 3)) {
511 nw = n >> 3;
512 makewt(nw, ip, w);
513 }
514 nc = ip[1];
515 if(n > (nc << 1)) {
516 nc = n >> 1;
517 makect(nc, ip, w + nw);
518 }
519 m = n >> 1;
520 yi = a[m];
521 xi = a[0] + a[n];
522 a[0] -= a[n];
523 t[0] = xi - yi;
524 t[m] = xi + yi;
525 if(n > 2) {
526 mh = m >> 1;
527 for(j = 1; j < mh; j++) {
528 k = m - j;
529 xr = a[j] - a[n - j];
530 xi = a[j] + a[n - j];
531 yr = a[k] - a[n - k];
532 yi = a[k] + a[n - k];
533 a[j] = xr;
534 a[k] = yr;
535 t[j] = xi - yi;
536 t[k] = xi + yi;
537 }
538 t[mh] = a[mh] + a[n - mh];
539 a[mh] -= a[n - mh];
540 dctsub(m, a, nc, w + nw);
541 if(m > 4) {
542 cftfsub(m, a, ip, nw, w);
543 rftfsub(m, a, nc, w + nw);
544 }
545 else if(m == 4) {
546 cftfsub(m, a, ip, nw, w);
547 }
548 a[n - 1] = a[0] - a[1];
549 a[1] = a[0] + a[1];
550 for(j = m - 2; j >= 2; j -= 2) {
551 a[2 * j + 1] = a[j] + a[j + 1];
552 a[2 * j - 1] = a[j] - a[j + 1];
553 }
554 l = 2;
555 m = mh;
556 while(m >= 2) {
557 dctsub(m, t, nc, w + nw);
558 if(m > 4) {
559 cftfsub(m, t, ip, nw, w);
560 rftfsub(m, t, nc, w + nw);
561 }
562 else if(m == 4) {
563 cftfsub(m, t, ip, nw, w);
564 }
565 a[n - l] = t[0] - t[1];
566 a[l] = t[0] + t[1];
567 k = 0;
568 for(j = 2; j < m; j += 2) {
569 k += l << 2;
570 a[k - l] = t[j] - t[j + 1];
571 a[k + l] = t[j] + t[j + 1];
572 }
573 l <<= 1;
574 mh = m >> 1;
575 for(j = 0; j < mh; j++) {
576 k = m - j;
577 t[j] = t[m + k] - t[m + j];
578 t[k] = t[m + k] + t[m + j];
579 }
580 t[mh] = t[m + mh];
581 m = mh;
582 }
583 a[l] = t[0];
584 a[n] = t[2] - t[1];
585 a[0] = t[2] + t[1];
586 }
587 else {
588 a[1] = a[0];
589 a[2] = t[0];
590 a[0] = t[1];
591 }
592 }
593
dfst(int n,float * a,float * t,int * ip,float * w)594 void dfst(int n, float *a, float *t, int *ip, float *w) {
595 void makewt(int nw, int *ip, float *w);
596 void makect(int nc, int *ip, float *c);
597 void cftfsub(int n, float *a, int *ip, int nw, float *w);
598 void rftfsub(int n, float *a, int nc, float *c);
599 void dstsub(int n, float *a, int nc, float *c);
600 int j, k, l, m, mh, nw, nc;
601 float xr, xi, yr, yi;
602
603 nw = ip[0];
604 if(n > (nw << 3)) {
605 nw = n >> 3;
606 makewt(nw, ip, w);
607 }
608 nc = ip[1];
609 if(n > (nc << 1)) {
610 nc = n >> 1;
611 makect(nc, ip, w + nw);
612 }
613 if(n > 2) {
614 m = n >> 1;
615 mh = m >> 1;
616 for(j = 1; j < mh; j++) {
617 k = m - j;
618 xr = a[j] + a[n - j];
619 xi = a[j] - a[n - j];
620 yr = a[k] + a[n - k];
621 yi = a[k] - a[n - k];
622 a[j] = xr;
623 a[k] = yr;
624 t[j] = xi + yi;
625 t[k] = xi - yi;
626 }
627 t[0] = a[mh] - a[n - mh];
628 a[mh] += a[n - mh];
629 a[0] = a[m];
630 dstsub(m, a, nc, w + nw);
631 if(m > 4) {
632 cftfsub(m, a, ip, nw, w);
633 rftfsub(m, a, nc, w + nw);
634 }
635 else if(m == 4) {
636 cftfsub(m, a, ip, nw, w);
637 }
638 a[n - 1] = a[1] - a[0];
639 a[1] = a[0] + a[1];
640 for(j = m - 2; j >= 2; j -= 2) {
641 a[2 * j + 1] = a[j] - a[j + 1];
642 a[2 * j - 1] = -a[j] - a[j + 1];
643 }
644 l = 2;
645 m = mh;
646 while(m >= 2) {
647 dstsub(m, t, nc, w + nw);
648 if(m > 4) {
649 cftfsub(m, t, ip, nw, w);
650 rftfsub(m, t, nc, w + nw);
651 }
652 else if(m == 4) {
653 cftfsub(m, t, ip, nw, w);
654 }
655 a[n - l] = t[1] - t[0];
656 a[l] = t[0] + t[1];
657 k = 0;
658 for(j = 2; j < m; j += 2) {
659 k += l << 2;
660 a[k - l] = -t[j] - t[j + 1];
661 a[k + l] = t[j] - t[j + 1];
662 }
663 l <<= 1;
664 mh = m >> 1;
665 for(j = 1; j < mh; j++) {
666 k = m - j;
667 t[j] = t[m + k] + t[m + j];
668 t[k] = t[m + k] - t[m + j];
669 }
670 t[0] = t[m + mh];
671 m = mh;
672 }
673 a[l] = t[0];
674 }
675 a[0] = 0;
676 }
677
678 /* -------- initializing routines -------- */
679
680
makewt(int nw,int * ip,float * w)681 void makewt(int nw, int *ip, float *w) {
682 void makeipt(int nw, int *ip);
683 int j, nwh, nw0, nw1;
684 float delta, wn4r, wk1r, wk1i, wk3r, wk3i;
685
686 ip[0] = nw;
687 ip[1] = 1;
688 if(nw > 2) {
689 nwh = nw >> 1;
690 delta = atan(1.0) / nwh;
691 wn4r = cos(delta * nwh);
692 w[0] = 1;
693 w[1] = wn4r;
694 if(nwh == 4) {
695 w[2] = cos(delta * 2);
696 w[3] = sin(delta * 2);
697 }
698 else if(nwh > 4) {
699 makeipt(nw, ip);
700 w[2] = 0.5 / cos(delta * 2);
701 w[3] = 0.5 / cos(delta * 6);
702 for(j = 4; j < nwh; j += 4) {
703 w[j] = cos(delta * j);
704 w[j + 1] = sin(delta * j);
705 w[j + 2] = cos(3 * delta * j);
706 w[j + 3] = -sin(3 * delta * j);
707 }
708 }
709 nw0 = 0;
710 while(nwh > 2) {
711 nw1 = nw0 + nwh;
712 nwh >>= 1;
713 w[nw1] = 1;
714 w[nw1 + 1] = wn4r;
715 if(nwh == 4) {
716 wk1r = w[nw0 + 4];
717 wk1i = w[nw0 + 5];
718 w[nw1 + 2] = wk1r;
719 w[nw1 + 3] = wk1i;
720 }
721 else if(nwh > 4) {
722 wk1r = w[nw0 + 4];
723 wk3r = w[nw0 + 6];
724 w[nw1 + 2] = 0.5 / wk1r;
725 w[nw1 + 3] = 0.5 / wk3r;
726 for(j = 4; j < nwh; j += 4) {
727 wk1r = w[nw0 + 2 * j];
728 wk1i = w[nw0 + 2 * j + 1];
729 wk3r = w[nw0 + 2 * j + 2];
730 wk3i = w[nw0 + 2 * j + 3];
731 w[nw1 + j] = wk1r;
732 w[nw1 + j + 1] = wk1i;
733 w[nw1 + j + 2] = wk3r;
734 w[nw1 + j + 3] = wk3i;
735 }
736 }
737 nw0 = nw1;
738 }
739 }
740 }
741
makeipt(int nw,int * ip)742 void makeipt(int nw, int *ip) {
743 int j, l, m, m2, p, q;
744
745 ip[2] = 0;
746 ip[3] = 16;
747 m = 2;
748 for(l = nw; l > 32; l >>= 2) {
749 m2 = m << 1;
750 q = m2 << 3;
751 for(j = m; j < m2; j++) {
752 p = ip[j] << 2;
753 ip[m + j] = p;
754 ip[m2 + j] = p + q;
755 }
756 m = m2;
757 }
758 }
759
makect(int nc,int * ip,float * c)760 void makect(int nc, int *ip, float *c) {
761 int j, nch;
762 float delta;
763
764 ip[1] = nc;
765 if(nc > 1) {
766 nch = nc >> 1;
767 delta = atan(1.0) / nch;
768 c[0] = cos(delta * nch);
769 c[nch] = 0.5 * c[0];
770 for(j = 1; j < nch; j++) {
771 c[j] = 0.5 * cos(delta * j);
772 c[nc - j] = 0.5 * sin(delta * j);
773 }
774 }
775 }
776
777 /* -------- child routines -------- */
778
779 #ifdef USE_CDFT_PTHREADS
780 #define USE_CDFT_THREADS
781 #ifndef CDFT_THREADS_BEGIN_N
782 #define CDFT_THREADS_BEGIN_N 8192
783 #endif
784 #ifndef CDFT_4THREADS_BEGIN_N
785 #define CDFT_4THREADS_BEGIN_N 65536
786 #endif
787 #include <pthread.h>
788 #include <stdio.h>
789 #include <stdlib.h>
790 #define cdft_thread_t pthread_t
791 #define cdft_thread_create(thp, func, argp) \
792 { \
793 if(pthread_create(thp, NULL, func, (void *)argp) != 0) { \
794 fprintf(stderr, "cdft thread error\n"); \
795 exit(1); \
796 } \
797 }
798 #define cdft_thread_wait(th) \
799 { \
800 if(pthread_join(th, NULL) != 0) { \
801 fprintf(stderr, "cdft thread error\n"); \
802 exit(1); \
803 } \
804 }
805 #endif /* USE_CDFT_PTHREADS */
806
807 #ifdef USE_CDFT_WINTHREADS
808 #define USE_CDFT_THREADS
809 #ifndef CDFT_THREADS_BEGIN_N
810 #define CDFT_THREADS_BEGIN_N 32768
811 #endif
812 #ifndef CDFT_4THREADS_BEGIN_N
813 #define CDFT_4THREADS_BEGIN_N 524288
814 #endif
815 #include <windows.h>
816 #include <stdio.h>
817 #include <stdlib.h>
818 #define cdft_thread_t HANDLE
819 #define cdft_thread_create(thp, func, argp) \
820 { \
821 DWORD thid; \
822 *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)func, (LPVOID)argp, \
823 0, &thid); \
824 if(*(thp) == 0) { \
825 fprintf(stderr, "cdft thread error\n"); \
826 exit(1); \
827 } \
828 }
829 #define cdft_thread_wait(th) \
830 { \
831 WaitForSingleObject(th, INFINITE); \
832 CloseHandle(th); \
833 }
834 #endif /* USE_CDFT_WINTHREADS */
835
cftfsub(int n,float * a,int * ip,int nw,float * w)836 void cftfsub(int n, float *a, int *ip, int nw, float *w) {
837 void bitrv2(int n, int *ip, float *a);
838 void bitrv216(float * a);
839 void bitrv208(float * a);
840 void cftf1st(int n, float *a, float *w);
841 void cftrec4(int n, float *a, int nw, float *w);
842 void cftleaf(int n, int isplt, float *a, int nw, float *w);
843 void cftfx41(int n, float *a, int nw, float *w);
844 void cftf161(float * a, float * w);
845 void cftf081(float * a, float * w);
846 void cftf040(float * a);
847 void cftx020(float * a);
848 #ifdef USE_CDFT_THREADS
849 void cftrec4_th(int n, float *a, int nw, float *w);
850 #endif /* USE_CDFT_THREADS */
851
852 if(n > 8) {
853 if(n > 32) {
854 cftf1st(n, a, &w[nw - (n >> 2)]);
855 #ifdef USE_CDFT_THREADS
856 if(n > CDFT_THREADS_BEGIN_N) {
857 cftrec4_th(n, a, nw, w);
858 }
859 else
860 #endif /* USE_CDFT_THREADS */
861 if(n > 512) {
862 cftrec4(n, a, nw, w);
863 }
864 else if(n > 128) {
865 cftleaf(n, 1, a, nw, w);
866 }
867 else {
868 cftfx41(n, a, nw, w);
869 }
870 bitrv2(n, ip, a);
871 }
872 else if(n == 32) {
873 cftf161(a, &w[nw - 8]);
874 bitrv216(a);
875 }
876 else {
877 cftf081(a, w);
878 bitrv208(a);
879 }
880 }
881 else if(n == 8) {
882 cftf040(a);
883 }
884 else if(n == 4) {
885 cftx020(a);
886 }
887 }
888
cftbsub(int n,float * a,int * ip,int nw,float * w)889 void cftbsub(int n, float *a, int *ip, int nw, float *w) {
890 void bitrv2conj(int n, int *ip, float *a);
891 void bitrv216neg(float * a);
892 void bitrv208neg(float * a);
893 void cftb1st(int n, float *a, float *w);
894 void cftrec4(int n, float *a, int nw, float *w);
895 void cftleaf(int n, int isplt, float *a, int nw, float *w);
896 void cftfx41(int n, float *a, int nw, float *w);
897 void cftf161(float * a, float * w);
898 void cftf081(float * a, float * w);
899 void cftb040(float * a);
900 void cftx020(float * a);
901 #ifdef USE_CDFT_THREADS
902 void cftrec4_th(int n, float *a, int nw, float *w);
903 #endif /* USE_CDFT_THREADS */
904
905 if(n > 8) {
906 if(n > 32) {
907 cftb1st(n, a, &w[nw - (n >> 2)]);
908 #ifdef USE_CDFT_THREADS
909 if(n > CDFT_THREADS_BEGIN_N) {
910 cftrec4_th(n, a, nw, w);
911 }
912 else
913 #endif /* USE_CDFT_THREADS */
914 if(n > 512) {
915 cftrec4(n, a, nw, w);
916 }
917 else if(n > 128) {
918 cftleaf(n, 1, a, nw, w);
919 }
920 else {
921 cftfx41(n, a, nw, w);
922 }
923 bitrv2conj(n, ip, a);
924 }
925 else if(n == 32) {
926 cftf161(a, &w[nw - 8]);
927 bitrv216neg(a);
928 }
929 else {
930 cftf081(a, w);
931 bitrv208neg(a);
932 }
933 }
934 else if(n == 8) {
935 cftb040(a);
936 }
937 else if(n == 4) {
938 cftx020(a);
939 }
940 }
941
bitrv2(int n,int * ip,float * a)942 void bitrv2(int n, int *ip, float *a) {
943 int j, j1, k, k1, l, m, nh, nm;
944 float xr, xi, yr, yi;
945
946 m = 1;
947 for(l = n >> 2; l > 8; l >>= 2) {
948 m <<= 1;
949 }
950 nh = n >> 1;
951 nm = 4 * m;
952 if(l == 8) {
953 for(k = 0; k < m; k++) {
954 for(j = 0; j < k; j++) {
955 j1 = 4 * j + 2 * ip[m + k];
956 k1 = 4 * k + 2 * ip[m + j];
957 xr = a[j1];
958 xi = a[j1 + 1];
959 yr = a[k1];
960 yi = a[k1 + 1];
961 a[j1] = yr;
962 a[j1 + 1] = yi;
963 a[k1] = xr;
964 a[k1 + 1] = xi;
965 j1 += nm;
966 k1 += 2 * nm;
967 xr = a[j1];
968 xi = a[j1 + 1];
969 yr = a[k1];
970 yi = a[k1 + 1];
971 a[j1] = yr;
972 a[j1 + 1] = yi;
973 a[k1] = xr;
974 a[k1 + 1] = xi;
975 j1 += nm;
976 k1 -= nm;
977 xr = a[j1];
978 xi = a[j1 + 1];
979 yr = a[k1];
980 yi = a[k1 + 1];
981 a[j1] = yr;
982 a[j1 + 1] = yi;
983 a[k1] = xr;
984 a[k1 + 1] = xi;
985 j1 += nm;
986 k1 += 2 * nm;
987 xr = a[j1];
988 xi = a[j1 + 1];
989 yr = a[k1];
990 yi = a[k1 + 1];
991 a[j1] = yr;
992 a[j1 + 1] = yi;
993 a[k1] = xr;
994 a[k1 + 1] = xi;
995 j1 += nh;
996 k1 += 2;
997 xr = a[j1];
998 xi = a[j1 + 1];
999 yr = a[k1];
1000 yi = a[k1 + 1];
1001 a[j1] = yr;
1002 a[j1 + 1] = yi;
1003 a[k1] = xr;
1004 a[k1 + 1] = xi;
1005 j1 -= nm;
1006 k1 -= 2 * nm;
1007 xr = a[j1];
1008 xi = a[j1 + 1];
1009 yr = a[k1];
1010 yi = a[k1 + 1];
1011 a[j1] = yr;
1012 a[j1 + 1] = yi;
1013 a[k1] = xr;
1014 a[k1 + 1] = xi;
1015 j1 -= nm;
1016 k1 += nm;
1017 xr = a[j1];
1018 xi = a[j1 + 1];
1019 yr = a[k1];
1020 yi = a[k1 + 1];
1021 a[j1] = yr;
1022 a[j1 + 1] = yi;
1023 a[k1] = xr;
1024 a[k1 + 1] = xi;
1025 j1 -= nm;
1026 k1 -= 2 * nm;
1027 xr = a[j1];
1028 xi = a[j1 + 1];
1029 yr = a[k1];
1030 yi = a[k1 + 1];
1031 a[j1] = yr;
1032 a[j1 + 1] = yi;
1033 a[k1] = xr;
1034 a[k1 + 1] = xi;
1035 j1 += 2;
1036 k1 += nh;
1037 xr = a[j1];
1038 xi = a[j1 + 1];
1039 yr = a[k1];
1040 yi = a[k1 + 1];
1041 a[j1] = yr;
1042 a[j1 + 1] = yi;
1043 a[k1] = xr;
1044 a[k1 + 1] = xi;
1045 j1 += nm;
1046 k1 += 2 * nm;
1047 xr = a[j1];
1048 xi = a[j1 + 1];
1049 yr = a[k1];
1050 yi = a[k1 + 1];
1051 a[j1] = yr;
1052 a[j1 + 1] = yi;
1053 a[k1] = xr;
1054 a[k1 + 1] = xi;
1055 j1 += nm;
1056 k1 -= nm;
1057 xr = a[j1];
1058 xi = a[j1 + 1];
1059 yr = a[k1];
1060 yi = a[k1 + 1];
1061 a[j1] = yr;
1062 a[j1 + 1] = yi;
1063 a[k1] = xr;
1064 a[k1 + 1] = xi;
1065 j1 += nm;
1066 k1 += 2 * nm;
1067 xr = a[j1];
1068 xi = a[j1 + 1];
1069 yr = a[k1];
1070 yi = a[k1 + 1];
1071 a[j1] = yr;
1072 a[j1 + 1] = yi;
1073 a[k1] = xr;
1074 a[k1 + 1] = xi;
1075 j1 -= nh;
1076 k1 -= 2;
1077 xr = a[j1];
1078 xi = a[j1 + 1];
1079 yr = a[k1];
1080 yi = a[k1 + 1];
1081 a[j1] = yr;
1082 a[j1 + 1] = yi;
1083 a[k1] = xr;
1084 a[k1 + 1] = xi;
1085 j1 -= nm;
1086 k1 -= 2 * nm;
1087 xr = a[j1];
1088 xi = a[j1 + 1];
1089 yr = a[k1];
1090 yi = a[k1 + 1];
1091 a[j1] = yr;
1092 a[j1 + 1] = yi;
1093 a[k1] = xr;
1094 a[k1 + 1] = xi;
1095 j1 -= nm;
1096 k1 += nm;
1097 xr = a[j1];
1098 xi = a[j1 + 1];
1099 yr = a[k1];
1100 yi = a[k1 + 1];
1101 a[j1] = yr;
1102 a[j1 + 1] = yi;
1103 a[k1] = xr;
1104 a[k1 + 1] = xi;
1105 j1 -= nm;
1106 k1 -= 2 * nm;
1107 xr = a[j1];
1108 xi = a[j1 + 1];
1109 yr = a[k1];
1110 yi = a[k1 + 1];
1111 a[j1] = yr;
1112 a[j1 + 1] = yi;
1113 a[k1] = xr;
1114 a[k1 + 1] = xi;
1115 }
1116 k1 = 4 * k + 2 * ip[m + k];
1117 j1 = k1 + 2;
1118 k1 += nh;
1119 xr = a[j1];
1120 xi = a[j1 + 1];
1121 yr = a[k1];
1122 yi = a[k1 + 1];
1123 a[j1] = yr;
1124 a[j1 + 1] = yi;
1125 a[k1] = xr;
1126 a[k1 + 1] = xi;
1127 j1 += nm;
1128 k1 += 2 * nm;
1129 xr = a[j1];
1130 xi = a[j1 + 1];
1131 yr = a[k1];
1132 yi = a[k1 + 1];
1133 a[j1] = yr;
1134 a[j1 + 1] = yi;
1135 a[k1] = xr;
1136 a[k1 + 1] = xi;
1137 j1 += nm;
1138 k1 -= nm;
1139 xr = a[j1];
1140 xi = a[j1 + 1];
1141 yr = a[k1];
1142 yi = a[k1 + 1];
1143 a[j1] = yr;
1144 a[j1 + 1] = yi;
1145 a[k1] = xr;
1146 a[k1 + 1] = xi;
1147 j1 -= 2;
1148 k1 -= nh;
1149 xr = a[j1];
1150 xi = a[j1 + 1];
1151 yr = a[k1];
1152 yi = a[k1 + 1];
1153 a[j1] = yr;
1154 a[j1 + 1] = yi;
1155 a[k1] = xr;
1156 a[k1 + 1] = xi;
1157 j1 += nh + 2;
1158 k1 += nh + 2;
1159 xr = a[j1];
1160 xi = a[j1 + 1];
1161 yr = a[k1];
1162 yi = a[k1 + 1];
1163 a[j1] = yr;
1164 a[j1 + 1] = yi;
1165 a[k1] = xr;
1166 a[k1 + 1] = xi;
1167 j1 -= nh - nm;
1168 k1 += 2 * nm - 2;
1169 xr = a[j1];
1170 xi = a[j1 + 1];
1171 yr = a[k1];
1172 yi = a[k1 + 1];
1173 a[j1] = yr;
1174 a[j1 + 1] = yi;
1175 a[k1] = xr;
1176 a[k1 + 1] = xi;
1177 }
1178 }
1179 else {
1180 for(k = 0; k < m; k++) {
1181 for(j = 0; j < k; j++) {
1182 j1 = 4 * j + ip[m + k];
1183 k1 = 4 * k + ip[m + j];
1184 xr = a[j1];
1185 xi = a[j1 + 1];
1186 yr = a[k1];
1187 yi = a[k1 + 1];
1188 a[j1] = yr;
1189 a[j1 + 1] = yi;
1190 a[k1] = xr;
1191 a[k1 + 1] = xi;
1192 j1 += nm;
1193 k1 += nm;
1194 xr = a[j1];
1195 xi = a[j1 + 1];
1196 yr = a[k1];
1197 yi = a[k1 + 1];
1198 a[j1] = yr;
1199 a[j1 + 1] = yi;
1200 a[k1] = xr;
1201 a[k1 + 1] = xi;
1202 j1 += nh;
1203 k1 += 2;
1204 xr = a[j1];
1205 xi = a[j1 + 1];
1206 yr = a[k1];
1207 yi = a[k1 + 1];
1208 a[j1] = yr;
1209 a[j1 + 1] = yi;
1210 a[k1] = xr;
1211 a[k1 + 1] = xi;
1212 j1 -= nm;
1213 k1 -= nm;
1214 xr = a[j1];
1215 xi = a[j1 + 1];
1216 yr = a[k1];
1217 yi = a[k1 + 1];
1218 a[j1] = yr;
1219 a[j1 + 1] = yi;
1220 a[k1] = xr;
1221 a[k1 + 1] = xi;
1222 j1 += 2;
1223 k1 += nh;
1224 xr = a[j1];
1225 xi = a[j1 + 1];
1226 yr = a[k1];
1227 yi = a[k1 + 1];
1228 a[j1] = yr;
1229 a[j1 + 1] = yi;
1230 a[k1] = xr;
1231 a[k1 + 1] = xi;
1232 j1 += nm;
1233 k1 += nm;
1234 xr = a[j1];
1235 xi = a[j1 + 1];
1236 yr = a[k1];
1237 yi = a[k1 + 1];
1238 a[j1] = yr;
1239 a[j1 + 1] = yi;
1240 a[k1] = xr;
1241 a[k1 + 1] = xi;
1242 j1 -= nh;
1243 k1 -= 2;
1244 xr = a[j1];
1245 xi = a[j1 + 1];
1246 yr = a[k1];
1247 yi = a[k1 + 1];
1248 a[j1] = yr;
1249 a[j1 + 1] = yi;
1250 a[k1] = xr;
1251 a[k1 + 1] = xi;
1252 j1 -= nm;
1253 k1 -= nm;
1254 xr = a[j1];
1255 xi = a[j1 + 1];
1256 yr = a[k1];
1257 yi = a[k1 + 1];
1258 a[j1] = yr;
1259 a[j1 + 1] = yi;
1260 a[k1] = xr;
1261 a[k1 + 1] = xi;
1262 }
1263 k1 = 4 * k + ip[m + k];
1264 j1 = k1 + 2;
1265 k1 += nh;
1266 xr = a[j1];
1267 xi = a[j1 + 1];
1268 yr = a[k1];
1269 yi = a[k1 + 1];
1270 a[j1] = yr;
1271 a[j1 + 1] = yi;
1272 a[k1] = xr;
1273 a[k1 + 1] = xi;
1274 j1 += nm;
1275 k1 += nm;
1276 xr = a[j1];
1277 xi = a[j1 + 1];
1278 yr = a[k1];
1279 yi = a[k1 + 1];
1280 a[j1] = yr;
1281 a[j1 + 1] = yi;
1282 a[k1] = xr;
1283 a[k1 + 1] = xi;
1284 }
1285 }
1286 }
1287
bitrv2conj(int n,int * ip,float * a)1288 void bitrv2conj(int n, int *ip, float *a) {
1289 int j, j1, k, k1, l, m, nh, nm;
1290 float xr, xi, yr, yi;
1291
1292 m = 1;
1293 for(l = n >> 2; l > 8; l >>= 2) {
1294 m <<= 1;
1295 }
1296 nh = n >> 1;
1297 nm = 4 * m;
1298 if(l == 8) {
1299 for(k = 0; k < m; k++) {
1300 for(j = 0; j < k; j++) {
1301 j1 = 4 * j + 2 * ip[m + k];
1302 k1 = 4 * k + 2 * ip[m + j];
1303 xr = a[j1];
1304 xi = -a[j1 + 1];
1305 yr = a[k1];
1306 yi = -a[k1 + 1];
1307 a[j1] = yr;
1308 a[j1 + 1] = yi;
1309 a[k1] = xr;
1310 a[k1 + 1] = xi;
1311 j1 += nm;
1312 k1 += 2 * nm;
1313 xr = a[j1];
1314 xi = -a[j1 + 1];
1315 yr = a[k1];
1316 yi = -a[k1 + 1];
1317 a[j1] = yr;
1318 a[j1 + 1] = yi;
1319 a[k1] = xr;
1320 a[k1 + 1] = xi;
1321 j1 += nm;
1322 k1 -= nm;
1323 xr = a[j1];
1324 xi = -a[j1 + 1];
1325 yr = a[k1];
1326 yi = -a[k1 + 1];
1327 a[j1] = yr;
1328 a[j1 + 1] = yi;
1329 a[k1] = xr;
1330 a[k1 + 1] = xi;
1331 j1 += nm;
1332 k1 += 2 * nm;
1333 xr = a[j1];
1334 xi = -a[j1 + 1];
1335 yr = a[k1];
1336 yi = -a[k1 + 1];
1337 a[j1] = yr;
1338 a[j1 + 1] = yi;
1339 a[k1] = xr;
1340 a[k1 + 1] = xi;
1341 j1 += nh;
1342 k1 += 2;
1343 xr = a[j1];
1344 xi = -a[j1 + 1];
1345 yr = a[k1];
1346 yi = -a[k1 + 1];
1347 a[j1] = yr;
1348 a[j1 + 1] = yi;
1349 a[k1] = xr;
1350 a[k1 + 1] = xi;
1351 j1 -= nm;
1352 k1 -= 2 * nm;
1353 xr = a[j1];
1354 xi = -a[j1 + 1];
1355 yr = a[k1];
1356 yi = -a[k1 + 1];
1357 a[j1] = yr;
1358 a[j1 + 1] = yi;
1359 a[k1] = xr;
1360 a[k1 + 1] = xi;
1361 j1 -= nm;
1362 k1 += nm;
1363 xr = a[j1];
1364 xi = -a[j1 + 1];
1365 yr = a[k1];
1366 yi = -a[k1 + 1];
1367 a[j1] = yr;
1368 a[j1 + 1] = yi;
1369 a[k1] = xr;
1370 a[k1 + 1] = xi;
1371 j1 -= nm;
1372 k1 -= 2 * nm;
1373 xr = a[j1];
1374 xi = -a[j1 + 1];
1375 yr = a[k1];
1376 yi = -a[k1 + 1];
1377 a[j1] = yr;
1378 a[j1 + 1] = yi;
1379 a[k1] = xr;
1380 a[k1 + 1] = xi;
1381 j1 += 2;
1382 k1 += nh;
1383 xr = a[j1];
1384 xi = -a[j1 + 1];
1385 yr = a[k1];
1386 yi = -a[k1 + 1];
1387 a[j1] = yr;
1388 a[j1 + 1] = yi;
1389 a[k1] = xr;
1390 a[k1 + 1] = xi;
1391 j1 += nm;
1392 k1 += 2 * nm;
1393 xr = a[j1];
1394 xi = -a[j1 + 1];
1395 yr = a[k1];
1396 yi = -a[k1 + 1];
1397 a[j1] = yr;
1398 a[j1 + 1] = yi;
1399 a[k1] = xr;
1400 a[k1 + 1] = xi;
1401 j1 += nm;
1402 k1 -= nm;
1403 xr = a[j1];
1404 xi = -a[j1 + 1];
1405 yr = a[k1];
1406 yi = -a[k1 + 1];
1407 a[j1] = yr;
1408 a[j1 + 1] = yi;
1409 a[k1] = xr;
1410 a[k1 + 1] = xi;
1411 j1 += nm;
1412 k1 += 2 * nm;
1413 xr = a[j1];
1414 xi = -a[j1 + 1];
1415 yr = a[k1];
1416 yi = -a[k1 + 1];
1417 a[j1] = yr;
1418 a[j1 + 1] = yi;
1419 a[k1] = xr;
1420 a[k1 + 1] = xi;
1421 j1 -= nh;
1422 k1 -= 2;
1423 xr = a[j1];
1424 xi = -a[j1 + 1];
1425 yr = a[k1];
1426 yi = -a[k1 + 1];
1427 a[j1] = yr;
1428 a[j1 + 1] = yi;
1429 a[k1] = xr;
1430 a[k1 + 1] = xi;
1431 j1 -= nm;
1432 k1 -= 2 * nm;
1433 xr = a[j1];
1434 xi = -a[j1 + 1];
1435 yr = a[k1];
1436 yi = -a[k1 + 1];
1437 a[j1] = yr;
1438 a[j1 + 1] = yi;
1439 a[k1] = xr;
1440 a[k1 + 1] = xi;
1441 j1 -= nm;
1442 k1 += nm;
1443 xr = a[j1];
1444 xi = -a[j1 + 1];
1445 yr = a[k1];
1446 yi = -a[k1 + 1];
1447 a[j1] = yr;
1448 a[j1 + 1] = yi;
1449 a[k1] = xr;
1450 a[k1 + 1] = xi;
1451 j1 -= nm;
1452 k1 -= 2 * nm;
1453 xr = a[j1];
1454 xi = -a[j1 + 1];
1455 yr = a[k1];
1456 yi = -a[k1 + 1];
1457 a[j1] = yr;
1458 a[j1 + 1] = yi;
1459 a[k1] = xr;
1460 a[k1 + 1] = xi;
1461 }
1462 k1 = 4 * k + 2 * ip[m + k];
1463 j1 = k1 + 2;
1464 k1 += nh;
1465 a[j1 - 1] = -a[j1 - 1];
1466 xr = a[j1];
1467 xi = -a[j1 + 1];
1468 yr = a[k1];
1469 yi = -a[k1 + 1];
1470 a[j1] = yr;
1471 a[j1 + 1] = yi;
1472 a[k1] = xr;
1473 a[k1 + 1] = xi;
1474 a[k1 + 3] = -a[k1 + 3];
1475 j1 += nm;
1476 k1 += 2 * nm;
1477 xr = a[j1];
1478 xi = -a[j1 + 1];
1479 yr = a[k1];
1480 yi = -a[k1 + 1];
1481 a[j1] = yr;
1482 a[j1 + 1] = yi;
1483 a[k1] = xr;
1484 a[k1 + 1] = xi;
1485 j1 += nm;
1486 k1 -= nm;
1487 xr = a[j1];
1488 xi = -a[j1 + 1];
1489 yr = a[k1];
1490 yi = -a[k1 + 1];
1491 a[j1] = yr;
1492 a[j1 + 1] = yi;
1493 a[k1] = xr;
1494 a[k1 + 1] = xi;
1495 j1 -= 2;
1496 k1 -= nh;
1497 xr = a[j1];
1498 xi = -a[j1 + 1];
1499 yr = a[k1];
1500 yi = -a[k1 + 1];
1501 a[j1] = yr;
1502 a[j1 + 1] = yi;
1503 a[k1] = xr;
1504 a[k1 + 1] = xi;
1505 j1 += nh + 2;
1506 k1 += nh + 2;
1507 xr = a[j1];
1508 xi = -a[j1 + 1];
1509 yr = a[k1];
1510 yi = -a[k1 + 1];
1511 a[j1] = yr;
1512 a[j1 + 1] = yi;
1513 a[k1] = xr;
1514 a[k1 + 1] = xi;
1515 j1 -= nh - nm;
1516 k1 += 2 * nm - 2;
1517 a[j1 - 1] = -a[j1 - 1];
1518 xr = a[j1];
1519 xi = -a[j1 + 1];
1520 yr = a[k1];
1521 yi = -a[k1 + 1];
1522 a[j1] = yr;
1523 a[j1 + 1] = yi;
1524 a[k1] = xr;
1525 a[k1 + 1] = xi;
1526 a[k1 + 3] = -a[k1 + 3];
1527 }
1528 }
1529 else {
1530 for(k = 0; k < m; k++) {
1531 for(j = 0; j < k; j++) {
1532 j1 = 4 * j + ip[m + k];
1533 k1 = 4 * k + ip[m + j];
1534 xr = a[j1];
1535 xi = -a[j1 + 1];
1536 yr = a[k1];
1537 yi = -a[k1 + 1];
1538 a[j1] = yr;
1539 a[j1 + 1] = yi;
1540 a[k1] = xr;
1541 a[k1 + 1] = xi;
1542 j1 += nm;
1543 k1 += nm;
1544 xr = a[j1];
1545 xi = -a[j1 + 1];
1546 yr = a[k1];
1547 yi = -a[k1 + 1];
1548 a[j1] = yr;
1549 a[j1 + 1] = yi;
1550 a[k1] = xr;
1551 a[k1 + 1] = xi;
1552 j1 += nh;
1553 k1 += 2;
1554 xr = a[j1];
1555 xi = -a[j1 + 1];
1556 yr = a[k1];
1557 yi = -a[k1 + 1];
1558 a[j1] = yr;
1559 a[j1 + 1] = yi;
1560 a[k1] = xr;
1561 a[k1 + 1] = xi;
1562 j1 -= nm;
1563 k1 -= nm;
1564 xr = a[j1];
1565 xi = -a[j1 + 1];
1566 yr = a[k1];
1567 yi = -a[k1 + 1];
1568 a[j1] = yr;
1569 a[j1 + 1] = yi;
1570 a[k1] = xr;
1571 a[k1 + 1] = xi;
1572 j1 += 2;
1573 k1 += nh;
1574 xr = a[j1];
1575 xi = -a[j1 + 1];
1576 yr = a[k1];
1577 yi = -a[k1 + 1];
1578 a[j1] = yr;
1579 a[j1 + 1] = yi;
1580 a[k1] = xr;
1581 a[k1 + 1] = xi;
1582 j1 += nm;
1583 k1 += nm;
1584 xr = a[j1];
1585 xi = -a[j1 + 1];
1586 yr = a[k1];
1587 yi = -a[k1 + 1];
1588 a[j1] = yr;
1589 a[j1 + 1] = yi;
1590 a[k1] = xr;
1591 a[k1 + 1] = xi;
1592 j1 -= nh;
1593 k1 -= 2;
1594 xr = a[j1];
1595 xi = -a[j1 + 1];
1596 yr = a[k1];
1597 yi = -a[k1 + 1];
1598 a[j1] = yr;
1599 a[j1 + 1] = yi;
1600 a[k1] = xr;
1601 a[k1 + 1] = xi;
1602 j1 -= nm;
1603 k1 -= nm;
1604 xr = a[j1];
1605 xi = -a[j1 + 1];
1606 yr = a[k1];
1607 yi = -a[k1 + 1];
1608 a[j1] = yr;
1609 a[j1 + 1] = yi;
1610 a[k1] = xr;
1611 a[k1 + 1] = xi;
1612 }
1613 k1 = 4 * k + ip[m + k];
1614 j1 = k1 + 2;
1615 k1 += nh;
1616 a[j1 - 1] = -a[j1 - 1];
1617 xr = a[j1];
1618 xi = -a[j1 + 1];
1619 yr = a[k1];
1620 yi = -a[k1 + 1];
1621 a[j1] = yr;
1622 a[j1 + 1] = yi;
1623 a[k1] = xr;
1624 a[k1 + 1] = xi;
1625 a[k1 + 3] = -a[k1 + 3];
1626 j1 += nm;
1627 k1 += nm;
1628 a[j1 - 1] = -a[j1 - 1];
1629 xr = a[j1];
1630 xi = -a[j1 + 1];
1631 yr = a[k1];
1632 yi = -a[k1 + 1];
1633 a[j1] = yr;
1634 a[j1 + 1] = yi;
1635 a[k1] = xr;
1636 a[k1 + 1] = xi;
1637 a[k1 + 3] = -a[k1 + 3];
1638 }
1639 }
1640 }
1641
bitrv216(float * a)1642 void bitrv216(float *a) {
1643 float x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x7r, x7i, x8r, x8i,
1644 x10r, x10i, x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i;
1645
1646 x1r = a[2];
1647 x1i = a[3];
1648 x2r = a[4];
1649 x2i = a[5];
1650 x3r = a[6];
1651 x3i = a[7];
1652 x4r = a[8];
1653 x4i = a[9];
1654 x5r = a[10];
1655 x5i = a[11];
1656 x7r = a[14];
1657 x7i = a[15];
1658 x8r = a[16];
1659 x8i = a[17];
1660 x10r = a[20];
1661 x10i = a[21];
1662 x11r = a[22];
1663 x11i = a[23];
1664 x12r = a[24];
1665 x12i = a[25];
1666 x13r = a[26];
1667 x13i = a[27];
1668 x14r = a[28];
1669 x14i = a[29];
1670 a[2] = x8r;
1671 a[3] = x8i;
1672 a[4] = x4r;
1673 a[5] = x4i;
1674 a[6] = x12r;
1675 a[7] = x12i;
1676 a[8] = x2r;
1677 a[9] = x2i;
1678 a[10] = x10r;
1679 a[11] = x10i;
1680 a[14] = x14r;
1681 a[15] = x14i;
1682 a[16] = x1r;
1683 a[17] = x1i;
1684 a[20] = x5r;
1685 a[21] = x5i;
1686 a[22] = x13r;
1687 a[23] = x13i;
1688 a[24] = x3r;
1689 a[25] = x3i;
1690 a[26] = x11r;
1691 a[27] = x11i;
1692 a[28] = x7r;
1693 a[29] = x7i;
1694 }
1695
bitrv216neg(float * a)1696 void bitrv216neg(float *a) {
1697 float x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x6r, x6i, x7r, x7i,
1698 x8r, x8i, x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i, x13r, x13i, x14r,
1699 x14i, x15r, x15i;
1700
1701 x1r = a[2];
1702 x1i = a[3];
1703 x2r = a[4];
1704 x2i = a[5];
1705 x3r = a[6];
1706 x3i = a[7];
1707 x4r = a[8];
1708 x4i = a[9];
1709 x5r = a[10];
1710 x5i = a[11];
1711 x6r = a[12];
1712 x6i = a[13];
1713 x7r = a[14];
1714 x7i = a[15];
1715 x8r = a[16];
1716 x8i = a[17];
1717 x9r = a[18];
1718 x9i = a[19];
1719 x10r = a[20];
1720 x10i = a[21];
1721 x11r = a[22];
1722 x11i = a[23];
1723 x12r = a[24];
1724 x12i = a[25];
1725 x13r = a[26];
1726 x13i = a[27];
1727 x14r = a[28];
1728 x14i = a[29];
1729 x15r = a[30];
1730 x15i = a[31];
1731 a[2] = x15r;
1732 a[3] = x15i;
1733 a[4] = x7r;
1734 a[5] = x7i;
1735 a[6] = x11r;
1736 a[7] = x11i;
1737 a[8] = x3r;
1738 a[9] = x3i;
1739 a[10] = x13r;
1740 a[11] = x13i;
1741 a[12] = x5r;
1742 a[13] = x5i;
1743 a[14] = x9r;
1744 a[15] = x9i;
1745 a[16] = x1r;
1746 a[17] = x1i;
1747 a[18] = x14r;
1748 a[19] = x14i;
1749 a[20] = x6r;
1750 a[21] = x6i;
1751 a[22] = x10r;
1752 a[23] = x10i;
1753 a[24] = x2r;
1754 a[25] = x2i;
1755 a[26] = x12r;
1756 a[27] = x12i;
1757 a[28] = x4r;
1758 a[29] = x4i;
1759 a[30] = x8r;
1760 a[31] = x8i;
1761 }
1762
bitrv208(float * a)1763 void bitrv208(float *a) {
1764 float x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i;
1765
1766 x1r = a[2];
1767 x1i = a[3];
1768 x3r = a[6];
1769 x3i = a[7];
1770 x4r = a[8];
1771 x4i = a[9];
1772 x6r = a[12];
1773 x6i = a[13];
1774 a[2] = x4r;
1775 a[3] = x4i;
1776 a[6] = x6r;
1777 a[7] = x6i;
1778 a[8] = x1r;
1779 a[9] = x1i;
1780 a[12] = x3r;
1781 a[13] = x3i;
1782 }
1783
bitrv208neg(float * a)1784 void bitrv208neg(float *a) {
1785 float x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x6r, x6i, x7r, x7i;
1786
1787 x1r = a[2];
1788 x1i = a[3];
1789 x2r = a[4];
1790 x2i = a[5];
1791 x3r = a[6];
1792 x3i = a[7];
1793 x4r = a[8];
1794 x4i = a[9];
1795 x5r = a[10];
1796 x5i = a[11];
1797 x6r = a[12];
1798 x6i = a[13];
1799 x7r = a[14];
1800 x7i = a[15];
1801 a[2] = x7r;
1802 a[3] = x7i;
1803 a[4] = x3r;
1804 a[5] = x3i;
1805 a[6] = x5r;
1806 a[7] = x5i;
1807 a[8] = x1r;
1808 a[9] = x1i;
1809 a[10] = x6r;
1810 a[11] = x6i;
1811 a[12] = x2r;
1812 a[13] = x2i;
1813 a[14] = x4r;
1814 a[15] = x4i;
1815 }
1816
cftf1st(int n,float * a,float * w)1817 void cftf1st(int n, float *a, float *w) {
1818 int j, j0, j1, j2, j3, k, m, mh;
1819 float wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i;
1820 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i,
1821 y3r, y3i;
1822
1823 mh = n >> 3;
1824 m = 2 * mh;
1825 j1 = m;
1826 j2 = j1 + m;
1827 j3 = j2 + m;
1828 x0r = a[0] + a[j2];
1829 x0i = a[1] + a[j2 + 1];
1830 x1r = a[0] - a[j2];
1831 x1i = a[1] - a[j2 + 1];
1832 x2r = a[j1] + a[j3];
1833 x2i = a[j1 + 1] + a[j3 + 1];
1834 x3r = a[j1] - a[j3];
1835 x3i = a[j1 + 1] - a[j3 + 1];
1836 a[0] = x0r + x2r;
1837 a[1] = x0i + x2i;
1838 a[j1] = x0r - x2r;
1839 a[j1 + 1] = x0i - x2i;
1840 a[j2] = x1r - x3i;
1841 a[j2 + 1] = x1i + x3r;
1842 a[j3] = x1r + x3i;
1843 a[j3 + 1] = x1i - x3r;
1844 wn4r = w[1];
1845 csc1 = w[2];
1846 csc3 = w[3];
1847 wd1r = 1;
1848 wd1i = 0;
1849 wd3r = 1;
1850 wd3i = 0;
1851 k = 0;
1852 for(j = 2; j < mh - 2; j += 4) {
1853 k += 4;
1854 wk1r = csc1 * (wd1r + w[k]);
1855 wk1i = csc1 * (wd1i + w[k + 1]);
1856 wk3r = csc3 * (wd3r + w[k + 2]);
1857 wk3i = csc3 * (wd3i + w[k + 3]);
1858 wd1r = w[k];
1859 wd1i = w[k + 1];
1860 wd3r = w[k + 2];
1861 wd3i = w[k + 3];
1862 j1 = j + m;
1863 j2 = j1 + m;
1864 j3 = j2 + m;
1865 x0r = a[j] + a[j2];
1866 x0i = a[j + 1] + a[j2 + 1];
1867 x1r = a[j] - a[j2];
1868 x1i = a[j + 1] - a[j2 + 1];
1869 y0r = a[j + 2] + a[j2 + 2];
1870 y0i = a[j + 3] + a[j2 + 3];
1871 y1r = a[j + 2] - a[j2 + 2];
1872 y1i = a[j + 3] - a[j2 + 3];
1873 x2r = a[j1] + a[j3];
1874 x2i = a[j1 + 1] + a[j3 + 1];
1875 x3r = a[j1] - a[j3];
1876 x3i = a[j1 + 1] - a[j3 + 1];
1877 y2r = a[j1 + 2] + a[j3 + 2];
1878 y2i = a[j1 + 3] + a[j3 + 3];
1879 y3r = a[j1 + 2] - a[j3 + 2];
1880 y3i = a[j1 + 3] - a[j3 + 3];
1881 a[j] = x0r + x2r;
1882 a[j + 1] = x0i + x2i;
1883 a[j + 2] = y0r + y2r;
1884 a[j + 3] = y0i + y2i;
1885 a[j1] = x0r - x2r;
1886 a[j1 + 1] = x0i - x2i;
1887 a[j1 + 2] = y0r - y2r;
1888 a[j1 + 3] = y0i - y2i;
1889 x0r = x1r - x3i;
1890 x0i = x1i + x3r;
1891 a[j2] = wk1r * x0r - wk1i * x0i;
1892 a[j2 + 1] = wk1r * x0i + wk1i * x0r;
1893 x0r = y1r - y3i;
1894 x0i = y1i + y3r;
1895 a[j2 + 2] = wd1r * x0r - wd1i * x0i;
1896 a[j2 + 3] = wd1r * x0i + wd1i * x0r;
1897 x0r = x1r + x3i;
1898 x0i = x1i - x3r;
1899 a[j3] = wk3r * x0r + wk3i * x0i;
1900 a[j3 + 1] = wk3r * x0i - wk3i * x0r;
1901 x0r = y1r + y3i;
1902 x0i = y1i - y3r;
1903 a[j3 + 2] = wd3r * x0r + wd3i * x0i;
1904 a[j3 + 3] = wd3r * x0i - wd3i * x0r;
1905 j0 = m - j;
1906 j1 = j0 + m;
1907 j2 = j1 + m;
1908 j3 = j2 + m;
1909 x0r = a[j0] + a[j2];
1910 x0i = a[j0 + 1] + a[j2 + 1];
1911 x1r = a[j0] - a[j2];
1912 x1i = a[j0 + 1] - a[j2 + 1];
1913 y0r = a[j0 - 2] + a[j2 - 2];
1914 y0i = a[j0 - 1] + a[j2 - 1];
1915 y1r = a[j0 - 2] - a[j2 - 2];
1916 y1i = a[j0 - 1] - a[j2 - 1];
1917 x2r = a[j1] + a[j3];
1918 x2i = a[j1 + 1] + a[j3 + 1];
1919 x3r = a[j1] - a[j3];
1920 x3i = a[j1 + 1] - a[j3 + 1];
1921 y2r = a[j1 - 2] + a[j3 - 2];
1922 y2i = a[j1 - 1] + a[j3 - 1];
1923 y3r = a[j1 - 2] - a[j3 - 2];
1924 y3i = a[j1 - 1] - a[j3 - 1];
1925 a[j0] = x0r + x2r;
1926 a[j0 + 1] = x0i + x2i;
1927 a[j0 - 2] = y0r + y2r;
1928 a[j0 - 1] = y0i + y2i;
1929 a[j1] = x0r - x2r;
1930 a[j1 + 1] = x0i - x2i;
1931 a[j1 - 2] = y0r - y2r;
1932 a[j1 - 1] = y0i - y2i;
1933 x0r = x1r - x3i;
1934 x0i = x1i + x3r;
1935 a[j2] = wk1i * x0r - wk1r * x0i;
1936 a[j2 + 1] = wk1i * x0i + wk1r * x0r;
1937 x0r = y1r - y3i;
1938 x0i = y1i + y3r;
1939 a[j2 - 2] = wd1i * x0r - wd1r * x0i;
1940 a[j2 - 1] = wd1i * x0i + wd1r * x0r;
1941 x0r = x1r + x3i;
1942 x0i = x1i - x3r;
1943 a[j3] = wk3i * x0r + wk3r * x0i;
1944 a[j3 + 1] = wk3i * x0i - wk3r * x0r;
1945 x0r = y1r + y3i;
1946 x0i = y1i - y3r;
1947 a[j3 - 2] = wd3i * x0r + wd3r * x0i;
1948 a[j3 - 1] = wd3i * x0i - wd3r * x0r;
1949 }
1950 wk1r = csc1 * (wd1r + wn4r);
1951 wk1i = csc1 * (wd1i + wn4r);
1952 wk3r = csc3 * (wd3r - wn4r);
1953 wk3i = csc3 * (wd3i - wn4r);
1954 j0 = mh;
1955 j1 = j0 + m;
1956 j2 = j1 + m;
1957 j3 = j2 + m;
1958 x0r = a[j0 - 2] + a[j2 - 2];
1959 x0i = a[j0 - 1] + a[j2 - 1];
1960 x1r = a[j0 - 2] - a[j2 - 2];
1961 x1i = a[j0 - 1] - a[j2 - 1];
1962 x2r = a[j1 - 2] + a[j3 - 2];
1963 x2i = a[j1 - 1] + a[j3 - 1];
1964 x3r = a[j1 - 2] - a[j3 - 2];
1965 x3i = a[j1 - 1] - a[j3 - 1];
1966 a[j0 - 2] = x0r + x2r;
1967 a[j0 - 1] = x0i + x2i;
1968 a[j1 - 2] = x0r - x2r;
1969 a[j1 - 1] = x0i - x2i;
1970 x0r = x1r - x3i;
1971 x0i = x1i + x3r;
1972 a[j2 - 2] = wk1r * x0r - wk1i * x0i;
1973 a[j2 - 1] = wk1r * x0i + wk1i * x0r;
1974 x0r = x1r + x3i;
1975 x0i = x1i - x3r;
1976 a[j3 - 2] = wk3r * x0r + wk3i * x0i;
1977 a[j3 - 1] = wk3r * x0i - wk3i * x0r;
1978 x0r = a[j0] + a[j2];
1979 x0i = a[j0 + 1] + a[j2 + 1];
1980 x1r = a[j0] - a[j2];
1981 x1i = a[j0 + 1] - a[j2 + 1];
1982 x2r = a[j1] + a[j3];
1983 x2i = a[j1 + 1] + a[j3 + 1];
1984 x3r = a[j1] - a[j3];
1985 x3i = a[j1 + 1] - a[j3 + 1];
1986 a[j0] = x0r + x2r;
1987 a[j0 + 1] = x0i + x2i;
1988 a[j1] = x0r - x2r;
1989 a[j1 + 1] = x0i - x2i;
1990 x0r = x1r - x3i;
1991 x0i = x1i + x3r;
1992 a[j2] = wn4r * (x0r - x0i);
1993 a[j2 + 1] = wn4r * (x0i + x0r);
1994 x0r = x1r + x3i;
1995 x0i = x1i - x3r;
1996 a[j3] = -wn4r * (x0r + x0i);
1997 a[j3 + 1] = -wn4r * (x0i - x0r);
1998 x0r = a[j0 + 2] + a[j2 + 2];
1999 x0i = a[j0 + 3] + a[j2 + 3];
2000 x1r = a[j0 + 2] - a[j2 + 2];
2001 x1i = a[j0 + 3] - a[j2 + 3];
2002 x2r = a[j1 + 2] + a[j3 + 2];
2003 x2i = a[j1 + 3] + a[j3 + 3];
2004 x3r = a[j1 + 2] - a[j3 + 2];
2005 x3i = a[j1 + 3] - a[j3 + 3];
2006 a[j0 + 2] = x0r + x2r;
2007 a[j0 + 3] = x0i + x2i;
2008 a[j1 + 2] = x0r - x2r;
2009 a[j1 + 3] = x0i - x2i;
2010 x0r = x1r - x3i;
2011 x0i = x1i + x3r;
2012 a[j2 + 2] = wk1i * x0r - wk1r * x0i;
2013 a[j2 + 3] = wk1i * x0i + wk1r * x0r;
2014 x0r = x1r + x3i;
2015 x0i = x1i - x3r;
2016 a[j3 + 2] = wk3i * x0r + wk3r * x0i;
2017 a[j3 + 3] = wk3i * x0i - wk3r * x0r;
2018 }
2019
cftb1st(int n,float * a,float * w)2020 void cftb1st(int n, float *a, float *w) {
2021 int j, j0, j1, j2, j3, k, m, mh;
2022 float wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i;
2023 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r, y2i,
2024 y3r, y3i;
2025
2026 mh = n >> 3;
2027 m = 2 * mh;
2028 j1 = m;
2029 j2 = j1 + m;
2030 j3 = j2 + m;
2031 x0r = a[0] + a[j2];
2032 x0i = -a[1] - a[j2 + 1];
2033 x1r = a[0] - a[j2];
2034 x1i = -a[1] + a[j2 + 1];
2035 x2r = a[j1] + a[j3];
2036 x2i = a[j1 + 1] + a[j3 + 1];
2037 x3r = a[j1] - a[j3];
2038 x3i = a[j1 + 1] - a[j3 + 1];
2039 a[0] = x0r + x2r;
2040 a[1] = x0i - x2i;
2041 a[j1] = x0r - x2r;
2042 a[j1 + 1] = x0i + x2i;
2043 a[j2] = x1r + x3i;
2044 a[j2 + 1] = x1i + x3r;
2045 a[j3] = x1r - x3i;
2046 a[j3 + 1] = x1i - x3r;
2047 wn4r = w[1];
2048 csc1 = w[2];
2049 csc3 = w[3];
2050 wd1r = 1;
2051 wd1i = 0;
2052 wd3r = 1;
2053 wd3i = 0;
2054 k = 0;
2055 for(j = 2; j < mh - 2; j += 4) {
2056 k += 4;
2057 wk1r = csc1 * (wd1r + w[k]);
2058 wk1i = csc1 * (wd1i + w[k + 1]);
2059 wk3r = csc3 * (wd3r + w[k + 2]);
2060 wk3i = csc3 * (wd3i + w[k + 3]);
2061 wd1r = w[k];
2062 wd1i = w[k + 1];
2063 wd3r = w[k + 2];
2064 wd3i = w[k + 3];
2065 j1 = j + m;
2066 j2 = j1 + m;
2067 j3 = j2 + m;
2068 x0r = a[j] + a[j2];
2069 x0i = -a[j + 1] - a[j2 + 1];
2070 x1r = a[j] - a[j2];
2071 x1i = -a[j + 1] + a[j2 + 1];
2072 y0r = a[j + 2] + a[j2 + 2];
2073 y0i = -a[j + 3] - a[j2 + 3];
2074 y1r = a[j + 2] - a[j2 + 2];
2075 y1i = -a[j + 3] + a[j2 + 3];
2076 x2r = a[j1] + a[j3];
2077 x2i = a[j1 + 1] + a[j3 + 1];
2078 x3r = a[j1] - a[j3];
2079 x3i = a[j1 + 1] - a[j3 + 1];
2080 y2r = a[j1 + 2] + a[j3 + 2];
2081 y2i = a[j1 + 3] + a[j3 + 3];
2082 y3r = a[j1 + 2] - a[j3 + 2];
2083 y3i = a[j1 + 3] - a[j3 + 3];
2084 a[j] = x0r + x2r;
2085 a[j + 1] = x0i - x2i;
2086 a[j + 2] = y0r + y2r;
2087 a[j + 3] = y0i - y2i;
2088 a[j1] = x0r - x2r;
2089 a[j1 + 1] = x0i + x2i;
2090 a[j1 + 2] = y0r - y2r;
2091 a[j1 + 3] = y0i + y2i;
2092 x0r = x1r + x3i;
2093 x0i = x1i + x3r;
2094 a[j2] = wk1r * x0r - wk1i * x0i;
2095 a[j2 + 1] = wk1r * x0i + wk1i * x0r;
2096 x0r = y1r + y3i;
2097 x0i = y1i + y3r;
2098 a[j2 + 2] = wd1r * x0r - wd1i * x0i;
2099 a[j2 + 3] = wd1r * x0i + wd1i * x0r;
2100 x0r = x1r - x3i;
2101 x0i = x1i - x3r;
2102 a[j3] = wk3r * x0r + wk3i * x0i;
2103 a[j3 + 1] = wk3r * x0i - wk3i * x0r;
2104 x0r = y1r - y3i;
2105 x0i = y1i - y3r;
2106 a[j3 + 2] = wd3r * x0r + wd3i * x0i;
2107 a[j3 + 3] = wd3r * x0i - wd3i * x0r;
2108 j0 = m - j;
2109 j1 = j0 + m;
2110 j2 = j1 + m;
2111 j3 = j2 + m;
2112 x0r = a[j0] + a[j2];
2113 x0i = -a[j0 + 1] - a[j2 + 1];
2114 x1r = a[j0] - a[j2];
2115 x1i = -a[j0 + 1] + a[j2 + 1];
2116 y0r = a[j0 - 2] + a[j2 - 2];
2117 y0i = -a[j0 - 1] - a[j2 - 1];
2118 y1r = a[j0 - 2] - a[j2 - 2];
2119 y1i = -a[j0 - 1] + a[j2 - 1];
2120 x2r = a[j1] + a[j3];
2121 x2i = a[j1 + 1] + a[j3 + 1];
2122 x3r = a[j1] - a[j3];
2123 x3i = a[j1 + 1] - a[j3 + 1];
2124 y2r = a[j1 - 2] + a[j3 - 2];
2125 y2i = a[j1 - 1] + a[j3 - 1];
2126 y3r = a[j1 - 2] - a[j3 - 2];
2127 y3i = a[j1 - 1] - a[j3 - 1];
2128 a[j0] = x0r + x2r;
2129 a[j0 + 1] = x0i - x2i;
2130 a[j0 - 2] = y0r + y2r;
2131 a[j0 - 1] = y0i - y2i;
2132 a[j1] = x0r - x2r;
2133 a[j1 + 1] = x0i + x2i;
2134 a[j1 - 2] = y0r - y2r;
2135 a[j1 - 1] = y0i + y2i;
2136 x0r = x1r + x3i;
2137 x0i = x1i + x3r;
2138 a[j2] = wk1i * x0r - wk1r * x0i;
2139 a[j2 + 1] = wk1i * x0i + wk1r * x0r;
2140 x0r = y1r + y3i;
2141 x0i = y1i + y3r;
2142 a[j2 - 2] = wd1i * x0r - wd1r * x0i;
2143 a[j2 - 1] = wd1i * x0i + wd1r * x0r;
2144 x0r = x1r - x3i;
2145 x0i = x1i - x3r;
2146 a[j3] = wk3i * x0r + wk3r * x0i;
2147 a[j3 + 1] = wk3i * x0i - wk3r * x0r;
2148 x0r = y1r - y3i;
2149 x0i = y1i - y3r;
2150 a[j3 - 2] = wd3i * x0r + wd3r * x0i;
2151 a[j3 - 1] = wd3i * x0i - wd3r * x0r;
2152 }
2153 wk1r = csc1 * (wd1r + wn4r);
2154 wk1i = csc1 * (wd1i + wn4r);
2155 wk3r = csc3 * (wd3r - wn4r);
2156 wk3i = csc3 * (wd3i - wn4r);
2157 j0 = mh;
2158 j1 = j0 + m;
2159 j2 = j1 + m;
2160 j3 = j2 + m;
2161 x0r = a[j0 - 2] + a[j2 - 2];
2162 x0i = -a[j0 - 1] - a[j2 - 1];
2163 x1r = a[j0 - 2] - a[j2 - 2];
2164 x1i = -a[j0 - 1] + a[j2 - 1];
2165 x2r = a[j1 - 2] + a[j3 - 2];
2166 x2i = a[j1 - 1] + a[j3 - 1];
2167 x3r = a[j1 - 2] - a[j3 - 2];
2168 x3i = a[j1 - 1] - a[j3 - 1];
2169 a[j0 - 2] = x0r + x2r;
2170 a[j0 - 1] = x0i - x2i;
2171 a[j1 - 2] = x0r - x2r;
2172 a[j1 - 1] = x0i + x2i;
2173 x0r = x1r + x3i;
2174 x0i = x1i + x3r;
2175 a[j2 - 2] = wk1r * x0r - wk1i * x0i;
2176 a[j2 - 1] = wk1r * x0i + wk1i * x0r;
2177 x0r = x1r - x3i;
2178 x0i = x1i - x3r;
2179 a[j3 - 2] = wk3r * x0r + wk3i * x0i;
2180 a[j3 - 1] = wk3r * x0i - wk3i * x0r;
2181 x0r = a[j0] + a[j2];
2182 x0i = -a[j0 + 1] - a[j2 + 1];
2183 x1r = a[j0] - a[j2];
2184 x1i = -a[j0 + 1] + a[j2 + 1];
2185 x2r = a[j1] + a[j3];
2186 x2i = a[j1 + 1] + a[j3 + 1];
2187 x3r = a[j1] - a[j3];
2188 x3i = a[j1 + 1] - a[j3 + 1];
2189 a[j0] = x0r + x2r;
2190 a[j0 + 1] = x0i - x2i;
2191 a[j1] = x0r - x2r;
2192 a[j1 + 1] = x0i + x2i;
2193 x0r = x1r + x3i;
2194 x0i = x1i + x3r;
2195 a[j2] = wn4r * (x0r - x0i);
2196 a[j2 + 1] = wn4r * (x0i + x0r);
2197 x0r = x1r - x3i;
2198 x0i = x1i - x3r;
2199 a[j3] = -wn4r * (x0r + x0i);
2200 a[j3 + 1] = -wn4r * (x0i - x0r);
2201 x0r = a[j0 + 2] + a[j2 + 2];
2202 x0i = -a[j0 + 3] - a[j2 + 3];
2203 x1r = a[j0 + 2] - a[j2 + 2];
2204 x1i = -a[j0 + 3] + a[j2 + 3];
2205 x2r = a[j1 + 2] + a[j3 + 2];
2206 x2i = a[j1 + 3] + a[j3 + 3];
2207 x3r = a[j1 + 2] - a[j3 + 2];
2208 x3i = a[j1 + 3] - a[j3 + 3];
2209 a[j0 + 2] = x0r + x2r;
2210 a[j0 + 3] = x0i - x2i;
2211 a[j1 + 2] = x0r - x2r;
2212 a[j1 + 3] = x0i + x2i;
2213 x0r = x1r + x3i;
2214 x0i = x1i + x3r;
2215 a[j2 + 2] = wk1i * x0r - wk1r * x0i;
2216 a[j2 + 3] = wk1i * x0i + wk1r * x0r;
2217 x0r = x1r - x3i;
2218 x0i = x1i - x3r;
2219 a[j3 + 2] = wk3i * x0r + wk3r * x0i;
2220 a[j3 + 3] = wk3i * x0i - wk3r * x0r;
2221 }
2222
2223 #ifdef USE_CDFT_THREADS
2224 struct cdft_arg_st {
2225 int n0;
2226 int n;
2227 float *a;
2228 int nw;
2229 float *w;
2230 };
2231 typedef struct cdft_arg_st cdft_arg_t;
2232
cftrec4_th(int n,float * a,int nw,float * w)2233 void cftrec4_th(int n, float *a, int nw, float *w) {
2234 void *cftrec1_th(void *p);
2235 void *cftrec2_th(void *p);
2236 int i, idiv4, m, nthread;
2237 cdft_thread_t th[4];
2238 cdft_arg_t ag[4];
2239
2240 nthread = 2;
2241 idiv4 = 0;
2242 m = n >> 1;
2243 if(n > CDFT_4THREADS_BEGIN_N) {
2244 nthread = 4;
2245 idiv4 = 1;
2246 m >>= 1;
2247 }
2248 for(i = 0; i < nthread; i++) {
2249 ag[i].n0 = n;
2250 ag[i].n = m;
2251 ag[i].a = &a[i * m];
2252 ag[i].nw = nw;
2253 ag[i].w = w;
2254 if(i != idiv4) {
2255 cdft_thread_create(&th[i], cftrec1_th, &ag[i]);
2256 }
2257 else {
2258 cdft_thread_create(&th[i], cftrec2_th, &ag[i]);
2259 }
2260 }
2261 for(i = 0; i < nthread; i++) {
2262 cdft_thread_wait(th[i]);
2263 }
2264 }
2265
cftrec1_th(void * p)2266 void *cftrec1_th(void *p) {
2267 int cfttree(int n, int j, int k, float *a, int nw, float *w);
2268 void cftleaf(int n, int isplt, float *a, int nw, float *w);
2269 void cftmdl1(int n, float *a, float *w);
2270 int isplt, j, k, m, n, n0, nw;
2271 float *a, *w;
2272
2273 n0 = ((cdft_arg_t *)p)->n0;
2274 n = ((cdft_arg_t *)p)->n;
2275 a = ((cdft_arg_t *)p)->a;
2276 nw = ((cdft_arg_t *)p)->nw;
2277 w = ((cdft_arg_t *)p)->w;
2278 m = n0;
2279 while(m > 512) {
2280 m >>= 2;
2281 cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]);
2282 }
2283 cftleaf(m, 1, &a[n - m], nw, w);
2284 k = 0;
2285 for(j = n - m; j > 0; j -= m) {
2286 k++;
2287 isplt = cfttree(m, j, k, a, nw, w);
2288 cftleaf(m, isplt, &a[j - m], nw, w);
2289 }
2290 return (void *)0;
2291 }
2292
cftrec2_th(void * p)2293 void *cftrec2_th(void *p) {
2294 int cfttree(int n, int j, int k, float *a, int nw, float *w);
2295 void cftleaf(int n, int isplt, float *a, int nw, float *w);
2296 void cftmdl2(int n, float *a, float *w);
2297 int isplt, j, k, m, n, n0, nw;
2298 float *a, *w;
2299
2300 n0 = ((cdft_arg_t *)p)->n0;
2301 n = ((cdft_arg_t *)p)->n;
2302 a = ((cdft_arg_t *)p)->a;
2303 nw = ((cdft_arg_t *)p)->nw;
2304 w = ((cdft_arg_t *)p)->w;
2305 k = 1;
2306 m = n0;
2307 while(m > 512) {
2308 m >>= 2;
2309 k <<= 2;
2310 cftmdl2(m, &a[n - m], &w[nw - m]);
2311 }
2312 cftleaf(m, 0, &a[n - m], nw, w);
2313 k >>= 1;
2314 for(j = n - m; j > 0; j -= m) {
2315 k++;
2316 isplt = cfttree(m, j, k, a, nw, w);
2317 cftleaf(m, isplt, &a[j - m], nw, w);
2318 }
2319 return (void *)0;
2320 }
2321 #endif /* USE_CDFT_THREADS */
2322
cftrec4(int n,float * a,int nw,float * w)2323 void cftrec4(int n, float *a, int nw, float *w) {
2324 int cfttree(int n, int j, int k, float *a, int nw, float *w);
2325 void cftleaf(int n, int isplt, float *a, int nw, float *w);
2326 void cftmdl1(int n, float *a, float *w);
2327 int isplt, j, k, m;
2328
2329 m = n;
2330 while(m > 512) {
2331 m >>= 2;
2332 cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]);
2333 }
2334 cftleaf(m, 1, &a[n - m], nw, w);
2335 k = 0;
2336 for(j = n - m; j > 0; j -= m) {
2337 k++;
2338 isplt = cfttree(m, j, k, a, nw, w);
2339 cftleaf(m, isplt, &a[j - m], nw, w);
2340 }
2341 }
2342
cfttree(int n,int j,int k,float * a,int nw,float * w)2343 int cfttree(int n, int j, int k, float *a, int nw, float *w) {
2344 void cftmdl1(int n, float *a, float *w);
2345 void cftmdl2(int n, float *a, float *w);
2346 int i, isplt, m;
2347
2348 if((k & 3) != 0) {
2349 isplt = k & 1;
2350 if(isplt != 0) {
2351 cftmdl1(n, &a[j - n], &w[nw - (n >> 1)]);
2352 }
2353 else {
2354 cftmdl2(n, &a[j - n], &w[nw - n]);
2355 }
2356 }
2357 else {
2358 m = n;
2359 for(i = k; (i & 3) == 0; i >>= 2) {
2360 m <<= 2;
2361 }
2362 isplt = i & 1;
2363 if(isplt != 0) {
2364 while(m > 128) {
2365 cftmdl1(m, &a[j - m], &w[nw - (m >> 1)]);
2366 m >>= 2;
2367 }
2368 }
2369 else {
2370 while(m > 128) {
2371 cftmdl2(m, &a[j - m], &w[nw - m]);
2372 m >>= 2;
2373 }
2374 }
2375 }
2376 return isplt;
2377 }
2378
cftleaf(int n,int isplt,float * a,int nw,float * w)2379 void cftleaf(int n, int isplt, float *a, int nw, float *w) {
2380 void cftmdl1(int n, float *a, float *w);
2381 void cftmdl2(int n, float *a, float *w);
2382 void cftf161(float * a, float * w);
2383 void cftf162(float * a, float * w);
2384 void cftf081(float * a, float * w);
2385 void cftf082(float * a, float * w);
2386
2387 if(n == 512) {
2388 cftmdl1(128, a, &w[nw - 64]);
2389 cftf161(a, &w[nw - 8]);
2390 cftf162(&a[32], &w[nw - 32]);
2391 cftf161(&a[64], &w[nw - 8]);
2392 cftf161(&a[96], &w[nw - 8]);
2393 cftmdl2(128, &a[128], &w[nw - 128]);
2394 cftf161(&a[128], &w[nw - 8]);
2395 cftf162(&a[160], &w[nw - 32]);
2396 cftf161(&a[192], &w[nw - 8]);
2397 cftf162(&a[224], &w[nw - 32]);
2398 cftmdl1(128, &a[256], &w[nw - 64]);
2399 cftf161(&a[256], &w[nw - 8]);
2400 cftf162(&a[288], &w[nw - 32]);
2401 cftf161(&a[320], &w[nw - 8]);
2402 cftf161(&a[352], &w[nw - 8]);
2403 if(isplt != 0) {
2404 cftmdl1(128, &a[384], &w[nw - 64]);
2405 cftf161(&a[480], &w[nw - 8]);
2406 }
2407 else {
2408 cftmdl2(128, &a[384], &w[nw - 128]);
2409 cftf162(&a[480], &w[nw - 32]);
2410 }
2411 cftf161(&a[384], &w[nw - 8]);
2412 cftf162(&a[416], &w[nw - 32]);
2413 cftf161(&a[448], &w[nw - 8]);
2414 }
2415 else {
2416 cftmdl1(64, a, &w[nw - 32]);
2417 cftf081(a, &w[nw - 8]);
2418 cftf082(&a[16], &w[nw - 8]);
2419 cftf081(&a[32], &w[nw - 8]);
2420 cftf081(&a[48], &w[nw - 8]);
2421 cftmdl2(64, &a[64], &w[nw - 64]);
2422 cftf081(&a[64], &w[nw - 8]);
2423 cftf082(&a[80], &w[nw - 8]);
2424 cftf081(&a[96], &w[nw - 8]);
2425 cftf082(&a[112], &w[nw - 8]);
2426 cftmdl1(64, &a[128], &w[nw - 32]);
2427 cftf081(&a[128], &w[nw - 8]);
2428 cftf082(&a[144], &w[nw - 8]);
2429 cftf081(&a[160], &w[nw - 8]);
2430 cftf081(&a[176], &w[nw - 8]);
2431 if(isplt != 0) {
2432 cftmdl1(64, &a[192], &w[nw - 32]);
2433 cftf081(&a[240], &w[nw - 8]);
2434 }
2435 else {
2436 cftmdl2(64, &a[192], &w[nw - 64]);
2437 cftf082(&a[240], &w[nw - 8]);
2438 }
2439 cftf081(&a[192], &w[nw - 8]);
2440 cftf082(&a[208], &w[nw - 8]);
2441 cftf081(&a[224], &w[nw - 8]);
2442 }
2443 }
2444
cftmdl1(int n,float * a,float * w)2445 void cftmdl1(int n, float *a, float *w) {
2446 int j, j0, j1, j2, j3, k, m, mh;
2447 float wn4r, wk1r, wk1i, wk3r, wk3i;
2448 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2449
2450 mh = n >> 3;
2451 m = 2 * mh;
2452 j1 = m;
2453 j2 = j1 + m;
2454 j3 = j2 + m;
2455 x0r = a[0] + a[j2];
2456 x0i = a[1] + a[j2 + 1];
2457 x1r = a[0] - a[j2];
2458 x1i = a[1] - a[j2 + 1];
2459 x2r = a[j1] + a[j3];
2460 x2i = a[j1 + 1] + a[j3 + 1];
2461 x3r = a[j1] - a[j3];
2462 x3i = a[j1 + 1] - a[j3 + 1];
2463 a[0] = x0r + x2r;
2464 a[1] = x0i + x2i;
2465 a[j1] = x0r - x2r;
2466 a[j1 + 1] = x0i - x2i;
2467 a[j2] = x1r - x3i;
2468 a[j2 + 1] = x1i + x3r;
2469 a[j3] = x1r + x3i;
2470 a[j3 + 1] = x1i - x3r;
2471 wn4r = w[1];
2472 k = 0;
2473 for(j = 2; j < mh; j += 2) {
2474 k += 4;
2475 wk1r = w[k];
2476 wk1i = w[k + 1];
2477 wk3r = w[k + 2];
2478 wk3i = w[k + 3];
2479 j1 = j + m;
2480 j2 = j1 + m;
2481 j3 = j2 + m;
2482 x0r = a[j] + a[j2];
2483 x0i = a[j + 1] + a[j2 + 1];
2484 x1r = a[j] - a[j2];
2485 x1i = a[j + 1] - a[j2 + 1];
2486 x2r = a[j1] + a[j3];
2487 x2i = a[j1 + 1] + a[j3 + 1];
2488 x3r = a[j1] - a[j3];
2489 x3i = a[j1 + 1] - a[j3 + 1];
2490 a[j] = x0r + x2r;
2491 a[j + 1] = x0i + x2i;
2492 a[j1] = x0r - x2r;
2493 a[j1 + 1] = x0i - x2i;
2494 x0r = x1r - x3i;
2495 x0i = x1i + x3r;
2496 a[j2] = wk1r * x0r - wk1i * x0i;
2497 a[j2 + 1] = wk1r * x0i + wk1i * x0r;
2498 x0r = x1r + x3i;
2499 x0i = x1i - x3r;
2500 a[j3] = wk3r * x0r + wk3i * x0i;
2501 a[j3 + 1] = wk3r * x0i - wk3i * x0r;
2502 j0 = m - j;
2503 j1 = j0 + m;
2504 j2 = j1 + m;
2505 j3 = j2 + m;
2506 x0r = a[j0] + a[j2];
2507 x0i = a[j0 + 1] + a[j2 + 1];
2508 x1r = a[j0] - a[j2];
2509 x1i = a[j0 + 1] - a[j2 + 1];
2510 x2r = a[j1] + a[j3];
2511 x2i = a[j1 + 1] + a[j3 + 1];
2512 x3r = a[j1] - a[j3];
2513 x3i = a[j1 + 1] - a[j3 + 1];
2514 a[j0] = x0r + x2r;
2515 a[j0 + 1] = x0i + x2i;
2516 a[j1] = x0r - x2r;
2517 a[j1 + 1] = x0i - x2i;
2518 x0r = x1r - x3i;
2519 x0i = x1i + x3r;
2520 a[j2] = wk1i * x0r - wk1r * x0i;
2521 a[j2 + 1] = wk1i * x0i + wk1r * x0r;
2522 x0r = x1r + x3i;
2523 x0i = x1i - x3r;
2524 a[j3] = wk3i * x0r + wk3r * x0i;
2525 a[j3 + 1] = wk3i * x0i - wk3r * x0r;
2526 }
2527 j0 = mh;
2528 j1 = j0 + m;
2529 j2 = j1 + m;
2530 j3 = j2 + m;
2531 x0r = a[j0] + a[j2];
2532 x0i = a[j0 + 1] + a[j2 + 1];
2533 x1r = a[j0] - a[j2];
2534 x1i = a[j0 + 1] - a[j2 + 1];
2535 x2r = a[j1] + a[j3];
2536 x2i = a[j1 + 1] + a[j3 + 1];
2537 x3r = a[j1] - a[j3];
2538 x3i = a[j1 + 1] - a[j3 + 1];
2539 a[j0] = x0r + x2r;
2540 a[j0 + 1] = x0i + x2i;
2541 a[j1] = x0r - x2r;
2542 a[j1 + 1] = x0i - x2i;
2543 x0r = x1r - x3i;
2544 x0i = x1i + x3r;
2545 a[j2] = wn4r * (x0r - x0i);
2546 a[j2 + 1] = wn4r * (x0i + x0r);
2547 x0r = x1r + x3i;
2548 x0i = x1i - x3r;
2549 a[j3] = -wn4r * (x0r + x0i);
2550 a[j3 + 1] = -wn4r * (x0i - x0r);
2551 }
2552
cftmdl2(int n,float * a,float * w)2553 void cftmdl2(int n, float *a, float *w) {
2554 int j, j0, j1, j2, j3, k, kr, m, mh;
2555 float wn4r, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i;
2556 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y2r, y2i;
2557
2558 mh = n >> 3;
2559 m = 2 * mh;
2560 wn4r = w[1];
2561 j1 = m;
2562 j2 = j1 + m;
2563 j3 = j2 + m;
2564 x0r = a[0] - a[j2 + 1];
2565 x0i = a[1] + a[j2];
2566 x1r = a[0] + a[j2 + 1];
2567 x1i = a[1] - a[j2];
2568 x2r = a[j1] - a[j3 + 1];
2569 x2i = a[j1 + 1] + a[j3];
2570 x3r = a[j1] + a[j3 + 1];
2571 x3i = a[j1 + 1] - a[j3];
2572 y0r = wn4r * (x2r - x2i);
2573 y0i = wn4r * (x2i + x2r);
2574 a[0] = x0r + y0r;
2575 a[1] = x0i + y0i;
2576 a[j1] = x0r - y0r;
2577 a[j1 + 1] = x0i - y0i;
2578 y0r = wn4r * (x3r - x3i);
2579 y0i = wn4r * (x3i + x3r);
2580 a[j2] = x1r - y0i;
2581 a[j2 + 1] = x1i + y0r;
2582 a[j3] = x1r + y0i;
2583 a[j3 + 1] = x1i - y0r;
2584 k = 0;
2585 kr = 2 * m;
2586 for(j = 2; j < mh; j += 2) {
2587 k += 4;
2588 wk1r = w[k];
2589 wk1i = w[k + 1];
2590 wk3r = w[k + 2];
2591 wk3i = w[k + 3];
2592 kr -= 4;
2593 wd1i = w[kr];
2594 wd1r = w[kr + 1];
2595 wd3i = w[kr + 2];
2596 wd3r = w[kr + 3];
2597 j1 = j + m;
2598 j2 = j1 + m;
2599 j3 = j2 + m;
2600 x0r = a[j] - a[j2 + 1];
2601 x0i = a[j + 1] + a[j2];
2602 x1r = a[j] + a[j2 + 1];
2603 x1i = a[j + 1] - a[j2];
2604 x2r = a[j1] - a[j3 + 1];
2605 x2i = a[j1 + 1] + a[j3];
2606 x3r = a[j1] + a[j3 + 1];
2607 x3i = a[j1 + 1] - a[j3];
2608 y0r = wk1r * x0r - wk1i * x0i;
2609 y0i = wk1r * x0i + wk1i * x0r;
2610 y2r = wd1r * x2r - wd1i * x2i;
2611 y2i = wd1r * x2i + wd1i * x2r;
2612 a[j] = y0r + y2r;
2613 a[j + 1] = y0i + y2i;
2614 a[j1] = y0r - y2r;
2615 a[j1 + 1] = y0i - y2i;
2616 y0r = wk3r * x1r + wk3i * x1i;
2617 y0i = wk3r * x1i - wk3i * x1r;
2618 y2r = wd3r * x3r + wd3i * x3i;
2619 y2i = wd3r * x3i - wd3i * x3r;
2620 a[j2] = y0r + y2r;
2621 a[j2 + 1] = y0i + y2i;
2622 a[j3] = y0r - y2r;
2623 a[j3 + 1] = y0i - y2i;
2624 j0 = m - j;
2625 j1 = j0 + m;
2626 j2 = j1 + m;
2627 j3 = j2 + m;
2628 x0r = a[j0] - a[j2 + 1];
2629 x0i = a[j0 + 1] + a[j2];
2630 x1r = a[j0] + a[j2 + 1];
2631 x1i = a[j0 + 1] - a[j2];
2632 x2r = a[j1] - a[j3 + 1];
2633 x2i = a[j1 + 1] + a[j3];
2634 x3r = a[j1] + a[j3 + 1];
2635 x3i = a[j1 + 1] - a[j3];
2636 y0r = wd1i * x0r - wd1r * x0i;
2637 y0i = wd1i * x0i + wd1r * x0r;
2638 y2r = wk1i * x2r - wk1r * x2i;
2639 y2i = wk1i * x2i + wk1r * x2r;
2640 a[j0] = y0r + y2r;
2641 a[j0 + 1] = y0i + y2i;
2642 a[j1] = y0r - y2r;
2643 a[j1 + 1] = y0i - y2i;
2644 y0r = wd3i * x1r + wd3r * x1i;
2645 y0i = wd3i * x1i - wd3r * x1r;
2646 y2r = wk3i * x3r + wk3r * x3i;
2647 y2i = wk3i * x3i - wk3r * x3r;
2648 a[j2] = y0r + y2r;
2649 a[j2 + 1] = y0i + y2i;
2650 a[j3] = y0r - y2r;
2651 a[j3 + 1] = y0i - y2i;
2652 }
2653 wk1r = w[m];
2654 wk1i = w[m + 1];
2655 j0 = mh;
2656 j1 = j0 + m;
2657 j2 = j1 + m;
2658 j3 = j2 + m;
2659 x0r = a[j0] - a[j2 + 1];
2660 x0i = a[j0 + 1] + a[j2];
2661 x1r = a[j0] + a[j2 + 1];
2662 x1i = a[j0 + 1] - a[j2];
2663 x2r = a[j1] - a[j3 + 1];
2664 x2i = a[j1 + 1] + a[j3];
2665 x3r = a[j1] + a[j3 + 1];
2666 x3i = a[j1 + 1] - a[j3];
2667 y0r = wk1r * x0r - wk1i * x0i;
2668 y0i = wk1r * x0i + wk1i * x0r;
2669 y2r = wk1i * x2r - wk1r * x2i;
2670 y2i = wk1i * x2i + wk1r * x2r;
2671 a[j0] = y0r + y2r;
2672 a[j0 + 1] = y0i + y2i;
2673 a[j1] = y0r - y2r;
2674 a[j1 + 1] = y0i - y2i;
2675 y0r = wk1i * x1r - wk1r * x1i;
2676 y0i = wk1i * x1i + wk1r * x1r;
2677 y2r = wk1r * x3r - wk1i * x3i;
2678 y2i = wk1r * x3i + wk1i * x3r;
2679 a[j2] = y0r - y2r;
2680 a[j2 + 1] = y0i - y2i;
2681 a[j3] = y0r + y2r;
2682 a[j3 + 1] = y0i + y2i;
2683 }
2684
cftfx41(int n,float * a,int nw,float * w)2685 void cftfx41(int n, float *a, int nw, float *w) {
2686 void cftf161(float * a, float * w);
2687 void cftf162(float * a, float * w);
2688 void cftf081(float * a, float * w);
2689 void cftf082(float * a, float * w);
2690
2691 if(n == 128) {
2692 cftf161(a, &w[nw - 8]);
2693 cftf162(&a[32], &w[nw - 32]);
2694 cftf161(&a[64], &w[nw - 8]);
2695 cftf161(&a[96], &w[nw - 8]);
2696 }
2697 else {
2698 cftf081(a, &w[nw - 8]);
2699 cftf082(&a[16], &w[nw - 8]);
2700 cftf081(&a[32], &w[nw - 8]);
2701 cftf081(&a[48], &w[nw - 8]);
2702 }
2703 }
2704
cftf161(float * a,float * w)2705 void cftf161(float *a, float *w) {
2706 float wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r,
2707 y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i, y8r, y8i,
2708 y9r, y9i, y10r, y10i, y11r, y11i, y12r, y12i, y13r, y13i, y14r, y14i,
2709 y15r, y15i;
2710
2711 wn4r = w[1];
2712 wk1r = w[2];
2713 wk1i = w[3];
2714 x0r = a[0] + a[16];
2715 x0i = a[1] + a[17];
2716 x1r = a[0] - a[16];
2717 x1i = a[1] - a[17];
2718 x2r = a[8] + a[24];
2719 x2i = a[9] + a[25];
2720 x3r = a[8] - a[24];
2721 x3i = a[9] - a[25];
2722 y0r = x0r + x2r;
2723 y0i = x0i + x2i;
2724 y4r = x0r - x2r;
2725 y4i = x0i - x2i;
2726 y8r = x1r - x3i;
2727 y8i = x1i + x3r;
2728 y12r = x1r + x3i;
2729 y12i = x1i - x3r;
2730 x0r = a[2] + a[18];
2731 x0i = a[3] + a[19];
2732 x1r = a[2] - a[18];
2733 x1i = a[3] - a[19];
2734 x2r = a[10] + a[26];
2735 x2i = a[11] + a[27];
2736 x3r = a[10] - a[26];
2737 x3i = a[11] - a[27];
2738 y1r = x0r + x2r;
2739 y1i = x0i + x2i;
2740 y5r = x0r - x2r;
2741 y5i = x0i - x2i;
2742 x0r = x1r - x3i;
2743 x0i = x1i + x3r;
2744 y9r = wk1r * x0r - wk1i * x0i;
2745 y9i = wk1r * x0i + wk1i * x0r;
2746 x0r = x1r + x3i;
2747 x0i = x1i - x3r;
2748 y13r = wk1i * x0r - wk1r * x0i;
2749 y13i = wk1i * x0i + wk1r * x0r;
2750 x0r = a[4] + a[20];
2751 x0i = a[5] + a[21];
2752 x1r = a[4] - a[20];
2753 x1i = a[5] - a[21];
2754 x2r = a[12] + a[28];
2755 x2i = a[13] + a[29];
2756 x3r = a[12] - a[28];
2757 x3i = a[13] - a[29];
2758 y2r = x0r + x2r;
2759 y2i = x0i + x2i;
2760 y6r = x0r - x2r;
2761 y6i = x0i - x2i;
2762 x0r = x1r - x3i;
2763 x0i = x1i + x3r;
2764 y10r = wn4r * (x0r - x0i);
2765 y10i = wn4r * (x0i + x0r);
2766 x0r = x1r + x3i;
2767 x0i = x1i - x3r;
2768 y14r = wn4r * (x0r + x0i);
2769 y14i = wn4r * (x0i - x0r);
2770 x0r = a[6] + a[22];
2771 x0i = a[7] + a[23];
2772 x1r = a[6] - a[22];
2773 x1i = a[7] - a[23];
2774 x2r = a[14] + a[30];
2775 x2i = a[15] + a[31];
2776 x3r = a[14] - a[30];
2777 x3i = a[15] - a[31];
2778 y3r = x0r + x2r;
2779 y3i = x0i + x2i;
2780 y7r = x0r - x2r;
2781 y7i = x0i - x2i;
2782 x0r = x1r - x3i;
2783 x0i = x1i + x3r;
2784 y11r = wk1i * x0r - wk1r * x0i;
2785 y11i = wk1i * x0i + wk1r * x0r;
2786 x0r = x1r + x3i;
2787 x0i = x1i - x3r;
2788 y15r = wk1r * x0r - wk1i * x0i;
2789 y15i = wk1r * x0i + wk1i * x0r;
2790 x0r = y12r - y14r;
2791 x0i = y12i - y14i;
2792 x1r = y12r + y14r;
2793 x1i = y12i + y14i;
2794 x2r = y13r - y15r;
2795 x2i = y13i - y15i;
2796 x3r = y13r + y15r;
2797 x3i = y13i + y15i;
2798 a[24] = x0r + x2r;
2799 a[25] = x0i + x2i;
2800 a[26] = x0r - x2r;
2801 a[27] = x0i - x2i;
2802 a[28] = x1r - x3i;
2803 a[29] = x1i + x3r;
2804 a[30] = x1r + x3i;
2805 a[31] = x1i - x3r;
2806 x0r = y8r + y10r;
2807 x0i = y8i + y10i;
2808 x1r = y8r - y10r;
2809 x1i = y8i - y10i;
2810 x2r = y9r + y11r;
2811 x2i = y9i + y11i;
2812 x3r = y9r - y11r;
2813 x3i = y9i - y11i;
2814 a[16] = x0r + x2r;
2815 a[17] = x0i + x2i;
2816 a[18] = x0r - x2r;
2817 a[19] = x0i - x2i;
2818 a[20] = x1r - x3i;
2819 a[21] = x1i + x3r;
2820 a[22] = x1r + x3i;
2821 a[23] = x1i - x3r;
2822 x0r = y5r - y7i;
2823 x0i = y5i + y7r;
2824 x2r = wn4r * (x0r - x0i);
2825 x2i = wn4r * (x0i + x0r);
2826 x0r = y5r + y7i;
2827 x0i = y5i - y7r;
2828 x3r = wn4r * (x0r - x0i);
2829 x3i = wn4r * (x0i + x0r);
2830 x0r = y4r - y6i;
2831 x0i = y4i + y6r;
2832 x1r = y4r + y6i;
2833 x1i = y4i - y6r;
2834 a[8] = x0r + x2r;
2835 a[9] = x0i + x2i;
2836 a[10] = x0r - x2r;
2837 a[11] = x0i - x2i;
2838 a[12] = x1r - x3i;
2839 a[13] = x1i + x3r;
2840 a[14] = x1r + x3i;
2841 a[15] = x1i - x3r;
2842 x0r = y0r + y2r;
2843 x0i = y0i + y2i;
2844 x1r = y0r - y2r;
2845 x1i = y0i - y2i;
2846 x2r = y1r + y3r;
2847 x2i = y1i + y3i;
2848 x3r = y1r - y3r;
2849 x3i = y1i - y3i;
2850 a[0] = x0r + x2r;
2851 a[1] = x0i + x2i;
2852 a[2] = x0r - x2r;
2853 a[3] = x0i - x2i;
2854 a[4] = x1r - x3i;
2855 a[5] = x1i + x3r;
2856 a[6] = x1r + x3i;
2857 a[7] = x1i - x3r;
2858 }
2859
cftf162(float * a,float * w)2860 void cftf162(float *a, float *w) {
2861 float wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i, x0r, x0i, x1r, x1i, x2r, x2i,
2862 y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r,
2863 y7i, y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i, y12r, y12i, y13r, y13i,
2864 y14r, y14i, y15r, y15i;
2865
2866 wn4r = w[1];
2867 wk1r = w[4];
2868 wk1i = w[5];
2869 wk3r = w[6];
2870 wk3i = -w[7];
2871 wk2r = w[8];
2872 wk2i = w[9];
2873 x1r = a[0] - a[17];
2874 x1i = a[1] + a[16];
2875 x0r = a[8] - a[25];
2876 x0i = a[9] + a[24];
2877 x2r = wn4r * (x0r - x0i);
2878 x2i = wn4r * (x0i + x0r);
2879 y0r = x1r + x2r;
2880 y0i = x1i + x2i;
2881 y4r = x1r - x2r;
2882 y4i = x1i - x2i;
2883 x1r = a[0] + a[17];
2884 x1i = a[1] - a[16];
2885 x0r = a[8] + a[25];
2886 x0i = a[9] - a[24];
2887 x2r = wn4r * (x0r - x0i);
2888 x2i = wn4r * (x0i + x0r);
2889 y8r = x1r - x2i;
2890 y8i = x1i + x2r;
2891 y12r = x1r + x2i;
2892 y12i = x1i - x2r;
2893 x0r = a[2] - a[19];
2894 x0i = a[3] + a[18];
2895 x1r = wk1r * x0r - wk1i * x0i;
2896 x1i = wk1r * x0i + wk1i * x0r;
2897 x0r = a[10] - a[27];
2898 x0i = a[11] + a[26];
2899 x2r = wk3i * x0r - wk3r * x0i;
2900 x2i = wk3i * x0i + wk3r * x0r;
2901 y1r = x1r + x2r;
2902 y1i = x1i + x2i;
2903 y5r = x1r - x2r;
2904 y5i = x1i - x2i;
2905 x0r = a[2] + a[19];
2906 x0i = a[3] - a[18];
2907 x1r = wk3r * x0r - wk3i * x0i;
2908 x1i = wk3r * x0i + wk3i * x0r;
2909 x0r = a[10] + a[27];
2910 x0i = a[11] - a[26];
2911 x2r = wk1r * x0r + wk1i * x0i;
2912 x2i = wk1r * x0i - wk1i * x0r;
2913 y9r = x1r - x2r;
2914 y9i = x1i - x2i;
2915 y13r = x1r + x2r;
2916 y13i = x1i + x2i;
2917 x0r = a[4] - a[21];
2918 x0i = a[5] + a[20];
2919 x1r = wk2r * x0r - wk2i * x0i;
2920 x1i = wk2r * x0i + wk2i * x0r;
2921 x0r = a[12] - a[29];
2922 x0i = a[13] + a[28];
2923 x2r = wk2i * x0r - wk2r * x0i;
2924 x2i = wk2i * x0i + wk2r * x0r;
2925 y2r = x1r + x2r;
2926 y2i = x1i + x2i;
2927 y6r = x1r - x2r;
2928 y6i = x1i - x2i;
2929 x0r = a[4] + a[21];
2930 x0i = a[5] - a[20];
2931 x1r = wk2i * x0r - wk2r * x0i;
2932 x1i = wk2i * x0i + wk2r * x0r;
2933 x0r = a[12] + a[29];
2934 x0i = a[13] - a[28];
2935 x2r = wk2r * x0r - wk2i * x0i;
2936 x2i = wk2r * x0i + wk2i * x0r;
2937 y10r = x1r - x2r;
2938 y10i = x1i - x2i;
2939 y14r = x1r + x2r;
2940 y14i = x1i + x2i;
2941 x0r = a[6] - a[23];
2942 x0i = a[7] + a[22];
2943 x1r = wk3r * x0r - wk3i * x0i;
2944 x1i = wk3r * x0i + wk3i * x0r;
2945 x0r = a[14] - a[31];
2946 x0i = a[15] + a[30];
2947 x2r = wk1i * x0r - wk1r * x0i;
2948 x2i = wk1i * x0i + wk1r * x0r;
2949 y3r = x1r + x2r;
2950 y3i = x1i + x2i;
2951 y7r = x1r - x2r;
2952 y7i = x1i - x2i;
2953 x0r = a[6] + a[23];
2954 x0i = a[7] - a[22];
2955 x1r = wk1i * x0r + wk1r * x0i;
2956 x1i = wk1i * x0i - wk1r * x0r;
2957 x0r = a[14] + a[31];
2958 x0i = a[15] - a[30];
2959 x2r = wk3i * x0r - wk3r * x0i;
2960 x2i = wk3i * x0i + wk3r * x0r;
2961 y11r = x1r + x2r;
2962 y11i = x1i + x2i;
2963 y15r = x1r - x2r;
2964 y15i = x1i - x2i;
2965 x1r = y0r + y2r;
2966 x1i = y0i + y2i;
2967 x2r = y1r + y3r;
2968 x2i = y1i + y3i;
2969 a[0] = x1r + x2r;
2970 a[1] = x1i + x2i;
2971 a[2] = x1r - x2r;
2972 a[3] = x1i - x2i;
2973 x1r = y0r - y2r;
2974 x1i = y0i - y2i;
2975 x2r = y1r - y3r;
2976 x2i = y1i - y3i;
2977 a[4] = x1r - x2i;
2978 a[5] = x1i + x2r;
2979 a[6] = x1r + x2i;
2980 a[7] = x1i - x2r;
2981 x1r = y4r - y6i;
2982 x1i = y4i + y6r;
2983 x0r = y5r - y7i;
2984 x0i = y5i + y7r;
2985 x2r = wn4r * (x0r - x0i);
2986 x2i = wn4r * (x0i + x0r);
2987 a[8] = x1r + x2r;
2988 a[9] = x1i + x2i;
2989 a[10] = x1r - x2r;
2990 a[11] = x1i - x2i;
2991 x1r = y4r + y6i;
2992 x1i = y4i - y6r;
2993 x0r = y5r + y7i;
2994 x0i = y5i - y7r;
2995 x2r = wn4r * (x0r - x0i);
2996 x2i = wn4r * (x0i + x0r);
2997 a[12] = x1r - x2i;
2998 a[13] = x1i + x2r;
2999 a[14] = x1r + x2i;
3000 a[15] = x1i - x2r;
3001 x1r = y8r + y10r;
3002 x1i = y8i + y10i;
3003 x2r = y9r - y11r;
3004 x2i = y9i - y11i;
3005 a[16] = x1r + x2r;
3006 a[17] = x1i + x2i;
3007 a[18] = x1r - x2r;
3008 a[19] = x1i - x2i;
3009 x1r = y8r - y10r;
3010 x1i = y8i - y10i;
3011 x2r = y9r + y11r;
3012 x2i = y9i + y11i;
3013 a[20] = x1r - x2i;
3014 a[21] = x1i + x2r;
3015 a[22] = x1r + x2i;
3016 a[23] = x1i - x2r;
3017 x1r = y12r - y14i;
3018 x1i = y12i + y14r;
3019 x0r = y13r + y15i;
3020 x0i = y13i - y15r;
3021 x2r = wn4r * (x0r - x0i);
3022 x2i = wn4r * (x0i + x0r);
3023 a[24] = x1r + x2r;
3024 a[25] = x1i + x2i;
3025 a[26] = x1r - x2r;
3026 a[27] = x1i - x2i;
3027 x1r = y12r + y14i;
3028 x1i = y12i - y14r;
3029 x0r = y13r - y15i;
3030 x0i = y13i + y15r;
3031 x2r = wn4r * (x0r - x0i);
3032 x2i = wn4r * (x0i + x0r);
3033 a[28] = x1r - x2i;
3034 a[29] = x1i + x2r;
3035 a[30] = x1r + x2i;
3036 a[31] = x1i - x2r;
3037 }
3038
cftf081(float * a,float * w)3039 void cftf081(float *a, float *w) {
3040 float wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y1r, y1i, y2r,
3041 y2i, y3r, y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
3042
3043 wn4r = w[1];
3044 x0r = a[0] + a[8];
3045 x0i = a[1] + a[9];
3046 x1r = a[0] - a[8];
3047 x1i = a[1] - a[9];
3048 x2r = a[4] + a[12];
3049 x2i = a[5] + a[13];
3050 x3r = a[4] - a[12];
3051 x3i = a[5] - a[13];
3052 y0r = x0r + x2r;
3053 y0i = x0i + x2i;
3054 y2r = x0r - x2r;
3055 y2i = x0i - x2i;
3056 y1r = x1r - x3i;
3057 y1i = x1i + x3r;
3058 y3r = x1r + x3i;
3059 y3i = x1i - x3r;
3060 x0r = a[2] + a[10];
3061 x0i = a[3] + a[11];
3062 x1r = a[2] - a[10];
3063 x1i = a[3] - a[11];
3064 x2r = a[6] + a[14];
3065 x2i = a[7] + a[15];
3066 x3r = a[6] - a[14];
3067 x3i = a[7] - a[15];
3068 y4r = x0r + x2r;
3069 y4i = x0i + x2i;
3070 y6r = x0r - x2r;
3071 y6i = x0i - x2i;
3072 x0r = x1r - x3i;
3073 x0i = x1i + x3r;
3074 x2r = x1r + x3i;
3075 x2i = x1i - x3r;
3076 y5r = wn4r * (x0r - x0i);
3077 y5i = wn4r * (x0r + x0i);
3078 y7r = wn4r * (x2r - x2i);
3079 y7i = wn4r * (x2r + x2i);
3080 a[8] = y1r + y5r;
3081 a[9] = y1i + y5i;
3082 a[10] = y1r - y5r;
3083 a[11] = y1i - y5i;
3084 a[12] = y3r - y7i;
3085 a[13] = y3i + y7r;
3086 a[14] = y3r + y7i;
3087 a[15] = y3i - y7r;
3088 a[0] = y0r + y4r;
3089 a[1] = y0i + y4i;
3090 a[2] = y0r - y4r;
3091 a[3] = y0i - y4i;
3092 a[4] = y2r - y6i;
3093 a[5] = y2i + y6r;
3094 a[6] = y2r + y6i;
3095 a[7] = y2i - y6r;
3096 }
3097
cftf082(float * a,float * w)3098 void cftf082(float *a, float *w) {
3099 float wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i, y0r, y0i, y1r, y1i, y2r, y2i, y3r,
3100 y3i, y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
3101
3102 wn4r = w[1];
3103 wk1r = w[2];
3104 wk1i = w[3];
3105 y0r = a[0] - a[9];
3106 y0i = a[1] + a[8];
3107 y1r = a[0] + a[9];
3108 y1i = a[1] - a[8];
3109 x0r = a[4] - a[13];
3110 x0i = a[5] + a[12];
3111 y2r = wn4r * (x0r - x0i);
3112 y2i = wn4r * (x0i + x0r);
3113 x0r = a[4] + a[13];
3114 x0i = a[5] - a[12];
3115 y3r = wn4r * (x0r - x0i);
3116 y3i = wn4r * (x0i + x0r);
3117 x0r = a[2] - a[11];
3118 x0i = a[3] + a[10];
3119 y4r = wk1r * x0r - wk1i * x0i;
3120 y4i = wk1r * x0i + wk1i * x0r;
3121 x0r = a[2] + a[11];
3122 x0i = a[3] - a[10];
3123 y5r = wk1i * x0r - wk1r * x0i;
3124 y5i = wk1i * x0i + wk1r * x0r;
3125 x0r = a[6] - a[15];
3126 x0i = a[7] + a[14];
3127 y6r = wk1i * x0r - wk1r * x0i;
3128 y6i = wk1i * x0i + wk1r * x0r;
3129 x0r = a[6] + a[15];
3130 x0i = a[7] - a[14];
3131 y7r = wk1r * x0r - wk1i * x0i;
3132 y7i = wk1r * x0i + wk1i * x0r;
3133 x0r = y0r + y2r;
3134 x0i = y0i + y2i;
3135 x1r = y4r + y6r;
3136 x1i = y4i + y6i;
3137 a[0] = x0r + x1r;
3138 a[1] = x0i + x1i;
3139 a[2] = x0r - x1r;
3140 a[3] = x0i - x1i;
3141 x0r = y0r - y2r;
3142 x0i = y0i - y2i;
3143 x1r = y4r - y6r;
3144 x1i = y4i - y6i;
3145 a[4] = x0r - x1i;
3146 a[5] = x0i + x1r;
3147 a[6] = x0r + x1i;
3148 a[7] = x0i - x1r;
3149 x0r = y1r - y3i;
3150 x0i = y1i + y3r;
3151 x1r = y5r - y7r;
3152 x1i = y5i - y7i;
3153 a[8] = x0r + x1r;
3154 a[9] = x0i + x1i;
3155 a[10] = x0r - x1r;
3156 a[11] = x0i - x1i;
3157 x0r = y1r + y3i;
3158 x0i = y1i - y3r;
3159 x1r = y5r + y7r;
3160 x1i = y5i + y7i;
3161 a[12] = x0r - x1i;
3162 a[13] = x0i + x1r;
3163 a[14] = x0r + x1i;
3164 a[15] = x0i - x1r;
3165 }
3166
cftf040(float * a)3167 void cftf040(float *a) {
3168 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
3169
3170 x0r = a[0] + a[4];
3171 x0i = a[1] + a[5];
3172 x1r = a[0] - a[4];
3173 x1i = a[1] - a[5];
3174 x2r = a[2] + a[6];
3175 x2i = a[3] + a[7];
3176 x3r = a[2] - a[6];
3177 x3i = a[3] - a[7];
3178 a[0] = x0r + x2r;
3179 a[1] = x0i + x2i;
3180 a[2] = x1r - x3i;
3181 a[3] = x1i + x3r;
3182 a[4] = x0r - x2r;
3183 a[5] = x0i - x2i;
3184 a[6] = x1r + x3i;
3185 a[7] = x1i - x3r;
3186 }
3187
cftb040(float * a)3188 void cftb040(float *a) {
3189 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
3190
3191 x0r = a[0] + a[4];
3192 x0i = a[1] + a[5];
3193 x1r = a[0] - a[4];
3194 x1i = a[1] - a[5];
3195 x2r = a[2] + a[6];
3196 x2i = a[3] + a[7];
3197 x3r = a[2] - a[6];
3198 x3i = a[3] - a[7];
3199 a[0] = x0r + x2r;
3200 a[1] = x0i + x2i;
3201 a[2] = x1r + x3i;
3202 a[3] = x1i - x3r;
3203 a[4] = x0r - x2r;
3204 a[5] = x0i - x2i;
3205 a[6] = x1r - x3i;
3206 a[7] = x1i + x3r;
3207 }
3208
cftx020(float * a)3209 void cftx020(float *a) {
3210 float x0r, x0i;
3211
3212 x0r = a[0] - a[2];
3213 x0i = a[1] - a[3];
3214 a[0] += a[2];
3215 a[1] += a[3];
3216 a[2] = x0r;
3217 a[3] = x0i;
3218 }
3219
rftfsub(int n,float * a,int nc,float * c)3220 void rftfsub(int n, float *a, int nc, float *c) {
3221 int j, k, kk, ks, m;
3222 float wkr, wki, xr, xi, yr, yi;
3223
3224 m = n >> 1;
3225 ks = 2 * nc / m;
3226 kk = 0;
3227 for(j = 2; j < m; j += 2) {
3228 k = n - j;
3229 kk += ks;
3230 wkr = 0.5 - c[nc - kk];
3231 wki = c[kk];
3232 xr = a[j] - a[k];
3233 xi = a[j + 1] + a[k + 1];
3234 yr = wkr * xr - wki * xi;
3235 yi = wkr * xi + wki * xr;
3236 a[j] -= yr;
3237 a[j + 1] -= yi;
3238 a[k] += yr;
3239 a[k + 1] -= yi;
3240 }
3241 }
3242
rftbsub(int n,float * a,int nc,float * c)3243 void rftbsub(int n, float *a, int nc, float *c) {
3244 int j, k, kk, ks, m;
3245 float wkr, wki, xr, xi, yr, yi;
3246
3247 m = n >> 1;
3248 ks = 2 * nc / m;
3249 kk = 0;
3250 for(j = 2; j < m; j += 2) {
3251 k = n - j;
3252 kk += ks;
3253 wkr = 0.5 - c[nc - kk];
3254 wki = c[kk];
3255 xr = a[j] - a[k];
3256 xi = a[j + 1] + a[k + 1];
3257 yr = wkr * xr + wki * xi;
3258 yi = wkr * xi - wki * xr;
3259 a[j] -= yr;
3260 a[j + 1] -= yi;
3261 a[k] += yr;
3262 a[k + 1] -= yi;
3263 }
3264 }
3265
dctsub(int n,float * a,int nc,float * c)3266 void dctsub(int n, float *a, int nc, float *c) {
3267 int j, k, kk, ks, m;
3268 float wkr, wki, xr;
3269
3270 m = n >> 1;
3271 ks = nc / n;
3272 kk = 0;
3273 for(j = 1; j < m; j++) {
3274 k = n - j;
3275 kk += ks;
3276 wkr = c[kk] - c[nc - kk];
3277 wki = c[kk] + c[nc - kk];
3278 xr = wki * a[j] - wkr * a[k];
3279 a[j] = wkr * a[j] + wki * a[k];
3280 a[k] = xr;
3281 }
3282 a[m] *= c[0];
3283 }
3284
dstsub(int n,float * a,int nc,float * c)3285 void dstsub(int n, float *a, int nc, float *c) {
3286 int j, k, kk, ks, m;
3287 float wkr, wki, xr;
3288
3289 m = n >> 1;
3290 ks = nc / n;
3291 kk = 0;
3292 for(j = 1; j < m; j++) {
3293 k = n - j;
3294 kk += ks;
3295 wkr = c[kk] - c[nc - kk];
3296 wki = c[kk] + c[nc - kk];
3297 xr = wki * a[k] - wkr * a[j];
3298 a[k] = wkr * a[k] + wki * a[j];
3299 a[j] = xr;
3300 }
3301 a[m] *= c[0];
3302 }
3303
3304 }
3305