1 /*
2 * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 */
19
20 /*
21 * rexec.c -- execute the fft
22 */
23
24 /* $Id: rexec.c,v 1.28 2003/03/16 23:43:46 stevenj Exp $ */
25 #include <stdio.h>
26 #include <stdlib.h>
27
28 #include "fftw-int.h"
29 #include "rfftw.h"
30
rfftw_strided_copy(int n,fftw_real * in,int ostride,fftw_real * out)31 void rfftw_strided_copy(int n, fftw_real *in, int ostride,
32 fftw_real *out)
33 {
34 int i;
35 fftw_real r0, r1, r2, r3;
36
37 i = 0;
38 for (; i < (n & 3); ++i) {
39 out[i * ostride] = in[i];
40 }
41 for (; i < n; i += 4) {
42 r0 = in[i];
43 r1 = in[i + 1];
44 r2 = in[i + 2];
45 r3 = in[i + 3];
46 out[i * ostride] = r0;
47 out[(i + 1) * ostride] = r1;
48 out[(i + 2) * ostride] = r2;
49 out[(i + 3) * ostride] = r3;
50 }
51 }
52
rexecutor_many(int n,fftw_real * in,fftw_real * out,fftw_plan_node * p,int istride,int ostride,int howmany,int idist,int odist,fftw_recurse_kind recurse_kind)53 static void rexecutor_many(int n, fftw_real *in,
54 fftw_real *out,
55 fftw_plan_node *p,
56 int istride,
57 int ostride,
58 int howmany, int idist, int odist,
59 fftw_recurse_kind recurse_kind)
60 {
61 int s;
62
63 switch (p->type) {
64 case FFTW_REAL2HC:
65 {
66 fftw_real2hc_codelet *codelet = p->nodeu.real2hc.codelet;
67
68 HACK_ALIGN_STACK_ODD;
69 for (s = 0; s < howmany; ++s)
70 codelet(in + s * idist, out + s * odist,
71 out + n * ostride + s * odist,
72 istride, ostride, -ostride);
73 break;
74 }
75
76 case FFTW_HC2REAL:
77 {
78 fftw_hc2real_codelet *codelet = p->nodeu.hc2real.codelet;
79
80 HACK_ALIGN_STACK_ODD;
81 for (s = 0; s < howmany; ++s)
82 codelet(in + s * idist, in + n * istride + s * idist,
83 out + s * odist,
84 istride, -istride, ostride);
85 break;
86 }
87
88 default:
89 for (s = 0; s < howmany; ++s)
90 rfftw_executor_simple(n, in + s * idist,
91 out + s * odist,
92 p, istride, ostride,
93 recurse_kind);
94 }
95 }
96
97 #ifdef FFTW_ENABLE_VECTOR_RECURSE
98
99 /* rexecutor_many_vector is like rexecutor_many, but it pushes the
100 howmany loop down to the leaves of the transform: */
rexecutor_many_vector(int n,fftw_real * in,fftw_real * out,fftw_plan_node * p,int istride,int ostride,int howmany,int idist,int odist)101 static void rexecutor_many_vector(int n, fftw_real *in,
102 fftw_real *out,
103 fftw_plan_node *p,
104 int istride,
105 int ostride,
106 int howmany, int idist, int odist)
107 {
108 switch (p->type) {
109 case FFTW_REAL2HC:
110 {
111 fftw_real2hc_codelet *codelet = p->nodeu.real2hc.codelet;
112 int s;
113
114 HACK_ALIGN_STACK_ODD;
115 for (s = 0; s < howmany; ++s)
116 codelet(in + s * idist, out + s * odist,
117 out + n * ostride + s * odist,
118 istride, ostride, -ostride);
119 break;
120 }
121
122 case FFTW_HC2REAL:
123 {
124 fftw_hc2real_codelet *codelet = p->nodeu.hc2real.codelet;
125 int s;
126
127 HACK_ALIGN_STACK_ODD;
128 for (s = 0; s < howmany; ++s)
129 codelet(in + s * idist, in + n * istride + s * idist,
130 out + s * odist,
131 istride, -istride, ostride);
132 break;
133 }
134
135 case FFTW_HC2HC:
136 {
137 int r = p->nodeu.hc2hc.size;
138 int m = n / r;
139 int i;
140 fftw_hc2hc_codelet *codelet;
141 fftw_complex *W;
142
143 switch (p->nodeu.hc2hc.dir) {
144 case FFTW_REAL_TO_COMPLEX:
145 for (i = 0; i < r; ++i)
146 rexecutor_many_vector(m, in + i * istride,
147 out + i * (m*ostride),
148 p->nodeu.hc2hc.recurse,
149 istride * r, ostride,
150 howmany, idist, odist);
151
152 W = p->nodeu.hc2hc.tw->twarray;
153 codelet = p->nodeu.hc2hc.codelet;
154 HACK_ALIGN_STACK_EVEN;
155 for (i = 0; i < howmany; ++i)
156 codelet(out + i * odist,
157 W, m * ostride, m, ostride);
158 break;
159 case FFTW_COMPLEX_TO_REAL:
160 W = p->nodeu.hc2hc.tw->twarray;
161 codelet = p->nodeu.hc2hc.codelet;
162 HACK_ALIGN_STACK_EVEN;
163 for (i = 0; i < howmany; ++i)
164 codelet(in + i * idist,
165 W, m * istride, m, istride);
166
167 for (i = 0; i < r; ++i)
168 rexecutor_many_vector(m, in + i * (m*istride),
169 out + i * ostride,
170 p->nodeu.hc2hc.recurse,
171 istride, ostride * r,
172 howmany, idist, odist);
173 break;
174 default:
175 goto bug;
176 }
177
178 break;
179 }
180
181 case FFTW_RGENERIC:
182 {
183 int r = p->nodeu.rgeneric.size;
184 int m = n / r;
185 int i;
186 fftw_rgeneric_codelet *codelet = p->nodeu.rgeneric.codelet;
187 fftw_complex *W = p->nodeu.rgeneric.tw->twarray;
188
189 switch (p->nodeu.rgeneric.dir) {
190 case FFTW_REAL_TO_COMPLEX:
191 for (i = 0; i < r; ++i)
192 rexecutor_many_vector(m, in + i * istride,
193 out + i * (m * ostride),
194 p->nodeu.rgeneric.recurse,
195 istride * r, ostride,
196 howmany, idist, odist);
197
198 for (i = 0; i < howmany; ++i)
199 codelet(out + i * odist, W, m, r, n, ostride);
200 break;
201 case FFTW_COMPLEX_TO_REAL:
202 for (i = 0; i < howmany; ++i)
203 codelet(in + i * idist, W, m, r, n, istride);
204
205 for (i = 0; i < r; ++i)
206 rexecutor_many_vector(m, in + i * m * istride,
207 out + i * ostride,
208 p->nodeu.rgeneric.recurse,
209 istride, ostride * r,
210 howmany, idist, odist);
211 break;
212 default:
213 goto bug;
214 }
215
216 break;
217 }
218
219 default:
220 bug:
221 fftw_die("BUG in rexecutor: invalid plan\n");
222 break;
223 }
224 }
225
226 #endif /* FFTW_ENABLE_VECTOR_RECURSE */
227
rfftw_executor_simple(int n,fftw_real * in,fftw_real * out,fftw_plan_node * p,int istride,int ostride,fftw_recurse_kind recurse_kind)228 void rfftw_executor_simple(int n, fftw_real *in,
229 fftw_real *out,
230 fftw_plan_node *p,
231 int istride,
232 int ostride,
233 fftw_recurse_kind recurse_kind)
234 {
235 switch (p->type) {
236 case FFTW_REAL2HC:
237 HACK_ALIGN_STACK_ODD;
238 (p->nodeu.real2hc.codelet) (in, out, out + n * ostride,
239 istride, ostride, -ostride);
240 break;
241
242 case FFTW_HC2REAL:
243 HACK_ALIGN_STACK_ODD;
244 (p->nodeu.hc2real.codelet) (in, in + n * istride, out,
245 istride, -istride, ostride);
246 break;
247
248 case FFTW_HC2HC:
249 {
250 int r = p->nodeu.hc2hc.size;
251 int m = n / r;
252 /*
253 * please do resist the temptation of initializing
254 * these variables here. Doing so forces the
255 * compiler to keep a live variable across the
256 * recursive call.
257 */
258 fftw_hc2hc_codelet *codelet;
259 fftw_complex *W;
260
261 switch (p->nodeu.hc2hc.dir) {
262 case FFTW_REAL_TO_COMPLEX:
263 #ifdef FFTW_ENABLE_VECTOR_RECURSE
264 if (recurse_kind == FFTW_NORMAL_RECURSE)
265 #endif
266 rexecutor_many(m, in, out,
267 p->nodeu.hc2hc.recurse,
268 istride * r, ostride,
269 r, istride, m * ostride,
270 FFTW_NORMAL_RECURSE);
271 #ifdef FFTW_ENABLE_VECTOR_RECURSE
272 else
273 rexecutor_many_vector(m, in, out,
274 p->nodeu.hc2hc.recurse,
275 istride * r, ostride,
276 r, istride, m * ostride);
277 #endif
278
279 W = p->nodeu.hc2hc.tw->twarray;
280 codelet = p->nodeu.hc2hc.codelet;
281 HACK_ALIGN_STACK_EVEN;
282 codelet(out, W, m * ostride, m, ostride);
283 break;
284 case FFTW_COMPLEX_TO_REAL:
285 W = p->nodeu.hc2hc.tw->twarray;
286 codelet = p->nodeu.hc2hc.codelet;
287 HACK_ALIGN_STACK_EVEN;
288 codelet(in, W, m * istride, m, istride);
289
290 #ifdef FFTW_ENABLE_VECTOR_RECURSE
291 if (recurse_kind == FFTW_NORMAL_RECURSE)
292 #endif
293 rexecutor_many(m, in, out,
294 p->nodeu.hc2hc.recurse,
295 istride, ostride * r,
296 r, m * istride, ostride,
297 FFTW_NORMAL_RECURSE);
298 #ifdef FFTW_ENABLE_VECTOR_RECURSE
299 else
300 rexecutor_many_vector(m, in, out,
301 p->nodeu.hc2hc.recurse,
302 istride, ostride * r,
303 r, m * istride, ostride);
304 #endif
305 break;
306 default:
307 goto bug;
308 }
309
310 break;
311 }
312
313 case FFTW_RGENERIC:
314 {
315 int r = p->nodeu.rgeneric.size;
316 int m = n / r;
317 fftw_rgeneric_codelet *codelet = p->nodeu.rgeneric.codelet;
318 fftw_complex *W = p->nodeu.rgeneric.tw->twarray;
319
320 switch (p->nodeu.rgeneric.dir) {
321 case FFTW_REAL_TO_COMPLEX:
322 #ifdef FFTW_ENABLE_VECTOR_RECURSE
323 if (recurse_kind == FFTW_NORMAL_RECURSE)
324 #endif
325 rexecutor_many(m, in, out,
326 p->nodeu.rgeneric.recurse,
327 istride * r, ostride,
328 r, istride, m * ostride,
329 FFTW_NORMAL_RECURSE);
330 #ifdef FFTW_ENABLE_VECTOR_RECURSE
331 else
332 rexecutor_many_vector(m, in, out,
333 p->nodeu.rgeneric.recurse,
334 istride * r, ostride,
335 r, istride, m * ostride);
336 #endif
337
338 codelet(out, W, m, r, n, ostride);
339 break;
340 case FFTW_COMPLEX_TO_REAL:
341 codelet(in, W, m, r, n, istride);
342
343 #ifdef FFTW_ENABLE_VECTOR_RECURSE
344 if (recurse_kind == FFTW_NORMAL_RECURSE)
345 #endif
346 rexecutor_many(m, in, out,
347 p->nodeu.rgeneric.recurse,
348 istride, ostride * r,
349 r, m * istride, ostride,
350 FFTW_NORMAL_RECURSE);
351 #ifdef FFTW_ENABLE_VECTOR_RECURSE
352 else
353 rexecutor_many_vector(m, in, out,
354 p->nodeu.rgeneric.recurse,
355 istride, ostride * r,
356 r, m * istride, ostride);
357 #endif
358 break;
359 default:
360 goto bug;
361 }
362
363 break;
364 }
365
366 default:
367 bug:
368 fftw_die("BUG in rexecutor: invalid plan\n");
369 break;
370 }
371 }
372
rexecutor_simple_inplace(int n,fftw_real * in,fftw_real * out,fftw_plan_node * p,int istride,fftw_recurse_kind recurse_kind)373 static void rexecutor_simple_inplace(int n, fftw_real *in,
374 fftw_real *out,
375 fftw_plan_node *p,
376 int istride,
377 fftw_recurse_kind recurse_kind)
378 {
379 switch (p->type) {
380 case FFTW_REAL2HC:
381 HACK_ALIGN_STACK_ODD;
382 (p->nodeu.real2hc.codelet) (in, in, in + n * istride,
383 istride, istride, -istride);
384 break;
385
386 case FFTW_HC2REAL:
387 HACK_ALIGN_STACK_ODD;
388 (p->nodeu.hc2real.codelet) (in, in + n * istride, in,
389 istride, -istride, istride);
390 break;
391
392 default:
393 {
394 fftw_real *tmp;
395
396 if (out)
397 tmp = out;
398 else
399 tmp = (fftw_real *) fftw_malloc(n * sizeof(fftw_real));
400
401 rfftw_executor_simple(n, in, tmp, p, istride, 1,
402 recurse_kind);
403 rfftw_strided_copy(n, tmp, istride, in);
404
405 if (!out)
406 fftw_free(tmp);
407 }
408 }
409 }
410
rexecutor_many_inplace(int n,fftw_real * in,fftw_real * out,fftw_plan_node * p,int istride,int howmany,int idist,fftw_recurse_kind recurse_kind)411 static void rexecutor_many_inplace(int n, fftw_real *in,
412 fftw_real *out,
413 fftw_plan_node *p,
414 int istride,
415 int howmany, int idist,
416 fftw_recurse_kind recurse_kind)
417 {
418 switch (p->type) {
419 case FFTW_REAL2HC:
420 {
421 fftw_real2hc_codelet *codelet = p->nodeu.real2hc.codelet;
422 int s;
423
424 HACK_ALIGN_STACK_ODD;
425 for (s = 0; s < howmany; ++s)
426 codelet(in + s * idist, in + s * idist,
427 in + n * istride + s * idist,
428 istride, istride, -istride);
429
430 break;
431 }
432
433 case FFTW_HC2REAL:
434 {
435 fftw_hc2real_codelet *codelet = p->nodeu.hc2real.codelet;
436 int s;
437
438 HACK_ALIGN_STACK_ODD;
439 for (s = 0; s < howmany; ++s)
440 codelet(in + s * idist, in + n * istride + s * idist,
441 in + s * idist,
442 istride, -istride, istride);
443
444 break;
445 }
446
447 default:
448 {
449 int s;
450 fftw_real *tmp;
451 if (out)
452 tmp = out;
453 else
454 tmp = (fftw_real *) fftw_malloc(n * sizeof(fftw_real));
455
456 for (s = 0; s < howmany; ++s) {
457 rfftw_executor_simple(n,
458 in + s * idist,
459 tmp,
460 p, istride, 1, recurse_kind);
461 rfftw_strided_copy(n, tmp, istride, in + s * idist);
462 }
463
464 if (!out)
465 fftw_free(tmp);
466 }
467 }
468 }
469
470 /* user interface */
rfftw(fftw_plan plan,int howmany,fftw_real * in,int istride,int idist,fftw_real * out,int ostride,int odist)471 void rfftw(fftw_plan plan, int howmany, fftw_real *in, int istride,
472 int idist, fftw_real *out, int ostride, int odist)
473 {
474 int n = plan->n;
475
476 if (plan->flags & FFTW_IN_PLACE) {
477 if (howmany == 1) {
478 rexecutor_simple_inplace(n, in, out, plan->root, istride,
479 plan->recurse_kind);
480 } else {
481 rexecutor_many_inplace(n, in, out, plan->root, istride, howmany,
482 idist, plan->recurse_kind);
483 }
484 } else {
485 if (howmany == 1) {
486 rfftw_executor_simple(n, in, out, plan->root, istride, ostride,
487 plan->recurse_kind);
488 } else {
489 #ifdef FFTW_ENABLE_VECTOR_RECURSE
490 int vector_size = plan->vector_size;
491 if (vector_size <= 1)
492 #endif
493 rexecutor_many(n, in, out, plan->root, istride, ostride,
494 howmany, idist, odist,
495 plan->recurse_kind);
496 #ifdef FFTW_ENABLE_VECTOR_RECURSE
497 else {
498 int s;
499 int num_vects = howmany / vector_size;
500 fftw_plan_node *root = plan->root;
501
502 for (s = 0; s < num_vects; ++s)
503 rexecutor_many_vector(n,
504 in + s * (vector_size * idist),
505 out + s * (vector_size * odist),
506 root,
507 istride, ostride,
508 vector_size, idist, odist);
509
510 s = howmany % vector_size;
511 if (s > 0)
512 rexecutor_many(n,
513 in + num_vects * (vector_size*idist),
514 out + num_vects * (vector_size*odist),
515 root,
516 istride, ostride,
517 s, idist, odist,
518 FFTW_NORMAL_RECURSE);
519 }
520 #endif
521 }
522 }
523 }
524
rfftw_one(fftw_plan plan,fftw_real * in,fftw_real * out)525 void rfftw_one(fftw_plan plan, fftw_real *in, fftw_real *out)
526 {
527 int n = plan->n;
528
529 if (plan->flags & FFTW_IN_PLACE)
530 rexecutor_simple_inplace(n, in, out, plan->root, 1,
531 plan->recurse_kind);
532 else
533 rfftw_executor_simple(n, in, out, plan->root, 1, 1,
534 plan->recurse_kind);
535 }
536