1
2 /*
3 * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 *
19 */
20
21 /* $Id: rfftwnd.c,v 1.35 2003/03/16 23:43:46 stevenj Exp $ */
22
23 #include "fftw-int.h"
24 #include "rfftw.h"
25
26 /********************** prototypes for rexec2 routines **********************/
27
28 extern void rfftw_real2c_aux(fftw_plan plan, int howmany,
29 fftw_real *in, int istride, int idist,
30 fftw_complex *out, int ostride, int odist,
31 fftw_real *work);
32 extern void rfftw_c2real_aux(fftw_plan plan, int howmany,
33 fftw_complex *in, int istride, int idist,
34 fftw_real *out, int ostride, int odist,
35 fftw_real *work);
36 extern void rfftw_real2c_overlap_aux(fftw_plan plan, int howmany,
37 fftw_real *in, int istride, int idist,
38 fftw_complex *out, int ostride, int odist,
39 fftw_real *work);
40 extern void rfftw_c2real_overlap_aux(fftw_plan plan, int howmany,
41 fftw_complex *in, int istride, int idist,
42 fftw_real *out, int ostride, int odist,
43 fftw_real *work);
44
45 /********************** Initializing the RFFTWND Plan ***********************/
46
47 /*
48 * Create an fftwnd_plan specialized for specific arrays. (These
49 * arrays are ignored, however, if they are NULL or if the flags
50 * do not include FFTW_MEASURE.) The main advantage of being
51 * provided arrays like this is that we can do runtime timing
52 * measurements of our options, without worrying about allocating
53 * excessive scratch space.
54 */
rfftwnd_create_plan_specific(int rank,const int * n,fftw_direction dir,int flags,fftw_real * in,int istride,fftw_real * out,int ostride)55 fftwnd_plan rfftwnd_create_plan_specific(int rank, const int *n,
56 fftw_direction dir, int flags,
57 fftw_real *in, int istride,
58 fftw_real *out, int ostride)
59 {
60 fftwnd_plan p;
61 int i;
62 int rflags = flags & ~FFTW_IN_PLACE;
63 /* note that we always do rfftw transforms out-of-place in rexec2.c */
64
65 if (flags & FFTW_IN_PLACE) {
66 out = NULL;
67 ostride = istride;
68 }
69 istride = ostride = 1; /*
70 * strides don't work yet, since it is not
71 * clear whether they apply to real
72 * or complex data
73 */
74
75 if (!(p = fftwnd_create_plan_aux(rank, n, dir, flags)))
76 return 0;
77
78 for (i = 0; i < rank - 1; ++i)
79 p->n_after[i] = (n[rank - 1]/2 + 1) * (p->n_after[i] / n[rank - 1]);
80 if (rank > 0)
81 p->n[rank - 1] = n[rank - 1] / 2 + 1;
82
83 p->plans = fftwnd_new_plan_array(rank);
84 if (rank > 0 && !p->plans) {
85 rfftwnd_destroy_plan(p);
86 return 0;
87 }
88 if (rank > 0) {
89 p->plans[rank - 1] = rfftw_create_plan(n[rank - 1], dir, rflags);
90 if (!p->plans[rank - 1]) {
91 rfftwnd_destroy_plan(p);
92 return 0;
93 }
94 }
95 if (rank > 1) {
96 if (!(flags & FFTW_MEASURE) || in == 0
97 || (!p->is_in_place && out == 0)) {
98 if (!fftwnd_create_plans_generic(p->plans, rank - 1, n,
99 dir, flags | FFTW_IN_PLACE)) {
100 rfftwnd_destroy_plan(p);
101 return 0;
102 }
103 } else if (dir == FFTW_COMPLEX_TO_REAL || (flags & FFTW_IN_PLACE)) {
104 if (!fftwnd_create_plans_specific(p->plans, rank - 1, n,
105 p->n_after,
106 dir, flags | FFTW_IN_PLACE,
107 (fftw_complex *) in,
108 istride,
109 0, 0)) {
110 rfftwnd_destroy_plan(p);
111 return 0;
112 }
113 } else {
114 if (!fftwnd_create_plans_specific(p->plans, rank - 1, n,
115 p->n_after,
116 dir, flags | FFTW_IN_PLACE,
117 (fftw_complex *) out,
118 ostride,
119 0, 0)) {
120 rfftwnd_destroy_plan(p);
121 return 0;
122 }
123 }
124 }
125 p->nbuffers = 0;
126 p->nwork = fftwnd_work_size(rank, p->n, flags | FFTW_IN_PLACE,
127 p->nbuffers + 1);
128 if (p->nwork && !(flags & FFTW_THREADSAFE)) {
129 p->work = (fftw_complex *) fftw_malloc(p->nwork
130 * sizeof(fftw_complex));
131 if (!p->work) {
132 rfftwnd_destroy_plan(p);
133 return 0;
134 }
135 }
136 return p;
137 }
138
rfftw2d_create_plan_specific(int nx,int ny,fftw_direction dir,int flags,fftw_real * in,int istride,fftw_real * out,int ostride)139 fftwnd_plan rfftw2d_create_plan_specific(int nx, int ny,
140 fftw_direction dir, int flags,
141 fftw_real *in, int istride,
142 fftw_real *out, int ostride)
143 {
144 int n[2];
145
146 n[0] = nx;
147 n[1] = ny;
148
149 return rfftwnd_create_plan_specific(2, n, dir, flags,
150 in, istride, out, ostride);
151 }
152
rfftw3d_create_plan_specific(int nx,int ny,int nz,fftw_direction dir,int flags,fftw_real * in,int istride,fftw_real * out,int ostride)153 fftwnd_plan rfftw3d_create_plan_specific(int nx, int ny, int nz,
154 fftw_direction dir, int flags,
155 fftw_real *in, int istride,
156 fftw_real *out, int ostride)
157 {
158 int n[3];
159
160 n[0] = nx;
161 n[1] = ny;
162 n[2] = nz;
163
164 return rfftwnd_create_plan_specific(3, n, dir, flags,
165 in, istride, out, ostride);
166 }
167
168 /* Create a generic fftwnd plan: */
169
rfftwnd_create_plan(int rank,const int * n,fftw_direction dir,int flags)170 fftwnd_plan rfftwnd_create_plan(int rank, const int *n,
171 fftw_direction dir, int flags)
172 {
173 return rfftwnd_create_plan_specific(rank, n, dir, flags, 0, 1, 0, 1);
174 }
175
rfftw2d_create_plan(int nx,int ny,fftw_direction dir,int flags)176 fftwnd_plan rfftw2d_create_plan(int nx, int ny,
177 fftw_direction dir, int flags)
178 {
179 return rfftw2d_create_plan_specific(nx, ny, dir, flags, 0, 1, 0, 1);
180 }
181
rfftw3d_create_plan(int nx,int ny,int nz,fftw_direction dir,int flags)182 fftwnd_plan rfftw3d_create_plan(int nx, int ny, int nz,
183 fftw_direction dir, int flags)
184 {
185 return rfftw3d_create_plan_specific(nx, ny, nz, dir, flags, 0, 1, 0, 1);
186 }
187
188 /************************ Freeing the RFFTWND Plan ************************/
189
rfftwnd_destroy_plan(fftwnd_plan plan)190 void rfftwnd_destroy_plan(fftwnd_plan plan)
191 {
192 fftwnd_destroy_plan(plan);
193 }
194
195 /************************ Printing the RFFTWND Plan ************************/
196
rfftwnd_fprint_plan(FILE * f,fftwnd_plan plan)197 void rfftwnd_fprint_plan(FILE *f, fftwnd_plan plan)
198 {
199 fftwnd_fprint_plan(f, plan);
200 }
201
rfftwnd_print_plan(fftwnd_plan plan)202 void rfftwnd_print_plan(fftwnd_plan plan)
203 {
204 rfftwnd_fprint_plan(stdout, plan);
205 }
206
207 /*********** Computing the N-Dimensional FFT: Auxiliary Routines ************/
208
rfftwnd_real2c_aux(fftwnd_plan p,int cur_dim,fftw_real * in,int istride,fftw_complex * out,int ostride,fftw_real * work)209 void rfftwnd_real2c_aux(fftwnd_plan p, int cur_dim,
210 fftw_real *in, int istride,
211 fftw_complex *out, int ostride,
212 fftw_real *work)
213 {
214 int n_after = p->n_after[cur_dim], n = p->n[cur_dim];
215
216 if (cur_dim == p->rank - 2) {
217 /* just do the last dimension directly: */
218 if (p->is_in_place)
219 rfftw_real2c_aux(p->plans[p->rank - 1], n,
220 in, istride, (n_after * istride) * 2,
221 out, istride, n_after * istride,
222 work);
223 else
224 rfftw_real2c_aux(p->plans[p->rank - 1], n,
225 in, istride, p->plans[p->rank - 1]->n * istride,
226 out, ostride, n_after * ostride,
227 work);
228 } else { /* we have at least two dimensions to go */
229 int nr = p->plans[p->rank - 1]->n;
230 int n_after_r = p->is_in_place ? n_after * 2
231 : nr * (n_after / (nr/2 + 1));
232 int i;
233
234 /*
235 * process the subsequent dimensions recursively, in hyperslabs,
236 * to get maximum locality:
237 */
238 for (i = 0; i < n; ++i)
239 rfftwnd_real2c_aux(p, cur_dim + 1,
240 in + i * n_after_r * istride, istride,
241 out + i * n_after * ostride, ostride, work);
242 }
243
244 /* do the current dimension (in-place): */
245 fftw(p->plans[cur_dim], n_after,
246 out, n_after * ostride, ostride,
247 (fftw_complex *) work, 1, 0);
248 /* I hate this cast */
249 }
250
rfftwnd_c2real_aux(fftwnd_plan p,int cur_dim,fftw_complex * in,int istride,fftw_real * out,int ostride,fftw_real * work)251 void rfftwnd_c2real_aux(fftwnd_plan p, int cur_dim,
252 fftw_complex *in, int istride,
253 fftw_real *out, int ostride,
254 fftw_real *work)
255 {
256 int n_after = p->n_after[cur_dim], n = p->n[cur_dim];
257
258 /* do the current dimension (in-place): */
259 fftw(p->plans[cur_dim], n_after,
260 in, n_after * istride, istride,
261 (fftw_complex *) work, 1, 0);
262
263 if (cur_dim == p->rank - 2) {
264 /* just do the last dimension directly: */
265 if (p->is_in_place)
266 rfftw_c2real_aux(p->plans[p->rank - 1], n,
267 in, istride, n_after * istride,
268 out, istride, (n_after * istride) * 2,
269 work);
270 else
271 rfftw_c2real_aux(p->plans[p->rank - 1], n,
272 in, istride, n_after * istride,
273 out, ostride, p->plans[p->rank - 1]->n * ostride,
274 work);
275 } else { /* we have at least two dimensions to go */
276 int nr = p->plans[p->rank - 1]->n;
277 int n_after_r = p->is_in_place ? n_after * 2 :
278 nr * (n_after / (nr/2 + 1));
279 int i;
280
281 /*
282 * process the subsequent dimensions recursively, in hyperslabs,
283 * to get maximum locality:
284 */
285 for (i = 0; i < n; ++i)
286 rfftwnd_c2real_aux(p, cur_dim + 1,
287 in + i * n_after * istride, istride,
288 out + i * n_after_r * ostride, ostride, work);
289 }
290 }
291
292 /*
293 * alternate version of rfftwnd_aux -- this version pushes the howmany
294 * loop down to the leaves of the computation, for greater locality
295 * in cases where dist < stride. It is also required for correctness
296 * if in==out, and we must call a special version of the executor.
297 * Note that work must point to 'howmany' copies of its data
298 * if in == out.
299 */
300
rfftwnd_real2c_aux_howmany(fftwnd_plan p,int cur_dim,int howmany,fftw_real * in,int istride,int idist,fftw_complex * out,int ostride,int odist,fftw_complex * work)301 void rfftwnd_real2c_aux_howmany(fftwnd_plan p, int cur_dim,
302 int howmany,
303 fftw_real *in, int istride, int idist,
304 fftw_complex *out, int ostride, int odist,
305 fftw_complex *work)
306 {
307 int n_after = p->n_after[cur_dim], n = p->n[cur_dim];
308 int k;
309
310 if (cur_dim == p->rank - 2) {
311 /* just do the last dimension directly: */
312 if (p->is_in_place)
313 for (k = 0; k < n; ++k)
314 rfftw_real2c_overlap_aux(p->plans[p->rank - 1], howmany,
315 in + (k * n_after * istride) * 2,
316 istride, idist,
317 out + (k * n_after * ostride),
318 ostride, odist,
319 (fftw_real *) work);
320 else {
321 int nlast = p->plans[p->rank - 1]->n;
322 for (k = 0; k < n; ++k)
323 rfftw_real2c_aux(p->plans[p->rank - 1], howmany,
324 in + k * nlast * istride,
325 istride, idist,
326 out + k * n_after * ostride,
327 ostride, odist,
328 (fftw_real *) work);
329 }
330 } else { /* we have at least two dimensions to go */
331 int nr = p->plans[p->rank - 1]->n;
332 int n_after_r = p->is_in_place ? n_after * 2 :
333 nr * (n_after / (nr/2 + 1));
334 int i;
335
336 /*
337 * process the subsequent dimensions recursively, in hyperslabs,
338 * to get maximum locality:
339 */
340 for (i = 0; i < n; ++i)
341 rfftwnd_real2c_aux_howmany(p, cur_dim + 1, howmany,
342 in + i * n_after_r * istride, istride, idist,
343 out + i * n_after * ostride, ostride, odist,
344 work);
345 }
346
347 /* do the current dimension (in-place): */
348 for (k = 0; k < n_after; ++k)
349 fftw(p->plans[cur_dim], howmany,
350 out + k * ostride, n_after * ostride, odist,
351 work, 1, 0);
352 }
353
rfftwnd_c2real_aux_howmany(fftwnd_plan p,int cur_dim,int howmany,fftw_complex * in,int istride,int idist,fftw_real * out,int ostride,int odist,fftw_complex * work)354 void rfftwnd_c2real_aux_howmany(fftwnd_plan p, int cur_dim,
355 int howmany,
356 fftw_complex *in, int istride, int idist,
357 fftw_real *out, int ostride, int odist,
358 fftw_complex *work)
359 {
360 int n_after = p->n_after[cur_dim], n = p->n[cur_dim];
361 int k;
362
363 /* do the current dimension (in-place): */
364 for (k = 0; k < n_after; ++k)
365 fftw(p->plans[cur_dim], howmany,
366 in + k * istride, n_after * istride, idist,
367 work, 1, 0);
368
369 if (cur_dim == p->rank - 2) {
370 /* just do the last dimension directly: */
371 if (p->is_in_place)
372 for (k = 0; k < n; ++k)
373 rfftw_c2real_overlap_aux(p->plans[p->rank - 1], howmany,
374 in + (k * n_after * istride),
375 istride, idist,
376 out + (k * n_after * ostride) * 2,
377 ostride, odist,
378 (fftw_real *) work);
379 else {
380 int nlast = p->plans[p->rank - 1]->n;
381 for (k = 0; k < n; ++k)
382 rfftw_c2real_aux(p->plans[p->rank - 1], howmany,
383 in + k * n_after * istride,
384 istride, idist,
385 out + k * nlast * ostride,
386 ostride, odist,
387 (fftw_real *) work);
388 }
389 } else { /* we have at least two dimensions to go */
390 int nr = p->plans[p->rank - 1]->n;
391 int n_after_r = p->is_in_place ? n_after * 2
392 : nr * (n_after / (nr/2 + 1));
393 int i;
394
395 /*
396 * process the subsequent dimensions recursively, in hyperslabs,
397 * to get maximum locality:
398 */
399 for (i = 0; i < n; ++i)
400 rfftwnd_c2real_aux_howmany(p, cur_dim + 1, howmany,
401 in + i * n_after * istride, istride, idist,
402 out + i * n_after_r * ostride, ostride, odist,
403 work);
404 }
405 }
406
407 /********** Computing the N-Dimensional FFT: User-Visible Routines **********/
408
rfftwnd_real_to_complex(fftwnd_plan p,int howmany,fftw_real * in,int istride,int idist,fftw_complex * out,int ostride,int odist)409 void rfftwnd_real_to_complex(fftwnd_plan p, int howmany,
410 fftw_real *in, int istride, int idist,
411 fftw_complex *out, int ostride, int odist)
412 {
413 fftw_complex *work = p->work;
414 int rank = p->rank;
415 int free_work = 0;
416
417 if (p->dir != FFTW_REAL_TO_COMPLEX)
418 fftw_die("rfftwnd_real_to_complex with complex-to-real plan");
419
420 #ifdef FFTW_DEBUG
421 if (p->rank > 0 && (p->plans[0]->flags & FFTW_THREADSAFE)
422 && p->nwork && p->work)
423 fftw_die("bug with FFTW_THREADSAFE flag");
424 #endif
425
426 if (p->is_in_place) {
427 ostride = istride;
428 odist = (idist == 1 && idist < istride) ? 1 : (idist / 2); /* ugh */
429 out = (fftw_complex *) in;
430 if (howmany > 1 && istride > idist && rank > 0) {
431 int new_nwork;
432
433 new_nwork = p->n[rank - 1] * howmany;
434 if (new_nwork > p->nwork) {
435 work = (fftw_complex *)
436 fftw_malloc(sizeof(fftw_complex) * new_nwork);
437 if (!work)
438 fftw_die("error allocating work array");
439 free_work = 1;
440 }
441 }
442 }
443 if (p->nwork && !work) {
444 work = (fftw_complex *) fftw_malloc(sizeof(fftw_complex) * p->nwork);
445 free_work = 1;
446 }
447 switch (rank) {
448 case 0:
449 break;
450 case 1:
451 if (p->is_in_place && howmany > 1 && istride > idist)
452 rfftw_real2c_overlap_aux(p->plans[0], howmany,
453 in, istride, idist,
454 out, ostride, odist,
455 (fftw_real *) work);
456 else
457 rfftw_real2c_aux(p->plans[0], howmany,
458 in, istride, idist,
459 out, ostride, odist,
460 (fftw_real *) work);
461 break;
462 default: /* rank >= 2 */
463 {
464 if (howmany > 1 && ostride > odist)
465 rfftwnd_real2c_aux_howmany(p, 0, howmany,
466 in, istride, idist,
467 out, ostride, odist,
468 work);
469 else {
470 int i;
471
472 for (i = 0; i < howmany; ++i)
473 rfftwnd_real2c_aux(p, 0,
474 in + i * idist, istride,
475 out + i * odist, ostride,
476 (fftw_real *) work);
477 }
478 }
479 }
480
481 if (free_work)
482 fftw_free(work);
483 }
484
rfftwnd_complex_to_real(fftwnd_plan p,int howmany,fftw_complex * in,int istride,int idist,fftw_real * out,int ostride,int odist)485 void rfftwnd_complex_to_real(fftwnd_plan p, int howmany,
486 fftw_complex *in, int istride, int idist,
487 fftw_real *out, int ostride, int odist)
488 {
489 fftw_complex *work = p->work;
490 int rank = p->rank;
491 int free_work = 0;
492
493 if (p->dir != FFTW_COMPLEX_TO_REAL)
494 fftw_die("rfftwnd_complex_to_real with real-to-complex plan");
495
496 #ifdef FFTW_DEBUG
497 if (p->rank > 0 && (p->plans[0]->flags & FFTW_THREADSAFE)
498 && p->nwork && p->work)
499 fftw_die("bug with FFTW_THREADSAFE flag");
500 #endif
501
502 if (p->is_in_place) {
503 ostride = istride;
504 odist = idist;
505 odist = (idist == 1 && idist < istride) ? 1 : (idist * 2); /* ugh */
506 out = (fftw_real *) in;
507 if (howmany > 1 && istride > idist && rank > 0) {
508 int new_nwork = p->n[rank - 1] * howmany;
509 if (new_nwork > p->nwork) {
510 work = (fftw_complex *)
511 fftw_malloc(sizeof(fftw_complex) * new_nwork);
512 if (!work)
513 fftw_die("error allocating work array");
514 free_work = 1;
515 }
516 }
517 }
518 if (p->nwork && !work) {
519 work = (fftw_complex *) fftw_malloc(sizeof(fftw_complex) * p->nwork);
520 free_work = 1;
521 }
522 switch (rank) {
523 case 0:
524 break;
525 case 1:
526 if (p->is_in_place && howmany > 1 && istride > idist)
527 rfftw_c2real_overlap_aux(p->plans[0], howmany,
528 in, istride, idist,
529 out, ostride, odist,
530 (fftw_real *) work);
531 else
532 rfftw_c2real_aux(p->plans[0], howmany,
533 in, istride, idist,
534 out, ostride, odist,
535 (fftw_real *) work);
536 break;
537 default: /* rank >= 2 */
538 {
539 if (howmany > 1 && ostride > odist)
540 rfftwnd_c2real_aux_howmany(p, 0, howmany,
541 in, istride, idist,
542 out, ostride, odist,
543 work);
544 else {
545 int i;
546
547 for (i = 0; i < howmany; ++i)
548 rfftwnd_c2real_aux(p, 0,
549 in + i * idist, istride,
550 out + i * odist, ostride,
551 (fftw_real *) work);
552 }
553 }
554 }
555
556 if (free_work)
557 fftw_free(work);
558 }
559
rfftwnd_one_real_to_complex(fftwnd_plan p,fftw_real * in,fftw_complex * out)560 void rfftwnd_one_real_to_complex(fftwnd_plan p,
561 fftw_real *in, fftw_complex *out)
562 {
563 rfftwnd_real_to_complex(p, 1, in, 1, 1, out, 1, 1);
564 }
565
rfftwnd_one_complex_to_real(fftwnd_plan p,fftw_complex * in,fftw_real * out)566 void rfftwnd_one_complex_to_real(fftwnd_plan p,
567 fftw_complex *in, fftw_real *out)
568 {
569 rfftwnd_complex_to_real(p, 1, in, 1, 1, out, 1, 1);
570 }
571