1 
2 /*
3  * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
4  *
5  * This program is free software; you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation; either version 2 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18  *
19  */
20 
21 /* $Id: rfftwnd.c,v 1.35 2003/03/16 23:43:46 stevenj Exp $ */
22 
23 #include "fftw-int.h"
24 #include "rfftw.h"
25 
26 /********************** prototypes for rexec2 routines **********************/
27 
28 extern void rfftw_real2c_aux(fftw_plan plan, int howmany,
29 			     fftw_real *in, int istride, int idist,
30 			     fftw_complex *out, int ostride, int odist,
31 			     fftw_real *work);
32 extern void rfftw_c2real_aux(fftw_plan plan, int howmany,
33 			     fftw_complex *in, int istride, int idist,
34 			     fftw_real *out, int ostride, int odist,
35 			     fftw_real *work);
36 extern void rfftw_real2c_overlap_aux(fftw_plan plan, int howmany,
37 				   fftw_real *in, int istride, int idist,
38 			       fftw_complex *out, int ostride, int odist,
39 				     fftw_real *work);
40 extern void rfftw_c2real_overlap_aux(fftw_plan plan, int howmany,
41 				fftw_complex *in, int istride, int idist,
42 				  fftw_real *out, int ostride, int odist,
43 				     fftw_real *work);
44 
45 /********************** Initializing the RFFTWND Plan ***********************/
46 
47 /*
48  * Create an fftwnd_plan specialized for specific arrays.  (These
49  * arrays are ignored, however, if they are NULL or if the flags
50  * do not include FFTW_MEASURE.)  The main advantage of being
51  * provided arrays like this is that we can do runtime timing
52  * measurements of our options, without worrying about allocating
53  * excessive scratch space.
54  */
rfftwnd_create_plan_specific(int rank,const int * n,fftw_direction dir,int flags,fftw_real * in,int istride,fftw_real * out,int ostride)55 fftwnd_plan rfftwnd_create_plan_specific(int rank, const int *n,
56 					 fftw_direction dir, int flags,
57 					 fftw_real *in, int istride,
58 					 fftw_real *out, int ostride)
59 {
60      fftwnd_plan p;
61      int i;
62      int rflags = flags & ~FFTW_IN_PLACE;
63      /* note that we always do rfftw transforms out-of-place in rexec2.c */
64 
65      if (flags & FFTW_IN_PLACE) {
66 	  out = NULL;
67 	  ostride = istride;
68      }
69      istride = ostride = 1;	/*
70 				 * strides don't work yet, since it is not
71 				 * clear whether they apply to real
72 				 * or complex data
73 				 */
74 
75      if (!(p = fftwnd_create_plan_aux(rank, n, dir, flags)))
76 	  return 0;
77 
78      for (i = 0; i < rank - 1; ++i)
79 	  p->n_after[i] = (n[rank - 1]/2 + 1) * (p->n_after[i] / n[rank - 1]);
80      if (rank > 0)
81 	  p->n[rank - 1] = n[rank - 1] / 2 + 1;
82 
83      p->plans = fftwnd_new_plan_array(rank);
84      if (rank > 0 && !p->plans) {
85 	  rfftwnd_destroy_plan(p);
86 	  return 0;
87      }
88      if (rank > 0) {
89 	  p->plans[rank - 1] = rfftw_create_plan(n[rank - 1], dir, rflags);
90 	  if (!p->plans[rank - 1]) {
91 	       rfftwnd_destroy_plan(p);
92 	       return 0;
93 	  }
94      }
95      if (rank > 1) {
96 	  if (!(flags & FFTW_MEASURE) || in == 0
97 	      || (!p->is_in_place && out == 0)) {
98 	       if (!fftwnd_create_plans_generic(p->plans, rank - 1, n,
99 					   dir, flags | FFTW_IN_PLACE)) {
100 		    rfftwnd_destroy_plan(p);
101 		    return 0;
102 	       }
103 	  } else if (dir == FFTW_COMPLEX_TO_REAL || (flags & FFTW_IN_PLACE)) {
104 	       if (!fftwnd_create_plans_specific(p->plans, rank - 1, n,
105 						 p->n_after,
106 					      dir, flags | FFTW_IN_PLACE,
107 						 (fftw_complex *) in,
108 						 istride,
109 						 0, 0)) {
110 		    rfftwnd_destroy_plan(p);
111 		    return 0;
112 	       }
113 	  } else {
114 	       if (!fftwnd_create_plans_specific(p->plans, rank - 1, n,
115 						 p->n_after,
116 					      dir, flags | FFTW_IN_PLACE,
117 						 (fftw_complex *) out,
118 						 ostride,
119 						 0, 0)) {
120 		    rfftwnd_destroy_plan(p);
121 		    return 0;
122 	       }
123 	  }
124      }
125      p->nbuffers = 0;
126      p->nwork = fftwnd_work_size(rank, p->n, flags | FFTW_IN_PLACE,
127 				 p->nbuffers + 1);
128      if (p->nwork && !(flags & FFTW_THREADSAFE)) {
129 	  p->work = (fftw_complex *) fftw_malloc(p->nwork
130 						 * sizeof(fftw_complex));
131 	  if (!p->work) {
132 	       rfftwnd_destroy_plan(p);
133 	       return 0;
134 	  }
135      }
136      return p;
137 }
138 
rfftw2d_create_plan_specific(int nx,int ny,fftw_direction dir,int flags,fftw_real * in,int istride,fftw_real * out,int ostride)139 fftwnd_plan rfftw2d_create_plan_specific(int nx, int ny,
140 					 fftw_direction dir, int flags,
141 					 fftw_real *in, int istride,
142 					 fftw_real *out, int ostride)
143 {
144      int n[2];
145 
146      n[0] = nx;
147      n[1] = ny;
148 
149      return rfftwnd_create_plan_specific(2, n, dir, flags,
150 					 in, istride, out, ostride);
151 }
152 
rfftw3d_create_plan_specific(int nx,int ny,int nz,fftw_direction dir,int flags,fftw_real * in,int istride,fftw_real * out,int ostride)153 fftwnd_plan rfftw3d_create_plan_specific(int nx, int ny, int nz,
154 					 fftw_direction dir, int flags,
155 					 fftw_real *in, int istride,
156 					 fftw_real *out, int ostride)
157 {
158      int n[3];
159 
160      n[0] = nx;
161      n[1] = ny;
162      n[2] = nz;
163 
164      return rfftwnd_create_plan_specific(3, n, dir, flags,
165 					 in, istride, out, ostride);
166 }
167 
168 /* Create a generic fftwnd plan: */
169 
rfftwnd_create_plan(int rank,const int * n,fftw_direction dir,int flags)170 fftwnd_plan rfftwnd_create_plan(int rank, const int *n,
171 				fftw_direction dir, int flags)
172 {
173      return rfftwnd_create_plan_specific(rank, n, dir, flags, 0, 1, 0, 1);
174 }
175 
rfftw2d_create_plan(int nx,int ny,fftw_direction dir,int flags)176 fftwnd_plan rfftw2d_create_plan(int nx, int ny,
177 				fftw_direction dir, int flags)
178 {
179      return rfftw2d_create_plan_specific(nx, ny, dir, flags, 0, 1, 0, 1);
180 }
181 
rfftw3d_create_plan(int nx,int ny,int nz,fftw_direction dir,int flags)182 fftwnd_plan rfftw3d_create_plan(int nx, int ny, int nz,
183 				fftw_direction dir, int flags)
184 {
185      return rfftw3d_create_plan_specific(nx, ny, nz, dir, flags, 0, 1, 0, 1);
186 }
187 
188 /************************ Freeing the RFFTWND Plan ************************/
189 
rfftwnd_destroy_plan(fftwnd_plan plan)190 void rfftwnd_destroy_plan(fftwnd_plan plan)
191 {
192      fftwnd_destroy_plan(plan);
193 }
194 
195 /************************ Printing the RFFTWND Plan ************************/
196 
rfftwnd_fprint_plan(FILE * f,fftwnd_plan plan)197 void rfftwnd_fprint_plan(FILE *f, fftwnd_plan plan)
198 {
199      fftwnd_fprint_plan(f, plan);
200 }
201 
rfftwnd_print_plan(fftwnd_plan plan)202 void rfftwnd_print_plan(fftwnd_plan plan)
203 {
204      rfftwnd_fprint_plan(stdout, plan);
205 }
206 
207 /*********** Computing the N-Dimensional FFT: Auxiliary Routines ************/
208 
rfftwnd_real2c_aux(fftwnd_plan p,int cur_dim,fftw_real * in,int istride,fftw_complex * out,int ostride,fftw_real * work)209 void rfftwnd_real2c_aux(fftwnd_plan p, int cur_dim,
210 			fftw_real *in, int istride,
211 			fftw_complex *out, int ostride,
212 			fftw_real *work)
213 {
214      int n_after = p->n_after[cur_dim], n = p->n[cur_dim];
215 
216      if (cur_dim == p->rank - 2) {
217 	  /* just do the last dimension directly: */
218 	  if (p->is_in_place)
219 	       rfftw_real2c_aux(p->plans[p->rank - 1], n,
220 				in, istride, (n_after * istride) * 2,
221 				out, istride, n_after * istride,
222 				work);
223 	  else
224 	       rfftw_real2c_aux(p->plans[p->rank - 1], n,
225 			 in, istride, p->plans[p->rank - 1]->n * istride,
226 				out, ostride, n_after * ostride,
227 				work);
228      } else {			/* we have at least two dimensions to go */
229 	  int nr = p->plans[p->rank - 1]->n;
230 	  int n_after_r = p->is_in_place ? n_after * 2
231 	       : nr * (n_after / (nr/2 + 1));
232 	  int i;
233 
234 	  /*
235 	   * process the subsequent dimensions recursively, in hyperslabs,
236 	   * to get maximum locality:
237 	   */
238 	  for (i = 0; i < n; ++i)
239 	       rfftwnd_real2c_aux(p, cur_dim + 1,
240 				  in + i * n_after_r * istride, istride,
241 			     out + i * n_after * ostride, ostride, work);
242      }
243 
244      /* do the current dimension (in-place): */
245      fftw(p->plans[cur_dim], n_after,
246 	  out, n_after * ostride, ostride,
247 	  (fftw_complex *) work, 1, 0);
248      /* I hate this cast */
249 }
250 
rfftwnd_c2real_aux(fftwnd_plan p,int cur_dim,fftw_complex * in,int istride,fftw_real * out,int ostride,fftw_real * work)251 void rfftwnd_c2real_aux(fftwnd_plan p, int cur_dim,
252 			fftw_complex *in, int istride,
253 			fftw_real *out, int ostride,
254 			fftw_real *work)
255 {
256      int n_after = p->n_after[cur_dim], n = p->n[cur_dim];
257 
258      /* do the current dimension (in-place): */
259      fftw(p->plans[cur_dim], n_after,
260 	  in, n_after * istride, istride,
261 	  (fftw_complex *) work, 1, 0);
262 
263      if (cur_dim == p->rank - 2) {
264 	  /* just do the last dimension directly: */
265 	  if (p->is_in_place)
266 	       rfftw_c2real_aux(p->plans[p->rank - 1], n,
267 				in, istride, n_after * istride,
268 				out, istride, (n_after * istride) * 2,
269 				work);
270 	  else
271 	       rfftw_c2real_aux(p->plans[p->rank - 1], n,
272 				in, istride, n_after * istride,
273 			out, ostride, p->plans[p->rank - 1]->n * ostride,
274 				work);
275      } else {			/* we have at least two dimensions to go */
276 	  int nr = p->plans[p->rank - 1]->n;
277 	  int n_after_r = p->is_in_place ? n_after * 2 :
278 	       nr * (n_after / (nr/2 + 1));
279 	  int i;
280 
281 	  /*
282 	   * process the subsequent dimensions recursively, in hyperslabs,
283 	   * to get maximum locality:
284 	   */
285 	  for (i = 0; i < n; ++i)
286 	       rfftwnd_c2real_aux(p, cur_dim + 1,
287 				  in + i * n_after * istride, istride,
288 			   out + i * n_after_r * ostride, ostride, work);
289      }
290 }
291 
292 /*
293  * alternate version of rfftwnd_aux -- this version pushes the howmany
294  * loop down to the leaves of the computation, for greater locality
295  * in cases where dist < stride.  It is also required for correctness
296  * if in==out, and we must call a special version of the executor.
297  * Note that work must point to 'howmany' copies of its data
298  * if in == out.
299  */
300 
rfftwnd_real2c_aux_howmany(fftwnd_plan p,int cur_dim,int howmany,fftw_real * in,int istride,int idist,fftw_complex * out,int ostride,int odist,fftw_complex * work)301 void rfftwnd_real2c_aux_howmany(fftwnd_plan p, int cur_dim,
302 				int howmany,
303 				fftw_real *in, int istride, int idist,
304 				fftw_complex *out, int ostride, int odist,
305 				fftw_complex *work)
306 {
307      int n_after = p->n_after[cur_dim], n = p->n[cur_dim];
308      int k;
309 
310      if (cur_dim == p->rank - 2) {
311 	  /* just do the last dimension directly: */
312 	  if (p->is_in_place)
313 	       for (k = 0; k < n; ++k)
314 		    rfftw_real2c_overlap_aux(p->plans[p->rank - 1], howmany,
315 					in + (k * n_after * istride) * 2,
316 					     istride, idist,
317 					   out + (k * n_after * ostride),
318 					     ostride, odist,
319 					     (fftw_real *) work);
320 	  else {
321 	       int nlast = p->plans[p->rank - 1]->n;
322 	       for (k = 0; k < n; ++k)
323 		    rfftw_real2c_aux(p->plans[p->rank - 1], howmany,
324 				     in + k * nlast * istride,
325 				     istride, idist,
326 				     out + k * n_after * ostride,
327 				     ostride, odist,
328 				     (fftw_real *) work);
329 	  }
330      } else {			/* we have at least two dimensions to go */
331 	  int nr = p->plans[p->rank - 1]->n;
332 	  int n_after_r = p->is_in_place ? n_after * 2 :
333 	       nr * (n_after / (nr/2 + 1));
334 	  int i;
335 
336 	  /*
337 	   * process the subsequent dimensions recursively, in hyperslabs,
338 	   * to get maximum locality:
339 	   */
340 	  for (i = 0; i < n; ++i)
341 	       rfftwnd_real2c_aux_howmany(p, cur_dim + 1, howmany,
342 			    in + i * n_after_r * istride, istride, idist,
343 			     out + i * n_after * ostride, ostride, odist,
344 					  work);
345      }
346 
347      /* do the current dimension (in-place): */
348      for (k = 0; k < n_after; ++k)
349 	  fftw(p->plans[cur_dim], howmany,
350 	       out + k * ostride, n_after * ostride, odist,
351 	       work, 1, 0);
352 }
353 
rfftwnd_c2real_aux_howmany(fftwnd_plan p,int cur_dim,int howmany,fftw_complex * in,int istride,int idist,fftw_real * out,int ostride,int odist,fftw_complex * work)354 void rfftwnd_c2real_aux_howmany(fftwnd_plan p, int cur_dim,
355 				int howmany,
356 				fftw_complex *in, int istride, int idist,
357 				fftw_real *out, int ostride, int odist,
358 				fftw_complex *work)
359 {
360      int n_after = p->n_after[cur_dim], n = p->n[cur_dim];
361      int k;
362 
363      /* do the current dimension (in-place): */
364      for (k = 0; k < n_after; ++k)
365 	  fftw(p->plans[cur_dim], howmany,
366 	       in + k * istride, n_after * istride, idist,
367 	       work, 1, 0);
368 
369      if (cur_dim == p->rank - 2) {
370 	  /* just do the last dimension directly: */
371 	  if (p->is_in_place)
372 	       for (k = 0; k < n; ++k)
373 		    rfftw_c2real_overlap_aux(p->plans[p->rank - 1], howmany,
374 					     in + (k * n_after * istride),
375 					     istride, idist,
376 				       out + (k * n_after * ostride) * 2,
377 					     ostride, odist,
378 					     (fftw_real *) work);
379 	  else {
380 	       int nlast = p->plans[p->rank - 1]->n;
381 	       for (k = 0; k < n; ++k)
382 		    rfftw_c2real_aux(p->plans[p->rank - 1], howmany,
383 				     in + k * n_after * istride,
384 				     istride, idist,
385 				     out + k * nlast * ostride,
386 				     ostride, odist,
387 				     (fftw_real *) work);
388 	  }
389      } else {			/* we have at least two dimensions to go */
390 	  int nr = p->plans[p->rank - 1]->n;
391 	  int n_after_r = p->is_in_place ? n_after * 2
392 	       : nr * (n_after / (nr/2 + 1));
393 	  int i;
394 
395 	  /*
396 	   * process the subsequent dimensions recursively, in hyperslabs,
397 	   * to get maximum locality:
398 	   */
399 	  for (i = 0; i < n; ++i)
400 	       rfftwnd_c2real_aux_howmany(p, cur_dim + 1, howmany,
401 			      in + i * n_after * istride, istride, idist,
402 			   out + i * n_after_r * ostride, ostride, odist,
403 					  work);
404      }
405 }
406 
407 /********** Computing the N-Dimensional FFT: User-Visible Routines **********/
408 
rfftwnd_real_to_complex(fftwnd_plan p,int howmany,fftw_real * in,int istride,int idist,fftw_complex * out,int ostride,int odist)409 void rfftwnd_real_to_complex(fftwnd_plan p, int howmany,
410 			     fftw_real *in, int istride, int idist,
411 			     fftw_complex *out, int ostride, int odist)
412 {
413      fftw_complex *work = p->work;
414      int rank = p->rank;
415      int free_work = 0;
416 
417      if (p->dir != FFTW_REAL_TO_COMPLEX)
418 	  fftw_die("rfftwnd_real_to_complex with complex-to-real plan");
419 
420 #ifdef FFTW_DEBUG
421      if (p->rank > 0 && (p->plans[0]->flags & FFTW_THREADSAFE)
422 	 && p->nwork && p->work)
423 	  fftw_die("bug with FFTW_THREADSAFE flag");
424 #endif
425 
426      if (p->is_in_place) {
427 	  ostride = istride;
428 	  odist = (idist == 1 && idist < istride) ? 1 : (idist / 2);  /* ugh */
429 	  out = (fftw_complex *) in;
430 	  if (howmany > 1 && istride > idist && rank > 0) {
431 	       int new_nwork;
432 
433 	       new_nwork = p->n[rank - 1] * howmany;
434 	       if (new_nwork > p->nwork) {
435 		    work = (fftw_complex *)
436 			fftw_malloc(sizeof(fftw_complex) * new_nwork);
437 		    if (!work)
438 			 fftw_die("error allocating work array");
439 		    free_work = 1;
440 	       }
441 	  }
442      }
443      if (p->nwork && !work) {
444 	  work = (fftw_complex *) fftw_malloc(sizeof(fftw_complex) * p->nwork);
445 	  free_work = 1;
446      }
447      switch (rank) {
448 	 case 0:
449 	      break;
450 	 case 1:
451 	      if (p->is_in_place && howmany > 1 && istride > idist)
452 		   rfftw_real2c_overlap_aux(p->plans[0], howmany,
453 					    in, istride, idist,
454 					    out, ostride, odist,
455 					    (fftw_real *) work);
456 	      else
457 		   rfftw_real2c_aux(p->plans[0], howmany,
458 				    in, istride, idist,
459 				    out, ostride, odist,
460 				    (fftw_real *) work);
461 	      break;
462 	 default:		/* rank >= 2 */
463 	      {
464 		   if (howmany > 1 && ostride > odist)
465 			rfftwnd_real2c_aux_howmany(p, 0, howmany,
466 						   in, istride, idist,
467 						   out, ostride, odist,
468 						   work);
469 		   else {
470 			int i;
471 
472 			for (i = 0; i < howmany; ++i)
473 			     rfftwnd_real2c_aux(p, 0,
474 						in + i * idist, istride,
475 						out + i * odist, ostride,
476 						(fftw_real *) work);
477 		   }
478 	      }
479      }
480 
481      if (free_work)
482 	  fftw_free(work);
483 }
484 
rfftwnd_complex_to_real(fftwnd_plan p,int howmany,fftw_complex * in,int istride,int idist,fftw_real * out,int ostride,int odist)485 void rfftwnd_complex_to_real(fftwnd_plan p, int howmany,
486 			     fftw_complex *in, int istride, int idist,
487 			     fftw_real *out, int ostride, int odist)
488 {
489      fftw_complex *work = p->work;
490      int rank = p->rank;
491      int free_work = 0;
492 
493      if (p->dir != FFTW_COMPLEX_TO_REAL)
494 	  fftw_die("rfftwnd_complex_to_real with real-to-complex plan");
495 
496 #ifdef FFTW_DEBUG
497      if (p->rank > 0 && (p->plans[0]->flags & FFTW_THREADSAFE)
498 	 && p->nwork && p->work)
499 	  fftw_die("bug with FFTW_THREADSAFE flag");
500 #endif
501 
502      if (p->is_in_place) {
503 	  ostride = istride;
504 	  odist = idist;
505 	  odist = (idist == 1 && idist < istride) ? 1 : (idist * 2);  /* ugh */
506 	  out = (fftw_real *) in;
507 	  if (howmany > 1 && istride > idist && rank > 0) {
508 	       int new_nwork = p->n[rank - 1] * howmany;
509 	       if (new_nwork > p->nwork) {
510 		    work = (fftw_complex *)
511 			fftw_malloc(sizeof(fftw_complex) * new_nwork);
512 		    if (!work)
513 			 fftw_die("error allocating work array");
514 		    free_work = 1;
515 	       }
516 	  }
517      }
518      if (p->nwork && !work) {
519 	  work = (fftw_complex *) fftw_malloc(sizeof(fftw_complex) * p->nwork);
520 	  free_work = 1;
521      }
522      switch (rank) {
523 	 case 0:
524 	      break;
525 	 case 1:
526 	      if (p->is_in_place && howmany > 1 && istride > idist)
527 		   rfftw_c2real_overlap_aux(p->plans[0], howmany,
528 					    in, istride, idist,
529 					    out, ostride, odist,
530 					    (fftw_real *) work);
531 	      else
532 		   rfftw_c2real_aux(p->plans[0], howmany,
533 				    in, istride, idist,
534 				    out, ostride, odist,
535 				    (fftw_real *) work);
536 	      break;
537 	 default:		/* rank >= 2 */
538 	      {
539 		   if (howmany > 1 && ostride > odist)
540 			rfftwnd_c2real_aux_howmany(p, 0, howmany,
541 						   in, istride, idist,
542 						   out, ostride, odist,
543 						   work);
544 		   else {
545 			int i;
546 
547 			for (i = 0; i < howmany; ++i)
548 			     rfftwnd_c2real_aux(p, 0,
549 						in + i * idist, istride,
550 						out + i * odist, ostride,
551 						(fftw_real *) work);
552 		   }
553 	      }
554      }
555 
556      if (free_work)
557 	  fftw_free(work);
558 }
559 
rfftwnd_one_real_to_complex(fftwnd_plan p,fftw_real * in,fftw_complex * out)560 void rfftwnd_one_real_to_complex(fftwnd_plan p,
561 				 fftw_real *in, fftw_complex *out)
562 {
563      rfftwnd_real_to_complex(p, 1, in, 1, 1, out, 1, 1);
564 }
565 
rfftwnd_one_complex_to_real(fftwnd_plan p,fftw_complex * in,fftw_real * out)566 void rfftwnd_one_complex_to_real(fftwnd_plan p,
567 				 fftw_complex *in, fftw_real *out)
568 {
569      rfftwnd_complex_to_real(p, 1, in, 1, 1, out, 1, 1);
570 }
571