1 #include <stdlib.h>
2 
3 /************************************************************************
4   This file is an OpenMP (OMP) compatible version of the
5     venerable mixed-radix FFTN function.
6   This file is meant to be #include-d into another C file,
7     and it does NOT require that USE_OMP be defined, nor does
8     it require being compiled with OpenMP enabled.
9   It uses some macros in file Aomp.h to re-define global static scalars
10     as arrays, so that they can be thread-safe. If Aomp.h is not
11     available to you, the macro definitions needed are given below.
12   To be compatible with the AJJ/RWC function
13       csfft_cox( int mode , int idim , complex *xc )
14     instead do this:
15       fftnf_OMP( idim, NULL, &(xc[0].r), &(xc[0].i), 2*mode, 0.0 )
16     or use the csfft_OMP() macro defined below
17   As a side note, the SUN_BROKEN_REALLOC code has been removed,
18     since no one uses the Sun C compiler anymore, and for that
19     matter, no one uses Sun computers anymore.
20   -- RWCox == @AFNIman -- 10 Nov 2020
21 ************************************************************************/
22 
23 /*=====================================================================*/
24 #ifndef _SECOND_ROUND_INCLUDE_
25 #define _SECOND_ROUND_INCLUDE_
26 
27 #define USING_FFTN
28 
29 #ifdef USE_OMP
30 # include "Aomp.h"      /* part of the AFNI software package */
31 
32 /**** Below are the Aomp.h macros used herein;
33       'AO' is an abbreviation for 'AFNI OpenMP'.
34       If you don't want AFNI code polluting your life, then throw out
35       the #include just above and instead enable the definitions below. ****/
36 # if 0
37 #   define AOth omp_get_thread_num()
38 #   define AO_NTH_MAX 99
39 #   define AO_DEFINE_SCALAR(typ,nam) static typ  AO##nam[AO_NTH_MAX]
40 #   define AO_DEFINE_ARRAY(typ,nam)  static typ *AO##nam[AO_NTH_MAX] ;  \
41                                      static int AOL##nam[AO_NTH_MAX]
42 #   define AO_ARRAY_LEN(nam)         AOL##nam[AOth]
43 
44 #   define AO_RESIZE_ARRAY(typ,nam,len)                                 \
45      do{ int hh=AOth ;                                                  \
46          if( AOL##nam[hh] < len ){                                      \
47            AO##nam[hh] = (typ *)realloc(AO##nam[hh],sizeof(typ)*len) ;  \
48            AOL##nam[hh] = len ;                                         \
49      } } while(0)
50 # endif
51 
52 #endif /* USE_OMP */
53 
54 #define FFT_NODOUBLE    /* use only the float version fftnf() */
55 
56 #ifndef PROTO_FFNTF_OMP
57 #define PROTO_FFNTF_OMP
58 int fftnf_OMP( int ndim, const int dims[],
59                float Re[], float Im[], int iSign, double scaling ) ;
60 #endif
61 
62 /* macro to make fftnt look like csfft,
63    for ease of porting code from AFNI-land */
64 
65 #define csfft_OMP(mode,idim,xc)                                        \
66   fftnf_OMP( (idim), NULL, &((xc)[0].r), &((xc)[0].i), 2*(mode), 0.0 )
67 
68 /*---------------------------------------------------------------------*/
69 /* Return the next highest integer that is of the form
70      2^a * 3^b * 5^c
71    where a >= 1 (i.e., value is even; at least one factor of 2)
72    and   b <= 1 (i.e., at most one factor of 3)
73    and   c <= 1 (i.e., at most one factor of 5)
74    These values are efficient for FFTs, while
75    giving some extra flexibility over just powers of 2.
76    Code is adapted from the csfft_nextup() function in csfft.c.
77 *//*-------------------------------------------------------------------*/
78 
fftn_nextup_one35(int idim)79 int fftn_nextup_one35( int idim )
80 {
81    int ibase=2 ;
82    while(1){  /* loop over ibase=powers of 2 */
83       if(              idim <=    ibase   ) return    ibase   ;
84       if( 4 < ibase && idim <=  5*ibase/4 ) return  5*ibase/4 ; /* c=1 */
85       if( 2 < ibase && idim <=  3*ibase/2 ) return  3*ibase/2 ; /* b=1 */
86       if( 8 < ibase && idim <= 15*ibase/8 ) return 15*ibase/8 ; /* b=c=1 */
87       ibase *= 2 ; /* next power of 2 */
88    }
89    return -666 ; /*unreachable*/
90 }
91 
92 /*---------------------------------------------------------------------*/
93 /* Return the smallest integer that is even, greater than or equal
94    to idim, and has no prime factors except 2, 3, and 5 -- but any
95    number of powers of 3 and 5 are OK, unlike the previous function.
96 *//*-------------------------------------------------------------------*/
97 
fftn_nextup_even(int idim)98 int fftn_nextup_even( int idim )
99 {
100    int ibase=idim , itemp ;
101    if( ibase   <= 2 ) return 2;
102    if( ibase%2 == 1 ) ibase++ ; /* make it even */
103    while(1){
104      itemp = ibase ;    /* start here and see what happens below */
105      while( itemp%4 == 0 ){ itemp /= 4 ; }  /* divide out all 4s */
106      if   ( itemp%2 == 0 ){ itemp /= 2 ; }  /* any remaining 2?  */
107      while( itemp%3 == 0 ){ itemp /= 3 ; }  /* divide out all 3s */
108      while( itemp%5 == 0 ){ itemp /= 5 ; }  /* divide out all 5s */
109      if( itemp == 1 ) return ibase ; /* nothing was left == GOOD */
110      ibase += 2 ;            /* otherwise, try next even integer */
111    }
112    return -666 ; /*unreachable*/
113 }
114 
115 #endif /* _SECOND_ROUND_INCLUDE_ */
116 
117 /*=====================================================================*/
118 
119 /*--------------------------------*-C-*---------------------------------*
120  * File:
121  *	fftn.c
122  *
123  *	multivariate complex Fourier transform, computed in place
124  *	using mixed-radix Fast Fourier Transform algorithm.
125  *
126  *	Fortran code by:
127  *	    RC Singleton, Stanford Research Institute, Sept. 1968
128  *		NIST Guide to Available Math Software.
129  *		Source for module FFT from package GO.
130  *		Retrieved from NETLIB on Wed Jul  5 11:50:07 1995.
131  *	translated by f2c (version 19950721) and with lots of cleanup
132  *	to make it resemble C by:
133  *	    MJ Olesen, Queen's University at Kingston, 1995-97
134  */
135 /*{{{ Copyright: */
136 /*
137  * Copyright(c)1995,97 Mark Olesen <olesen@me.QueensU.CA>
138  *		Queen's Univ at Kingston (Canada)
139  *
140  * Permission to use, copy, modify, and distribute this software for
141  * any purpose without fee is hereby granted, provided that this
142  * entire notice is included in all copies of any software which is
143  * or includes a copy or modification of this software and in all
144  * copies of the supporting documentation for such software.
145  *
146  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR
147  * IMPLIED WARRANTY.  IN PARTICULAR, NEITHER THE AUTHOR NOR QUEEN'S
148  * UNIVERSITY AT KINGSTON MAKES ANY REPRESENTATION OR WARRANTY OF ANY
149  * KIND CONCERNING THE MERCHANTABILITY OF THIS SOFTWARE OR ITS
150  * FITNESS FOR ANY PARTICULAR PURPOSE.
151  *
152  * All of which is to say that you can do what you like with this
153  * source code provided you don't try to sell it as your own and you
154  * include an unaltered copy of this message (including the
155  * copyright).
156  *
157  * It is also implicitly understood that bug fixes and improvements
158  * should make their way back to the general Internet community so
159  * that everyone benefits.
160  *----------------------------------------------------------------------*/
161 /*}}}*/
162 /*{{{ notes: */
163 /*
164  * Public:
165  *	fft_free
166  *	fftn / fftnf
167  *	(these are documented in the header file)
168  *
169  * Private:
170  *	fftradix / fftradixf
171  *
172  * ----------------------------------------------------------------------*
173  * int fftradix (REAL Re[], REAL Im[], size_t nTotal, size_t nPass,
174  *		 size_t nSpan, int iSign, size_t maxFactors,
175  *		 size_t maxPerm);
176  *
177  * RE and IM hold the real and imaginary components of the data, and
178  * return the resulting real and imaginary Fourier coefficients.
179  * Multidimensional data *must* be allocated contiguously.  There is
180  * no limit on the number of dimensions.
181  *
182  *
183  * Although there is no limit on the number of dimensions, fftradix()
184  * must be called once for each dimension, but the calls may be in
185  * any order.
186  *
187  * NTOTAL = the total number of complex data values
188  * NPASS  = the dimension of the current variable
189  * NSPAN/NPASS = the spacing of consecutive data values while indexing
190  *	the current variable
191  * ISIGN - see above documentation
192  *
193  * example:
194  * tri-variate transform with Re[n1][n2][n3], Im[n1][n2][n3]
195  *
196  *	fftradix (Re, Im, n1*n2*n3, n1,       n1, 1, maxf, maxp);
197  *	fftradix (Re, Im, n1*n2*n3, n2,    n1*n2, 1, maxf, maxp);
198  *	fftradix (Re, Im, n1*n2*n3, n3, n1*n2*n3, 1, maxf, maxp);
199  *
200  * single-variate transform,
201  *    NTOTAL = N = NSPAN = (number of complex data values),
202  *
203  *	fftradix (Re, Im, n, n, n, 1, maxf, maxp);
204  *
205  * The data can also be stored in a single array with alternating
206  * real and imaginary parts, the magnitude of ISIGN is changed to 2
207  * to give correct indexing increment, and data [0] and data [1] used
208  * to pass the initial addresses for the sequences of real and
209  * imaginary values,
210  *
211  * example:
212  *	REAL data [2*NTOTAL];
213  *	fftradix (&data[0], &data[1], NTOTAL, nPass, nSpan, 2, maxf, maxp);
214  *
215  * for temporary allocation:
216  *
217  * MAXFACTORS	>= the maximum prime factor of NPASS
218  * MAXPERM	>= the number of prime factors of NPASS.  In addition,
219  *	if the square-free portion K of NPASS has two or more prime
220  *	factors, then MAXPERM >= (K-1)
221  *
222  * storage in FACTOR for a maximum of 15 prime factors of NPASS. if
223  * NPASS has more than one square-free factor, the product of the
224  * square-free factors must be <= 210 array storage for maximum prime
225  * factor of 23 the following two constants should agree with the
226  * array dimensions.
227  * ----------------------------------------------------------------------*/
228 /*}}}*/
229 /*{{{ Revisions: */
230 /*
231  * 26 July 95	John Beale
232  *	- added maxf and maxp as parameters to fftradix()
233  *
234  * 28 July 95	Mark Olesen <olesen@me.QueensU.CA>
235  *	- cleaned-up the Fortran 66 goto spaghetti, only 3 labels remain.
236  *
237  *	- added fft_free() to provide some measure of control over
238  *	  allocation/deallocation.
239  *
240  *	- added fftn() wrapper for multidimensional FFTs
241  *
242  *	- use -DFFT_NOFLOAT or -DFFT_NODOUBLE to avoid compiling that
243  *	  precision. Note suffix `f' on the function names indicates
244  *	  float precision.
245  *
246  *	- revised documentation
247  *
248  * 31 July 95	Mark Olesen <olesen@me.QueensU.CA>
249  *	- added GNU Public License
250  *	- more cleanup
251  *	- define SUN_BROKEN_REALLOC to use malloc() instead of realloc()
252  *	  on the first pass through, apparently needed for old libc
253  *	- removed #error directive in favour of some code that simply
254  *	  won't compile (generate an error that way)
255  *
256  * 1 Aug 95	Mark Olesen <olesen@me.QueensU.CA>
257  *	- define FFT_RADIX4 to only have radix 2, radix 4 transforms
258  *	- made fftradix /fftradixf () static scope, just use fftn()
259  *	  instead.  If you have good ideas about fixing the factors
260  *	  in fftn() please do so.
261  *
262  * 8 Jan 95	mj olesen <olesen@me.QueensU.CA>
263  *	- fixed typo's, including one that broke scaling for scaling by
264  *	  total number of matrix elements or the square root of same
265  *	- removed unnecessary casts from allocations
266  *
267  * 10 Dec 96	mj olesen <olesen@me.QueensU.CA>
268  *	- changes defines to compile *without* float support by default,
269  *	  use -DFFT_FLOAT to enable.
270  *	- shifted some variables to local scope	(better hints for optimizer)
271  *	- added Michael Steffens <Michael.Steffens@mbox.muk.uni-hannover.de>
272  *	  Fortran 90 module
273  *	- made it simpler to pass dimensions for 1D FFT.
274  *
275  * 23 Feb 97	Mark Olesen <olesen@me.QueensU.CA>
276  *	- removed the GNU Public License (see 21 July 1995 entry),
277  *	  which should make it clear why I have the right to do so.
278  *	- Added copyright notice and submitted to netlib
279  *	- Moved documentation for the public functions to the header
280  *	  files where is will always be available.
281  * ----------------------------------------------------------------------*/
282 /*}}}*/
283 #ifndef _FFTN_C
284 #define _FFTN_C
285 /* we use CPP to re-include this same file for double/float cases
286 #if !defined (lint) && !defined (__FILE__)
287 Error: your compiler is sick!  define __FILE__ yourself (a string)
288 eg, something like -D__FILE__=\"fftn.c\"
289 #endif
290 */
291 #include <stdio.h>
292 #include <stdlib.h>
293 #include <math.h>
294 #include "fftn.h"
295 
296 /*{{{ defines/constants */
297 #ifndef M_PI
298 # define M_PI	3.14159265358979323846264338327950288
299 #endif
300 
301 #ifndef SIN60
302 # define SIN60	0.86602540378443865	/* sin(60 deg) */
303 # define COS72	0.30901699437494742	/* cos(72 deg) */
304 # define SIN72	0.95105651629515357	/* sin(72 deg) */
305 #endif
306 /*}}}*/
307 
308 /*{{{ static parameters - for memory management */
309 
310 #ifdef USE_OMP
311 AO_DEFINE_SCALAR(size_t,SpaceAlloced) ;
312 AO_DEFINE_SCALAR(size_t,MaxPermAlloced) ;
313 #else
314 static size_t SpaceAlloced = 0;
315 static size_t MaxPermAlloced = 0;
316 #endif
317 
318 /* temp space, (void *) since both float and double routines use it */
319 
320 #ifdef USE_OMP
321 AO_DEFINE_ARRAY(void,Tmp0) ;
322 AO_DEFINE_ARRAY(void,Tmp1) ;
323 AO_DEFINE_ARRAY(void,Tmp2) ;
324 AO_DEFINE_ARRAY(void,Tmp3) ;
325 AO_DEFINE_ARRAY(int,Perm)  ;
326 #else
327 static void * Tmp0 = NULL;	/* temp space for real part */
328 static void * Tmp1 = NULL;	/* temp space for imaginary part */
329 static void * Tmp2 = NULL;	/* temp space for Cosine values */
330 static void * Tmp3 = NULL;	/* temp space for Sine values */
331 static int  * Perm = NULL;	/* Permutation vector */
332 #endif
333 
334 #define NFACTOR	11
335 #ifdef USE_OMP
336 AO_DEFINE_ARRAY(int,factor) ;   /* must be allocated later */
337 #else
338 static int factor [NFACTOR];    /* pre-allocated */
339 #endif
340 /*}}}*/
341 
342 /*{{{ fft_free() */
343 static void
fft_free_OMP(void)344 fft_free_OMP (void)
345 {
346 #ifdef USE_OMP
347 #pragma omp critical (MALLOC)
348  {
349    AO_VALUE(SpaceAlloced) = AO_VALUE(MaxPermAlloced) = 0;
350    if (AO_VALUE(Tmp0)) { free (AO_VALUE(Tmp0)); AO_VALUE(Tmp0) = NULL; }
351    if (AO_VALUE(Tmp1)) { free (AO_VALUE(Tmp1)); AO_VALUE(Tmp1) = NULL; }
352    if (AO_VALUE(Tmp2)) { free (AO_VALUE(Tmp2)); AO_VALUE(Tmp2) = NULL; }
353    if (AO_VALUE(Tmp3)) { free (AO_VALUE(Tmp3)); AO_VALUE(Tmp3) = NULL; }
354    if (AO_VALUE(Perm)) { free (AO_VALUE(Perm)); AO_VALUE(Perm) = NULL; }
355  }
356 #else
357    SpaceAlloced = MaxPermAlloced = 0;
358    if (Tmp0) { free (Tmp0); Tmp0 = NULL; }
359    if (Tmp1) { free (Tmp1); Tmp1 = NULL; }
360    if (Tmp2) { free (Tmp2); Tmp2 = NULL; }
361    if (Tmp3) { free (Tmp3); Tmp3 = NULL; }
362    if (Perm) { free (Perm); Perm = NULL; }
363 #endif
364 }
365 /*}}}*/
366 
367 /* return the number of factors */
368 static int
factorize(int nPass,int * kt)369 factorize (int nPass, int * kt)
370 {
371    int nFactor = 0;
372    int j, jj;
373 
374 #ifdef USE_OMP
375    int *factor ;
376    if( AO_ARRAY_LEN(factor) < NFACTOR ){    /* this code will be used */
377 #pragma omp critical (MALLOC)               /* once per thread to make */
378      AO_RESIZE_ARRAY(int,factor,NFACTOR) ;  /* the permanent factor[] array */
379    }
380    factor = AO_VALUE(factor) ;
381 #endif
382 
383    *kt = 0;
384    /* determine the factors of n */
385    while ((nPass % 16) == 0)	/* factors of 4 */
386      {
387 	factor [nFactor++] = 4;
388 	nPass /= 16;
389      }
390    j = 3; jj = 9;		/* factors of 3, 5, 7, ... */
391    do {
392       while ((nPass % jj) == 0)
393 	{
394 	   factor [nFactor++] = j;
395 	   nPass /= jj;
396 	}
397       j += 2;
398       jj = j * j;
399    } while (jj <= nPass);
400    if (nPass <= 4)
401      {
402 	*kt = nFactor;
403 	factor [nFactor] = nPass;
404 	if (nPass != 1)
405 	  nFactor++;
406      }
407    else
408      {
409 	if (nPass - (nPass / 4 << 2) == 0)
410 	  {
411 	     factor [nFactor++] = 2;
412 	     nPass /= 4;
413 	  }
414 	*kt = nFactor;
415 	j = 2;
416 	do {
417 	   if ((nPass % j) == 0)
418 	     {
419 		factor [nFactor++] = j;
420 		nPass /= j;
421 	     }
422 	   j = ((j + 1) / 2 << 1) + 1;
423 	} while (j <= nPass);
424      }
425    if (*kt)
426      {
427 	j = *kt;
428 	do
429 	  factor [nFactor++] = factor [--j];
430 	while (j);
431      }
432 
433    return nFactor;
434 }
435 
436 /* re-include this source file on the second pass through */
437 /*{{{ defines for re-including double precision */
438 #ifdef FFT_NODOUBLE
439 # ifndef FFT_FLOAT
440 #  define FFT_FLOAT
441 # endif
442 #else
443 # undef REAL
444 # undef FFTN
445 # undef FFTNS
446 # undef FFTRADIX
447 # undef FFTRADIXS
448 /* defines for double */
449 # define REAL		double
450 # define FFTN		fftn
451 # define FFTNS		"fftn"
452 # define FFTRADIX	fftradix
453 # define FFTRADIXS	"fftradix"
454 /* double precision routine */
455 static int
456 fftradix (double Re[], double Im[],
457 	  size_t nTotal, size_t nPass, size_t nSpan, int isign,
458 	  int maxFactors, int maxPerm);
459 # include __FILE__			/* include this file again */
460 #endif
461 /*}}}*/
462 
463 /*{{{ defines for re-including float precision */
464 #ifdef FFT_FLOAT
465 # undef REAL
466 # undef FFTN
467 # undef FFTNS
468 # undef FFTRADIX
469 # undef FFTRADIXS
470 /* defines for float */
471 # define REAL		float
472 # define FFTN		fftnf_OMP		/* trailing 'f' for float */
473 # define FFTNS		"fftnf_OMP"		/* name for error message */
474 # define FFTRADIX	fftradixf	/* trailing 'f' for float */
475 # define FFTRADIXS	"fftradixf"	/* name for error message */
476 /* float precision routine */
477 static int
478 fftradixf (float Re[], float Im[],
479 	   size_t nTotal, size_t nPass, size_t nSpan, int isign,
480 	   int maxFactors, int maxPerm);
481 # include __FILE__			/* include this file again */
482 #endif
483 /*}}}*/
484 #else	/* _FFTN_C */
485 
486 /*
487  * use macros to access the Real/Imaginary parts so that it's possible
488  * to substitute different macros if a complex struct is used
489  */
490 
491 #ifndef Re_Data
492 # define Re_Data(i)	Re[i]
493 # define Im_Data(i)	Im[i]
494 #endif
495 
496 /*
497  *
498  */
499 int
FFTN(int ndim,const int dims[],REAL Re[],REAL Im[],int iSign,double scaling)500 FFTN (int ndim,
501       const int dims [],
502       REAL Re [],
503       REAL Im [],
504       int iSign,
505       double scaling)
506 {
507    size_t nTotal;
508    int maxFactors, maxPerm;
509 
510    /*
511     * tally the number of elements in the data array
512     * and determine the number of dimensions
513     */
514    /* printf("Calling fft with isign=%d scale=%g dim=%d\n",iSign,scaling,dims[0]);  */
515    nTotal = 1;
516    if (ndim)
517      {
518 	if (dims != NULL)
519 	  {
520 	     int i;
521 	     /* number of dimensions was specified */
522 	     for (i = 0; i < ndim; i++)
523 	       {
524 		  if (dims [i] <= 0) goto Dimension_Error;
525 		  nTotal *= dims [i];
526 	       }
527 	  }
528 	else
529 	  nTotal *= ndim;
530      }
531    else
532      {
533 	int i;
534 	/* determine # of dimensions from zero-terminated list */
535 	if (dims == NULL) goto Dimension_Error;
536 	for (ndim = i = 0; dims [i]; i++)
537 	  {
538 	     if (dims [i] <= 0)
539 	       goto Dimension_Error;
540 	     nTotal *= dims [i];
541 	     ndim++;
542 	  }
543      }
544 
545    /* determine maximum number of factors and permuations */
546 #if 1
547    /*
548     * follow John Beale's example, just use the largest dimension and don't
549     * worry about excess allocation.  May be someone else will do it?
550     */
551    if (dims != NULL)
552      {
553 	int i;
554 	for (maxFactors = maxPerm = 1, i = 0; i < ndim; i++)
555 	  {
556 	     if (dims [i] > maxFactors) maxFactors = dims [i];
557 	     if (dims [i] > maxPerm) maxPerm = dims [i];
558 	  }
559      }
560    else
561      {
562 	maxFactors = maxPerm = nTotal;
563      }
564 #else
565    /* use the constants used in the original Fortran code */
566    maxFactors = 23;
567    maxPerm = 209;
568 #endif
569    /* loop over the dimensions: */
570    if (dims != NULL)
571      {
572 	size_t nSpan = 1;
573 	int i;
574 
575 	for (i = 0; i < ndim; i++)
576 	  {
577 	     int ret;
578 	     nSpan *= dims [i];
579 	     ret = FFTRADIX (Re, Im, nTotal, dims [i], nSpan, iSign,
580 			     maxFactors, maxPerm);
581 	     /* exit, clean-up already done */
582 	     if (ret)
583 	       return ret;
584           }
585      }
586    else
587      {
588 	int ret;
589 	ret = FFTRADIX (Re, Im, nTotal, nTotal, nTotal, iSign,
590 			maxFactors, maxPerm);
591 	/* exit, clean-up already done */
592 	if (ret)
593 	  return ret;
594      }
595 
596    /* Divide through by the normalizing constant: */
597    if (scaling && scaling != 1.0)
598      {
599 	int i;
600 
601 	if (iSign < 0) iSign = -iSign;
602 	if (scaling < 0.0)
603 	  scaling = (scaling < -1.0) ? sqrt (nTotal) : nTotal;
604 	scaling = 1.0 / scaling;	/* multiply is often faster */
605 	for (i = 0; i < nTotal; i += iSign)
606 	  {
607 	     Re_Data (i) *= scaling;
608 	     Im_Data (i) *= scaling;
609 	  }
610      }
611    return 0;
612 
613    Dimension_Error:
614    fprintf (stderr, "Error: " FFTNS "() - dimension error\n");
615    fft_free_OMP ();	/* free-up memory */
616    return -1;
617 }
618 
619 /*----------------------------------------------------------------------*/
620 /*
621  * singleton's mixed radix routine
622  *
623  * could move allocation out to fftn(), but leave it here so that it's
624  * possible to make this a standalone function
625  */
626 static int
FFTRADIX(REAL Re[],REAL Im[],size_t nTotal,size_t nPass,size_t nSpan,int iSign,int maxFactors,int maxPerm)627 FFTRADIX (REAL Re [],
628 	  REAL Im [],
629 	  size_t nTotal,
630 	  size_t nPass,
631 	  size_t nSpan,
632 	  int iSign,
633 	  int maxFactors,
634 	  int maxPerm)
635 {
636    int ii, nFactor, kspan, ispan, inc;
637    int j, jc, jf, jj, k, k1, k3, kk, kt, nn, ns, nt;
638 #ifdef USE_OMP
639    int *Perm , *factor ;
640 #endif
641 
642    REAL radf;
643    REAL c1, c2, c3, cd;
644    REAL s1, s2, s3, sd;
645 
646    REAL * Rtmp = NULL;		/* temp space for real part*/
647    REAL * Itmp = NULL;		/* temp space for imaginary part */
648    REAL * Cos = NULL;		/* Cosine values */
649    REAL * Sin = NULL;		/* Sine values */
650 
651 #ifndef FFT_RADIX4
652    REAL s60 = SIN60;		/* sin(60 deg) */
653    REAL s72 = SIN72;		/* sin(72 deg) */
654    REAL c72 = COS72;		/* cos(72 deg) */
655 #endif
656    REAL pi2 = M_PI;		/* use PI first, 2 PI later */
657 
658    /* gcc complains about k3 being uninitialized, but I can't find out where
659     * or why ... it looks okay to me.
660     *
661     * initialize to make gcc happy
662     */
663    k3 = 0;
664 
665    /* gcc complains about c2, c3, s2,s3 being uninitialized, but they're
666     * only used for the radix 4 case and only AFTER the (s1 == 0.0) pass
667     * through the loop at which point they will have been calculated.
668     *
669     * initialize to make gcc happy
670     */
671    c2 = c3 = s2 = s3 = 0.0;
672 
673    /* Parameter adjustments, was fortran so fix zero-offset */
674    Re--;
675    Im--;
676 
677 /* fprintf(stderr,"FFTRADIX: nTotal=%d\n",(int)nTotal) ; */
678 
679    if (nPass < 2)
680      return 0;
681 
682    /* allocate storage */
683 #pragma omp critical (MALLOC)
684  {
685 #ifdef USE_OMP
686    if (AO_VALUE(SpaceAlloced) < maxFactors * sizeof (REAL))
687      {
688 	     AO_VALUE(SpaceAlloced) = maxFactors * sizeof (REAL);
689 	     AO_VALUE(Tmp0) = realloc (AO_VALUE(Tmp0), 2*AO_VALUE(SpaceAlloced));
690 	     AO_VALUE(Tmp1) = realloc (AO_VALUE(Tmp1), 2*AO_VALUE(SpaceAlloced));
691 	     AO_VALUE(Tmp2) = realloc (AO_VALUE(Tmp2), 2*AO_VALUE(SpaceAlloced));
692 	     AO_VALUE(Tmp3) = realloc (AO_VALUE(Tmp3), 2*AO_VALUE(SpaceAlloced));
693      }
694 #else
695    if (SpaceAlloced < maxFactors * sizeof (REAL))
696      {
697 	     SpaceAlloced = maxFactors * sizeof (REAL);
698 	     Tmp0 = realloc (Tmp0, 2*SpaceAlloced);
699 	     Tmp1 = realloc (Tmp1, 2*SpaceAlloced);
700 	     Tmp2 = realloc (Tmp2, 2*SpaceAlloced);
701 	     Tmp3 = realloc (Tmp3, 2*SpaceAlloced);
702      }
703 #endif
704    else
705      {
706 	/* allow full use of alloc'd space */
707 #ifdef USE_OMP
708 	maxFactors = AO_VALUE(SpaceAlloced) / sizeof (REAL);
709 #else
710 	maxFactors = SpaceAlloced / sizeof (REAL);
711 #endif
712      }
713 #ifdef USE_OMP
714    if ( AO_VALUE(MaxPermAlloced) < maxPerm )
715      {
716       AO_VALUE(Perm) = realloc (AO_VALUE(Perm), 2*maxPerm * sizeof(int));
717       AO_VALUE(MaxPermAlloced) = maxPerm;
718      }
719 #else
720    if ( MaxPermAlloced < maxPerm )
721      {
722       Perm = realloc (Perm, 2*maxPerm * sizeof(int));
723       MaxPermAlloced = maxPerm;
724      }
725 #endif
726    else
727      {
728 	/* allow full use of alloc'd space */
729 #ifdef USE_OMP
730 	maxPerm = AO_VALUE(MaxPermAlloced);
731 #else
732 	maxPerm = MaxPermAlloced;
733 #endif
734      }
735  } /* end critical section */
736 #if USE_OMP
737    if (!AO_VALUE(Tmp0) || !AO_VALUE(Tmp1) ||
738        !AO_VALUE(Tmp2) || !AO_VALUE(Tmp3) || !AO_VALUE(Perm)) goto Memory_Error;
739    /* assign pointers */
740    Rtmp = (REAL *) AO_VALUE(Tmp0);
741    Itmp = (REAL *) AO_VALUE(Tmp1);
742    Cos  = (REAL *) AO_VALUE(Tmp2);
743    Sin  = (REAL *) AO_VALUE(Tmp3);
744 #else
745    if (!Tmp0 || !Tmp1 || !Tmp2 || !Tmp3 || !Perm) goto Memory_Error;
746    /* assign pointers */
747    Rtmp = (REAL *) Tmp0;
748    Itmp = (REAL *) Tmp1;
749    Cos  = (REAL *) Tmp2;
750    Sin  = (REAL *) Tmp3;
751 #endif
752 
753    /*
754     * Function Body
755     */
756    inc = iSign;
757    if (iSign < 0)
758      {
759 #ifndef FFT_RADIX4
760 	s60 = -s60;
761 	s72 = -s72;
762 #endif
763 	pi2 = -pi2;
764 	inc = -inc;		/* absolute value */
765      }
766 
767    /* adjust for strange increments */
768    nt = inc * nTotal;
769    ns = inc * nSpan;
770    kspan = ns;
771 
772    nn = nt - inc;
773    jc = ns / nPass;
774    radf = pi2 * (double) jc;
775    pi2 *= 2.0;			/* use 2 PI from here on */
776 
777    ii = 0;
778    jf = 0;
779    /* determine the factors of n */
780 
781    nFactor = factorize (nPass, &kt);
782    /* test that nFactors is in range */
783    if (nFactor > NFACTOR)
784      {
785 	fprintf (stderr, "Error: " FFTRADIXS "() - exceeded number of factors\n");
786 	goto Memory_Error;
787      }
788 
789 #ifdef USE_OMP
790      factor = AO_VALUE(factor) ;
791 #endif
792 
793    /* compute fourier transform */
794    for (;;) {
795       sd = radf / (double) kspan;
796       cd = sin (sd);
797       cd = 2.0 * cd * cd;
798       sd = sin (sd + sd);
799       kk = 1;
800       ii++;
801 
802 /* fprintf(stderr,"FFTRADIX factor=%d\n",factor[ii-1]) ; */
803 
804       switch (factor [ii - 1]) {
805        case 2:
806 	 /* transform for factor of 2 (including rotation factor) */
807 	 kspan /= 2;
808 	 k1 = kspan + 2;
809 	 do {
810 	    do {
811 	       REAL tmpr;
812 	       REAL tmpi;
813 	       int k2;
814 
815 	       k2 = kk + kspan;
816 	       tmpr = Re_Data (k2);
817 	       tmpi = Im_Data (k2);
818 	       Re_Data (k2) = Re_Data (kk) - tmpr;
819 	       Im_Data (k2) = Im_Data (kk) - tmpi;
820 	       Re_Data (kk) += tmpr;
821 	       Im_Data (kk) += tmpi;
822 	       kk = k2 + kspan;
823 	    } while (kk <= nn);
824 	    kk -= nn;
825 	 } while (kk <= jc);
826 	 if (kk > kspan)
827 	   goto Permute_Results;	/* exit infinite loop */
828 	 do {
829 	    int k2;
830 
831 	    c1 = 1.0 - cd;
832 	    s1 = sd;
833 	    do {
834 	       REAL tmp;
835 	       do {
836 		  do {
837 		     REAL tmpr;
838 		     REAL tmpi;
839 
840 		     k2 = kk + kspan;
841 		     tmpr = Re_Data (kk) - Re_Data (k2);
842 		     tmpi = Im_Data (kk) - Im_Data (k2);
843 		     Re_Data (kk) += Re_Data (k2);
844 		     Im_Data (kk) += Im_Data (k2);
845 		     Re_Data (k2) = c1 * tmpr - s1 * tmpi;
846 		     Im_Data (k2) = s1 * tmpr + c1 * tmpi;
847 		     kk = k2 + kspan;
848 		  } while (kk < nt);
849 		  k2 = kk - nt;
850 		  c1 = -c1;
851 		  kk = k1 - k2;
852 	       } while (kk > k2);
853 	       tmp = c1 - (cd * c1 + sd * s1);
854 	       s1 = sd * c1 - cd * s1 + s1;
855 	       c1 = 2.0 - (tmp * tmp + s1 * s1);
856 	       s1 *= c1;
857 	       c1 *= tmp;
858 	       kk += jc;
859 	    } while (kk < k2);
860 	    k1 += (inc + inc);
861 	    kk = (k1 - kspan) / 2 + jc;
862 	 } while (kk <= jc + jc);
863 	 break;
864 
865        case 4:			/* transform for factor of 4 */
866 	 ispan = kspan;
867 	 kspan /= 4;
868 
869 	 do {
870 	    c1 = 1.0;
871 	    s1 = 0.0;
872 	    do {
873 	       do {
874 		  REAL ajm, ajp, akm, akp;
875 		  REAL bjm, bjp, bkm, bkp;
876 		  int k2;
877 
878 		  k1 = kk + kspan;
879 		  k2 = k1 + kspan;
880 		  k3 = k2 + kspan;
881 		  akp = Re_Data (kk) + Re_Data (k2);
882 		  akm = Re_Data (kk) - Re_Data (k2);
883 
884 		  ajp = Re_Data (k1) + Re_Data (k3);
885 		  ajm = Re_Data (k1) - Re_Data (k3);
886 
887 		  bkp = Im_Data (kk) + Im_Data (k2);
888 		  bkm = Im_Data (kk) - Im_Data (k2);
889 
890 		  bjp = Im_Data (k1) + Im_Data (k3);
891 		  bjm = Im_Data (k1) - Im_Data (k3);
892 
893 		  Re_Data (kk) = akp + ajp;
894 		  Im_Data (kk) = bkp + bjp;
895 		  ajp = akp - ajp;
896 		  bjp = bkp - bjp;
897 		  if (iSign < 0)
898 		    {
899 		       akp = akm + bjm;
900 		       bkp = bkm - ajm;
901 		       akm -= bjm;
902 		       bkm += ajm;
903 		    }
904 		  else
905 		    {
906 		       akp = akm - bjm;
907 		       bkp = bkm + ajm;
908 		       akm += bjm;
909 		       bkm -= ajm;
910 		    }
911 		  /* avoid useless multiplies */
912 		  if (s1 == 0.0)
913 		    {
914 		       Re_Data (k1) = akp;
915 		       Re_Data (k2) = ajp;
916 		       Re_Data (k3) = akm;
917 		       Im_Data (k1) = bkp;
918 		       Im_Data (k2) = bjp;
919 		       Im_Data (k3) = bkm;
920 		    }
921 		  else
922 		    {
923 		       Re_Data (k1) = akp * c1 - bkp * s1;
924 		       Re_Data (k2) = ajp * c2 - bjp * s2;
925 		       Re_Data (k3) = akm * c3 - bkm * s3;
926 		       Im_Data (k1) = akp * s1 + bkp * c1;
927 		       Im_Data (k2) = ajp * s2 + bjp * c2;
928 		       Im_Data (k3) = akm * s3 + bkm * c3;
929 		    }
930 		  kk = k3 + kspan;
931 	       } while (kk <= nt);
932 
933 	       c2 = c1 - (cd * c1 + sd * s1);
934 	       s1 = sd * c1 - cd * s1 + s1;
935 	       c1 = 2.0 - (c2 * c2 + s1 * s1);
936 	       s1 *= c1;
937 	       c1 *= c2;
938 	       /* values of c2, c3, s2, s3 that will get used next time */
939 	       c2 = c1 * c1 - s1 * s1;
940 	       s2 = 2.0 * c1 * s1;
941 	       c3 = c2 * c1 - s2 * s1;
942 	       s3 = c2 * s1 + s2 * c1;
943 	       kk = kk - nt + jc;
944 	    } while (kk <= kspan);
945 	    kk = kk - kspan + inc;
946 	 } while (kk <= jc);
947 	 if (kspan == jc)
948 	   goto Permute_Results;	/* exit infinite loop */
949 	 break;
950 
951        default:
952 	 /* transform for odd factors */
953 #ifdef FFT_RADIX4
954 	 fprintf (stderr, "Error: " FFTRADIXS "(): compiled for radix 2/4 only\n");
955 	 fft_free_OMP ();		/* free-up memory */
956 	 return -1;
957 	 break;
958 #else	/* FFT_RADIX4 */
959 	 ispan = kspan;
960 	 k = factor [ii - 1];
961 	 kspan /= factor [ii - 1];
962 
963 	 switch (factor [ii - 1]) {
964 	  case 3:	/* transform for factor of 3 (optional code) */
965 	    do {
966 	       do {
967 		  REAL aj, tmpr;
968 		  REAL bj, tmpi;
969 		  int k2;
970 
971 		  k1 = kk + kspan;
972 		  k2 = k1 + kspan;
973 		  tmpr = Re_Data (kk);
974 		  tmpi = Im_Data (kk);
975 		  aj = Re_Data (k1) + Re_Data (k2);
976 		  bj = Im_Data (k1) + Im_Data (k2);
977 		  Re_Data (kk) = tmpr + aj;
978 		  Im_Data (kk) = tmpi + bj;
979 		  tmpr -= 0.5 * aj;
980 		  tmpi -= 0.5 * bj;
981 		  aj = (Re_Data (k1) - Re_Data (k2)) * s60;
982 		  bj = (Im_Data (k1) - Im_Data (k2)) * s60;
983 		  Re_Data (k1) = tmpr - bj;
984 		  Re_Data (k2) = tmpr + bj;
985 		  Im_Data (k1) = tmpi + aj;
986 		  Im_Data (k2) = tmpi - aj;
987 		  kk = k2 + kspan;
988 	       } while (kk < nn);
989 	       kk -= nn;
990 	    } while (kk <= kspan);
991 	    break;
992 
993 	  case 5:	/* transform for factor of 5 (optional code) */
994 	    c2 = c72 * c72 - s72 * s72;
995 	    s2 = 2.0 * c72 * s72;
996 	    do {
997 	       do {
998 		  REAL aa, aj, ak, ajm, ajp, akm, akp;
999 		  REAL bb, bj, bk, bjm, bjp, bkm, bkp;
1000 		  int k2, k4;
1001 
1002 		  k1 = kk + kspan;
1003 		  k2 = k1 + kspan;
1004 		  k3 = k2 + kspan;
1005 		  k4 = k3 + kspan;
1006 		  akp = Re_Data (k1) + Re_Data (k4);
1007 		  akm = Re_Data (k1) - Re_Data (k4);
1008 		  bkp = Im_Data (k1) + Im_Data (k4);
1009 		  bkm = Im_Data (k1) - Im_Data (k4);
1010 		  ajp = Re_Data (k2) + Re_Data (k3);
1011 		  ajm = Re_Data (k2) - Re_Data (k3);
1012 		  bjp = Im_Data (k2) + Im_Data (k3);
1013 		  bjm = Im_Data (k2) - Im_Data (k3);
1014 		  aa = Re_Data (kk);
1015 		  bb = Im_Data (kk);
1016 		  Re_Data (kk) = aa + akp + ajp;
1017 		  Im_Data (kk) = bb + bkp + bjp;
1018 		  ak = akp * c72 + ajp * c2 + aa;
1019 		  bk = bkp * c72 + bjp * c2 + bb;
1020 		  aj = akm * s72 + ajm * s2;
1021 		  bj = bkm * s72 + bjm * s2;
1022 		  Re_Data (k1) = ak - bj;
1023 		  Re_Data (k4) = ak + bj;
1024 		  Im_Data (k1) = bk + aj;
1025 		  Im_Data (k4) = bk - aj;
1026 		  ak = akp * c2 + ajp * c72 + aa;
1027 		  bk = bkp * c2 + bjp * c72 + bb;
1028 		  aj = akm * s2 - ajm * s72;
1029 		  bj = bkm * s2 - bjm * s72;
1030 		  Re_Data (k2) = ak - bj;
1031 		  Re_Data (k3) = ak + bj;
1032 		  Im_Data (k2) = bk + aj;
1033 		  Im_Data (k3) = bk - aj;
1034 		  kk = k4 + kspan;
1035 	       } while (kk < nn);
1036 	       kk -= nn;
1037 	    } while (kk <= kspan);
1038 	    break;
1039 
1040 	  default:
1041 	    k = factor [ii - 1];
1042 	    if (jf != k)
1043 	      {
1044 		 jf = k;
1045 		 s1 = pi2 / (double) jf;
1046 		 c1 = cos (s1);
1047 		 s1 = sin (s1);
1048 		 if (jf > maxFactors)
1049 		   goto Memory_Error;
1050 		 Cos [jf - 1] = 1.0;
1051 		 Sin [jf - 1] = 0.0;
1052 		 j = 1;
1053 		 do {
1054 		    Cos [j - 1] = Cos [k - 1] * c1 + Sin [k - 1] * s1;
1055 		    Sin [j - 1] = Cos [k - 1] * s1 - Sin [k - 1] * c1;
1056 		    k--;
1057 		    Cos [k - 1] = Cos [j - 1];
1058 		    Sin [k - 1] = -Sin [j - 1];
1059 		    j++;
1060 		 } while (j < k);
1061 	      }
1062 	    do {
1063 	       do {
1064 		  REAL aa, ak;
1065 		  REAL bb, bk;
1066 		  int k2;
1067 
1068 		  aa = ak = Re_Data (kk);
1069 		  bb = bk = Im_Data (kk);
1070 
1071 		  k1 = kk;
1072 		  k2 = kk + ispan;
1073 		  j = 1;
1074 		  k1 += kspan;
1075 		  do {
1076 		     k2 -= kspan;
1077 		     Rtmp [j] = Re_Data (k1) + Re_Data (k2);
1078 		     ak += Rtmp [j];
1079 		     Itmp [j] = Im_Data (k1) + Im_Data (k2);
1080 		     bk += Itmp [j];
1081 		     j++;
1082 		     Rtmp [j] = Re_Data (k1) - Re_Data (k2);
1083 		     Itmp [j] = Im_Data (k1) - Im_Data (k2);
1084 		     j++;
1085 		     k1 += kspan;
1086 		  } while (k1 < k2);
1087 		  Re_Data (kk) = ak;
1088 		  Im_Data (kk) = bk;
1089 
1090 		  k1 = kk;
1091 		  k2 = kk + ispan;
1092 		  j = 1;
1093 		  do {
1094 		     REAL aj = 0.0;
1095 		     REAL bj = 0.0;
1096 
1097 		     k1 += kspan;
1098 		     k2 -= kspan;
1099 		     jj = j;
1100 		     ak = aa;
1101 		     bk = bb;
1102 		     k = 1;
1103 		     do {
1104 			ak += Rtmp [k] * Cos [jj - 1];
1105 			bk += Itmp [k] * Cos [jj - 1];
1106 			k++;
1107 			aj += Rtmp [k] * Sin [jj - 1];
1108 			bj += Itmp [k] * Sin [jj - 1];
1109 			k++;
1110 			jj += j;
1111 			if (jj > jf)
1112 			  jj -= jf;
1113 		     } while (k < jf);
1114 		     k = jf - j;
1115 		     Re_Data (k1) = ak - bj;
1116 		     Im_Data (k1) = bk + aj;
1117 		     Re_Data (k2) = ak + bj;
1118 		     Im_Data (k2) = bk - aj;
1119 		     j++;
1120 		  } while (j < k);
1121 		  kk += ispan;
1122 	       } while (kk <= nn);
1123 	       kk -= nn;
1124 	    } while (kk <= kspan);
1125 	    break;
1126 	 }
1127 	 /* multiply by rotation factor (except for factors of 2 and 4) */
1128 	 if (ii == nFactor)
1129 	   goto Permute_Results;	/* exit infinite loop */
1130 	 kk = jc + 1;
1131 	 do {
1132 	    c2 = 1.0 - cd;
1133 	    s1 = sd;
1134 	    do {
1135 	       c1 = c2;
1136 	       s2 = s1;
1137 	       kk += kspan;
1138 	       do {
1139 		  REAL tmp;
1140 		  do {
1141 		     REAL ak;
1142 		     ak = Re_Data (kk);
1143 		     Re_Data (kk) = c2 * ak - s2 * Im_Data (kk);
1144 		     Im_Data (kk) = s2 * ak + c2 * Im_Data (kk);
1145 		     kk += ispan;
1146 		  } while (kk <= nt);
1147 		  tmp = s1 * s2;
1148 		  s2 = s1 * c2 + c1 * s2;
1149 		  c2 = c1 * c2 - tmp;
1150 		  kk = kk - nt + kspan;
1151 	       } while (kk <= ispan);
1152 	       c2 = c1 - (cd * c1 + sd * s1);
1153 	       s1 += sd * c1 - cd * s1;
1154 	       c1 = 2.0 - (c2 * c2 + s1 * s1);
1155 	       s1 *= c1;
1156 	       c2 *= c1;
1157 	       kk = kk - ispan + jc;
1158 	    } while (kk <= kspan);
1159 	    kk = kk - kspan + jc + inc;
1160 	 } while (kk <= jc + jc);
1161 	 break;
1162 #endif	/* FFT_RADIX4 */
1163       }
1164    }
1165 
1166 /* permute the results to normal order -- done in two stages */
1167 /* permutation for square factors of n */
1168 Permute_Results:
1169 #ifdef USE_OMP
1170    Perm = AO_VALUE(Perm) ;
1171 #endif
1172    Perm [0] = ns;
1173    if (kt)
1174      {
1175 	int k2;
1176 
1177 	k = kt + kt + 1;
1178 	if (k > nFactor)
1179 	  k--;
1180 	Perm [k] = jc;
1181 	j = 1;
1182 	do {
1183 	   Perm [j] = Perm [j - 1] / factor [j - 1];
1184 	   Perm [k - 1] = Perm [k] * factor [j - 1];
1185 	   j++;
1186 	   k--;
1187 	} while (j < k);
1188 	k3 = Perm [k];
1189 	kspan = Perm [1];
1190 	kk = jc + 1;
1191 	k2 = kspan + 1;
1192 	j = 1;
1193 	if (nPass != nTotal)
1194 	  {
1195 	     /* permutation for multivariate transform */
1196 	     Permute_Multi:
1197 	     do {
1198 		do {
1199 		   k = kk + jc;
1200 		   do {
1201 		      /* swap
1202 		       * Re_Data (kk) <> Re_Data (k2)
1203 		       * Im_Data (kk) <> Im_Data (k2)
1204 		       */
1205 		      REAL tmp;
1206 		      tmp = Re_Data (kk); Re_Data (kk) = Re_Data (k2); Re_Data (k2) = tmp;
1207 		      tmp = Im_Data (kk); Im_Data (kk) = Im_Data (k2); Im_Data (k2) = tmp;
1208 		      kk += inc;
1209 		      k2 += inc;
1210 		   } while (kk < k);
1211 		   kk += (ns - jc);
1212 		   k2 += (ns - jc);
1213 		} while (kk < nt);
1214 		k2 = k2 - nt + kspan;
1215 		kk = kk - nt + jc;
1216 	     } while (k2 < ns);
1217 	     do {
1218 		do {
1219 		   k2 -= Perm [j - 1];
1220 		   j++;
1221 		   k2 = Perm [j] + k2;
1222 		} while (k2 > Perm [j - 1]);
1223 		j = 1;
1224 		do {
1225 		   if (kk < k2)
1226 		     goto Permute_Multi;
1227 		   kk += jc;
1228 		   k2 += kspan;
1229 		} while (k2 < ns);
1230 	     } while (kk < ns);
1231 	  }
1232 	else
1233 	  {
1234 	     /* permutation for single-variate transform (optional code) */
1235 Permute_Single:
1236 	     do {
1237 		/* swap
1238 		 * Re_Data (kk) <> Re_Data (k2)
1239 		 * Im_Data (kk) <> Im_Data (k2)
1240 		 */
1241 		REAL t;
1242 		t = Re_Data (kk); Re_Data (kk) = Re_Data (k2); Re_Data (k2) = t;
1243 		t = Im_Data (kk); Im_Data (kk) = Im_Data (k2); Im_Data (k2) = t;
1244 		kk += inc;
1245 		k2 += kspan;
1246 	     } while (k2 < ns);
1247 	     do {
1248 		do {
1249 		   k2 -= Perm [j - 1];
1250 		   j++;
1251 		   k2 = Perm [j] + k2;
1252 		} while (k2 > Perm [j - 1]);
1253 		j = 1;
1254 		do {
1255 		   if (kk < k2)
1256 		     goto Permute_Single;
1257 		   kk += inc;
1258 		   k2 += kspan;
1259 		} while (k2 < ns);
1260 	     } while (kk < ns);
1261 	  }
1262 	jc = k3;
1263      }
1264 
1265    if ((kt << 1) + 1 >= nFactor)
1266      return 0;
1267    ispan = Perm [kt];
1268 
1269    /* permutation for square-free factors of n */
1270    j = nFactor - kt;
1271    factor [j] = 1;
1272    do {
1273       factor [j - 1] *= factor [j];
1274       j--;
1275    } while (j != kt);
1276    nn = factor [kt] - 1;
1277    kt++;
1278    if (nn > maxPerm)
1279      goto Memory_Error;
1280 
1281    j = jj = 0;
1282    for (;;) {
1283       int k2;
1284 
1285       k = kt + 1;
1286       k2 = factor [kt - 1];
1287       kk = factor [k - 1];
1288       j++;
1289       if (j > nn)
1290 	break;				/* exit infinite loop */
1291       jj += kk;
1292       while (jj >= k2)
1293 	{
1294 	   jj -= k2;
1295 	   k2 = kk;
1296 	   kk = factor [k++];
1297 	   jj += kk;
1298 	}
1299       Perm [j - 1] = jj;
1300    }
1301    /* determine the permutation cycles of length greater than 1 */
1302    j = 0;
1303    for (;;) {
1304       do {
1305 	 kk = Perm [j++];
1306       } while (kk < 0);
1307       if (kk != j)
1308 	{
1309 	   do {
1310 	      k = kk;
1311 	      kk = Perm [k - 1];
1312 	      Perm [k - 1] = -kk;
1313 	   } while (kk != j);
1314 	   k3 = kk;
1315 	}
1316       else
1317 	{
1318 	   Perm [j - 1] = -j;
1319 	   if (j == nn)
1320 	     break;		/* exit infinite loop */
1321 	}
1322    }
1323 
1324    maxFactors *= inc;
1325 
1326    /* reorder a and b, following the permutation cycles */
1327    for (;;) {
1328       j = k3 + 1;
1329       nt -= ispan;
1330       ii = nt - inc + 1;
1331       if (nt < 0)
1332 	break;			/* exit infinite loop */
1333       do {
1334 	 do {
1335 	    j--;
1336 	 } while (Perm [j - 1] < 0);
1337 	 jj = jc;
1338 	 do {
1339 	    int k2;
1340 
1341 	    if (jj < maxFactors) kspan = jj; else kspan = maxFactors;
1342 
1343 	    jj -= kspan;
1344 	    k = Perm [j - 1];
1345 	    kk = jc * k + ii + jj;
1346 
1347 	    k1 = kk + kspan;
1348 	    k2 = 0;
1349 	    do {
1350 	       Rtmp [k2] = Re_Data (k1);
1351 	       Itmp [k2] = Im_Data (k1);
1352 	       k2++;
1353 	       k1 -= inc;
1354 	    } while (k1 != kk);
1355 
1356 	    do {
1357 	       k1 = kk + kspan;
1358 	       k2 = k1 - jc * (k + Perm [k - 1]);
1359 	       k = -Perm [k - 1];
1360 	       do {
1361 		  Re_Data (k1) = Re_Data (k2);
1362 		  Im_Data (k1) = Im_Data (k2);
1363 		  k1 -= inc;
1364 		  k2 -= inc;
1365 	       } while (k1 != kk);
1366 	       kk = k2;
1367 	    } while (k != j);
1368 
1369 	    k1 = kk + kspan;
1370 	    k2 = 0;
1371 	    do {
1372 	       Re_Data (k1) = Rtmp [k2];
1373 	       Im_Data (k1) = Itmp [k2];
1374 	       k2++;
1375 	       k1 -= inc;
1376 	    } while (k1 != kk);
1377 	 } while (jj);
1378       } while (j != 1);
1379    }
1380    return 0;			/* exit point here */
1381 
1382    /* alloc or other problem, do some clean-up */
1383 Memory_Error:
1384    fprintf (stderr, "Error: " FFTRADIXS "() - insufficient memory.\n");
1385    fft_free_OMP ();			/* free-up memory */
1386    return -1;
1387 }
1388 #endif	/* _FFTN_C */
1389 /*----------------------- end-of-file (C source) -----------------------*/
1390