1 /*
2  *  This file is part of libfftpack.
3  *
4  *  libfftpack is free software; you can redistribute it and/or modify
5  *  it under the terms of the GNU General Public License as published by
6  *  the Free Software Foundation; either version 2 of the License, or
7  *  (at your option) any later version.
8  *
9  *  libfftpack is distributed in the hope that it will be useful,
10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  *  GNU General Public License for more details.
13  *
14  *  You should have received a copy of the GNU General Public License
15  *  along with libfftpack; if not, write to the Free Software
16  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 
19 /*
20  *  libfftpack is being developed at the Max-Planck-Institut fuer Astrophysik
21  *  and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
22  *  (DLR).
23  */
24 
25 /*
26   fftpack.c : A set of FFT routines in C.
27   Algorithmically based on Fortran-77 FFTPACK by Paul N. Swarztrauber
28   (Version 4, 1985).
29 
30   C port by Martin Reinecke (2010)
31  */
32 
33 #include <math.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include "fftpack.h"
37 
38 #define WA(x,i) wa[(i)+(x)*ido]
39 #define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
40 #define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
41 #define PM(a,b,c,d) { a=c+d; b=c-d; }
42 #define PMC(a,b,c,d) { a.r=c.r+d.r; a.i=c.i+d.i; b.r=c.r-d.r; b.i=c.i-d.i; }
43 #define ADDC(a,b,c) { a.r=b.r+c.r; a.i=b.i+c.i; }
44 #define SCALEC(a,b) { a.r*=b; a.i*=b; }
45 #define CONJFLIPC(a) { double tmp_=a.r; a.r=-a.i; a.i=tmp_; }
46 /* (a+ib) = conj(c+id) * (e+if) */
47 #define MULPM(a,b,c,d,e,f) { a=c*e+d*f; b=c*f-d*e; }
48 
49 typedef struct {
50   double r,i;
51 } cmplx;
52 
53 #define CONCAT(a,b) a ## b
54 
55 #define X(arg) CONCAT(passb,arg)
56 #define BACKWARD
57 #include "fftpack_inc.c"
58 #undef BACKWARD
59 #undef X
60 
61 #define X(arg) CONCAT(passf,arg)
62 #include "fftpack_inc.c"
63 #undef X
64 
65 #undef CC
66 #undef CH
67 #define CC(a,b,c) cc[(a)+ido*((b)+l1*(c))]
68 #define CH(a,b,c) ch[(a)+ido*((b)+cdim*(c))]
69 
radf2(size_t ido,size_t l1,const double * cc,double * ch,const double * wa)70 static void radf2 (size_t ido, size_t l1, const double *cc, double *ch,
71   const double *wa)
72   {
73   const size_t cdim=2;
74   size_t i, k, ic;
75   double ti2, tr2;
76 
77   for (k=0; k<l1; k++)
78     PM (CH(0,0,k),CH(ido-1,1,k),CC(0,k,0),CC(0,k,1))
79   if ((ido&1)==0)
80     for (k=0; k<l1; k++)
81       {
82       CH(    0,1,k) = -CC(ido-1,k,1);
83       CH(ido-1,0,k) =  CC(ido-1,k,0);
84       }
85   if (ido<=2) return;
86   for (k=0; k<l1; k++)
87     for (i=2; i<ido; i+=2)
88       {
89       ic=ido-i;
90       MULPM (tr2,ti2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
91       PM (CH(i-1,0,k),CH(ic-1,1,k),CC(i-1,k,0),tr2)
92       PM (CH(i  ,0,k),CH(ic  ,1,k),ti2,CC(i  ,k,0))
93       }
94   }
95 
radf3(size_t ido,size_t l1,const double * cc,double * ch,const double * wa)96 static void radf3(size_t ido, size_t l1, const double *cc, double *ch,
97   const double *wa)
98   {
99   const size_t cdim=3;
100   static const double taur=-0.5, taui=0.86602540378443864676;
101   size_t i, k, ic;
102   double ci2, di2, di3, cr2, dr2, dr3, ti2, ti3, tr2, tr3;
103 
104   for (k=0; k<l1; k++)
105     {
106     cr2=CC(0,k,1)+CC(0,k,2);
107     CH(0,0,k) = CC(0,k,0)+cr2;
108     CH(0,2,k) = taui*(CC(0,k,2)-CC(0,k,1));
109     CH(ido-1,1,k) = CC(0,k,0)+taur*cr2;
110     }
111   if (ido==1) return;
112   for (k=0; k<l1; k++)
113     for (i=2; i<ido; i+=2)
114       {
115       ic=ido-i;
116       MULPM (dr2,di2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
117       MULPM (dr3,di3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
118       cr2=dr2+dr3;
119       ci2=di2+di3;
120       CH(i-1,0,k) = CC(i-1,k,0)+cr2;
121       CH(i  ,0,k) = CC(i  ,k,0)+ci2;
122       tr2 = CC(i-1,k,0)+taur*cr2;
123       ti2 = CC(i  ,k,0)+taur*ci2;
124       tr3 = taui*(di2-di3);
125       ti3 = taui*(dr3-dr2);
126       PM(CH(i-1,2,k),CH(ic-1,1,k),tr2,tr3)
127       PM(CH(i  ,2,k),CH(ic  ,1,k),ti3,ti2)
128       }
129   }
130 
radf4(size_t ido,size_t l1,const double * cc,double * ch,const double * wa)131 static void radf4(size_t ido, size_t l1, const double *cc, double *ch,
132   const double *wa)
133   {
134   const size_t cdim=4;
135   static const double hsqt2=0.70710678118654752440;
136   size_t i, k, ic;
137   double ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
138 
139   for (k=0; k<l1; k++)
140     {
141     PM (tr1,CH(0,2,k),CC(0,k,3),CC(0,k,1))
142     PM (tr2,CH(ido-1,1,k),CC(0,k,0),CC(0,k,2))
143     PM (CH(0,0,k),CH(ido-1,3,k),tr2,tr1)
144     }
145   if ((ido&1)==0)
146     for (k=0; k<l1; k++)
147       {
148       ti1=-hsqt2*(CC(ido-1,k,1)+CC(ido-1,k,3));
149       tr1= hsqt2*(CC(ido-1,k,1)-CC(ido-1,k,3));
150       PM (CH(ido-1,0,k),CH(ido-1,2,k),CC(ido-1,k,0),tr1)
151       PM (CH(    0,3,k),CH(    0,1,k),ti1,CC(ido-1,k,2))
152       }
153   if (ido<=2) return;
154   for (k=0; k<l1; k++)
155     for (i=2; i<ido; i+=2)
156       {
157       ic=ido-i;
158       MULPM(cr2,ci2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
159       MULPM(cr3,ci3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
160       MULPM(cr4,ci4,WA(2,i-2),WA(2,i-1),CC(i-1,k,3),CC(i,k,3))
161       PM(tr1,tr4,cr4,cr2)
162       PM(ti1,ti4,ci2,ci4)
163       PM(tr2,tr3,CC(i-1,k,0),cr3)
164       PM(ti2,ti3,CC(i  ,k,0),ci3)
165       PM(CH(i-1,0,k),CH(ic-1,3,k),tr2,tr1)
166       PM(CH(i  ,0,k),CH(ic  ,3,k),ti1,ti2)
167       PM(CH(i-1,2,k),CH(ic-1,1,k),tr3,ti4)
168       PM(CH(i  ,2,k),CH(ic  ,1,k),tr4,ti3)
169       }
170   }
171 
radf5(size_t ido,size_t l1,const double * cc,double * ch,const double * wa)172 static void radf5(size_t ido, size_t l1, const double *cc, double *ch,
173   const double *wa)
174   {
175   const size_t cdim=5;
176   static const double tr11= 0.3090169943749474241, ti11=0.95105651629515357212,
177                       tr12=-0.8090169943749474241, ti12=0.58778525229247312917;
178   size_t i, k, ic;
179   double ci2, di2, ci4, ci5, di3, di4, di5, ci3, cr2, cr3, dr2, dr3,
180          dr4, dr5, cr5, cr4, ti2, ti3, ti5, ti4, tr2, tr3, tr4, tr5;
181 
182   for (k=0; k<l1; k++)
183     {
184     PM (cr2,ci5,CC(0,k,4),CC(0,k,1))
185     PM (cr3,ci4,CC(0,k,3),CC(0,k,2))
186     CH(0,0,k)=CC(0,k,0)+cr2+cr3;
187     CH(ido-1,1,k)=CC(0,k,0)+tr11*cr2+tr12*cr3;
188     CH(0,2,k)=ti11*ci5+ti12*ci4;
189     CH(ido-1,3,k)=CC(0,k,0)+tr12*cr2+tr11*cr3;
190     CH(0,4,k)=ti12*ci5-ti11*ci4;
191     }
192   if (ido==1) return;
193   for (k=0; k<l1;++k)
194     for (i=2; i<ido; i+=2)
195       {
196       ic=ido-i;
197       MULPM (dr2,di2,WA(0,i-2),WA(0,i-1),CC(i-1,k,1),CC(i,k,1))
198       MULPM (dr3,di3,WA(1,i-2),WA(1,i-1),CC(i-1,k,2),CC(i,k,2))
199       MULPM (dr4,di4,WA(2,i-2),WA(2,i-1),CC(i-1,k,3),CC(i,k,3))
200       MULPM (dr5,di5,WA(3,i-2),WA(3,i-1),CC(i-1,k,4),CC(i,k,4))
201       PM(cr2,ci5,dr5,dr2)
202       PM(ci2,cr5,di2,di5)
203       PM(cr3,ci4,dr4,dr3)
204       PM(ci3,cr4,di3,di4)
205       CH(i-1,0,k)=CC(i-1,k,0)+cr2+cr3;
206       CH(i  ,0,k)=CC(i  ,k,0)+ci2+ci3;
207       tr2=CC(i-1,k,0)+tr11*cr2+tr12*cr3;
208       ti2=CC(i  ,k,0)+tr11*ci2+tr12*ci3;
209       tr3=CC(i-1,k,0)+tr12*cr2+tr11*cr3;
210       ti3=CC(i  ,k,0)+tr12*ci2+tr11*ci3;
211       MULPM(tr5,tr4,cr5,cr4,ti11,ti12)
212       MULPM(ti5,ti4,ci5,ci4,ti11,ti12)
213       PM(CH(i-1,2,k),CH(ic-1,1,k),tr2,tr5)
214       PM(CH(i  ,2,k),CH(ic  ,1,k),ti5,ti2)
215       PM(CH(i-1,4,k),CH(ic-1,3,k),tr3,tr4)
216       PM(CH(i  ,4,k),CH(ic  ,3,k),ti4,ti3)
217       }
218   }
219 
220 #undef CH
221 #undef CC
222 #define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
223 #define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
224 #define C1(a,b,c) cc[(a)+ido*((b)+l1*(c))]
225 #define C2(a,b) cc[(a)+idl1*(b)]
226 #define CH2(a,b) ch[(a)+idl1*(b)]
radfg(size_t ido,size_t ip,size_t l1,size_t idl1,double * cc,double * ch,const double * wa)227 static void radfg(size_t ido, size_t ip, size_t l1, size_t idl1,
228   double *cc, double *ch, const double *wa)
229   {
230   const size_t cdim=ip;
231   static const double twopi=6.28318530717958647692;
232   size_t idij, ipph, i, j, k, l, j2, ic, jc, lc, ik;
233   double ai1, ai2, ar1, ar2, arg;
234   double *csarr;
235   size_t aidx;
236 
237   ipph=(ip+1)/ 2;
238   if(ido!=1)
239     {
240     memcpy(ch,cc,idl1*sizeof(double));
241 
242     for(j=1; j<ip; j++)
243       for(k=0; k<l1; k++)
244         {
245         CH(0,k,j)=C1(0,k,j);
246         idij=(j-1)*ido+1;
247         for(i=2; i<ido; i+=2,idij+=2)
248           MULPM(CH(i-1,k,j),CH(i,k,j),wa[idij-1],wa[idij],C1(i-1,k,j),C1(i,k,j))
249         }
250 
251     for(j=1,jc=ip-1; j<ipph; j++,jc--)
252       for(k=0; k<l1; k++)
253         for(i=2; i<ido; i+=2)
254           {
255           PM(C1(i-1,k,j),C1(i  ,k,jc),CH(i-1,k,jc),CH(i-1,k,j ))
256           PM(C1(i  ,k,j),C1(i-1,k,jc),CH(i  ,k,j ),CH(i  ,k,jc))
257           }
258     }
259   else
260     memcpy(cc,ch,idl1*sizeof(double));
261 
262   for(j=1,jc=ip-1; j<ipph; j++,jc--)
263     for(k=0; k<l1; k++)
264       PM(C1(0,k,j),C1(0,k,jc),CH(0,k,jc),CH(0,k,j))
265 
266   csarr=RALLOC(double,2*ip);
267   arg=twopi / ip;
268   csarr[0]=1.;
269   csarr[1]=0.;
270   csarr[2]=csarr[2*ip-2]=cos(arg);
271   csarr[3]=sin(arg); csarr[2*ip-1]=-csarr[3];
272   for (i=2; i<=ip/2; ++i)
273     {
274     csarr[2*i]=csarr[2*ip-2*i]=cos(i*arg);
275     csarr[2*i+1]=sin(i*arg);
276     csarr[2*ip-2*i+1]=-csarr[2*i+1];
277     }
278   for(l=1,lc=ip-1; l<ipph; l++,lc--)
279     {
280     ar1=csarr[2*l];
281     ai1=csarr[2*l+1];
282     for(ik=0; ik<idl1; ik++)
283       {
284       CH2(ik,l)=C2(ik,0)+ar1*C2(ik,1);
285       CH2(ik,lc)=ai1*C2(ik,ip-1);
286       }
287     aidx=2*l;
288     for(j=2,jc=ip-2; j<ipph; j++,jc--)
289       {
290       aidx+=2*l;
291       if (aidx>=2*ip) aidx-=2*ip;
292       ar2=csarr[aidx];
293       ai2=csarr[aidx+1];
294       for(ik=0; ik<idl1; ik++)
295         {
296         CH2(ik,l )+=ar2*C2(ik,j );
297         CH2(ik,lc)+=ai2*C2(ik,jc);
298         }
299       }
300     }
301   DEALLOC(csarr);
302 
303   for(j=1; j<ipph; j++)
304     for(ik=0; ik<idl1; ik++)
305       CH2(ik,0)+=C2(ik,j);
306 
307   for(k=0; k<l1; k++)
308     memcpy(&CC(0,0,k),&CH(0,k,0),ido*sizeof(double));
309   for(j=1; j<ipph; j++)
310     {
311     jc=ip-j;
312     j2=2*j;
313     for(k=0; k<l1; k++)
314       {
315       CC(ido-1,j2-1,k) = CH(0,k,j );
316       CC(0    ,j2  ,k) = CH(0,k,jc);
317       }
318     }
319   if(ido==1) return;
320 
321   for(j=1; j<ipph; j++)
322     {
323     jc=ip-j;
324     j2=2*j;
325     for(k=0; k<l1; k++)
326       for(i=2; i<ido; i+=2)
327         {
328         ic=ido-i;
329         PM (CC(i-1,j2,k),CC(ic-1,j2-1,k),CH(i-1,k,j ),CH(i-1,k,jc))
330         PM (CC(i  ,j2,k),CC(ic  ,j2-1,k),CH(i  ,k,jc),CH(i  ,k,j ))
331         }
332     }
333   }
334 
335 #undef CC
336 #undef CH
337 #define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))]
338 #define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))]
339 
radb2(size_t ido,size_t l1,const double * cc,double * ch,const double * wa)340 static void radb2(size_t ido, size_t l1, const double *cc, double *ch,
341   const double *wa)
342   {
343   const size_t cdim=2;
344   size_t i, k, ic;
345   double ti2, tr2;
346 
347   for (k=0; k<l1; k++)
348     PM (CH(0,k,0),CH(0,k,1),CC(0,0,k),CC(ido-1,1,k))
349   if ((ido&1)==0)
350     for (k=0; k<l1; k++)
351       {
352       CH(ido-1,k,0) =  2*CC(ido-1,0,k);
353       CH(ido-1,k,1) = -2*CC(0    ,1,k);
354       }
355   if (ido<=2) return;
356   for (k=0; k<l1;++k)
357     for (i=2; i<ido; i+=2)
358       {
359       ic=ido-i;
360       PM (CH(i-1,k,0),tr2,CC(i-1,0,k),CC(ic-1,1,k))
361       PM (ti2,CH(i  ,k,0),CC(i  ,0,k),CC(ic  ,1,k))
362       MULPM (CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),ti2,tr2)
363       }
364   }
365 
radb3(size_t ido,size_t l1,const double * cc,double * ch,const double * wa)366 static void radb3(size_t ido, size_t l1, const double *cc, double *ch,
367   const double *wa)
368   {
369   const size_t cdim=3;
370   static const double taur=-0.5, taui=0.86602540378443864676;
371   size_t i, k, ic;
372   double ci2, ci3, di2, di3, cr2, cr3, dr2, dr3, ti2, tr2;
373 
374   for (k=0; k<l1; k++)
375     {
376     tr2=2*CC(ido-1,1,k);
377     cr2=CC(0,0,k)+taur*tr2;
378     CH(0,k,0)=CC(0,0,k)+tr2;
379     ci3=2*taui*CC(0,2,k);
380     PM (CH(0,k,2),CH(0,k,1),cr2,ci3);
381     }
382   if (ido==1) return;
383   for (k=0; k<l1; k++)
384     for (i=2; i<ido; i+=2)
385       {
386       ic=ido-i;
387       tr2=CC(i-1,2,k)+CC(ic-1,1,k);
388       ti2=CC(i  ,2,k)-CC(ic  ,1,k);
389       cr2=CC(i-1,0,k)+taur*tr2;
390       ci2=CC(i  ,0,k)+taur*ti2;
391       CH(i-1,k,0)=CC(i-1,0,k)+tr2;
392       CH(i  ,k,0)=CC(i  ,0,k)+ti2;
393       cr3=taui*(CC(i-1,2,k)-CC(ic-1,1,k));
394       ci3=taui*(CC(i  ,2,k)+CC(ic  ,1,k));
395       PM(dr3,dr2,cr2,ci3)
396       PM(di2,di3,ci2,cr3)
397       MULPM(CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),di2,dr2)
398       MULPM(CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),di3,dr3)
399       }
400   }
401 
radb4(size_t ido,size_t l1,const double * cc,double * ch,const double * wa)402 static void radb4(size_t ido, size_t l1, const double *cc, double *ch,
403   const double *wa)
404   {
405   const size_t cdim=4;
406   static const double sqrt2=1.41421356237309504880;
407   size_t i, k, ic;
408   double ci2, ci3, ci4, cr2, cr3, cr4, ti1, ti2, ti3, ti4, tr1, tr2, tr3, tr4;
409 
410   for (k=0; k<l1; k++)
411     {
412     PM (tr2,tr1,CC(0,0,k),CC(ido-1,3,k))
413     tr3=2*CC(ido-1,1,k);
414     tr4=2*CC(0,2,k);
415     PM (CH(0,k,0),CH(0,k,2),tr2,tr3)
416     PM (CH(0,k,3),CH(0,k,1),tr1,tr4)
417     }
418   if ((ido&1)==0)
419     for (k=0; k<l1; k++)
420       {
421       PM (ti1,ti2,CC(0    ,3,k),CC(0    ,1,k))
422       PM (tr2,tr1,CC(ido-1,0,k),CC(ido-1,2,k))
423       CH(ido-1,k,0)=tr2+tr2;
424       CH(ido-1,k,1)=sqrt2*(tr1-ti1);
425       CH(ido-1,k,2)=ti2+ti2;
426       CH(ido-1,k,3)=-sqrt2*(tr1+ti1);
427       }
428   if (ido<=2) return;
429   for (k=0; k<l1;++k)
430     for (i=2; i<ido; i+=2)
431       {
432       ic=ido-i;
433       PM (tr2,tr1,CC(i-1,0,k),CC(ic-1,3,k))
434       PM (ti1,ti2,CC(i  ,0,k),CC(ic  ,3,k))
435       PM (tr4,ti3,CC(i  ,2,k),CC(ic  ,1,k))
436       PM (tr3,ti4,CC(i-1,2,k),CC(ic-1,1,k))
437       PM (CH(i-1,k,0),cr3,tr2,tr3)
438       PM (CH(i  ,k,0),ci3,ti2,ti3)
439       PM (cr4,cr2,tr1,tr4)
440       PM (ci2,ci4,ti1,ti4)
441       MULPM (CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),ci2,cr2)
442       MULPM (CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),ci3,cr3)
443       MULPM (CH(i,k,3),CH(i-1,k,3),WA(2,i-2),WA(2,i-1),ci4,cr4)
444       }
445   }
446 
radb5(size_t ido,size_t l1,const double * cc,double * ch,const double * wa)447 static void radb5(size_t ido, size_t l1, const double *cc, double *ch,
448   const double *wa)
449   {
450   const size_t cdim=5;
451   static const double tr11= 0.3090169943749474241, ti11=0.95105651629515357212,
452                       tr12=-0.8090169943749474241, ti12=0.58778525229247312917;
453   size_t i, k, ic;
454   double ci2, ci3, ci4, ci5, di3, di4, di5, di2, cr2, cr3, cr5, cr4,
455          ti2, ti3, ti4, ti5, dr3, dr4, dr5, dr2, tr2, tr3, tr4, tr5;
456 
457   for (k=0; k<l1; k++)
458     {
459     ti5=2*CC(0,2,k);
460     ti4=2*CC(0,4,k);
461     tr2=2*CC(ido-1,1,k);
462     tr3=2*CC(ido-1,3,k);
463     CH(0,k,0)=CC(0,0,k)+tr2+tr3;
464     cr2=CC(0,0,k)+tr11*tr2+tr12*tr3;
465     cr3=CC(0,0,k)+tr12*tr2+tr11*tr3;
466     MULPM(ci5,ci4,ti5,ti4,ti11,ti12)
467     PM(CH(0,k,4),CH(0,k,1),cr2,ci5)
468     PM(CH(0,k,3),CH(0,k,2),cr3,ci4)
469     }
470   if (ido==1) return;
471   for (k=0; k<l1;++k)
472     for (i=2; i<ido; i+=2)
473       {
474       ic=ido-i;
475       PM(tr2,tr5,CC(i-1,2,k),CC(ic-1,1,k))
476       PM(ti5,ti2,CC(i  ,2,k),CC(ic  ,1,k))
477       PM(tr3,tr4,CC(i-1,4,k),CC(ic-1,3,k))
478       PM(ti4,ti3,CC(i  ,4,k),CC(ic  ,3,k))
479       CH(i-1,k,0)=CC(i-1,0,k)+tr2+tr3;
480       CH(i  ,k,0)=CC(i  ,0,k)+ti2+ti3;
481       cr2=CC(i-1,0,k)+tr11*tr2+tr12*tr3;
482       ci2=CC(i  ,0,k)+tr11*ti2+tr12*ti3;
483       cr3=CC(i-1,0,k)+tr12*tr2+tr11*tr3;
484       ci3=CC(i  ,0,k)+tr12*ti2+tr11*ti3;
485       MULPM(cr5,cr4,tr5,tr4,ti11,ti12)
486       MULPM(ci5,ci4,ti5,ti4,ti11,ti12)
487       PM(dr4,dr3,cr3,ci4)
488       PM(di3,di4,ci3,cr4)
489       PM(dr5,dr2,cr2,ci5)
490       PM(di2,di5,ci2,cr5)
491       MULPM(CH(i,k,1),CH(i-1,k,1),WA(0,i-2),WA(0,i-1),di2,dr2)
492       MULPM(CH(i,k,2),CH(i-1,k,2),WA(1,i-2),WA(1,i-1),di3,dr3)
493       MULPM(CH(i,k,3),CH(i-1,k,3),WA(2,i-2),WA(2,i-1),di4,dr4)
494       MULPM(CH(i,k,4),CH(i-1,k,4),WA(3,i-2),WA(3,i-1),di5,dr5)
495       }
496   }
497 
radbg(size_t ido,size_t ip,size_t l1,size_t idl1,double * cc,double * ch,const double * wa)498 static void radbg(size_t ido, size_t ip, size_t l1, size_t idl1,
499   double *cc, double *ch, const double *wa)
500   {
501   const size_t cdim=ip;
502   static const double twopi=6.28318530717958647692;
503   size_t idij, ipph, i, j, k, l, j2, ic, jc, lc, ik;
504   double ai1, ai2, ar1, ar2, arg;
505   double *csarr;
506   size_t aidx;
507 
508   ipph=(ip+1)/ 2;
509   for(k=0; k<l1; k++)
510     memcpy(&CH(0,k,0),&CC(0,0,k),ido*sizeof(double));
511   for(j=1; j<ipph; j++)
512     {
513     jc=ip-j;
514     j2=2*j;
515     for(k=0; k<l1; k++)
516       {
517       CH(0,k,j )=2*CC(ido-1,j2-1,k);
518       CH(0,k,jc)=2*CC(0    ,j2  ,k);
519       }
520     }
521 
522   if(ido!=1)
523     for(j=1,jc=ip-1; j<ipph; j++,jc--)
524       for(k=0; k<l1; k++)
525         for(i=2; i<ido; i+=2)
526           {
527           ic=ido-i;
528           PM (CH(i-1,k,j ),CH(i-1,k,jc),CC(i-1,2*j,k),CC(ic-1,2*j-1,k))
529           PM (CH(i  ,k,jc),CH(i  ,k,j ),CC(i  ,2*j,k),CC(ic  ,2*j-1,k))
530           }
531 
532   csarr=RALLOC(double,2*ip);
533   arg=twopi/ip;
534   csarr[0]=1.;
535   csarr[1]=0.;
536   csarr[2]=csarr[2*ip-2]=cos(arg);
537   csarr[3]=sin(arg); csarr[2*ip-1]=-csarr[3];
538   for (i=2; i<=ip/2; ++i)
539     {
540     csarr[2*i]=csarr[2*ip-2*i]=cos(i*arg);
541     csarr[2*i+1]=sin(i*arg);
542     csarr[2*ip-2*i+1]=-csarr[2*i+1];
543     }
544   for(l=1; l<ipph; l++)
545     {
546     lc=ip-l;
547     ar1=csarr[2*l];
548     ai1=csarr[2*l+1];
549     for(ik=0; ik<idl1; ik++)
550       {
551       C2(ik,l)=CH2(ik,0)+ar1*CH2(ik,1);
552       C2(ik,lc)=ai1*CH2(ik,ip-1);
553       }
554     aidx=2*l;
555     for(j=2; j<ipph; j++)
556       {
557       jc=ip-j;
558       aidx+=2*l;
559       if (aidx>=2*ip) aidx-=2*ip;
560       ar2=csarr[aidx];
561       ai2=csarr[aidx+1];
562       for(ik=0; ik<idl1; ik++)
563         {
564         C2(ik,l )+=ar2*CH2(ik,j );
565         C2(ik,lc)+=ai2*CH2(ik,jc);
566         }
567       }
568     }
569   DEALLOC(csarr);
570 
571   for(j=1; j<ipph; j++)
572     for(ik=0; ik<idl1; ik++)
573       CH2(ik,0)+=CH2(ik,j);
574 
575   for(j=1,jc=ip-1; j<ipph; j++,jc--)
576     for(k=0; k<l1; k++)
577       PM (CH(0,k,jc),CH(0,k,j),C1(0,k,j),C1(0,k,jc))
578 
579   if(ido==1)
580     return;
581   for(j=1,jc=ip-1; j<ipph; j++,jc--)
582     for(k=0; k<l1; k++)
583       for(i=2; i<ido; i+=2)
584         {
585         PM (CH(i-1,k,jc),CH(i-1,k,j ),C1(i-1,k,j),C1(i  ,k,jc))
586         PM (CH(i  ,k,j ),CH(i  ,k,jc),C1(i  ,k,j),C1(i-1,k,jc))
587         }
588   memcpy(cc,ch,idl1*sizeof(double));
589 
590   for(j=1; j<ip; j++)
591     for(k=0; k<l1; k++)
592       {
593       C1(0,k,j)=CH(0,k,j);
594       idij=(j-1)*ido+1;
595       for(i=2; i<ido; i+=2,idij+=2)
596         MULPM (C1(i,k,j),C1(i-1,k,j),wa[idij-1],wa[idij],CH(i,k,j),CH(i-1,k,j))
597       }
598   }
599 
600 #undef CC
601 #undef CH
602 #undef PM
603 #undef MULPM
604 
605 
606 /*----------------------------------------------------------------------
607    cfftf1, cfftb1, cfftf, cfftb, cffti1, cffti. Complex FFTs.
608   ----------------------------------------------------------------------*/
609 
cfft1(size_t n,cmplx c[],cmplx ch[],const cmplx wa[],const size_t ifac[],int isign)610 static void cfft1(size_t n, cmplx c[], cmplx ch[], const cmplx wa[],
611   const size_t ifac[], int isign)
612   {
613   size_t k1, l1=1, nf=ifac[1], iw=0;
614   cmplx *p1=c, *p2=ch;
615 
616   for(k1=0; k1<nf; k1++)
617     {
618     size_t ip=ifac[k1+2];
619     size_t l2=ip*l1;
620     size_t ido = n/l2;
621     if(ip==4)
622       (isign>0) ? passb4(ido, l1, p1, p2, wa+iw)
623                 : passf4(ido, l1, p1, p2, wa+iw);
624     else if(ip==2)
625       (isign>0) ? passb2(ido, l1, p1, p2, wa+iw)
626                 : passf2(ido, l1, p1, p2, wa+iw);
627     else if(ip==3)
628       (isign>0) ? passb3(ido, l1, p1, p2, wa+iw)
629                 : passf3(ido, l1, p1, p2, wa+iw);
630     else if(ip==5)
631       (isign>0) ? passb5(ido, l1, p1, p2, wa+iw)
632                 : passf5(ido, l1, p1, p2, wa+iw);
633     else if(ip==6)
634       (isign>0) ? passb6(ido, l1, p1, p2, wa+iw)
635                 : passf6(ido, l1, p1, p2, wa+iw);
636     else
637       (isign>0) ? passbg(ido, ip, l1, p1, p2, wa+iw)
638                 : passfg(ido, ip, l1, p1, p2, wa+iw);
639     SWAP(p1,p2,cmplx *);
640     l1=l2;
641     iw+=(ip-1)*ido;
642     }
643   if (p1!=c)
644     memcpy (c,p1,n*sizeof(cmplx));
645   }
646 
cfftf(size_t n,double c[],double wsave[])647 void cfftf(size_t n, double c[], double wsave[])
648   {
649   if (n!=1)
650     cfft1(n, (cmplx*)c, (cmplx*)wsave, (cmplx*)(wsave+2*n),
651           (size_t*)(wsave+4*n),-1);
652   }
653 
cfftb(size_t n,double c[],double wsave[])654 void cfftb(size_t n, double c[], double wsave[])
655   {
656   if (n!=1)
657     cfft1(n, (cmplx*)c, (cmplx*)wsave, (cmplx*)(wsave+2*n),
658           (size_t*)(wsave+4*n),+1);
659   }
660 
factorize(size_t n,const size_t * pf,size_t npf,size_t * ifac)661 static void factorize (size_t n, const size_t *pf, size_t npf, size_t *ifac)
662   {
663   size_t nl=n, nf=0, ntry=0, j=0, i;
664 
665 startloop:
666   j++;
667   ntry = (j<=npf) ? pf[j-1] : ntry+2;
668   do
669     {
670     size_t nq=nl / ntry;
671     size_t nr=nl-ntry*nq;
672     if (nr!=0)
673       goto startloop;
674     nf++;
675     ifac[nf+1]=ntry;
676     nl=nq;
677     if ((ntry==2) && (nf!=1))
678       {
679       for (i=nf+1; i>2; --i)
680         ifac[i]=ifac[i-1];
681       ifac[2]=2;
682       }
683     }
684   while(nl!=1);
685   ifac[0]=n;
686   ifac[1]=nf;
687   }
688 
cffti1(size_t n,double wa[],size_t ifac[])689 static void cffti1(size_t n, double wa[], size_t ifac[])
690   {
691   static const size_t ntryh[5]={4,6,3,2,5};
692   static const double twopi=6.28318530717958647692;
693   size_t j, k, fi;
694 
695   double argh=twopi/n;
696   size_t i=0, l1=1;
697   factorize (n,ntryh,5,ifac);
698   for(k=1; k<=ifac[1]; k++)
699     {
700     size_t ip=ifac[k+1];
701     size_t ido=n/(l1*ip);
702     for(j=1; j<ip; j++)
703       {
704       size_t is = i;
705       double argld=j*l1*argh;
706       wa[i  ]=1;
707       wa[i+1]=0;
708       for(fi=1; fi<=ido; fi++)
709         {
710         double arg=fi*argld;
711         i+=2;
712         wa[i  ]=cos(arg);
713         wa[i+1]=sin(arg);
714         }
715       if(ip>6)
716         {
717         wa[is  ]=wa[i  ];
718         wa[is+1]=wa[i+1];
719         }
720       }
721     l1*=ip;
722     }
723   }
724 
cffti(size_t n,double wsave[])725 void cffti(size_t n, double wsave[])
726   { if (n!=1) cffti1(n, wsave+2*n,(size_t*)(wsave+4*n)); }
727 
728 
729 /*----------------------------------------------------------------------
730    rfftf1, rfftb1, rfftf, rfftb, rffti1, rffti. Real FFTs.
731   ----------------------------------------------------------------------*/
732 
rfftf1(size_t n,double c[],double ch[],const double wa[],const size_t ifac[])733 static void rfftf1(size_t n, double c[], double ch[], const double wa[],
734   const size_t ifac[])
735   {
736   size_t k1, l1=n, nf=ifac[1], iw=n-1;
737   double *p1=ch, *p2=c;
738 
739   for(k1=1; k1<=nf;++k1)
740     {
741     size_t ip=ifac[nf-k1+2];
742     size_t ido=n / l1;
743     l1 /= ip;
744     iw-=(ip-1)*ido;
745     SWAP (p1,p2,double *);
746     if(ip==4)
747       radf4(ido, l1, p1, p2, wa+iw);
748     else if(ip==2)
749       radf2(ido, l1, p1, p2, wa+iw);
750     else if(ip==3)
751       radf3(ido, l1, p1, p2, wa+iw);
752     else if(ip==5)
753       radf5(ido, l1, p1, p2, wa+iw);
754     else
755       {
756       if (ido==1)
757         SWAP (p1,p2,double *);
758       radfg(ido, ip, l1, ido*l1, p1, p2, wa+iw);
759       SWAP (p1,p2,double *);
760       }
761     }
762   if (p1==c)
763     memcpy (c,ch,n*sizeof(double));
764   }
765 
rfftb1(size_t n,double c[],double ch[],const double wa[],const size_t ifac[])766 static void rfftb1(size_t n, double c[], double ch[], const double wa[],
767   const size_t ifac[])
768   {
769   size_t k1, l1=1, nf=ifac[1], iw=0;
770   double *p1=c, *p2=ch;
771 
772   for(k1=1; k1<=nf; k1++)
773     {
774     size_t ip = ifac[k1+1],
775            ido= n/(ip*l1);
776     if(ip==4)
777       radb4(ido, l1, p1, p2, wa+iw);
778     else if(ip==2)
779       radb2(ido, l1, p1, p2, wa+iw);
780     else if(ip==3)
781       radb3(ido, l1, p1, p2, wa+iw);
782     else if(ip==5)
783       radb5(ido, l1, p1, p2, wa+iw);
784     else
785       {
786       radbg(ido, ip, l1, ido*l1, p1, p2, wa+iw);
787       if (ido!=1)
788         SWAP (p1,p2,double *);
789       }
790     SWAP (p1,p2,double *);
791     l1*=ip;
792     iw+=(ip-1)*ido;
793     }
794   if (p1!=c)
795     memcpy (c,ch,n*sizeof(double));
796   }
797 
rfftf(size_t n,double r[],double wsave[])798 void rfftf(size_t n, double r[], double wsave[])
799   { if(n!=1) rfftf1(n, r, wsave, wsave+n,(size_t*)(wsave+2*n)); }
800 
rfftb(size_t n,double r[],double wsave[])801 void rfftb(size_t n, double r[], double wsave[])
802   { if(n!=1) rfftb1(n, r, wsave, wsave+n,(size_t*)(wsave+2*n)); }
803 
rffti1(size_t n,double wa[],size_t ifac[])804 static void rffti1(size_t n, double wa[], size_t ifac[])
805   {
806   static const size_t ntryh[4]={4,2,3,5};
807   static const double twopi=6.28318530717958647692;
808   size_t i, j, k, fi;
809 
810   double argh=twopi/n;
811   size_t is=0, l1=1;
812   factorize (n,ntryh,4,ifac);
813   for (k=1; k<ifac[1]; k++)
814     {
815     size_t ip=ifac[k+1],
816            ido=n/(l1*ip);
817     for (j=1; j<ip; ++j)
818       {
819       double argld=j*l1*argh;
820       for(i=is,fi=1; i<=ido+is-3; i+=2,++fi)
821         {
822         double arg=fi*argld;
823         wa[i  ]=cos(arg);
824         wa[i+1]=sin(arg);
825         }
826       is+=ido;
827       }
828     l1*=ip;
829     }
830   }
831 
rffti(size_t n,double wsave[])832 void rffti(size_t n, double wsave[])
833   { if (n!=1) rffti1(n, wsave+n,(size_t*)(wsave+2*n)); }
834