1 /* ZFFT http://fy.chalmers.se/~tfylb/ZFFT.html*/
2 /* Copyright (c) 2000 by Lennart Bengtsson */
3 /* Permission to use, copy, modify, distribute, and sell this software and its */
4 /* documentation for any purpose is hereby granted without fee, provided that */
5 /* the above copyright notice appear in all copies and that both the copyright */
6 /* notice and this permission notice appear in supporting documentation. */
7 /* No representations are made about the suitability of this software for */
8 /* any purpose.  It is provided "as is" without express or implied warranty. */
9 /* Note also that this kind of optimization violates the fortran standard */
10 /* so the compiled code should be tested carefully. So far ZFFT has been */
11 /* tested on SUN, SGI and IBM servers. */
12 #ifndef HAVE_FFT
13 
14 #ifdef __cplusplus
15 extern "C" {
16 #endif
17 
18 #include <math.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21 
22 typedef struct {
23   double re;
24   double im;
25 } zomplex;
26 
27 #ifndef min
28 #define min(a,b)               ( ((a) < (b)) ? (a) : (b) )
29 #endif
30 #ifndef max
31 #define max(a,b)               ( ((a) > (b)) ? (a) : (b) )
32 #endif
33 
34 /* Table of constant values */
35 static  int const1 = 1;
36 static  int const16384 = 16384;
37 static  int const0 = 0;
38 static  int constTrue = 1;
39 static  int constFalse = 0;
40 
radix2f_(double * a1,double * a2,double * b1,double * b2,int * b,int * ldb,double * c2,double * d2)41 int radix2f_(double *a1,double * a2, double *b1,double * b2,int * b,int *ldb, double *c2,double * d2)
42 {
43   /* System generated locals */
44   int b1_dim2, b1_offset, b2_dim2, b2_offset, i__1;
45 
46     /* Local variables */
47   int i__;
48   double i1, i2, j2, r1, r2, s2;
49 
50     /* Parameter adjustments */
51   a2 -= 3;
52   a1 -= 3;
53   b2_dim2 = *ldb;
54   b2_offset = 1 + 2 * (1 + b2_dim2 * 1);
55   b2 -= b2_offset;
56   b1_dim2 = *ldb;
57   b1_offset = 1 + 2 * (1 + b1_dim2 * 1);
58   b1 -= b1_offset;
59 
60     /* Function Body */
61   i__1 = *b;
62   for (i__ = 1; i__ <= i__1; ++i__) {
63     r2 = a2[(i__ << 1) + 1];
64     i2 = a2[(i__ << 1) + 2];
65     s2 = r2 * *c2 - i2 * *d2;
66     j2 = i2 * *c2 + r2 * *d2;
67     r1 = a1[(i__ << 1) + 1];
68     i1 = a1[(i__ << 1) + 2];
69     b1[((i__ * b1_dim2 + 1) << 1) + 1] = r1 + s2;
70     b1[((i__ * b1_dim2 + 1) << 1) + 2] = i1 + j2;
71     b2[((i__ * b2_dim2 + 1) << 1) + 1] = r1 - s2;
72     b2[((i__ * b2_dim2 + 1) << 1) + 2] = i1 - j2;
73   }
74   return 0;
75 } /* radix2f_ */
76 
radix3f_(double * a1,double * a2,double * a3,double * b1,double * b2,double * b3,int * b,int * ldb,double * c2,double * d2,double * c3,double * d3)77 int radix3f_(double *a1, double *a2, double *a3, double *b1, double *b2, double *b3, int *b, int *ldb, double *c2, double *d2, double *c3, double *d3)
78 {
79   /* Initialized data */
80 
81   double sin60 = .86602540378443864676;
82   double half = .5;
83 
84   /* System generated locals */
85   int b1_dim2, b1_offset, b2_dim2, b2_offset, b3_dim2, b3_offset, i__1;
86 
87   /* Local variables */
88   int i__;
89   double i1, i2, i3, j2, j3, r1, r2, r3, t1, t2, t3, t4, s2, s3;
90 
91     /* Parameter adjustments */
92   a3 -= 3;
93   a2 -= 3;
94   a1 -= 3;
95   b3_dim2 = *ldb;
96   b3_offset = 1 + 2 * (1 + b3_dim2 * 1);
97   b3 -= b3_offset;
98   b2_dim2 = *ldb;
99   b2_offset = 1 + 2 * (1 + b2_dim2 * 1);
100   b2 -= b2_offset;
101   b1_dim2 = *ldb;
102   b1_offset = 1 + 2 * (1 + b1_dim2 * 1);
103   b1 -= b1_offset;
104 
105     /* Function Body */
106   i__1 = *b;
107   for (i__ = 1; i__ <= i__1; ++i__) {
108     r3 = a2[(i__ << 1) + 1];
109     i3 = a2[(i__ << 1) + 2];
110     s3 = r3 * *c2 - i3 * *d2;
111     j3 = i3 * *c2 + r3 * *d2;
112     r2 = a3[(i__ << 1) + 1];
113     i2 = a3[(i__ << 1) + 2];
114     s2 = r2 * *c3 - i2 * *d3;
115     j2 = i2 * *c3 + r2 * *d3;
116     t1 = s2 + s3;
117     t2 = j2 + j3;
118     t3 = s2 - s3;
119     t4 = j2 - j3;
120     r1 = a1[(i__ << 1) + 1];
121     i1 = a1[(i__ << 1) + 2];
122     b1[((i__ * b1_dim2 + 1) << 1) + 1] = r1 + t1;
123     b1[((i__ * b1_dim2 + 1) << 1) + 2] = i1 + t2;
124     b2[((i__ * b2_dim2 + 1) << 1) + 1] = r1 - half * t1 + sin60 * t4;
125     b2[((i__ * b2_dim2 + 1) << 1) + 2] = i1 - half * t2 - sin60 * t3;
126     b3[((i__ * b3_dim2 + 1) << 1) + 1] = r1 - half * t1 - sin60 * t4;
127     b3[((i__ * b3_dim2 + 1) << 1) + 2] = i1 - half * t2 + sin60 * t3;
128   }
129   return 0;
130 } /* radix3f_ */
131 
radix4f_(double * a1,double * a2,double * a3,double * a4,double * b1,double * b2,double * b3,double * b4,int * b,int * ldb,double * c2,double * d2,double * c3,double * d3,double * c4,double * d4)132 int radix4f_(double *a1, double *a2, double *a3, double *a4, double *b1, double *b2, double *b3, double *b4, int *b, int *ldb, double *c2, double *d2, double *c3, double *d3, double *c4, double *d4)
133 {
134   /* System generated locals */
135   int b1_dim2, b1_offset, b2_dim2, b2_offset, b3_dim2, b3_offset,
136     b4_dim2, b4_offset, i__1;
137 
138   /* Local variables */
139   int i__;
140   double i0, i1, r0, r1, r2, r3, i2, i3, s1, j1, s2, j2, s3, j3,
141     t1, t2, t3, t4, t5, t6, t7, t8;
142 
143   /* Parameter adjustments */
144   a4 -= 3;
145   a3 -= 3;
146   a2 -= 3;
147   a1 -= 3;
148   b4_dim2 = *ldb;
149   b4_offset = 1 + 2 * (1 + b4_dim2 * 1);
150   b4 -= b4_offset;
151   b3_dim2 = *ldb;
152   b3_offset = 1 + 2 * (1 + b3_dim2 * 1);
153   b3 -= b3_offset;
154   b2_dim2 = *ldb;
155   b2_offset = 1 + 2 * (1 + b2_dim2 * 1);
156   b2 -= b2_offset;
157   b1_dim2 = *ldb;
158   b1_offset = 1 + 2 * (1 + b1_dim2 * 1);
159   b1 -= b1_offset;
160 
161     /* Function Body */
162   i__1 = *b;
163   for (i__ = 1; i__ <= i__1; ++i__) {
164     r1 = a2[(i__ << 1) + 1];
165     i1 = a2[(i__ << 1) + 2];
166     s1 = r1 * *c2 - i1 * *d2;
167     j1 = i1 * *c2 + r1 * *d2;
168     r2 = a3[(i__ << 1) + 1];
169     i2 = a3[(i__ << 1) + 2];
170     s2 = r2 * *c3 - i2 * *d3;
171     j2 = i2 * *c3 + r2 * *d3;
172     r3 = a4[(i__ << 1) + 1];
173     i3 = a4[(i__ << 1) + 2];
174     s3 = r3 * *c4 - i3 * *d4;
175     j3 = i3 * *c4 + r3 * *d4;
176     r0 = a1[(i__ << 1) + 1];
177     i0 = a1[(i__ << 1) + 2];
178     t1 = r0 + s2;
179     t2 = s1 + s3;
180     t3 = i0 + j2;
181     t4 = j1 + j3;
182     t5 = r0 - s2;
183     t6 = j1 - j3;
184     t7 = i0 - j2;
185     t8 = s1 - s3;
186     b1[((i__ * b1_dim2 + 1) << 1) + 1] = t1 + t2;
187     b1[((i__ * b1_dim2 + 1) << 1) + 2] = t3 + t4;
188     b2[((i__ * b2_dim2 + 1) << 1) + 1] = t5 - t6;
189     b2[((i__ * b2_dim2 + 1) << 1) + 2] = t7 + t8;
190     b3[((i__ * b3_dim2 + 1) << 1) + 1] = t1 - t2;
191     b3[((i__ * b3_dim2 + 1) << 1) + 2] = t3 - t4;
192     b4[((i__ * b4_dim2 + 1) << 1) + 1] = t5 + t6;
193     b4[((i__ * b4_dim2 + 1) << 1) + 2] = t7 - t8;
194   }
195   return 0;
196 } /* radix4f_ */
197 
radix5f_(double * a1,double * a2,double * a3,double * a4,double * a5,double * b1,double * b2,double * b3,double * b4,double * b5,int * b,int * ldb,double * c2,double * d2,double * c3,double * d3,double * c4,double * d4,double * c5,double * d5)198 int radix5f_(double *a1,double * a2,double * a3,double * a4,double * a5,double * b1,double * b2,double * b3,double * b4,double * b5,int *b, int *ldb, double *c2,double * d2,double * c3,double * d3,double * c4,double * d4,double * c5,double * d5)
199 {
200   /* Initialized data */
201 
202   double cos36 = .8090169943749474241;
203   double sin36 = .58778525229247312917;
204   double cos72 = .3090169943749474241;
205   double sin72 = .95105651629515357212;
206 
207   /* System generated locals */
208   int b1_dim2, b1_offset, b2_dim2, b2_offset, b3_dim2, b3_offset,
209     b4_dim2, b4_offset, b5_dim2, b5_offset, i__1;
210 
211     /* Local variables */
212   int i__;
213   double r1, r2, r3, r4, r5, i1, i2, i3, i4, i5, s2, s3, s4, s5,
214     j2, j3, j4, j5, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12,
215     t13, t14, t15, t16;
216 
217     /* Parameter adjustments */
218   a5 -= 3;
219   a4 -= 3;
220   a3 -= 3;
221   a2 -= 3;
222   a1 -= 3;
223   b5_dim2 = *ldb;
224   b5_offset = 1 + 2 * (1 + b5_dim2 * 1);
225   b5 -= b5_offset;
226   b4_dim2 = *ldb;
227   b4_offset = 1 + 2 * (1 + b4_dim2 * 1);
228   b4 -= b4_offset;
229   b3_dim2 = *ldb;
230   b3_offset = 1 + 2 * (1 + b3_dim2 * 1);
231   b3 -= b3_offset;
232   b2_dim2 = *ldb;
233   b2_offset = 1 + 2 * (1 + b2_dim2 * 1);
234   b2 -= b2_offset;
235   b1_dim2 = *ldb;
236   b1_offset = 1 + 2 * (1 + b1_dim2 * 1);
237   b1 -= b1_offset;
238 
239     /* Function Body */
240   i__1 = *b;
241   for (i__ = 1; i__ <= i__1; ++i__) {
242     r5 = a2[(i__ << 1) + 1];
243     i5 = a2[(i__ << 1) + 2];
244     s5 = r5 * *c2 - i5 * *d2;
245     j5 = i5 * *c2 + r5 * *d2;
246     r4 = a3[(i__ << 1) + 1];
247     i4 = a3[(i__ << 1) + 2];
248     s4 = r4 * *c3 - i4 * *d3;
249     j4 = i4 * *c3 + r4 * *d3;
250     r3 = a4[(i__ << 1) + 1];
251     i3 = a4[(i__ << 1) + 2];
252     s3 = r3 * *c4 - i3 * *d4;
253     j3 = i3 * *c4 + r3 * *d4;
254     r2 = a5[(i__ << 1) + 1];
255     i2 = a5[(i__ << 1) + 2];
256     s2 = r2 * *c5 - i2 * *d5;
257     j2 = i2 * *c5 + r2 * *d5;
258     t1 = s2 + s5;
259     t2 = s3 + s4;
260     t3 = s2 - s5;
261     t4 = s3 - s4;
262     t5 = j2 + j5;
263     t6 = j3 + j4;
264     t7 = j2 - j5;
265     t8 = j3 - j4;
266     r1 = a1[(i__ << 1) + 1];
267     i1 = a1[(i__ << 1) + 2];
268     t9 = t1 * cos72 - t2 * cos36;
269     t10 = t5 * cos72 - t6 * cos36;
270     t11 = t2 * cos72 - t1 * cos36;
271     t12 = t6 * cos72 - t5 * cos36;
272     t13 = t7 * sin72 + t8 * sin36;
273     t14 = t3 * sin72 + t4 * sin36;
274     t15 = t8 * sin72 - t7 * sin36;
275     t16 = t4 * sin72 - t3 * sin36;
276     b1[((i__ * b1_dim2 + 1) << 1) + 1] = r1 + t1 + t2;
277     b1[((i__ * b1_dim2 + 1) << 1) + 2] = i1 + t5 + t6;
278     b2[((i__ * b2_dim2 + 1) << 1) + 1] = r1 + t9 + t13;
279     b2[((i__ * b2_dim2 + 1) << 1) + 2] = i1 + t10 - t14;
280     b3[((i__ * b3_dim2 + 1) << 1) + 1] = r1 + t11 - t15;
281     b3[((i__ * b3_dim2 + 1) << 1) + 2] = i1 + t12 + t16;
282     b4[((i__ * b4_dim2 + 1) << 1) + 1] = r1 + t11 + t15;
283     b4[((i__ * b4_dim2 + 1) << 1) + 2] = i1 + t12 - t16;
284     b5[((i__ * b5_dim2 + 1) << 1) + 1] = r1 + t9 - t13;
285     b5[((i__ * b5_dim2 + 1) << 1) + 2] = i1 + t10 + t14;
286   }
287   return 0;
288 } /* radix5f_ */
289 
radix7f_(double * a1,double * a2,double * a3,double * a4,double * a5,double * a6,double * a7,double * b1,double * b2,double * b3,double * b4,double * b5,double * b6,double * b7,int * b,int * ldb)290 int radix7f_(double *a1,double * a2,double * a3, double *a4,double * a5,double * a6,double * a7,double * b1,double * b2,double * b3,double * b4,double * b5, double *b6,double * b7,int * b,int * ldb)
291 {
292   /* Initialized data */
293 
294   double c1 = .623489801858733530525;
295   double s1 = .78183148246802980870844;
296   double c2 = .2225209339563144042889;
297   double s2 = .97492791218182360701813;
298   double c3 = .9009688679024191262361;
299   double s3 = .43388373911755812047577;
300 
301   /* System generated locals */
302   int b1_dim2, b1_offset, b2_dim2, b2_offset, b3_dim2, b3_offset,
303     b4_dim2, b4_offset, b5_dim2, b5_offset, b6_dim2, b6_offset,
304     b7_dim2, b7_offset, i__1;
305 
306   /* Local variables */
307   int i__;
308   double d1, p1, p2, p3, p4, t1, t2, t3, t4, t5, t6, t7, p5, p6,
309     p7, r1, r2, d2, r3, d3, i1, j1, i2, j2, i3, j3, t11, t21, t31,
310     t12, t22, t32, t13, t23, t33, t14, t24, t34;
311 
312     /* Parameter adjustments */
313   a7 -= 3;
314   a6 -= 3;
315   a5 -= 3;
316   a4 -= 3;
317   a3 -= 3;
318   a2 -= 3;
319   a1 -= 3;
320   b7_dim2 = *ldb;
321   b7_offset = 1 + 2 * (1 + b7_dim2 * 1);
322   b7 -= b7_offset;
323   b6_dim2 = *ldb;
324   b6_offset = 1 + 2 * (1 + b6_dim2 * 1);
325   b6 -= b6_offset;
326   b5_dim2 = *ldb;
327   b5_offset = 1 + 2 * (1 + b5_dim2 * 1);
328   b5 -= b5_offset;
329   b4_dim2 = *ldb;
330   b4_offset = 1 + 2 * (1 + b4_dim2 * 1);
331   b4 -= b4_offset;
332   b3_dim2 = *ldb;
333   b3_offset = 1 + 2 * (1 + b3_dim2 * 1);
334   b3 -= b3_offset;
335   b2_dim2 = *ldb;
336   b2_offset = 1 + 2 * (1 + b2_dim2 * 1);
337   b2 -= b2_offset;
338   b1_dim2 = *ldb;
339   b1_offset = 1 + 2 * (1 + b1_dim2 * 1);
340   b1 -= b1_offset;
341 
342     /* Function Body */
343   i__1 = *b;
344   for (i__ = 1; i__ <= i__1; ++i__) {
345     t1 = a2[(i__ << 1) + 1];
346     t2 = a7[(i__ << 1) + 1];
347     r1 = t1 + t2;
348     d1 = t1 - t2;
349     t11 = r1 * c1;
350     t21 = r1 * c2;
351     t31 = r1 * c3;
352     t12 = d1 * s1;
353     t22 = d1 * s2;
354     t32 = d1 * s3;
355     t3 = a3[(i__ << 1) + 1];
356     t4 = a6[(i__ << 1) + 1];
357     r2 = t3 + t4;
358     d2 = t3 - t4;
359     t11 -= r2 * c2;
360     t21 += r2 * c3;
361     t31 -= r2 * c1;
362     t12 += d2 * s2;
363     t22 -= d2 * s3;
364     t32 -= d2 * s1;
365     t5 = a4[(i__ << 1) + 1];
366     t6 = a5[(i__ << 1) + 1];
367     r3 = t5 + t6;
368     d3 = t5 - t6;
369     t7 = a1[(i__ << 1) + 1];
370     b1[((i__ * b1_dim2 + 1) << 1) + 1] = t7 + r1 + r2 + r3;
371     t11 -= r3 * c3;
372     t21 -= r3 * c1;
373     t31 += r3 * c2;
374     t12 += d3 * s3;
375     t22 -= d3 * s1;
376     t32 += d3 * s2;
377     p1 = a2[(i__ << 1) + 2];
378     p2 = a7[(i__ << 1) + 2];
379     i1 = p1 + p2;
380     j1 = p1 - p2;
381     t13 = i1 * c1;
382     t23 = i1 * c2;
383     t33 = i1 * c3;
384     t14 = j1 * s1;
385     t24 = j1 * s2;
386     t34 = j1 * s3;
387     p3 = a3[(i__ << 1) + 2];
388     p4 = a6[(i__ << 1) + 2];
389     i2 = p3 + p4;
390     j2 = p3 - p4;
391     t13 -= i2 * c2;
392     t23 += i2 * c3;
393     t33 -= i2 * c1;
394     t14 += j2 * s2;
395     t24 -= j2 * s3;
396     t34 -= j2 * s1;
397     p5 = a4[(i__ << 1) + 2];
398     p6 = a5[(i__ << 1) + 2];
399     i3 = p5 + p6;
400     j3 = p5 - p6;
401     p7 = a1[(i__ << 1) + 2];
402     b1[((i__ * b1_dim2 + 1) << 1) + 2] = p7 + i1 + i2 + i3;
403     t13 -= i3 * c3;
404     t23 -= i3 * c1;
405     t33 += i3 * c2;
406     t14 += j3 * s3;
407     t24 -= j3 * s1;
408     t34 += j3 * s2;
409     t11 = t7 + t11;
410     t21 = t7 - t21;
411     t31 = t7 - t31;
412     t13 = p7 + t13;
413     t23 = p7 - t23;
414     t33 = p7 - t33;
415     b2[((i__ * b2_dim2 + 1) << 1) + 1] = t11 - t14;
416     b2[((i__ * b2_dim2 + 1) << 1) + 2] = t13 + t12;
417     b7[((i__ * b7_dim2 + 1) << 1) + 1] = t11 + t14;
418     b7[((i__ * b7_dim2 + 1) << 1) + 2] = t13 - t12;
419     b3[((i__ * b3_dim2 + 1) << 1) + 1] = t21 - t24;
420     b3[((i__ * b3_dim2 + 1) << 1) + 2] = t23 + t22;
421     b6[((i__ * b6_dim2 + 1) << 1) + 1] = t21 + t24;
422     b6[((i__ * b6_dim2 + 1) << 1) + 2] = t23 - t22;
423     b4[((i__ * b4_dim2 + 1) << 1) + 1] = t31 - t34;
424     b4[((i__ * b4_dim2 + 1) << 1) + 2] = t33 + t32;
425     b5[((i__ * b5_dim2 + 1) << 1) + 1] = t31 + t34;
426     b5[((i__ * b5_dim2 + 1) << 1) + 2] = t33 - t32;
427   }
428   return 0;
429 } /* radix7f_ */
430 
radix2b_(double * a1,double * a2,double * b1,double * b2,int * b,int * ldb,double * c2,double * d2)431 int radix2b_(double *a1, double *a2, double *b1, double *b2,int * b, int *ldb, double *c2, double *d2)
432 {
433   /* System generated locals */
434   int b1_dim2, b1_offset, b2_dim2, b2_offset, i__1;
435 
436     /* Local variables */
437   int i__;
438   double i1, i2, j2, r1, r2, s2;
439   /* Parameter adjustments */
440   a2 -= 3;
441   a1 -= 3;
442   b2_dim2 = *ldb;
443   b2_offset = 1 + 2 * (1 + b2_dim2 * 1);
444   b2 -= b2_offset;
445   b1_dim2 = *ldb;
446   b1_offset = 1 + 2 * (1 + b1_dim2 * 1);
447   b1 -= b1_offset;
448 
449   /* Function Body */
450   i__1 = *b;
451   for (i__ = 1; i__ <= i__1; ++i__) {
452     r2 = a2[(i__ << 1) + 1];
453     i2 = a2[(i__ << 1) + 2];
454     s2 = r2 * *c2 + i2 * *d2;
455     j2 = i2 * *c2 - r2 * *d2;
456     r1 = a1[(i__ << 1) + 1];
457     i1 = a1[(i__ << 1) + 2];
458     b1[((i__ * b1_dim2 + 1) << 1) + 1] = r1 + s2;
459     b1[((i__ * b1_dim2 + 1) << 1) + 2] = i1 + j2;
460     b2[((i__ * b2_dim2 + 1) << 1) + 1] = r1 - s2;
461     b2[((i__ * b2_dim2 + 1) << 1) + 2] = i1 - j2;
462   }
463   return 0;
464 } /* radix2b_ */
465 
radix3b_(double * a1,double * a2,double * a3,double * b1,double * b2,double * b3,int * b,int * ldb,double * c2,double * d2,double * c3,double * d3)466 int radix3b_(double *a1,double * a2, double *a3,double * b1,double * b2,double * b3, int *b, int *ldb, double *c2, double *d2, double *c3, double *d3)
467 {
468   /* Initialized data */
469 
470   double sin60 = .86602540378443864676;
471   double half = .5;
472 
473   /* System generated locals */
474   int b1_dim2, b1_offset, b2_dim2, b2_offset, b3_dim2, b3_offset, i__1;
475 
476   /* Local variables */
477   int i__;
478   double i1, i2, i3, j2, j3, r1, r2, r3, t1, t2, t3, t4, s2, s3;
479 
480     /* Parameter adjustments */
481   a3 -= 3;
482   a2 -= 3;
483   a1 -= 3;
484   b3_dim2 = *ldb;
485   b3_offset = 1 + 2 * (1 + b3_dim2 * 1);
486   b3 -= b3_offset;
487   b2_dim2 = *ldb;
488   b2_offset = 1 + 2 * (1 + b2_dim2 * 1);
489   b2 -= b2_offset;
490   b1_dim2 = *ldb;
491   b1_offset = 1 + 2 * (1 + b1_dim2 * 1);
492   b1 -= b1_offset;
493 
494     /* Function Body */
495   i__1 = *b;
496   for (i__ = 1; i__ <= i__1; ++i__) {
497     r2 = a2[(i__ << 1) + 1];
498     i2 = a2[(i__ << 1) + 2];
499     s2 = r2 * *c2 + i2 * *d2;
500     j2 = i2 * *c2 - r2 * *d2;
501     r3 = a3[(i__ << 1) + 1];
502     i3 = a3[(i__ << 1) + 2];
503     s3 = r3 * *c3 + i3 * *d3;
504     j3 = i3 * *c3 - r3 * *d3;
505     t1 = s2 + s3;
506     t2 = j2 + j3;
507     t3 = s2 - s3;
508     t4 = j2 - j3;
509     r1 = a1[(i__ << 1) + 1];
510     i1 = a1[(i__ << 1) + 2];
511     b1[((i__ * b1_dim2 + 1) << 1) + 1] = r1 + t1;
512     b1[((i__ * b1_dim2 + 1) << 1) + 2] = i1 + t2;
513     b2[((i__ * b2_dim2 + 1) << 1) + 1] = r1 - half * t1 + sin60 * t4;
514     b2[((i__ * b2_dim2 + 1) << 1) + 2] = i1 - half * t2 - sin60 * t3;
515     b3[((i__ * b3_dim2 + 1) << 1) + 1] = r1 - half * t1 - sin60 * t4;
516     b3[((i__ * b3_dim2 + 1) << 1) + 2] = i1 - half * t2 + sin60 * t3;
517   }
518   return 0;
519 } /* radix3b_ */
520 
radix4b_(double * a1,double * a2,double * a3,double * a4,double * b1,double * b2,double * b3,double * b4,int * b,int * ldb,double * c2,double * d2,double * c3,double * d3,double * c4,double * d4)521 int radix4b_(double *a1, double *a2, double *a3, double *a4, double *b1, double *b2, double *b3, double *b4, int *b, int *ldb, double *c2, double *d2, double *c3, double *d3, double *c4, double *d4)
522 {
523   /* System generated locals */
524   int b1_dim2, b1_offset, b2_dim2, b2_offset, b3_dim2, b3_offset,
525     b4_dim2, b4_offset, i__1;
526 
527   /* Local variables */
528   int i__;
529   double i0, i1, r0, r1, r2, r3, i2, i3, s1, j1, s2, j2, s3, j3,
530     t1, t2, t3, t4, t5, t6, t7, t8;
531 
532   /* Parameter adjustments */
533   a4 -= 3;
534   a3 -= 3;
535   a2 -= 3;
536   a1 -= 3;
537   b4_dim2 = *ldb;
538   b4_offset = 1 + 2 * (1 + b4_dim2 * 1);
539   b4 -= b4_offset;
540   b3_dim2 = *ldb;
541   b3_offset = 1 + 2 * (1 + b3_dim2 * 1);
542   b3 -= b3_offset;
543   b2_dim2 = *ldb;
544   b2_offset = 1 + 2 * (1 + b2_dim2 * 1);
545   b2 -= b2_offset;
546   b1_dim2 = *ldb;
547   b1_offset = 1 + 2 * (1 + b1_dim2 * 1);
548   b1 -= b1_offset;
549 
550     /* Function Body */
551   i__1 = *b;
552   for (i__ = 1; i__ <= i__1; ++i__) {
553     r3 = a2[(i__ << 1) + 1];
554     i3 = a2[(i__ << 1) + 2];
555     s3 = r3 * *c2 + i3 * *d2;
556     j3 = i3 * *c2 - r3 * *d2;
557     r2 = a3[(i__ << 1) + 1];
558     i2 = a3[(i__ << 1) + 2];
559     s2 = r2 * *c3 + i2 * *d3;
560     j2 = i2 * *c3 - r2 * *d3;
561     r1 = a4[(i__ << 1) + 1];
562     i1 = a4[(i__ << 1) + 2];
563     s1 = r1 * *c4 + i1 * *d4;
564     j1 = i1 * *c4 - r1 * *d4;
565     r0 = a1[(i__ << 1) + 1];
566     i0 = a1[(i__ << 1) + 2];
567     t1 = r0 + s2;
568     t2 = s1 + s3;
569     t3 = i0 + j2;
570     t4 = j1 + j3;
571     t5 = r0 - s2;
572     t6 = j1 - j3;
573     t7 = i0 - j2;
574     t8 = s1 - s3;
575     b1[((i__ * b1_dim2 + 1) << 1) + 1] = t1 + t2;
576     b1[((i__ * b1_dim2 + 1) << 1) + 2] = t3 + t4;
577     b2[((i__ * b2_dim2 + 1) << 1) + 1] = t5 - t6;
578     b2[((i__ * b2_dim2 + 1) << 1) + 2] = t7 + t8;
579     b3[((i__ * b3_dim2 + 1) << 1) + 1] = t1 - t2;
580     b3[((i__ * b3_dim2 + 1) << 1) + 2] = t3 - t4;
581     b4[((i__ * b4_dim2 + 1) << 1) + 1] = t5 + t6;
582     b4[((i__ * b4_dim2 + 1) << 1) + 2] = t7 - t8;
583   }
584   return 0;
585 } /* radix4b_ */
586 
radix5b_(double * a1,double * a2,double * a3,double * a4,double * a5,double * b1,double * b2,double * b3,double * b4,double * b5,int * b,int * ldb,double * c2,double * d2,double * c3,double * d3,double * c4,double * d4,double * c5,double * d5)587 int radix5b_(double *a1,double * a2,double * a3,double * a4,double * a5,double * b1,double * b2,double * b3,double * b4,double * b5,int * b,int *ldb, double *c2,double * d2,double * c3,double * d3,double * c4,double * d4, double *c5, double *d5)
588 {
589   /* Initialized data */
590 
591   double cos36 = .8090169943749474241;
592   double sin36 = .58778525229247312917;
593   double cos72 = .3090169943749474241;
594   double sin72 = .95105651629515357212;
595 
596   /* System generated locals */
597   int b1_dim2, b1_offset, b2_dim2, b2_offset, b3_dim2, b3_offset,
598     b4_dim2, b4_offset, b5_dim2, b5_offset, i__1;
599 
600     /* Local variables */
601   int i__;
602   double r1, r2, r3, r4, r5, i1, i2, i3, i4, i5, s2, s3, s4, s5,
603     j2, j3, j4, j5, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12,
604     t13, t14, t15, t16;
605 
606     /* Parameter adjustments */
607   a5 -= 3;
608   a4 -= 3;
609   a3 -= 3;
610   a2 -= 3;
611   a1 -= 3;
612   b5_dim2 = *ldb;
613   b5_offset = 1 + 2 * (1 + b5_dim2 * 1);
614   b5 -= b5_offset;
615   b4_dim2 = *ldb;
616   b4_offset = 1 + 2 * (1 + b4_dim2 * 1);
617   b4 -= b4_offset;
618   b3_dim2 = *ldb;
619   b3_offset = 1 + 2 * (1 + b3_dim2 * 1);
620   b3 -= b3_offset;
621   b2_dim2 = *ldb;
622   b2_offset = 1 + 2 * (1 + b2_dim2 * 1);
623   b2 -= b2_offset;
624   b1_dim2 = *ldb;
625   b1_offset = 1 + 2 * (1 + b1_dim2 * 1);
626   b1 -= b1_offset;
627 
628     /* Function Body */
629   i__1 = *b;
630   for (i__ = 1; i__ <= i__1; ++i__) {
631     r2 = a2[(i__ << 1) + 1];
632     i2 = a2[(i__ << 1) + 2];
633     s2 = r2 * *c2 + i2 * *d2;
634     j2 = i2 * *c2 - r2 * *d2;
635     r3 = a3[(i__ << 1) + 1];
636     i3 = a3[(i__ << 1) + 2];
637     s3 = r3 * *c3 + i3 * *d3;
638     j3 = i3 * *c3 - r3 * *d3;
639     r4 = a4[(i__ << 1) + 1];
640     i4 = a4[(i__ << 1) + 2];
641     s4 = r4 * *c4 + i4 * *d4;
642     j4 = i4 * *c4 - r4 * *d4;
643     r5 = a5[(i__ << 1) + 1];
644     i5 = a5[(i__ << 1) + 2];
645     s5 = r5 * *c5 + i5 * *d5;
646     j5 = i5 * *c5 - r5 * *d5;
647     t1 = s2 + s5;
648     t2 = s3 + s4;
649     t3 = s2 - s5;
650     t4 = s3 - s4;
651     t5 = j2 + j5;
652     t6 = j3 + j4;
653     t7 = j2 - j5;
654     t8 = j3 - j4;
655     r1 = a1[(i__ << 1) + 1];
656     i1 = a1[(i__ << 1) + 2];
657     t9 = t1 * cos72 - t2 * cos36;
658     t10 = t5 * cos72 - t6 * cos36;
659     t11 = t2 * cos72 - t1 * cos36;
660     t12 = t6 * cos72 - t5 * cos36;
661     t13 = t7 * sin72 + t8 * sin36;
662     t14 = t3 * sin72 + t4 * sin36;
663     t15 = t8 * sin72 - t7 * sin36;
664     t16 = t4 * sin72 - t3 * sin36;
665     b1[((i__ * b1_dim2 + 1) << 1) + 1] = r1 + t1 + t2;
666     b1[((i__ * b1_dim2 + 1) << 1) + 2] = i1 + t5 + t6;
667     b2[((i__ * b2_dim2 + 1) << 1) + 1] = r1 + t9 + t13;
668     b2[((i__ * b2_dim2 + 1) << 1) + 2] = i1 + t10 - t14;
669     b3[((i__ * b3_dim2 + 1) << 1) + 1] = r1 + t11 - t15;
670     b3[((i__ * b3_dim2 + 1) << 1) + 2] = i1 + t12 + t16;
671     b4[((i__ * b4_dim2 + 1) << 1) + 1] = r1 + t11 + t15;
672     b4[((i__ * b4_dim2 + 1) << 1) + 2] = i1 + t12 - t16;
673     b5[((i__ * b5_dim2 + 1) << 1) + 1] = r1 + t9 - t13;
674     b5[((i__ * b5_dim2 + 1) << 1) + 2] = i1 + t10 + t14;
675   }
676   return 0;
677 } /* radix5b_ */
678 
radix7b_(double * a1,double * a2,double * a3,double * a4,double * a5,double * a6,double * a7,double * b1,double * b2,double * b3,double * b4,double * b5,double * b6,double * b7,int * b,int * ldb)679 int radix7b_(double *a1, double *a2,double * a3,double * a4,double * a5,double * a6,double * a7,double * b1,double * b2,double * b3,double * b4,double * b5, double *b6, double *b7, int *b, int *ldb)
680 {
681   /* Initialized data */
682 
683   double c1 = .623489801858733530525;
684   double s1 = .78183148246802980870844;
685   double c2 = .2225209339563144042889;
686   double s2 = .97492791218182360701813;
687   double c3 = .9009688679024191262361;
688   double s3 = .43388373911755812047577;
689 
690   /* System generated locals */
691   int b1_dim2, b1_offset, b2_dim2, b2_offset, b3_dim2, b3_offset,
692     b4_dim2, b4_offset, b5_dim2, b5_offset, b6_dim2, b6_offset,
693     b7_dim2, b7_offset, i__1;
694 
695   /* Local variables */
696   int i__;
697   double d1, p1, p2, p3, p4, t1, t2, t3, t4, t5, t6, t7, p5, p6,
698     p7, r1, r2, d2, r3, d3, i1, j1, i2, j2, i3, j3, t11, t21, t31,
699     t12, t22, t32, t13, t23, t33, t14, t24, t34;
700 
701     /* Parameter adjustments */
702   a7 -= 3;
703   a6 -= 3;
704   a5 -= 3;
705   a4 -= 3;
706   a3 -= 3;
707   a2 -= 3;
708   a1 -= 3;
709   b7_dim2 = *ldb;
710   b7_offset = 1 + 2 * (1 + b7_dim2 * 1);
711   b7 -= b7_offset;
712   b6_dim2 = *ldb;
713   b6_offset = 1 + 2 * (1 + b6_dim2 * 1);
714   b6 -= b6_offset;
715   b5_dim2 = *ldb;
716   b5_offset = 1 + 2 * (1 + b5_dim2 * 1);
717   b5 -= b5_offset;
718   b4_dim2 = *ldb;
719   b4_offset = 1 + 2 * (1 + b4_dim2 * 1);
720   b4 -= b4_offset;
721   b3_dim2 = *ldb;
722   b3_offset = 1 + 2 * (1 + b3_dim2 * 1);
723   b3 -= b3_offset;
724   b2_dim2 = *ldb;
725   b2_offset = 1 + 2 * (1 + b2_dim2 * 1);
726   b2 -= b2_offset;
727   b1_dim2 = *ldb;
728   b1_offset = 1 + 2 * (1 + b1_dim2 * 1);
729   b1 -= b1_offset;
730 
731     /* Function Body */
732   i__1 = *b;
733   for (i__ = 1; i__ <= i__1; ++i__) {
734     t1 = a7[(i__ << 1) + 1];
735     t2 = a2[(i__ << 1) + 1];
736     r1 = t1 + t2;
737     d1 = t1 - t2;
738     t11 = r1 * c1;
739     t21 = r1 * c2;
740     t31 = r1 * c3;
741     t12 = d1 * s1;
742     t22 = d1 * s2;
743     t32 = d1 * s3;
744     t3 = a6[(i__ << 1) + 1];
745     t4 = a3[(i__ << 1) + 1];
746     r2 = t3 + t4;
747     d2 = t3 - t4;
748     t11 -= r2 * c2;
749     t21 += r2 * c3;
750     t31 -= r2 * c1;
751     t12 += d2 * s2;
752     t22 -= d2 * s3;
753     t32 -= d2 * s1;
754     t5 = a5[(i__ << 1) + 1];
755     t6 = a4[(i__ << 1) + 1];
756     r3 = t5 + t6;
757     d3 = t5 - t6;
758     t7 = a1[(i__ << 1) + 1];
759     b1[((i__ * b1_dim2 + 1) << 1) + 1] = t7 + r1 + r2 + r3;
760     t11 -= r3 * c3;
761     t21 -= r3 * c1;
762     t31 += r3 * c2;
763     t12 += d3 * s3;
764     t22 -= d3 * s1;
765     t32 += d3 * s2;
766     p1 = a7[(i__ << 1) + 2];
767     p2 = a2[(i__ << 1) + 2];
768     i1 = p1 + p2;
769     j1 = p1 - p2;
770     t13 = i1 * c1;
771     t23 = i1 * c2;
772     t33 = i1 * c3;
773     t14 = j1 * s1;
774     t24 = j1 * s2;
775     t34 = j1 * s3;
776     p3 = a6[(i__ << 1) + 2];
777     p4 = a3[(i__ << 1) + 2];
778     i2 = p3 + p4;
779     j2 = p3 - p4;
780     t13 -= i2 * c2;
781     t23 += i2 * c3;
782     t33 -= i2 * c1;
783     t14 += j2 * s2;
784     t24 -= j2 * s3;
785     t34 -= j2 * s1;
786     p5 = a5[(i__ << 1) + 2];
787     p6 = a4[(i__ << 1) + 2];
788     i3 = p5 + p6;
789     j3 = p5 - p6;
790     p7 = a1[(i__ << 1) + 2];
791     b1[((i__ * b1_dim2 + 1) << 1) + 2] = p7 + i1 + i2 + i3;
792     t13 -= i3 * c3;
793     t23 -= i3 * c1;
794     t33 += i3 * c2;
795     t14 += j3 * s3;
796     t24 -= j3 * s1;
797     t34 += j3 * s2;
798     t11 = t7 + t11;
799     t21 = t7 - t21;
800     t31 = t7 - t31;
801     t13 = p7 + t13;
802     t23 = p7 - t23;
803     t33 = p7 - t33;
804     b2[((i__ * b2_dim2 + 1) << 1) + 1] = t11 - t14;
805     b2[((i__ * b2_dim2 + 1) << 1) + 2] = t13 + t12;
806     b7[((i__ * b7_dim2 + 1) << 1) + 1] = t11 + t14;
807     b7[((i__ * b7_dim2 + 1) << 1) + 2] = t13 - t12;
808     b3[((i__ * b3_dim2 + 1) << 1) + 1] = t21 - t24;
809     b3[((i__ * b3_dim2 + 1) << 1) + 2] = t23 + t22;
810     b6[((i__ * b6_dim2 + 1) << 1) + 1] = t21 + t24;
811     b6[((i__ * b6_dim2 + 1) << 1) + 2] = t23 - t22;
812     b4[((i__ * b4_dim2 + 1) << 1) + 1] = t31 - t34;
813     b4[((i__ * b4_dim2 + 1) << 1) + 2] = t33 + t32;
814     b5[((i__ * b5_dim2 + 1) << 1) + 1] = t31 + t34;
815     b5[((i__ * b5_dim2 + 1) << 1) + 2] = t33 - t32;
816   }
817   return 0;
818 } /* radix7b_ */
819 
gcd_(int * n1,int * n2)820 int gcd_(int* n1,int* n2)
821 {
822   /* System generated locals */
823   int ret_val;
824 
825     /* Local variables */
826   int i__, j, n;
827 
828   i__ = *n1;
829   j = *n2;
830  L10:
831   n = min(i__,j);
832   ret_val = max(i__,j);
833   if (n == 0) {
834     return ret_val;
835   }
836   i__ = n;
837   j = ret_val - n;
838   goto L10;
839 } /* gcd_ */
840 
fftfact_(int * n,int * nf,int * f)841 void fftfact_(int *n,int *nf,int *f)
842 {
843 
844   /* Local variables */
845   int i__, n7;
846 
847   /* Parameter adjustments */
848   --f;
849 
850   /* Function Body */
851   i__ = *n;
852   *nf = 0;
853   n7 = 0;
854  L10:
855   if (i__ == 1) {
856     if (n7 > 1) {
857       printf("Only one factor of 7 allowed in fft dimension.\n");
858       exit(-1);
859     }
860     return;
861   }
862   if (i__ % 7 == 0) {
863     ++(*nf);
864     f[*nf] = 7;
865     i__ /= 7;
866     ++n7;
867     goto L10;
868   }
869   if (i__ % 5 == 0) {
870     ++(*nf);
871     f[*nf] = 5;
872     i__ /= 5;
873     goto L10;
874   }
875   if (i__ % 4 == 0) {
876     ++(*nf);
877     f[*nf] = 4;
878     i__ /= 4;
879     goto L10;
880   }
881   if (i__ % 3 == 0) {
882     ++(*nf);
883     f[*nf] = 3;
884     i__ /= 3;
885     goto L10;
886   }
887   if (i__ % 2 == 0) {
888     ++(*nf);
889     f[*nf] = 2;
890     i__ /= 2;
891     goto L10;
892   }
893   printf("Illegal fft size %i\n",(*n));
894   exit(-1);
895 } /* fftfact_ */
896 
fftcoeff_(int * n,zomplex * coeff)897 void fftcoeff_(int *n,zomplex * coeff)
898 {
899   /* System generated locals */
900   int i__1, i__2;
901   double d__1, d__2;
902   zomplex z__1;
903 
904 
905     /* Local variables */
906   int i__;
907   double angle;
908 
909     /* Parameter adjustments */
910   --coeff;
911 
912   /* Function Body */
913   i__1 = *n;
914   for (i__ = 1; i__ <= i__1; ++i__) {
915     angle = (double) (i__ - 1) / (double) (*n) * 2. *
916       3.141592653589793238462643;
917     i__2 = i__;
918     d__1 = cos(angle);
919     d__2 = sin(angle);
920     z__1.re = d__1, z__1.im = d__2;
921     coeff[i__2].re = z__1.re, coeff[i__2].im = z__1.im;
922     /* L10: */
923   }
924 } /* fftcoeff_ */
925 
zfftcopyin_(int * n,int * b,int * bdim,zomplex * a,int * inc,int * lda,int * rindex,zomplex * w,int * lpref)926 void zfftcopyin_(int *n,int * b,int * bdim,zomplex * a,int *  inc ,int * lda,int * rindex,zomplex * w,int * lpref)
927 {
928   /* System generated locals */
929   int w_dim1, w_offset, i__1, i__2, i__3, i__4;
930 
931     /* Local variables */
932   int i__, j, ia;
933 
934   /*    if(lpref){ */
935   /*      if(*lda <= *inc) */
936   /*        prefetch(a,lda,inc,b,n); */
937   /*      else */
938   /*        prefetch(a,inc,lda,n,b); */
939   /*    } */
940   /* Copy data */
941   /* Parameter adjustments */
942   --rindex;
943   w_dim1 = *bdim;
944   w_offset = 1 + w_dim1 * 1;
945   w -= w_offset;
946   --a;
947 
948   /* Function Body */
949   i__1 = *n;
950   for (j = 1; j <= i__1; ++j) {
951     ia = rindex[j];
952     i__2 = *b;
953     for (i__ = 1; i__ <= i__2; ++i__) {
954       i__3 = i__ + j * w_dim1;
955       i__4 = ia;
956       w[i__3].re = a[i__4].re, w[i__3].im = a[i__4].im;
957       ia += *lda;
958     }
959   }
960 } /* zfftcopyin_ */
961 
zfftrev_(int * n,int * inc,int * nf,int * f,int * nmax,int * rindex,int * ipadok)962 int zfftrev_(int *n, int *inc, int *nf, int *f, int * nmax , int *rindex,int * ipadok)
963 {
964   int imax[10], irev, i__, s, fi, ii[10], stride[10];
965 
966   /* OK up to 100000 elements */
967   /* Parameter adjustments */
968   --f;
969   --rindex;
970 
971   /* Function Body */
972   s = 1;
973   for (fi = *nf; fi >= 1; --fi) {
974     ii[fi - 1] = 1;
975     stride[fi - 1] = s;
976     s *= f[fi];
977     imax[fi - 1] = s;
978   }
979   i__ = 1;
980   irev = 1;
981  L20:
982   rindex[i__] = (irev - 1) * *inc + 1;
983   if (i__ == *n) {
984     goto L100;
985   }
986   ++i__;
987   fi = 1;
988  L10:
989   irev += stride[fi - 1];
990   ii[fi - 1] += stride[fi - 1];
991   if (ii[fi - 1] > imax[fi - 1]) {
992     ii[fi - 1] = 1;
993     irev -= imax[fi - 1];
994     ++fi;
995     goto L10;
996   }
997   goto L20;
998  L100:
999   if (*n * *inc / f[*nf] % 1024 == 0) {
1000     *ipadok = 0;
1001   } else {
1002     *ipadok = 1;
1003   }
1004   return 0;
1005 } /* zfftrev_ */
1006 
1007 /* Row (inc=1) transform code on bitreversed vectors */
1008 /* n      length of transform (sub) vector */
1009 /* b      # of transforms in this block */
1010 /* bdim   # of rows in w, b.le.bdim */
1011 /* w      bdim*n matrix containing data elements */
1012 /* nf     # of factors in n */
1013 /* f      vector of factors */
1014 /* coeff  exp[2 pi i k/n], k=0..n-1] */
1015 /* a      pointer to output vector */
1016 /* inc    from caller, stride within transform */
1017 /* lda    from caller, stride between transforms measured in elements */
1018 /* ipadok if zero, don't copy data back to a here because doing so would */
1019 /*        trash the L1 cache */
1020 /* nrem   product of remaining factors (n unless blocking is used) */
1021 /* irow   row #, max(irow)*nrem = total length */
zfftm1rowf_(int * n,int * b,int * bdim,zomplex * w,int * nf,int * f,double * coeff,zomplex * a,int * inc,int * lda,int * ipadok,int * nprev,int * irow,int * nrem)1022 int zfftm1rowf_(int *n, int *b, int *bdim,zomplex * w, int *nf, int *f, double *coeff, zomplex *a, int *inc, int *lda,
1023 		int *ipadok, int *nprev, int *irow, int *nrem)
1024 {
1025   /* System generated locals */
1026   int i__1, i__2, i__3, i__4;
1027 
1028     /* Local variables */
1029   int aind, ntot, i__, j, p, index, ff, ic, nt, ninner, stride;
1030   int astride, bstride;
1031 
1032 
1033     /* Parameter adjustments */
1034   --coeff;
1035   --w;
1036   --f;
1037   --a;
1038 
1039     /* Function Body */
1040   stride = *bdim;
1041   ninner = 1;
1042   bstride = *bdim;
1043   ntot = *n * *bdim;
1044   nt = *n;
1045   i__1 = *nf - *ipadok;
1046   for (p = 1; p <= i__1; ++p) {
1047     ff = f[p];
1048     bstride *= ff;
1049     nt /= ff;
1050     i__2 = ntot;
1051     i__3 = bstride;
1052     for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) {
1053       index = i__;
1054       ic = (*irow - 1) * nt * *nrem;
1055       i__4 = ninner;
1056       for (j = 1; j <= i__4; ++j) {
1057 
1058 	if (ff == 2) {
1059 	  radix2f_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index]), (double*)(&w[index + stride]), b, &const1, &coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2]);
1060 	} else if (ff == 3) {
1061 	  radix3f_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), b, &const1, &coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2], &coeff[((ic + ic) <<  1) + 1], &coeff[ic * 4 + 2]);
1062 	} else if (ff == 4) {
1063 	  radix4f_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index]),  (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), b, &const1, &coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2], &coeff[ic * 4 + 1], &coeff[ic * 4 + 2], &coeff[ic * 6 + 1], &coeff[ic * 6 + 2]);
1064 	} else if (ff == 5) {
1065 	  radix5f_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index + (stride << 2)]), (double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index + (stride << 2)]), b, &const1, &coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2], &coeff[ic * 4 + 1], &coeff[ic * 4 + 2], &coeff[ic *  6 + 1], &coeff[ic * 6 + 2], &coeff[(ic << 3) + 1], &coeff[(ic << 3) + 2]);
1066 	} else if (ff == 7) {
1067 	  radix7f_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index + (stride << 2)]), (double*)(&w[index + stride * 5]), (double*)(&w[index + stride * 6]), (double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index + (stride << 2)]), (double*)(&w[index + stride * 5]),  (double*)(&w[index + stride * 6]), b, &const1);
1068 	}
1069 	index += *bdim;
1070 	ic += nt * (*nprev * *nrem);
1071       }
1072     }
1073     stride *= ff;
1074     ninner *= ff;
1075     /* L10: */
1076   }
1077   if (*ipadok == 0) {
1078     return 0;
1079   }
1080   ff = f[p];
1081   astride = *inc * ninner;
1082   index = 1;
1083   aind = 1;
1084   ic = (*irow - 1) * *nrem;
1085   i__1 = ninner;
1086   for (j = 1; j <= i__1; ++j) {
1087     if (ff == 2) {
1088       radix2f_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&a[aind]), (double*)(&a[aind + astride]), b, lda, &coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2]);
1089     } else if (ff == 3) {
1090       radix3f_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&a[aind]), (double*)(&a[aind + astride]), (double*)(&a[aind + (astride << 1)]),b, lda, &coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2], & coeff[ic * 4 + 1], &coeff[ic * 4 + 2]);
1091     } else if (ff == 4) {
1092       radix4f_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&a[aind]), (double*)(&a[aind + astride]), (double*)(&a[aind + (astride << 1)]),(double*)(&a[aind + astride * 3]), b, lda, & coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2], &coeff[ic * 4 + 1], &coeff[ic * 4 + 2], &coeff[ic * 6 + 1], &coeff[ic * 6 + 2]);
1093     } else if (ff == 5) {
1094       radix5f_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index + (stride << 2)]), (double*)(&a[aind]), (double*)(&a[aind + astride]), (double*)(&a[aind + (astride << 1)]),(double*)(&a[aind + astride * 3]), (double*)(&a[aind + (astride << 2)]), b, lda, & coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2], &coeff[ic * 4 + 1], &coeff[ic * 4 + 2], &coeff[ic * 6 + 1], &coeff[ic * 6 + 2], &coeff[(ic << 3) + 1], &coeff[(ic << 3) + 2]);
1095     } else if (ff == 7) {
1096       radix7f_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index + (stride << 2)]), (double*)(&w[index + stride * 5]), (double*)(&w[index + stride * 6]), (double*)(&a[aind]), (double*)(&a[aind + astride]), (double*)(&a[aind + (astride << 1)]),(double*)(&a[aind + astride * 3]), (double*)(&a[aind + (astride << 2)]), (double*)(&a[aind + astride * 5]), (double*)(&a[aind + astride * 6]), b, lda);
1097     }
1098     index += *bdim;
1099     ic += *nrem;
1100     aind += *inc;
1101   }
1102   return 0;
1103 } /* zfftm1rowf_ */
1104 
zfftm1rowb_(int * n,int * b,int * bdim,zomplex * w,int * nf,int * f,double * coeff,zomplex * a,int * inc,int * lda,int * ipadok,int * nprev,int * irow,int * nrem)1105 int zfftm1rowb_(int *n, int *b, int *bdim,zomplex *w, int *nf, int *f, double *coeff, zomplex *a, int *inc, int *lda,
1106 		int *ipadok, int *nprev, int *irow, int *nrem)
1107 {
1108   /* System generated locals */
1109   int i__1, i__2, i__3, i__4;
1110 
1111     /* Local variables */
1112   int aind, ntot, i__, j, p, index, ff, ic, nt, ninner, stride;
1113   int astride, bstride;
1114 
1115     /* Parameter adjustments */
1116   --coeff;
1117   --w;
1118   --f;
1119   --a;
1120 
1121     /* Function Body */
1122   stride = *bdim;
1123   ninner = 1;
1124   bstride = *bdim;
1125   ntot = *n * *bdim;
1126   nt = *n;
1127   i__1 = *nf - *ipadok;
1128   for (p = 1; p <= i__1; ++p) {
1129     ff = f[p];
1130     bstride *= ff;
1131     nt /= ff;
1132     i__2 = ntot;
1133     i__3 = bstride;
1134     for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) {
1135       index = i__;
1136       ic = (*irow - 1) * nt * *nrem;
1137       i__4 = ninner;
1138       for (j = 1; j <= i__4; ++j) {
1139 	if (ff == 2) {
1140 	  radix2b_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index]), (double*)(&w[index + stride]), b, &const1, &coeff[(ic << 1) + 1],  &coeff[(ic << 1) + 2]);
1141 	} else if (ff == 3) {
1142 	  radix3b_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), b, &const1, &coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2], &coeff[ic * 4 + 1], &coeff[ic * 4 + 2]);
1143 	} else if (ff == 4) {
1144 	  radix4b_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index]),  (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), b, &const1, &coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2], &coeff[ic * 4  + 1], &coeff[ic * 4 + 2], &coeff[ic * 6 + 1], &coeff[ic * 6 + 2]);
1145 	} else if (ff == 5) {
1146 	  radix5b_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index + (stride << 2)]), (double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index + (stride << 2)]), b, &const1, &coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2], &coeff[ic * 4 + 1], &coeff[ic * 4 + 2], &coeff[ic * 6 + 1], &coeff[ic * 6 + 2], &coeff[(ic << 3) + 1], &coeff[(ic << 3) + 2]);
1147 	} else if (ff == 7) {
1148 	  radix7b_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index + (stride << 2)]), (double*)(&w[index + stride * 5]), (double*)(&w[index + stride * 6]), (double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index + (stride << 2)]), (double*)(&w[index + stride * 5]), (double*)(&w[index + stride * 6]), b, &const1);
1149 	}
1150 	index += *bdim;
1151 	ic += nt * (*nprev * *nrem);
1152       }
1153     }
1154     stride *= ff;
1155     ninner *= ff;
1156     /* L10: */
1157   }
1158   if (*ipadok == 0) {
1159     return 0;
1160   }
1161   ff = f[p];
1162   astride = *inc * ninner;
1163   index = 1;
1164   aind = 1;
1165   ic = (*irow - 1) * *nrem;
1166   i__1 = ninner;
1167   for (j = 1; j <= i__1; ++j) {
1168     if (ff == 2) {
1169       radix2b_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&a[aind]), (double*)(&a[aind + astride]), b, lda, &coeff[(ic << 1) + 1], &coeff[(ic << 1)+ 2]);
1170     } else if (ff == 3) {
1171       radix3b_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&a[aind]), (double*)(&a[aind + astride]), (double*)(&a[aind + (astride << 1)]), b, lda, &coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2], & coeff[ic * 4 + 1], &coeff[ic * 4 + 2]);
1172     } else if (ff == 4) {
1173       radix4b_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&a[aind]), (double*)(&a[aind + astride]), (double*)(&a[aind + (astride << 1)]),(double*)(&a[aind + astride * 3]), b, lda, & coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2], &coeff[ic * 4 + 1], &coeff[ic * 4 + 2], &coeff[ic * 6 + 1], &coeff[ic * 6 + 2]);
1174     } else if (ff == 5) {
1175       radix5b_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index + (stride << 2)]), (double*)(&a[aind]), (double*)(&a[aind + astride]), (double*)(&a[aind + (astride << 1)]),(double*)(&a[aind + astride * 3]), (double*)(&a[aind + (astride << 2)]), b, lda, & coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2], &coeff[ic * 4 + 1], &coeff[ic * 4 + 2], &coeff[ic * 6 + 1], &coeff[ic * 6 + 2], &coeff[(ic << 3) + 1], &coeff[(ic << 3) + 2]);
1176     } else if (ff == 7) {
1177       radix7b_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index + (stride << 2)]), (double*)(&w[index + stride * 5]), (double*)(&w[index + stride * 6]), (double*)(&a[aind]), (double*)(&a[aind + astride]), (double*)(&a[aind + (astride << 1)]),(double*)(&a[aind + astride * 3]), (double*)(&a[aind + (astride << 2)]), (double*)(&a[aind + astride * 5]), (double*)(&a[aind + astride * 6]), b, lda);
1178     }
1179     index += *bdim;
1180     ic += *nrem;
1181     aind += *inc;
1182   }
1183   return 0;
1184 } /* zfftm1rowb_ */
1185 
1186 
1187 /* Multiple fft routine for large vectors (length>=128) */
1188 /* Uses both L2 and L1 cache blocking. */
1189 /* The maximum supported transform length is 8192 */
zfftm1big_(int * sign,int * n,int * p,zomplex * a,int * inc,int * lda,int * lpref,int * nf,int * f,zomplex * coeff,int * rindex)1190 int zfftm1big_(int *sign,int * n,int * p,zomplex * a,int * inc,int * lda,int * lpref,int * nf,int * f,zomplex *coeff,int *rindex)
1191 {
1192   /* System generated locals */
1193   int a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7;
1194 
1195 
1196     /* Local variables */
1197   int bpad, bincache;
1198   zomplex wbig[65536];
1199   int b, i__, j, k, m, iwpad, b1, n1, n2, w1, ia, if__, iw;
1200   int nf1;
1201   int pad, iiw;
1202 
1203 
1204   /* Max length of col transform (size of L1 cache) */
1205   /* Work area size (size of L2 cache) */
1206   /* Parameter adjustments */
1207   --coeff;
1208   a_dim1 = *lda;
1209   a_offset = 1 + a_dim1 * 1;
1210   a -= a_offset;
1211   --f;
1212   --rindex;
1213 
1214     /* Function Body */
1215   n1 = 1;
1216   n2 = *n;
1217   if__ = 1;
1218  L10:
1219   if (n1 < n2) {
1220     n1 *= f[if__];
1221     n2 /= f[if__];
1222     ++if__;
1223     goto L10;
1224   }
1225   nf1 = if__ - 1;
1226   /* Determine the # of transforms in each block, b1. b1 is bounded from above */
1227   /* by the max work area size (the size of the L2 cache) and from below by the */
1228   /* desire to make efficient use of loop unrolling/pipelining. We want the work */
1229   /* matrix and a piece of the input matrix to reside in L2 cache at the same */
1230   /* time, so that we can do the copy back operation without additional memory */
1231   /* operations. Therefore we try to limit the work area size to half of the */
1232   /* L2 cache size. */
1233   b1 = 1024 / n1;
1234  L30:
1235   /* The work area contains n2 columns of n1*b1+bpad*b1+pad elements each. */
1236   /* The padding is chosen so that n1+bpad and 8192/16/b1 are relatively */
1237   /* prime (8192 is the page size.) */
1238   bincache = 512 / b1;
1239   bpad = 0;
1240  L20:
1241   i__1 = n1 + bpad;
1242   if (gcd_(&i__1, &bincache) > 1) {
1243     ++bpad;
1244     goto L20;
1245   }
1246   /* If 8192/16/b1 is not an int, we have to adjust with some extra padding pad */
1247   /* Because the columns are roughly 2 pages long, there is a "2" below. */
1248   pad = (512 - b1 * bincache) << 1;
1249   w1 = (n1 + bpad) * b1 + pad;
1250   if (w1 * n2 > 32768) {
1251     /* If the work area turned out to be too large, first reduce the block size */
1252     if (b1 > 8) {
1253       --b1;
1254       goto L30;
1255     }
1256     /* If the work area is still too large, give up. */
1257     /* Transform lenghts up to 8192 are safe with the current cache parameters. */
1258     if (w1 * n2 > 65536) {
1259       printf("Large FFT work area too small.\n");
1260       exit(-1);
1261     }
1262   }
1263   /* Loop over fft blocks */
1264   i__1 = *p;
1265   i__2 = b1;
1266   for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
1267     /* Computing MIN */
1268     i__3 = b1, i__4 = *p - i__ + 1;
1269     b = min(i__3,i__4);
1270     /* Loop over columns of the work matrix */
1271     iw = 1;
1272     iwpad = 1;
1273     i__3 = n2;
1274     for (j = 1; j <= i__3; ++j) {
1275       /* Fill column with data */
1276       zfftcopyin_(&n1, &b, &b1, &a[i__ * a_dim1 + 1], inc, lda, &rindex[iw], &wbig[iwpad - 1], lpref);
1277       /* Do a block of column transforms */
1278       if (*sign >= 1) {
1279 	zfftm1rowf_(&n1, &b, &b1, &wbig[iwpad - 1], &nf1, &f[1], (double*)(&coeff[1]), &a[a_offset], &const0, &const0, &const0, &const1, & const1, &n2);
1280       } else {
1281 	zfftm1rowb_(&n1, &b, &b1, &wbig[iwpad - 1], &nf1, &f[1], (double*)(&coeff[1]), &a[a_offset], &const0, &const0, &const0, &const1, & const1, &n2);
1282       }
1283       iw += n1;
1284       iwpad += w1;
1285     }
1286     /* Loop over block rows of the work matrix */
1287     iw = 1;
1288     i__3 = n1;
1289     for (j = 1; j <= i__3; ++j) {
1290       /* Do a block of row transforms */
1291       if (*sign >= 1) {
1292 	i__4 = *nf - nf1;
1293 	zfftm1rowf_(&n2, &b, &w1, &wbig[iw - 1], &i__4, &f[nf1 + 1], (double*)(&coeff[1]), &a[a_offset], &const0, &const0, &const0, &n1, &j,   &const1);
1294       } else {
1295 	i__4 = *nf - nf1;
1296 	zfftm1rowb_(&n2, &b, &w1, &wbig[iw - 1], &i__4, &f[nf1 + 1], (double*)(&coeff[1]), &a[a_offset], &const0, &const0, &const0, &n1, &j,   &const1);
1297       }
1298       iw += b1;
1299     }
1300     /* Copy back data */
1301     if (*lda <= *inc) {
1302       if (*lda == 1) {
1303 	ia = 1;
1304 	iiw = 1;
1305 	i__3 = n2;
1306 	for (j = 1; j <= i__3; ++j) {
1307 	  iw = iiw;
1308 	  i__4 = n1;
1309 	  for (k = 1; k <= i__4; ++k) {
1310 	    i__5 = b;
1311 	    for (m = 1; m <= i__5; ++m) {
1312 	      /* An index checker would not like this */
1313 	      i__6 = ia + m - 1 + i__ * a_dim1;
1314 	      i__7 = iw + m - 2;
1315 	      a[i__6].re = wbig[i__7].re, a[i__6].im = wbig[i__7]
1316 		.im;
1317 	    }
1318 	    ia += *inc;
1319 	    iw += b1;
1320 	  }
1321 	  iiw += w1;
1322 	}
1323       } else {
1324 	ia = 1;
1325 	iiw = 1;
1326 	i__3 = n2;
1327 	for (j = 1; j <= i__3; ++j) {
1328 	  iw = iiw;
1329 	  i__4 = n1;
1330 	  for (k = 1; k <= i__4; ++k) {
1331 	    i__5 = b;
1332 	    for (m = 1; m <= i__5; ++m) {
1333 	      i__6 = ia + (m - 1 + i__) * a_dim1;
1334 	      i__7 = iw + m - 2;
1335 	      a[i__6].re = wbig[i__7].re, a[i__6].im = wbig[i__7]
1336 		.im;
1337 	    }
1338 	    ia += *inc;
1339 	    iw += b1;
1340 	  }
1341 	  iiw += w1;
1342 	}
1343       }
1344     } else {
1345       /* lda.gt.inc case */
1346       if (*inc == 1) {
1347 	i__3 = b;
1348 	for (m = 1; m <= i__3; ++m) {
1349 	  ia = 1;
1350 	  iw = m;
1351 	  i__4 = n2;
1352 	  for (j = 1; j <= i__4; ++j) {
1353 	    i__5 = n1;
1354 	    for (k = 1; k <= i__5; ++k) {
1355 	      i__6 = ia + k - 1 + (m - 1 + i__) * a_dim1;
1356 	      i__7 = iw + (k - 1) * b1 - 1;
1357 	      a[i__6].re = wbig[i__7].re, a[i__6].im = wbig[i__7]
1358 		.im;
1359 	    }
1360 	    ia += n1;
1361 	    iw += w1;
1362 	  }
1363 	}
1364       } else {
1365 	i__3 = b;
1366 	for (m = 1; m <= i__3; ++m) {
1367 	  ia = 1;
1368 	  iw = m;
1369 	  i__4 = n2;
1370 	  for (j = 1; j <= i__4; ++j) {
1371 	    i__5 = n1;
1372 	    for (k = 1; k <= i__5; ++k) {
1373 	      i__6 = ia + (k - 1) * *inc + (m - 1 + i__) *
1374 		a_dim1;
1375 	      i__7 = iw + (k - 1) * b1 - 1;
1376 	      a[i__6].re = wbig[i__7].re, a[i__6].im = wbig[i__7]
1377 		.im;
1378 	    }
1379 	    ia += n1 * *inc;
1380 	    iw += w1;
1381 	  }
1382 	}
1383       }
1384     }
1385     /* next fft block */
1386   }
1387   return 0;
1388 } /* zfftm1big_ */
1389 
1390 /* copy out routine for the large inc case */
zfftcopyout_(int * n,int * b,zomplex * a,int * inc,int * lda,zomplex * w)1391 void zfftcopyout_(int *n, int *b, zomplex *a, int *inc, int *lda,zomplex  *w)
1392 {
1393   /* System generated locals */
1394   int w_dim1, w_offset, i__1, i__2, i__3, i__4;
1395 
1396     /* Local variables */
1397   int i__, j, index, ia;
1398 
1399   /* Parameter adjustments */
1400   w_dim1 = *b;
1401   w_offset = 1 + w_dim1 * 1;
1402   w -= w_offset;
1403   --a;
1404 
1405     /* Function Body */
1406   ia = 1;
1407   i__1 = *n;
1408   for (j = 1; j <= i__1; ++j) {
1409     index = ia;
1410     i__2 = *b;
1411     for (i__ = 1; i__ <= i__2; ++i__) {
1412       i__3 = index;
1413       i__4 = i__ + j * w_dim1;
1414       a[i__3].re = w[i__4].re, a[i__3].im = w[i__4].im;
1415       index += *lda;
1416     }
1417     ia += *inc;
1418   }
1419 } /* zfftcopyout_ */
1420 
zfftm1di_(int * n,zomplex * coeff)1421 int zfftm1di_(int *n,zomplex * coeff)
1422 {
1423   /* Local variables */
1424   int nf;
1425 
1426 
1427     /* Parameter adjustments */
1428   --coeff;
1429 
1430   /* Function Body */
1431   if (*n < 1) {
1432     printf("zfftm1di: illegal FFT dimension %i\n",(*n));
1433     exit(-1);
1434   }
1435   coeff[1].re = 12345678., coeff[1].im = 0.;
1436   coeff[2].re = (double) (*n), coeff[2].im = 0.;
1437   fftfact_(n, &nf, (int*)(&coeff[4]));
1438   coeff[3].re = (double) nf, coeff[3].im = 0.;
1439   fftcoeff_(n, &coeff[15]);
1440   return 0;
1441 } /* zfftm1di_ */
1442 
zfftm1d_(int * sign,int * n,int * p,zomplex * a,int * inc,int * lda,zomplex * coeff,int * lpref)1443 int zfftm1d_(int *sign,int * n,int * p, zomplex* a, int *inc,int * lda, zomplex *coeff, int *lpref)
1444 {
1445   /* System generated locals */
1446   int a_dim1, a_offset, i__1, i__2, i__3, i__4;
1447 
1448 
1449     /* Local variables */
1450   int bdim;
1451   int b;
1452   int i__;
1453   zomplex w[1024];
1454   int nf, ipadok;
1455   int rindex[16384];
1456 
1457     /* Parameter adjustments */
1458   --coeff;
1459   a_dim1 = *lda;
1460   a_offset = 1 + a_dim1 * 1;
1461   a -= a_offset;
1462 
1463     /* Function Body */
1464   if (coeff[1].re != 12345678. || coeff[1].im != 0.) {
1465     printf("zfftm1d: coeff not initialized.\n");
1466     exit(-1);
1467   }
1468   if ((double) (*n) != coeff[2].re || 0. != coeff[2].im) {
1469     printf("zfftm1d: coeff initialized with wrong dimension.\n");
1470     exit(-1);
1471   }
1472   nf = (int) coeff[3].re;
1473   zfftrev_(n, inc, &nf, (int*)(&coeff[4]), &const16384, rindex, &ipadok);
1474   if (*n <= 128) {
1475     /* Use L1 blocking only */
1476     bdim = 1024 / *n;
1477     i__1 = *p;
1478     i__2 = bdim;
1479     for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
1480       /* Computing MIN */
1481       i__3 = bdim, i__4 = *p - i__ + 1;
1482       b = min(i__3,i__4);
1483       zfftcopyin_(n, &b, &b, &a[i__ * a_dim1 + 1], inc, lda, rindex, w, lpref);
1484       if (*sign >= 1) {
1485 	zfftm1rowf_(n, &b, &b, w, &nf, (int*)(&coeff[4]), (double*)(&coeff[15]), &a[i__ * a_dim1 + 1], inc, lda, &ipadok, &const1, &const1, &const1);
1486       } else {
1487 	zfftm1rowb_(n, &b, &b, w, &nf, (int*)(&coeff[4]), (double*)(&coeff[15]), &a[i__ * a_dim1 + 1], inc, lda, &ipadok, &const1, &const1, &const1);
1488       }
1489       if (ipadok == 0) {
1490 	zfftcopyout_(n, &b, &a[i__ * a_dim1 + 1], inc, lda, w);
1491       }
1492     }
1493   } else {
1494     /* Use both L1 and L2 blocking */
1495     zfftm1big_(sign, n, p, &a[a_offset], inc, lda, lpref, &nf, (int*)(&coeff[4]),
1496 	       &coeff[15], rindex);
1497   }
1498   return 0;
1499 } /* zfftm1d_ */
1500 
1501 
1502 
1503 
1504 /* Multidimensional transforms */
zfft2di_(int * x,int * y,zomplex * coeff)1505 int zfft2di_(int *x, int *y,zomplex * coeff)
1506 {
1507 
1508   /* Parameter adjustments */
1509   --coeff;
1510 
1511   /* Function Body */
1512   zfftm1di_(x, &coeff[1]);
1513   zfftm1di_(y, &coeff[*x + 16]);
1514   return 0;
1515 } /* zfft2di_ */
1516 
zfft2d_(int * sign,int * x,int * y,zomplex * a,int * lda,zomplex * coeff)1517 int zfft2d_(int *sign, int *x, int *y, zomplex *a, int *lda, zomplex *coeff)
1518 {
1519   /* System generated locals */
1520   int a_dim1, a_offset;
1521   int L__1;
1522 
1523   /* Local variables */
1524 
1525     /* Parameter adjustments */
1526   --coeff;
1527   a_dim1 = *lda;
1528   a_offset = 1 + a_dim1 * 1;
1529   a -= a_offset;
1530 
1531     /* Function Body */
1532   zfftm1d_(sign, y, x, &a[a_offset], lda, &const1, &coeff[*x + 16], &constTrue);
1533   L__1 = *x * *y > 32768;
1534   zfftm1d_(sign, x, y, &a[a_offset], &const1, lda, &coeff[1], &L__1);
1535   return 0;
1536 } /* zfft2d_ */
1537 
zfft3di_(int * x,int * y,int * z__,zomplex * coeff)1538 int zfft3di_(int *x, int *y, int *z__,zomplex * coeff)
1539 {
1540 
1541   /* Parameter adjustments */
1542   --coeff;
1543 
1544   /* Function Body */
1545   zfftm1di_(x, &coeff[1]);
1546   zfftm1di_(y, &coeff[*x + 16]);
1547   zfftm1di_(z__, &coeff[*x + *y + 31]);
1548   return 0;
1549 } /* zfft3di_ */
1550 
zfft3d_(int * sign,int * x,int * y,int * z__,zomplex * a,int * la1,int * la2,zomplex * coeff)1551 int zfft3d_(int *sign,int * x, int *y, int *z__,zomplex * a, int *la1, int *la2,zomplex * coeff)
1552 {
1553   /* System generated locals */
1554   int a_dim1, a_dim2, a_offset, i__1, i__2;
1555 
1556     /* Local variables */
1557   int k;
1558   /* Parameter adjustments */
1559   --coeff;
1560   a_dim1 = *la1;
1561   a_dim2 = *la2;
1562   a_offset = 1 + a_dim1 * (1 + a_dim2 * 1);
1563   a -= a_offset;
1564 
1565   /* Function Body */
1566   i__1 = *z__;
1567   for (k = 1; k <= i__1; ++k) {
1568     zfftm1d_(sign, y, x, &a[(k * a_dim2 + 1) * a_dim1 + 1], la1, &const1, &
1569 	     coeff[*x + 16], &constTrue);
1570     zfftm1d_(sign, x, y, &a[(k * a_dim2 + 1) * a_dim1 + 1], &const1, la1, &
1571 	     coeff[1], &constFalse);
1572   }
1573   i__1 = *y;
1574   for (k = 1; k <= i__1; ++k) {
1575     i__2 = *la1 * *la2;
1576     zfftm1d_(sign, z__, x, &a[(k + a_dim2) * a_dim1 + 1], &i__2, &const1, &
1577 	     coeff[*x + *y + 31], &constTrue);
1578   }
1579   return 0;
1580 } /* zfft3d_ */
1581 
1582 
1583 /* SGI-ZFFT wrapper */
zfftm1di(int m,zomplex * save)1584 zomplex *zfftm1di( int m, zomplex *save)
1585 {
1586   zomplex *c = 0;
1587   int i=0;
1588   c = (zomplex*)malloc((m+15)*sizeof(zomplex));
1589   zfftm1di_(&m,c);
1590   if(save != 0)
1591     for(i=0;i<m+15;i++)
1592       save[i] = c[i];
1593   return c;
1594 }
1595 
zfftm1d(int sign,int m,int n,zomplex * array,int incI,int incJ,zomplex * save)1596 int zfftm1d( int sign, int m, int n, zomplex *array, int incI, int incJ, zomplex *save)
1597 {
1598   static int lpref = 0;
1599   zfftm1d_(&sign,&m,&n,array,&incI,&incJ,save,&lpref);
1600   return 0;
1601 }
1602 
zfft2di(int n1,int n2,zomplex * save)1603 zomplex *zfft2di( int n1, int n2, zomplex *save)
1604 {
1605   zomplex *c = 0;
1606   int i=0;
1607   c = (zomplex*)malloc((n1+n2+30)*sizeof(zomplex));
1608   zfft2di_(&n1,&n2,c);
1609   if(save != 0)
1610     for(i=0;i<n1+n2+30;i++)
1611       save[i] = c[i];
1612   return c;
1613 }
1614 
zfft2d(int sign,int n1,int n2,zomplex * array,int ld,zomplex * save)1615 int zfft2d( int sign, int n1, int n2, zomplex *array, int ld, zomplex *save)
1616 {
1617   zfft2d_(&sign,&n1,&n2,array,&ld,save);
1618   return 0;
1619 }
1620 
zfft3di(int n1,int n2,int n3,zomplex * save)1621 zomplex *zfft3di( int n1, int n2, int n3, zomplex *save)
1622 {
1623   zomplex *c = 0;
1624   int i=0;
1625   c = (zomplex*)malloc((n1+n2+n3+45)*sizeof(zomplex));
1626   zfft3di_(&n1,&n2,&n3,c);
1627   if(save != 0)
1628     for(i=0;i<n1+n2+n3+45;i++)
1629       save[i] = c[i];
1630   return c;
1631 }
1632 
zfft3d(int sign,int n1,int n2,int n3,zomplex * array,int ld1,int ld2,zomplex * save)1633 int zfft3d( int sign, int n1, int n2, int n3, zomplex *array, int ld1, int ld2, zomplex *save)
1634 {
1635   zfft3d_(&sign,&n1,&n2,&n3,array,&ld1,&ld2,save);
1636   return 0;
1637 }
1638 
1639 #ifdef __cplusplus
1640 } /* extern "C" */
1641 #endif
1642 
1643 #endif /* HAVE_FFT*/
1644