1 /* ZFFT http://fy.chalmers.se/~tfylb/ZFFT.html*/
2 /* Copyright (c) 2000 by Lennart Bengtsson */
3 /* Permission to use, copy, modify, distribute, and sell this software and its */
4 /* documentation for any purpose is hereby granted without fee, provided that */
5 /* the above copyright notice appear in all copies and that both the copyright */
6 /* notice and this permission notice appear in supporting documentation. */
7 /* No representations are made about the suitability of this software for */
8 /* any purpose. It is provided "as is" without express or implied warranty. */
9 /* Note also that this kind of optimization violates the fortran standard */
10 /* so the compiled code should be tested carefully. So far ZFFT has been */
11 /* tested on SUN, SGI and IBM servers. */
12 #ifndef HAVE_FFT
13
14 #ifdef __cplusplus
15 extern "C" {
16 #endif
17
18 #include <math.h>
19 #include <stdio.h>
20 #include <stdlib.h>
21
22 typedef struct {
23 double re;
24 double im;
25 } zomplex;
26
27 #ifndef min
28 #define min(a,b) ( ((a) < (b)) ? (a) : (b) )
29 #endif
30 #ifndef max
31 #define max(a,b) ( ((a) > (b)) ? (a) : (b) )
32 #endif
33
34 /* Table of constant values */
35 static int const1 = 1;
36 static int const16384 = 16384;
37 static int const0 = 0;
38 static int constTrue = 1;
39 static int constFalse = 0;
40
radix2f_(double * a1,double * a2,double * b1,double * b2,int * b,int * ldb,double * c2,double * d2)41 int radix2f_(double *a1,double * a2, double *b1,double * b2,int * b,int *ldb, double *c2,double * d2)
42 {
43 /* System generated locals */
44 int b1_dim2, b1_offset, b2_dim2, b2_offset, i__1;
45
46 /* Local variables */
47 int i__;
48 double i1, i2, j2, r1, r2, s2;
49
50 /* Parameter adjustments */
51 a2 -= 3;
52 a1 -= 3;
53 b2_dim2 = *ldb;
54 b2_offset = 1 + 2 * (1 + b2_dim2 * 1);
55 b2 -= b2_offset;
56 b1_dim2 = *ldb;
57 b1_offset = 1 + 2 * (1 + b1_dim2 * 1);
58 b1 -= b1_offset;
59
60 /* Function Body */
61 i__1 = *b;
62 for (i__ = 1; i__ <= i__1; ++i__) {
63 r2 = a2[(i__ << 1) + 1];
64 i2 = a2[(i__ << 1) + 2];
65 s2 = r2 * *c2 - i2 * *d2;
66 j2 = i2 * *c2 + r2 * *d2;
67 r1 = a1[(i__ << 1) + 1];
68 i1 = a1[(i__ << 1) + 2];
69 b1[((i__ * b1_dim2 + 1) << 1) + 1] = r1 + s2;
70 b1[((i__ * b1_dim2 + 1) << 1) + 2] = i1 + j2;
71 b2[((i__ * b2_dim2 + 1) << 1) + 1] = r1 - s2;
72 b2[((i__ * b2_dim2 + 1) << 1) + 2] = i1 - j2;
73 }
74 return 0;
75 } /* radix2f_ */
76
radix3f_(double * a1,double * a2,double * a3,double * b1,double * b2,double * b3,int * b,int * ldb,double * c2,double * d2,double * c3,double * d3)77 int radix3f_(double *a1, double *a2, double *a3, double *b1, double *b2, double *b3, int *b, int *ldb, double *c2, double *d2, double *c3, double *d3)
78 {
79 /* Initialized data */
80
81 double sin60 = .86602540378443864676;
82 double half = .5;
83
84 /* System generated locals */
85 int b1_dim2, b1_offset, b2_dim2, b2_offset, b3_dim2, b3_offset, i__1;
86
87 /* Local variables */
88 int i__;
89 double i1, i2, i3, j2, j3, r1, r2, r3, t1, t2, t3, t4, s2, s3;
90
91 /* Parameter adjustments */
92 a3 -= 3;
93 a2 -= 3;
94 a1 -= 3;
95 b3_dim2 = *ldb;
96 b3_offset = 1 + 2 * (1 + b3_dim2 * 1);
97 b3 -= b3_offset;
98 b2_dim2 = *ldb;
99 b2_offset = 1 + 2 * (1 + b2_dim2 * 1);
100 b2 -= b2_offset;
101 b1_dim2 = *ldb;
102 b1_offset = 1 + 2 * (1 + b1_dim2 * 1);
103 b1 -= b1_offset;
104
105 /* Function Body */
106 i__1 = *b;
107 for (i__ = 1; i__ <= i__1; ++i__) {
108 r3 = a2[(i__ << 1) + 1];
109 i3 = a2[(i__ << 1) + 2];
110 s3 = r3 * *c2 - i3 * *d2;
111 j3 = i3 * *c2 + r3 * *d2;
112 r2 = a3[(i__ << 1) + 1];
113 i2 = a3[(i__ << 1) + 2];
114 s2 = r2 * *c3 - i2 * *d3;
115 j2 = i2 * *c3 + r2 * *d3;
116 t1 = s2 + s3;
117 t2 = j2 + j3;
118 t3 = s2 - s3;
119 t4 = j2 - j3;
120 r1 = a1[(i__ << 1) + 1];
121 i1 = a1[(i__ << 1) + 2];
122 b1[((i__ * b1_dim2 + 1) << 1) + 1] = r1 + t1;
123 b1[((i__ * b1_dim2 + 1) << 1) + 2] = i1 + t2;
124 b2[((i__ * b2_dim2 + 1) << 1) + 1] = r1 - half * t1 + sin60 * t4;
125 b2[((i__ * b2_dim2 + 1) << 1) + 2] = i1 - half * t2 - sin60 * t3;
126 b3[((i__ * b3_dim2 + 1) << 1) + 1] = r1 - half * t1 - sin60 * t4;
127 b3[((i__ * b3_dim2 + 1) << 1) + 2] = i1 - half * t2 + sin60 * t3;
128 }
129 return 0;
130 } /* radix3f_ */
131
radix4f_(double * a1,double * a2,double * a3,double * a4,double * b1,double * b2,double * b3,double * b4,int * b,int * ldb,double * c2,double * d2,double * c3,double * d3,double * c4,double * d4)132 int radix4f_(double *a1, double *a2, double *a3, double *a4, double *b1, double *b2, double *b3, double *b4, int *b, int *ldb, double *c2, double *d2, double *c3, double *d3, double *c4, double *d4)
133 {
134 /* System generated locals */
135 int b1_dim2, b1_offset, b2_dim2, b2_offset, b3_dim2, b3_offset,
136 b4_dim2, b4_offset, i__1;
137
138 /* Local variables */
139 int i__;
140 double i0, i1, r0, r1, r2, r3, i2, i3, s1, j1, s2, j2, s3, j3,
141 t1, t2, t3, t4, t5, t6, t7, t8;
142
143 /* Parameter adjustments */
144 a4 -= 3;
145 a3 -= 3;
146 a2 -= 3;
147 a1 -= 3;
148 b4_dim2 = *ldb;
149 b4_offset = 1 + 2 * (1 + b4_dim2 * 1);
150 b4 -= b4_offset;
151 b3_dim2 = *ldb;
152 b3_offset = 1 + 2 * (1 + b3_dim2 * 1);
153 b3 -= b3_offset;
154 b2_dim2 = *ldb;
155 b2_offset = 1 + 2 * (1 + b2_dim2 * 1);
156 b2 -= b2_offset;
157 b1_dim2 = *ldb;
158 b1_offset = 1 + 2 * (1 + b1_dim2 * 1);
159 b1 -= b1_offset;
160
161 /* Function Body */
162 i__1 = *b;
163 for (i__ = 1; i__ <= i__1; ++i__) {
164 r1 = a2[(i__ << 1) + 1];
165 i1 = a2[(i__ << 1) + 2];
166 s1 = r1 * *c2 - i1 * *d2;
167 j1 = i1 * *c2 + r1 * *d2;
168 r2 = a3[(i__ << 1) + 1];
169 i2 = a3[(i__ << 1) + 2];
170 s2 = r2 * *c3 - i2 * *d3;
171 j2 = i2 * *c3 + r2 * *d3;
172 r3 = a4[(i__ << 1) + 1];
173 i3 = a4[(i__ << 1) + 2];
174 s3 = r3 * *c4 - i3 * *d4;
175 j3 = i3 * *c4 + r3 * *d4;
176 r0 = a1[(i__ << 1) + 1];
177 i0 = a1[(i__ << 1) + 2];
178 t1 = r0 + s2;
179 t2 = s1 + s3;
180 t3 = i0 + j2;
181 t4 = j1 + j3;
182 t5 = r0 - s2;
183 t6 = j1 - j3;
184 t7 = i0 - j2;
185 t8 = s1 - s3;
186 b1[((i__ * b1_dim2 + 1) << 1) + 1] = t1 + t2;
187 b1[((i__ * b1_dim2 + 1) << 1) + 2] = t3 + t4;
188 b2[((i__ * b2_dim2 + 1) << 1) + 1] = t5 - t6;
189 b2[((i__ * b2_dim2 + 1) << 1) + 2] = t7 + t8;
190 b3[((i__ * b3_dim2 + 1) << 1) + 1] = t1 - t2;
191 b3[((i__ * b3_dim2 + 1) << 1) + 2] = t3 - t4;
192 b4[((i__ * b4_dim2 + 1) << 1) + 1] = t5 + t6;
193 b4[((i__ * b4_dim2 + 1) << 1) + 2] = t7 - t8;
194 }
195 return 0;
196 } /* radix4f_ */
197
radix5f_(double * a1,double * a2,double * a3,double * a4,double * a5,double * b1,double * b2,double * b3,double * b4,double * b5,int * b,int * ldb,double * c2,double * d2,double * c3,double * d3,double * c4,double * d4,double * c5,double * d5)198 int radix5f_(double *a1,double * a2,double * a3,double * a4,double * a5,double * b1,double * b2,double * b3,double * b4,double * b5,int *b, int *ldb, double *c2,double * d2,double * c3,double * d3,double * c4,double * d4,double * c5,double * d5)
199 {
200 /* Initialized data */
201
202 double cos36 = .8090169943749474241;
203 double sin36 = .58778525229247312917;
204 double cos72 = .3090169943749474241;
205 double sin72 = .95105651629515357212;
206
207 /* System generated locals */
208 int b1_dim2, b1_offset, b2_dim2, b2_offset, b3_dim2, b3_offset,
209 b4_dim2, b4_offset, b5_dim2, b5_offset, i__1;
210
211 /* Local variables */
212 int i__;
213 double r1, r2, r3, r4, r5, i1, i2, i3, i4, i5, s2, s3, s4, s5,
214 j2, j3, j4, j5, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12,
215 t13, t14, t15, t16;
216
217 /* Parameter adjustments */
218 a5 -= 3;
219 a4 -= 3;
220 a3 -= 3;
221 a2 -= 3;
222 a1 -= 3;
223 b5_dim2 = *ldb;
224 b5_offset = 1 + 2 * (1 + b5_dim2 * 1);
225 b5 -= b5_offset;
226 b4_dim2 = *ldb;
227 b4_offset = 1 + 2 * (1 + b4_dim2 * 1);
228 b4 -= b4_offset;
229 b3_dim2 = *ldb;
230 b3_offset = 1 + 2 * (1 + b3_dim2 * 1);
231 b3 -= b3_offset;
232 b2_dim2 = *ldb;
233 b2_offset = 1 + 2 * (1 + b2_dim2 * 1);
234 b2 -= b2_offset;
235 b1_dim2 = *ldb;
236 b1_offset = 1 + 2 * (1 + b1_dim2 * 1);
237 b1 -= b1_offset;
238
239 /* Function Body */
240 i__1 = *b;
241 for (i__ = 1; i__ <= i__1; ++i__) {
242 r5 = a2[(i__ << 1) + 1];
243 i5 = a2[(i__ << 1) + 2];
244 s5 = r5 * *c2 - i5 * *d2;
245 j5 = i5 * *c2 + r5 * *d2;
246 r4 = a3[(i__ << 1) + 1];
247 i4 = a3[(i__ << 1) + 2];
248 s4 = r4 * *c3 - i4 * *d3;
249 j4 = i4 * *c3 + r4 * *d3;
250 r3 = a4[(i__ << 1) + 1];
251 i3 = a4[(i__ << 1) + 2];
252 s3 = r3 * *c4 - i3 * *d4;
253 j3 = i3 * *c4 + r3 * *d4;
254 r2 = a5[(i__ << 1) + 1];
255 i2 = a5[(i__ << 1) + 2];
256 s2 = r2 * *c5 - i2 * *d5;
257 j2 = i2 * *c5 + r2 * *d5;
258 t1 = s2 + s5;
259 t2 = s3 + s4;
260 t3 = s2 - s5;
261 t4 = s3 - s4;
262 t5 = j2 + j5;
263 t6 = j3 + j4;
264 t7 = j2 - j5;
265 t8 = j3 - j4;
266 r1 = a1[(i__ << 1) + 1];
267 i1 = a1[(i__ << 1) + 2];
268 t9 = t1 * cos72 - t2 * cos36;
269 t10 = t5 * cos72 - t6 * cos36;
270 t11 = t2 * cos72 - t1 * cos36;
271 t12 = t6 * cos72 - t5 * cos36;
272 t13 = t7 * sin72 + t8 * sin36;
273 t14 = t3 * sin72 + t4 * sin36;
274 t15 = t8 * sin72 - t7 * sin36;
275 t16 = t4 * sin72 - t3 * sin36;
276 b1[((i__ * b1_dim2 + 1) << 1) + 1] = r1 + t1 + t2;
277 b1[((i__ * b1_dim2 + 1) << 1) + 2] = i1 + t5 + t6;
278 b2[((i__ * b2_dim2 + 1) << 1) + 1] = r1 + t9 + t13;
279 b2[((i__ * b2_dim2 + 1) << 1) + 2] = i1 + t10 - t14;
280 b3[((i__ * b3_dim2 + 1) << 1) + 1] = r1 + t11 - t15;
281 b3[((i__ * b3_dim2 + 1) << 1) + 2] = i1 + t12 + t16;
282 b4[((i__ * b4_dim2 + 1) << 1) + 1] = r1 + t11 + t15;
283 b4[((i__ * b4_dim2 + 1) << 1) + 2] = i1 + t12 - t16;
284 b5[((i__ * b5_dim2 + 1) << 1) + 1] = r1 + t9 - t13;
285 b5[((i__ * b5_dim2 + 1) << 1) + 2] = i1 + t10 + t14;
286 }
287 return 0;
288 } /* radix5f_ */
289
radix7f_(double * a1,double * a2,double * a3,double * a4,double * a5,double * a6,double * a7,double * b1,double * b2,double * b3,double * b4,double * b5,double * b6,double * b7,int * b,int * ldb)290 int radix7f_(double *a1,double * a2,double * a3, double *a4,double * a5,double * a6,double * a7,double * b1,double * b2,double * b3,double * b4,double * b5, double *b6,double * b7,int * b,int * ldb)
291 {
292 /* Initialized data */
293
294 double c1 = .623489801858733530525;
295 double s1 = .78183148246802980870844;
296 double c2 = .2225209339563144042889;
297 double s2 = .97492791218182360701813;
298 double c3 = .9009688679024191262361;
299 double s3 = .43388373911755812047577;
300
301 /* System generated locals */
302 int b1_dim2, b1_offset, b2_dim2, b2_offset, b3_dim2, b3_offset,
303 b4_dim2, b4_offset, b5_dim2, b5_offset, b6_dim2, b6_offset,
304 b7_dim2, b7_offset, i__1;
305
306 /* Local variables */
307 int i__;
308 double d1, p1, p2, p3, p4, t1, t2, t3, t4, t5, t6, t7, p5, p6,
309 p7, r1, r2, d2, r3, d3, i1, j1, i2, j2, i3, j3, t11, t21, t31,
310 t12, t22, t32, t13, t23, t33, t14, t24, t34;
311
312 /* Parameter adjustments */
313 a7 -= 3;
314 a6 -= 3;
315 a5 -= 3;
316 a4 -= 3;
317 a3 -= 3;
318 a2 -= 3;
319 a1 -= 3;
320 b7_dim2 = *ldb;
321 b7_offset = 1 + 2 * (1 + b7_dim2 * 1);
322 b7 -= b7_offset;
323 b6_dim2 = *ldb;
324 b6_offset = 1 + 2 * (1 + b6_dim2 * 1);
325 b6 -= b6_offset;
326 b5_dim2 = *ldb;
327 b5_offset = 1 + 2 * (1 + b5_dim2 * 1);
328 b5 -= b5_offset;
329 b4_dim2 = *ldb;
330 b4_offset = 1 + 2 * (1 + b4_dim2 * 1);
331 b4 -= b4_offset;
332 b3_dim2 = *ldb;
333 b3_offset = 1 + 2 * (1 + b3_dim2 * 1);
334 b3 -= b3_offset;
335 b2_dim2 = *ldb;
336 b2_offset = 1 + 2 * (1 + b2_dim2 * 1);
337 b2 -= b2_offset;
338 b1_dim2 = *ldb;
339 b1_offset = 1 + 2 * (1 + b1_dim2 * 1);
340 b1 -= b1_offset;
341
342 /* Function Body */
343 i__1 = *b;
344 for (i__ = 1; i__ <= i__1; ++i__) {
345 t1 = a2[(i__ << 1) + 1];
346 t2 = a7[(i__ << 1) + 1];
347 r1 = t1 + t2;
348 d1 = t1 - t2;
349 t11 = r1 * c1;
350 t21 = r1 * c2;
351 t31 = r1 * c3;
352 t12 = d1 * s1;
353 t22 = d1 * s2;
354 t32 = d1 * s3;
355 t3 = a3[(i__ << 1) + 1];
356 t4 = a6[(i__ << 1) + 1];
357 r2 = t3 + t4;
358 d2 = t3 - t4;
359 t11 -= r2 * c2;
360 t21 += r2 * c3;
361 t31 -= r2 * c1;
362 t12 += d2 * s2;
363 t22 -= d2 * s3;
364 t32 -= d2 * s1;
365 t5 = a4[(i__ << 1) + 1];
366 t6 = a5[(i__ << 1) + 1];
367 r3 = t5 + t6;
368 d3 = t5 - t6;
369 t7 = a1[(i__ << 1) + 1];
370 b1[((i__ * b1_dim2 + 1) << 1) + 1] = t7 + r1 + r2 + r3;
371 t11 -= r3 * c3;
372 t21 -= r3 * c1;
373 t31 += r3 * c2;
374 t12 += d3 * s3;
375 t22 -= d3 * s1;
376 t32 += d3 * s2;
377 p1 = a2[(i__ << 1) + 2];
378 p2 = a7[(i__ << 1) + 2];
379 i1 = p1 + p2;
380 j1 = p1 - p2;
381 t13 = i1 * c1;
382 t23 = i1 * c2;
383 t33 = i1 * c3;
384 t14 = j1 * s1;
385 t24 = j1 * s2;
386 t34 = j1 * s3;
387 p3 = a3[(i__ << 1) + 2];
388 p4 = a6[(i__ << 1) + 2];
389 i2 = p3 + p4;
390 j2 = p3 - p4;
391 t13 -= i2 * c2;
392 t23 += i2 * c3;
393 t33 -= i2 * c1;
394 t14 += j2 * s2;
395 t24 -= j2 * s3;
396 t34 -= j2 * s1;
397 p5 = a4[(i__ << 1) + 2];
398 p6 = a5[(i__ << 1) + 2];
399 i3 = p5 + p6;
400 j3 = p5 - p6;
401 p7 = a1[(i__ << 1) + 2];
402 b1[((i__ * b1_dim2 + 1) << 1) + 2] = p7 + i1 + i2 + i3;
403 t13 -= i3 * c3;
404 t23 -= i3 * c1;
405 t33 += i3 * c2;
406 t14 += j3 * s3;
407 t24 -= j3 * s1;
408 t34 += j3 * s2;
409 t11 = t7 + t11;
410 t21 = t7 - t21;
411 t31 = t7 - t31;
412 t13 = p7 + t13;
413 t23 = p7 - t23;
414 t33 = p7 - t33;
415 b2[((i__ * b2_dim2 + 1) << 1) + 1] = t11 - t14;
416 b2[((i__ * b2_dim2 + 1) << 1) + 2] = t13 + t12;
417 b7[((i__ * b7_dim2 + 1) << 1) + 1] = t11 + t14;
418 b7[((i__ * b7_dim2 + 1) << 1) + 2] = t13 - t12;
419 b3[((i__ * b3_dim2 + 1) << 1) + 1] = t21 - t24;
420 b3[((i__ * b3_dim2 + 1) << 1) + 2] = t23 + t22;
421 b6[((i__ * b6_dim2 + 1) << 1) + 1] = t21 + t24;
422 b6[((i__ * b6_dim2 + 1) << 1) + 2] = t23 - t22;
423 b4[((i__ * b4_dim2 + 1) << 1) + 1] = t31 - t34;
424 b4[((i__ * b4_dim2 + 1) << 1) + 2] = t33 + t32;
425 b5[((i__ * b5_dim2 + 1) << 1) + 1] = t31 + t34;
426 b5[((i__ * b5_dim2 + 1) << 1) + 2] = t33 - t32;
427 }
428 return 0;
429 } /* radix7f_ */
430
radix2b_(double * a1,double * a2,double * b1,double * b2,int * b,int * ldb,double * c2,double * d2)431 int radix2b_(double *a1, double *a2, double *b1, double *b2,int * b, int *ldb, double *c2, double *d2)
432 {
433 /* System generated locals */
434 int b1_dim2, b1_offset, b2_dim2, b2_offset, i__1;
435
436 /* Local variables */
437 int i__;
438 double i1, i2, j2, r1, r2, s2;
439 /* Parameter adjustments */
440 a2 -= 3;
441 a1 -= 3;
442 b2_dim2 = *ldb;
443 b2_offset = 1 + 2 * (1 + b2_dim2 * 1);
444 b2 -= b2_offset;
445 b1_dim2 = *ldb;
446 b1_offset = 1 + 2 * (1 + b1_dim2 * 1);
447 b1 -= b1_offset;
448
449 /* Function Body */
450 i__1 = *b;
451 for (i__ = 1; i__ <= i__1; ++i__) {
452 r2 = a2[(i__ << 1) + 1];
453 i2 = a2[(i__ << 1) + 2];
454 s2 = r2 * *c2 + i2 * *d2;
455 j2 = i2 * *c2 - r2 * *d2;
456 r1 = a1[(i__ << 1) + 1];
457 i1 = a1[(i__ << 1) + 2];
458 b1[((i__ * b1_dim2 + 1) << 1) + 1] = r1 + s2;
459 b1[((i__ * b1_dim2 + 1) << 1) + 2] = i1 + j2;
460 b2[((i__ * b2_dim2 + 1) << 1) + 1] = r1 - s2;
461 b2[((i__ * b2_dim2 + 1) << 1) + 2] = i1 - j2;
462 }
463 return 0;
464 } /* radix2b_ */
465
radix3b_(double * a1,double * a2,double * a3,double * b1,double * b2,double * b3,int * b,int * ldb,double * c2,double * d2,double * c3,double * d3)466 int radix3b_(double *a1,double * a2, double *a3,double * b1,double * b2,double * b3, int *b, int *ldb, double *c2, double *d2, double *c3, double *d3)
467 {
468 /* Initialized data */
469
470 double sin60 = .86602540378443864676;
471 double half = .5;
472
473 /* System generated locals */
474 int b1_dim2, b1_offset, b2_dim2, b2_offset, b3_dim2, b3_offset, i__1;
475
476 /* Local variables */
477 int i__;
478 double i1, i2, i3, j2, j3, r1, r2, r3, t1, t2, t3, t4, s2, s3;
479
480 /* Parameter adjustments */
481 a3 -= 3;
482 a2 -= 3;
483 a1 -= 3;
484 b3_dim2 = *ldb;
485 b3_offset = 1 + 2 * (1 + b3_dim2 * 1);
486 b3 -= b3_offset;
487 b2_dim2 = *ldb;
488 b2_offset = 1 + 2 * (1 + b2_dim2 * 1);
489 b2 -= b2_offset;
490 b1_dim2 = *ldb;
491 b1_offset = 1 + 2 * (1 + b1_dim2 * 1);
492 b1 -= b1_offset;
493
494 /* Function Body */
495 i__1 = *b;
496 for (i__ = 1; i__ <= i__1; ++i__) {
497 r2 = a2[(i__ << 1) + 1];
498 i2 = a2[(i__ << 1) + 2];
499 s2 = r2 * *c2 + i2 * *d2;
500 j2 = i2 * *c2 - r2 * *d2;
501 r3 = a3[(i__ << 1) + 1];
502 i3 = a3[(i__ << 1) + 2];
503 s3 = r3 * *c3 + i3 * *d3;
504 j3 = i3 * *c3 - r3 * *d3;
505 t1 = s2 + s3;
506 t2 = j2 + j3;
507 t3 = s2 - s3;
508 t4 = j2 - j3;
509 r1 = a1[(i__ << 1) + 1];
510 i1 = a1[(i__ << 1) + 2];
511 b1[((i__ * b1_dim2 + 1) << 1) + 1] = r1 + t1;
512 b1[((i__ * b1_dim2 + 1) << 1) + 2] = i1 + t2;
513 b2[((i__ * b2_dim2 + 1) << 1) + 1] = r1 - half * t1 + sin60 * t4;
514 b2[((i__ * b2_dim2 + 1) << 1) + 2] = i1 - half * t2 - sin60 * t3;
515 b3[((i__ * b3_dim2 + 1) << 1) + 1] = r1 - half * t1 - sin60 * t4;
516 b3[((i__ * b3_dim2 + 1) << 1) + 2] = i1 - half * t2 + sin60 * t3;
517 }
518 return 0;
519 } /* radix3b_ */
520
radix4b_(double * a1,double * a2,double * a3,double * a4,double * b1,double * b2,double * b3,double * b4,int * b,int * ldb,double * c2,double * d2,double * c3,double * d3,double * c4,double * d4)521 int radix4b_(double *a1, double *a2, double *a3, double *a4, double *b1, double *b2, double *b3, double *b4, int *b, int *ldb, double *c2, double *d2, double *c3, double *d3, double *c4, double *d4)
522 {
523 /* System generated locals */
524 int b1_dim2, b1_offset, b2_dim2, b2_offset, b3_dim2, b3_offset,
525 b4_dim2, b4_offset, i__1;
526
527 /* Local variables */
528 int i__;
529 double i0, i1, r0, r1, r2, r3, i2, i3, s1, j1, s2, j2, s3, j3,
530 t1, t2, t3, t4, t5, t6, t7, t8;
531
532 /* Parameter adjustments */
533 a4 -= 3;
534 a3 -= 3;
535 a2 -= 3;
536 a1 -= 3;
537 b4_dim2 = *ldb;
538 b4_offset = 1 + 2 * (1 + b4_dim2 * 1);
539 b4 -= b4_offset;
540 b3_dim2 = *ldb;
541 b3_offset = 1 + 2 * (1 + b3_dim2 * 1);
542 b3 -= b3_offset;
543 b2_dim2 = *ldb;
544 b2_offset = 1 + 2 * (1 + b2_dim2 * 1);
545 b2 -= b2_offset;
546 b1_dim2 = *ldb;
547 b1_offset = 1 + 2 * (1 + b1_dim2 * 1);
548 b1 -= b1_offset;
549
550 /* Function Body */
551 i__1 = *b;
552 for (i__ = 1; i__ <= i__1; ++i__) {
553 r3 = a2[(i__ << 1) + 1];
554 i3 = a2[(i__ << 1) + 2];
555 s3 = r3 * *c2 + i3 * *d2;
556 j3 = i3 * *c2 - r3 * *d2;
557 r2 = a3[(i__ << 1) + 1];
558 i2 = a3[(i__ << 1) + 2];
559 s2 = r2 * *c3 + i2 * *d3;
560 j2 = i2 * *c3 - r2 * *d3;
561 r1 = a4[(i__ << 1) + 1];
562 i1 = a4[(i__ << 1) + 2];
563 s1 = r1 * *c4 + i1 * *d4;
564 j1 = i1 * *c4 - r1 * *d4;
565 r0 = a1[(i__ << 1) + 1];
566 i0 = a1[(i__ << 1) + 2];
567 t1 = r0 + s2;
568 t2 = s1 + s3;
569 t3 = i0 + j2;
570 t4 = j1 + j3;
571 t5 = r0 - s2;
572 t6 = j1 - j3;
573 t7 = i0 - j2;
574 t8 = s1 - s3;
575 b1[((i__ * b1_dim2 + 1) << 1) + 1] = t1 + t2;
576 b1[((i__ * b1_dim2 + 1) << 1) + 2] = t3 + t4;
577 b2[((i__ * b2_dim2 + 1) << 1) + 1] = t5 - t6;
578 b2[((i__ * b2_dim2 + 1) << 1) + 2] = t7 + t8;
579 b3[((i__ * b3_dim2 + 1) << 1) + 1] = t1 - t2;
580 b3[((i__ * b3_dim2 + 1) << 1) + 2] = t3 - t4;
581 b4[((i__ * b4_dim2 + 1) << 1) + 1] = t5 + t6;
582 b4[((i__ * b4_dim2 + 1) << 1) + 2] = t7 - t8;
583 }
584 return 0;
585 } /* radix4b_ */
586
radix5b_(double * a1,double * a2,double * a3,double * a4,double * a5,double * b1,double * b2,double * b3,double * b4,double * b5,int * b,int * ldb,double * c2,double * d2,double * c3,double * d3,double * c4,double * d4,double * c5,double * d5)587 int radix5b_(double *a1,double * a2,double * a3,double * a4,double * a5,double * b1,double * b2,double * b3,double * b4,double * b5,int * b,int *ldb, double *c2,double * d2,double * c3,double * d3,double * c4,double * d4, double *c5, double *d5)
588 {
589 /* Initialized data */
590
591 double cos36 = .8090169943749474241;
592 double sin36 = .58778525229247312917;
593 double cos72 = .3090169943749474241;
594 double sin72 = .95105651629515357212;
595
596 /* System generated locals */
597 int b1_dim2, b1_offset, b2_dim2, b2_offset, b3_dim2, b3_offset,
598 b4_dim2, b4_offset, b5_dim2, b5_offset, i__1;
599
600 /* Local variables */
601 int i__;
602 double r1, r2, r3, r4, r5, i1, i2, i3, i4, i5, s2, s3, s4, s5,
603 j2, j3, j4, j5, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12,
604 t13, t14, t15, t16;
605
606 /* Parameter adjustments */
607 a5 -= 3;
608 a4 -= 3;
609 a3 -= 3;
610 a2 -= 3;
611 a1 -= 3;
612 b5_dim2 = *ldb;
613 b5_offset = 1 + 2 * (1 + b5_dim2 * 1);
614 b5 -= b5_offset;
615 b4_dim2 = *ldb;
616 b4_offset = 1 + 2 * (1 + b4_dim2 * 1);
617 b4 -= b4_offset;
618 b3_dim2 = *ldb;
619 b3_offset = 1 + 2 * (1 + b3_dim2 * 1);
620 b3 -= b3_offset;
621 b2_dim2 = *ldb;
622 b2_offset = 1 + 2 * (1 + b2_dim2 * 1);
623 b2 -= b2_offset;
624 b1_dim2 = *ldb;
625 b1_offset = 1 + 2 * (1 + b1_dim2 * 1);
626 b1 -= b1_offset;
627
628 /* Function Body */
629 i__1 = *b;
630 for (i__ = 1; i__ <= i__1; ++i__) {
631 r2 = a2[(i__ << 1) + 1];
632 i2 = a2[(i__ << 1) + 2];
633 s2 = r2 * *c2 + i2 * *d2;
634 j2 = i2 * *c2 - r2 * *d2;
635 r3 = a3[(i__ << 1) + 1];
636 i3 = a3[(i__ << 1) + 2];
637 s3 = r3 * *c3 + i3 * *d3;
638 j3 = i3 * *c3 - r3 * *d3;
639 r4 = a4[(i__ << 1) + 1];
640 i4 = a4[(i__ << 1) + 2];
641 s4 = r4 * *c4 + i4 * *d4;
642 j4 = i4 * *c4 - r4 * *d4;
643 r5 = a5[(i__ << 1) + 1];
644 i5 = a5[(i__ << 1) + 2];
645 s5 = r5 * *c5 + i5 * *d5;
646 j5 = i5 * *c5 - r5 * *d5;
647 t1 = s2 + s5;
648 t2 = s3 + s4;
649 t3 = s2 - s5;
650 t4 = s3 - s4;
651 t5 = j2 + j5;
652 t6 = j3 + j4;
653 t7 = j2 - j5;
654 t8 = j3 - j4;
655 r1 = a1[(i__ << 1) + 1];
656 i1 = a1[(i__ << 1) + 2];
657 t9 = t1 * cos72 - t2 * cos36;
658 t10 = t5 * cos72 - t6 * cos36;
659 t11 = t2 * cos72 - t1 * cos36;
660 t12 = t6 * cos72 - t5 * cos36;
661 t13 = t7 * sin72 + t8 * sin36;
662 t14 = t3 * sin72 + t4 * sin36;
663 t15 = t8 * sin72 - t7 * sin36;
664 t16 = t4 * sin72 - t3 * sin36;
665 b1[((i__ * b1_dim2 + 1) << 1) + 1] = r1 + t1 + t2;
666 b1[((i__ * b1_dim2 + 1) << 1) + 2] = i1 + t5 + t6;
667 b2[((i__ * b2_dim2 + 1) << 1) + 1] = r1 + t9 + t13;
668 b2[((i__ * b2_dim2 + 1) << 1) + 2] = i1 + t10 - t14;
669 b3[((i__ * b3_dim2 + 1) << 1) + 1] = r1 + t11 - t15;
670 b3[((i__ * b3_dim2 + 1) << 1) + 2] = i1 + t12 + t16;
671 b4[((i__ * b4_dim2 + 1) << 1) + 1] = r1 + t11 + t15;
672 b4[((i__ * b4_dim2 + 1) << 1) + 2] = i1 + t12 - t16;
673 b5[((i__ * b5_dim2 + 1) << 1) + 1] = r1 + t9 - t13;
674 b5[((i__ * b5_dim2 + 1) << 1) + 2] = i1 + t10 + t14;
675 }
676 return 0;
677 } /* radix5b_ */
678
radix7b_(double * a1,double * a2,double * a3,double * a4,double * a5,double * a6,double * a7,double * b1,double * b2,double * b3,double * b4,double * b5,double * b6,double * b7,int * b,int * ldb)679 int radix7b_(double *a1, double *a2,double * a3,double * a4,double * a5,double * a6,double * a7,double * b1,double * b2,double * b3,double * b4,double * b5, double *b6, double *b7, int *b, int *ldb)
680 {
681 /* Initialized data */
682
683 double c1 = .623489801858733530525;
684 double s1 = .78183148246802980870844;
685 double c2 = .2225209339563144042889;
686 double s2 = .97492791218182360701813;
687 double c3 = .9009688679024191262361;
688 double s3 = .43388373911755812047577;
689
690 /* System generated locals */
691 int b1_dim2, b1_offset, b2_dim2, b2_offset, b3_dim2, b3_offset,
692 b4_dim2, b4_offset, b5_dim2, b5_offset, b6_dim2, b6_offset,
693 b7_dim2, b7_offset, i__1;
694
695 /* Local variables */
696 int i__;
697 double d1, p1, p2, p3, p4, t1, t2, t3, t4, t5, t6, t7, p5, p6,
698 p7, r1, r2, d2, r3, d3, i1, j1, i2, j2, i3, j3, t11, t21, t31,
699 t12, t22, t32, t13, t23, t33, t14, t24, t34;
700
701 /* Parameter adjustments */
702 a7 -= 3;
703 a6 -= 3;
704 a5 -= 3;
705 a4 -= 3;
706 a3 -= 3;
707 a2 -= 3;
708 a1 -= 3;
709 b7_dim2 = *ldb;
710 b7_offset = 1 + 2 * (1 + b7_dim2 * 1);
711 b7 -= b7_offset;
712 b6_dim2 = *ldb;
713 b6_offset = 1 + 2 * (1 + b6_dim2 * 1);
714 b6 -= b6_offset;
715 b5_dim2 = *ldb;
716 b5_offset = 1 + 2 * (1 + b5_dim2 * 1);
717 b5 -= b5_offset;
718 b4_dim2 = *ldb;
719 b4_offset = 1 + 2 * (1 + b4_dim2 * 1);
720 b4 -= b4_offset;
721 b3_dim2 = *ldb;
722 b3_offset = 1 + 2 * (1 + b3_dim2 * 1);
723 b3 -= b3_offset;
724 b2_dim2 = *ldb;
725 b2_offset = 1 + 2 * (1 + b2_dim2 * 1);
726 b2 -= b2_offset;
727 b1_dim2 = *ldb;
728 b1_offset = 1 + 2 * (1 + b1_dim2 * 1);
729 b1 -= b1_offset;
730
731 /* Function Body */
732 i__1 = *b;
733 for (i__ = 1; i__ <= i__1; ++i__) {
734 t1 = a7[(i__ << 1) + 1];
735 t2 = a2[(i__ << 1) + 1];
736 r1 = t1 + t2;
737 d1 = t1 - t2;
738 t11 = r1 * c1;
739 t21 = r1 * c2;
740 t31 = r1 * c3;
741 t12 = d1 * s1;
742 t22 = d1 * s2;
743 t32 = d1 * s3;
744 t3 = a6[(i__ << 1) + 1];
745 t4 = a3[(i__ << 1) + 1];
746 r2 = t3 + t4;
747 d2 = t3 - t4;
748 t11 -= r2 * c2;
749 t21 += r2 * c3;
750 t31 -= r2 * c1;
751 t12 += d2 * s2;
752 t22 -= d2 * s3;
753 t32 -= d2 * s1;
754 t5 = a5[(i__ << 1) + 1];
755 t6 = a4[(i__ << 1) + 1];
756 r3 = t5 + t6;
757 d3 = t5 - t6;
758 t7 = a1[(i__ << 1) + 1];
759 b1[((i__ * b1_dim2 + 1) << 1) + 1] = t7 + r1 + r2 + r3;
760 t11 -= r3 * c3;
761 t21 -= r3 * c1;
762 t31 += r3 * c2;
763 t12 += d3 * s3;
764 t22 -= d3 * s1;
765 t32 += d3 * s2;
766 p1 = a7[(i__ << 1) + 2];
767 p2 = a2[(i__ << 1) + 2];
768 i1 = p1 + p2;
769 j1 = p1 - p2;
770 t13 = i1 * c1;
771 t23 = i1 * c2;
772 t33 = i1 * c3;
773 t14 = j1 * s1;
774 t24 = j1 * s2;
775 t34 = j1 * s3;
776 p3 = a6[(i__ << 1) + 2];
777 p4 = a3[(i__ << 1) + 2];
778 i2 = p3 + p4;
779 j2 = p3 - p4;
780 t13 -= i2 * c2;
781 t23 += i2 * c3;
782 t33 -= i2 * c1;
783 t14 += j2 * s2;
784 t24 -= j2 * s3;
785 t34 -= j2 * s1;
786 p5 = a5[(i__ << 1) + 2];
787 p6 = a4[(i__ << 1) + 2];
788 i3 = p5 + p6;
789 j3 = p5 - p6;
790 p7 = a1[(i__ << 1) + 2];
791 b1[((i__ * b1_dim2 + 1) << 1) + 2] = p7 + i1 + i2 + i3;
792 t13 -= i3 * c3;
793 t23 -= i3 * c1;
794 t33 += i3 * c2;
795 t14 += j3 * s3;
796 t24 -= j3 * s1;
797 t34 += j3 * s2;
798 t11 = t7 + t11;
799 t21 = t7 - t21;
800 t31 = t7 - t31;
801 t13 = p7 + t13;
802 t23 = p7 - t23;
803 t33 = p7 - t33;
804 b2[((i__ * b2_dim2 + 1) << 1) + 1] = t11 - t14;
805 b2[((i__ * b2_dim2 + 1) << 1) + 2] = t13 + t12;
806 b7[((i__ * b7_dim2 + 1) << 1) + 1] = t11 + t14;
807 b7[((i__ * b7_dim2 + 1) << 1) + 2] = t13 - t12;
808 b3[((i__ * b3_dim2 + 1) << 1) + 1] = t21 - t24;
809 b3[((i__ * b3_dim2 + 1) << 1) + 2] = t23 + t22;
810 b6[((i__ * b6_dim2 + 1) << 1) + 1] = t21 + t24;
811 b6[((i__ * b6_dim2 + 1) << 1) + 2] = t23 - t22;
812 b4[((i__ * b4_dim2 + 1) << 1) + 1] = t31 - t34;
813 b4[((i__ * b4_dim2 + 1) << 1) + 2] = t33 + t32;
814 b5[((i__ * b5_dim2 + 1) << 1) + 1] = t31 + t34;
815 b5[((i__ * b5_dim2 + 1) << 1) + 2] = t33 - t32;
816 }
817 return 0;
818 } /* radix7b_ */
819
gcd_(int * n1,int * n2)820 int gcd_(int* n1,int* n2)
821 {
822 /* System generated locals */
823 int ret_val;
824
825 /* Local variables */
826 int i__, j, n;
827
828 i__ = *n1;
829 j = *n2;
830 L10:
831 n = min(i__,j);
832 ret_val = max(i__,j);
833 if (n == 0) {
834 return ret_val;
835 }
836 i__ = n;
837 j = ret_val - n;
838 goto L10;
839 } /* gcd_ */
840
fftfact_(int * n,int * nf,int * f)841 void fftfact_(int *n,int *nf,int *f)
842 {
843
844 /* Local variables */
845 int i__, n7;
846
847 /* Parameter adjustments */
848 --f;
849
850 /* Function Body */
851 i__ = *n;
852 *nf = 0;
853 n7 = 0;
854 L10:
855 if (i__ == 1) {
856 if (n7 > 1) {
857 printf("Only one factor of 7 allowed in fft dimension.\n");
858 exit(-1);
859 }
860 return;
861 }
862 if (i__ % 7 == 0) {
863 ++(*nf);
864 f[*nf] = 7;
865 i__ /= 7;
866 ++n7;
867 goto L10;
868 }
869 if (i__ % 5 == 0) {
870 ++(*nf);
871 f[*nf] = 5;
872 i__ /= 5;
873 goto L10;
874 }
875 if (i__ % 4 == 0) {
876 ++(*nf);
877 f[*nf] = 4;
878 i__ /= 4;
879 goto L10;
880 }
881 if (i__ % 3 == 0) {
882 ++(*nf);
883 f[*nf] = 3;
884 i__ /= 3;
885 goto L10;
886 }
887 if (i__ % 2 == 0) {
888 ++(*nf);
889 f[*nf] = 2;
890 i__ /= 2;
891 goto L10;
892 }
893 printf("Illegal fft size %i\n",(*n));
894 exit(-1);
895 } /* fftfact_ */
896
fftcoeff_(int * n,zomplex * coeff)897 void fftcoeff_(int *n,zomplex * coeff)
898 {
899 /* System generated locals */
900 int i__1, i__2;
901 double d__1, d__2;
902 zomplex z__1;
903
904
905 /* Local variables */
906 int i__;
907 double angle;
908
909 /* Parameter adjustments */
910 --coeff;
911
912 /* Function Body */
913 i__1 = *n;
914 for (i__ = 1; i__ <= i__1; ++i__) {
915 angle = (double) (i__ - 1) / (double) (*n) * 2. *
916 3.141592653589793238462643;
917 i__2 = i__;
918 d__1 = cos(angle);
919 d__2 = sin(angle);
920 z__1.re = d__1, z__1.im = d__2;
921 coeff[i__2].re = z__1.re, coeff[i__2].im = z__1.im;
922 /* L10: */
923 }
924 } /* fftcoeff_ */
925
zfftcopyin_(int * n,int * b,int * bdim,zomplex * a,int * inc,int * lda,int * rindex,zomplex * w,int * lpref)926 void zfftcopyin_(int *n,int * b,int * bdim,zomplex * a,int * inc ,int * lda,int * rindex,zomplex * w,int * lpref)
927 {
928 /* System generated locals */
929 int w_dim1, w_offset, i__1, i__2, i__3, i__4;
930
931 /* Local variables */
932 int i__, j, ia;
933
934 /* if(lpref){ */
935 /* if(*lda <= *inc) */
936 /* prefetch(a,lda,inc,b,n); */
937 /* else */
938 /* prefetch(a,inc,lda,n,b); */
939 /* } */
940 /* Copy data */
941 /* Parameter adjustments */
942 --rindex;
943 w_dim1 = *bdim;
944 w_offset = 1 + w_dim1 * 1;
945 w -= w_offset;
946 --a;
947
948 /* Function Body */
949 i__1 = *n;
950 for (j = 1; j <= i__1; ++j) {
951 ia = rindex[j];
952 i__2 = *b;
953 for (i__ = 1; i__ <= i__2; ++i__) {
954 i__3 = i__ + j * w_dim1;
955 i__4 = ia;
956 w[i__3].re = a[i__4].re, w[i__3].im = a[i__4].im;
957 ia += *lda;
958 }
959 }
960 } /* zfftcopyin_ */
961
zfftrev_(int * n,int * inc,int * nf,int * f,int * nmax,int * rindex,int * ipadok)962 int zfftrev_(int *n, int *inc, int *nf, int *f, int * nmax , int *rindex,int * ipadok)
963 {
964 int imax[10], irev, i__, s, fi, ii[10], stride[10];
965
966 /* OK up to 100000 elements */
967 /* Parameter adjustments */
968 --f;
969 --rindex;
970
971 /* Function Body */
972 s = 1;
973 for (fi = *nf; fi >= 1; --fi) {
974 ii[fi - 1] = 1;
975 stride[fi - 1] = s;
976 s *= f[fi];
977 imax[fi - 1] = s;
978 }
979 i__ = 1;
980 irev = 1;
981 L20:
982 rindex[i__] = (irev - 1) * *inc + 1;
983 if (i__ == *n) {
984 goto L100;
985 }
986 ++i__;
987 fi = 1;
988 L10:
989 irev += stride[fi - 1];
990 ii[fi - 1] += stride[fi - 1];
991 if (ii[fi - 1] > imax[fi - 1]) {
992 ii[fi - 1] = 1;
993 irev -= imax[fi - 1];
994 ++fi;
995 goto L10;
996 }
997 goto L20;
998 L100:
999 if (*n * *inc / f[*nf] % 1024 == 0) {
1000 *ipadok = 0;
1001 } else {
1002 *ipadok = 1;
1003 }
1004 return 0;
1005 } /* zfftrev_ */
1006
1007 /* Row (inc=1) transform code on bitreversed vectors */
1008 /* n length of transform (sub) vector */
1009 /* b # of transforms in this block */
1010 /* bdim # of rows in w, b.le.bdim */
1011 /* w bdim*n matrix containing data elements */
1012 /* nf # of factors in n */
1013 /* f vector of factors */
1014 /* coeff exp[2 pi i k/n], k=0..n-1] */
1015 /* a pointer to output vector */
1016 /* inc from caller, stride within transform */
1017 /* lda from caller, stride between transforms measured in elements */
1018 /* ipadok if zero, don't copy data back to a here because doing so would */
1019 /* trash the L1 cache */
1020 /* nrem product of remaining factors (n unless blocking is used) */
1021 /* irow row #, max(irow)*nrem = total length */
zfftm1rowf_(int * n,int * b,int * bdim,zomplex * w,int * nf,int * f,double * coeff,zomplex * a,int * inc,int * lda,int * ipadok,int * nprev,int * irow,int * nrem)1022 int zfftm1rowf_(int *n, int *b, int *bdim,zomplex * w, int *nf, int *f, double *coeff, zomplex *a, int *inc, int *lda,
1023 int *ipadok, int *nprev, int *irow, int *nrem)
1024 {
1025 /* System generated locals */
1026 int i__1, i__2, i__3, i__4;
1027
1028 /* Local variables */
1029 int aind, ntot, i__, j, p, index, ff, ic, nt, ninner, stride;
1030 int astride, bstride;
1031
1032
1033 /* Parameter adjustments */
1034 --coeff;
1035 --w;
1036 --f;
1037 --a;
1038
1039 /* Function Body */
1040 stride = *bdim;
1041 ninner = 1;
1042 bstride = *bdim;
1043 ntot = *n * *bdim;
1044 nt = *n;
1045 i__1 = *nf - *ipadok;
1046 for (p = 1; p <= i__1; ++p) {
1047 ff = f[p];
1048 bstride *= ff;
1049 nt /= ff;
1050 i__2 = ntot;
1051 i__3 = bstride;
1052 for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) {
1053 index = i__;
1054 ic = (*irow - 1) * nt * *nrem;
1055 i__4 = ninner;
1056 for (j = 1; j <= i__4; ++j) {
1057
1058 if (ff == 2) {
1059 radix2f_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index]), (double*)(&w[index + stride]), b, &const1, &coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2]);
1060 } else if (ff == 3) {
1061 radix3f_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), b, &const1, &coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2], &coeff[((ic + ic) << 1) + 1], &coeff[ic * 4 + 2]);
1062 } else if (ff == 4) {
1063 radix4f_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), b, &const1, &coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2], &coeff[ic * 4 + 1], &coeff[ic * 4 + 2], &coeff[ic * 6 + 1], &coeff[ic * 6 + 2]);
1064 } else if (ff == 5) {
1065 radix5f_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index + (stride << 2)]), (double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index + (stride << 2)]), b, &const1, &coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2], &coeff[ic * 4 + 1], &coeff[ic * 4 + 2], &coeff[ic * 6 + 1], &coeff[ic * 6 + 2], &coeff[(ic << 3) + 1], &coeff[(ic << 3) + 2]);
1066 } else if (ff == 7) {
1067 radix7f_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index + (stride << 2)]), (double*)(&w[index + stride * 5]), (double*)(&w[index + stride * 6]), (double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index + (stride << 2)]), (double*)(&w[index + stride * 5]), (double*)(&w[index + stride * 6]), b, &const1);
1068 }
1069 index += *bdim;
1070 ic += nt * (*nprev * *nrem);
1071 }
1072 }
1073 stride *= ff;
1074 ninner *= ff;
1075 /* L10: */
1076 }
1077 if (*ipadok == 0) {
1078 return 0;
1079 }
1080 ff = f[p];
1081 astride = *inc * ninner;
1082 index = 1;
1083 aind = 1;
1084 ic = (*irow - 1) * *nrem;
1085 i__1 = ninner;
1086 for (j = 1; j <= i__1; ++j) {
1087 if (ff == 2) {
1088 radix2f_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&a[aind]), (double*)(&a[aind + astride]), b, lda, &coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2]);
1089 } else if (ff == 3) {
1090 radix3f_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&a[aind]), (double*)(&a[aind + astride]), (double*)(&a[aind + (astride << 1)]),b, lda, &coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2], & coeff[ic * 4 + 1], &coeff[ic * 4 + 2]);
1091 } else if (ff == 4) {
1092 radix4f_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&a[aind]), (double*)(&a[aind + astride]), (double*)(&a[aind + (astride << 1)]),(double*)(&a[aind + astride * 3]), b, lda, & coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2], &coeff[ic * 4 + 1], &coeff[ic * 4 + 2], &coeff[ic * 6 + 1], &coeff[ic * 6 + 2]);
1093 } else if (ff == 5) {
1094 radix5f_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index + (stride << 2)]), (double*)(&a[aind]), (double*)(&a[aind + astride]), (double*)(&a[aind + (astride << 1)]),(double*)(&a[aind + astride * 3]), (double*)(&a[aind + (astride << 2)]), b, lda, & coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2], &coeff[ic * 4 + 1], &coeff[ic * 4 + 2], &coeff[ic * 6 + 1], &coeff[ic * 6 + 2], &coeff[(ic << 3) + 1], &coeff[(ic << 3) + 2]);
1095 } else if (ff == 7) {
1096 radix7f_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index + (stride << 2)]), (double*)(&w[index + stride * 5]), (double*)(&w[index + stride * 6]), (double*)(&a[aind]), (double*)(&a[aind + astride]), (double*)(&a[aind + (astride << 1)]),(double*)(&a[aind + astride * 3]), (double*)(&a[aind + (astride << 2)]), (double*)(&a[aind + astride * 5]), (double*)(&a[aind + astride * 6]), b, lda);
1097 }
1098 index += *bdim;
1099 ic += *nrem;
1100 aind += *inc;
1101 }
1102 return 0;
1103 } /* zfftm1rowf_ */
1104
zfftm1rowb_(int * n,int * b,int * bdim,zomplex * w,int * nf,int * f,double * coeff,zomplex * a,int * inc,int * lda,int * ipadok,int * nprev,int * irow,int * nrem)1105 int zfftm1rowb_(int *n, int *b, int *bdim,zomplex *w, int *nf, int *f, double *coeff, zomplex *a, int *inc, int *lda,
1106 int *ipadok, int *nprev, int *irow, int *nrem)
1107 {
1108 /* System generated locals */
1109 int i__1, i__2, i__3, i__4;
1110
1111 /* Local variables */
1112 int aind, ntot, i__, j, p, index, ff, ic, nt, ninner, stride;
1113 int astride, bstride;
1114
1115 /* Parameter adjustments */
1116 --coeff;
1117 --w;
1118 --f;
1119 --a;
1120
1121 /* Function Body */
1122 stride = *bdim;
1123 ninner = 1;
1124 bstride = *bdim;
1125 ntot = *n * *bdim;
1126 nt = *n;
1127 i__1 = *nf - *ipadok;
1128 for (p = 1; p <= i__1; ++p) {
1129 ff = f[p];
1130 bstride *= ff;
1131 nt /= ff;
1132 i__2 = ntot;
1133 i__3 = bstride;
1134 for (i__ = 1; i__3 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__3) {
1135 index = i__;
1136 ic = (*irow - 1) * nt * *nrem;
1137 i__4 = ninner;
1138 for (j = 1; j <= i__4; ++j) {
1139 if (ff == 2) {
1140 radix2b_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index]), (double*)(&w[index + stride]), b, &const1, &coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2]);
1141 } else if (ff == 3) {
1142 radix3b_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), b, &const1, &coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2], &coeff[ic * 4 + 1], &coeff[ic * 4 + 2]);
1143 } else if (ff == 4) {
1144 radix4b_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), b, &const1, &coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2], &coeff[ic * 4 + 1], &coeff[ic * 4 + 2], &coeff[ic * 6 + 1], &coeff[ic * 6 + 2]);
1145 } else if (ff == 5) {
1146 radix5b_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index + (stride << 2)]), (double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index + (stride << 2)]), b, &const1, &coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2], &coeff[ic * 4 + 1], &coeff[ic * 4 + 2], &coeff[ic * 6 + 1], &coeff[ic * 6 + 2], &coeff[(ic << 3) + 1], &coeff[(ic << 3) + 2]);
1147 } else if (ff == 7) {
1148 radix7b_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index + (stride << 2)]), (double*)(&w[index + stride * 5]), (double*)(&w[index + stride * 6]), (double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index + (stride << 2)]), (double*)(&w[index + stride * 5]), (double*)(&w[index + stride * 6]), b, &const1);
1149 }
1150 index += *bdim;
1151 ic += nt * (*nprev * *nrem);
1152 }
1153 }
1154 stride *= ff;
1155 ninner *= ff;
1156 /* L10: */
1157 }
1158 if (*ipadok == 0) {
1159 return 0;
1160 }
1161 ff = f[p];
1162 astride = *inc * ninner;
1163 index = 1;
1164 aind = 1;
1165 ic = (*irow - 1) * *nrem;
1166 i__1 = ninner;
1167 for (j = 1; j <= i__1; ++j) {
1168 if (ff == 2) {
1169 radix2b_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&a[aind]), (double*)(&a[aind + astride]), b, lda, &coeff[(ic << 1) + 1], &coeff[(ic << 1)+ 2]);
1170 } else if (ff == 3) {
1171 radix3b_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&a[aind]), (double*)(&a[aind + astride]), (double*)(&a[aind + (astride << 1)]), b, lda, &coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2], & coeff[ic * 4 + 1], &coeff[ic * 4 + 2]);
1172 } else if (ff == 4) {
1173 radix4b_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&a[aind]), (double*)(&a[aind + astride]), (double*)(&a[aind + (astride << 1)]),(double*)(&a[aind + astride * 3]), b, lda, & coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2], &coeff[ic * 4 + 1], &coeff[ic * 4 + 2], &coeff[ic * 6 + 1], &coeff[ic * 6 + 2]);
1174 } else if (ff == 5) {
1175 radix5b_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index + (stride << 2)]), (double*)(&a[aind]), (double*)(&a[aind + astride]), (double*)(&a[aind + (astride << 1)]),(double*)(&a[aind + astride * 3]), (double*)(&a[aind + (astride << 2)]), b, lda, & coeff[(ic << 1) + 1], &coeff[(ic << 1) + 2], &coeff[ic * 4 + 1], &coeff[ic * 4 + 2], &coeff[ic * 6 + 1], &coeff[ic * 6 + 2], &coeff[(ic << 3) + 1], &coeff[(ic << 3) + 2]);
1176 } else if (ff == 7) {
1177 radix7b_((double*)(&w[index]), (double*)(&w[index + stride]), (double*)(&w[index + (stride << 1)]), (double*)(&w[index + stride * 3]), (double*)(&w[index + (stride << 2)]), (double*)(&w[index + stride * 5]), (double*)(&w[index + stride * 6]), (double*)(&a[aind]), (double*)(&a[aind + astride]), (double*)(&a[aind + (astride << 1)]),(double*)(&a[aind + astride * 3]), (double*)(&a[aind + (astride << 2)]), (double*)(&a[aind + astride * 5]), (double*)(&a[aind + astride * 6]), b, lda);
1178 }
1179 index += *bdim;
1180 ic += *nrem;
1181 aind += *inc;
1182 }
1183 return 0;
1184 } /* zfftm1rowb_ */
1185
1186
1187 /* Multiple fft routine for large vectors (length>=128) */
1188 /* Uses both L2 and L1 cache blocking. */
1189 /* The maximum supported transform length is 8192 */
zfftm1big_(int * sign,int * n,int * p,zomplex * a,int * inc,int * lda,int * lpref,int * nf,int * f,zomplex * coeff,int * rindex)1190 int zfftm1big_(int *sign,int * n,int * p,zomplex * a,int * inc,int * lda,int * lpref,int * nf,int * f,zomplex *coeff,int *rindex)
1191 {
1192 /* System generated locals */
1193 int a_dim1, a_offset, i__1, i__2, i__3, i__4, i__5, i__6, i__7;
1194
1195
1196 /* Local variables */
1197 int bpad, bincache;
1198 zomplex wbig[65536];
1199 int b, i__, j, k, m, iwpad, b1, n1, n2, w1, ia, if__, iw;
1200 int nf1;
1201 int pad, iiw;
1202
1203
1204 /* Max length of col transform (size of L1 cache) */
1205 /* Work area size (size of L2 cache) */
1206 /* Parameter adjustments */
1207 --coeff;
1208 a_dim1 = *lda;
1209 a_offset = 1 + a_dim1 * 1;
1210 a -= a_offset;
1211 --f;
1212 --rindex;
1213
1214 /* Function Body */
1215 n1 = 1;
1216 n2 = *n;
1217 if__ = 1;
1218 L10:
1219 if (n1 < n2) {
1220 n1 *= f[if__];
1221 n2 /= f[if__];
1222 ++if__;
1223 goto L10;
1224 }
1225 nf1 = if__ - 1;
1226 /* Determine the # of transforms in each block, b1. b1 is bounded from above */
1227 /* by the max work area size (the size of the L2 cache) and from below by the */
1228 /* desire to make efficient use of loop unrolling/pipelining. We want the work */
1229 /* matrix and a piece of the input matrix to reside in L2 cache at the same */
1230 /* time, so that we can do the copy back operation without additional memory */
1231 /* operations. Therefore we try to limit the work area size to half of the */
1232 /* L2 cache size. */
1233 b1 = 1024 / n1;
1234 L30:
1235 /* The work area contains n2 columns of n1*b1+bpad*b1+pad elements each. */
1236 /* The padding is chosen so that n1+bpad and 8192/16/b1 are relatively */
1237 /* prime (8192 is the page size.) */
1238 bincache = 512 / b1;
1239 bpad = 0;
1240 L20:
1241 i__1 = n1 + bpad;
1242 if (gcd_(&i__1, &bincache) > 1) {
1243 ++bpad;
1244 goto L20;
1245 }
1246 /* If 8192/16/b1 is not an int, we have to adjust with some extra padding pad */
1247 /* Because the columns are roughly 2 pages long, there is a "2" below. */
1248 pad = (512 - b1 * bincache) << 1;
1249 w1 = (n1 + bpad) * b1 + pad;
1250 if (w1 * n2 > 32768) {
1251 /* If the work area turned out to be too large, first reduce the block size */
1252 if (b1 > 8) {
1253 --b1;
1254 goto L30;
1255 }
1256 /* If the work area is still too large, give up. */
1257 /* Transform lenghts up to 8192 are safe with the current cache parameters. */
1258 if (w1 * n2 > 65536) {
1259 printf("Large FFT work area too small.\n");
1260 exit(-1);
1261 }
1262 }
1263 /* Loop over fft blocks */
1264 i__1 = *p;
1265 i__2 = b1;
1266 for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
1267 /* Computing MIN */
1268 i__3 = b1, i__4 = *p - i__ + 1;
1269 b = min(i__3,i__4);
1270 /* Loop over columns of the work matrix */
1271 iw = 1;
1272 iwpad = 1;
1273 i__3 = n2;
1274 for (j = 1; j <= i__3; ++j) {
1275 /* Fill column with data */
1276 zfftcopyin_(&n1, &b, &b1, &a[i__ * a_dim1 + 1], inc, lda, &rindex[iw], &wbig[iwpad - 1], lpref);
1277 /* Do a block of column transforms */
1278 if (*sign >= 1) {
1279 zfftm1rowf_(&n1, &b, &b1, &wbig[iwpad - 1], &nf1, &f[1], (double*)(&coeff[1]), &a[a_offset], &const0, &const0, &const0, &const1, & const1, &n2);
1280 } else {
1281 zfftm1rowb_(&n1, &b, &b1, &wbig[iwpad - 1], &nf1, &f[1], (double*)(&coeff[1]), &a[a_offset], &const0, &const0, &const0, &const1, & const1, &n2);
1282 }
1283 iw += n1;
1284 iwpad += w1;
1285 }
1286 /* Loop over block rows of the work matrix */
1287 iw = 1;
1288 i__3 = n1;
1289 for (j = 1; j <= i__3; ++j) {
1290 /* Do a block of row transforms */
1291 if (*sign >= 1) {
1292 i__4 = *nf - nf1;
1293 zfftm1rowf_(&n2, &b, &w1, &wbig[iw - 1], &i__4, &f[nf1 + 1], (double*)(&coeff[1]), &a[a_offset], &const0, &const0, &const0, &n1, &j, &const1);
1294 } else {
1295 i__4 = *nf - nf1;
1296 zfftm1rowb_(&n2, &b, &w1, &wbig[iw - 1], &i__4, &f[nf1 + 1], (double*)(&coeff[1]), &a[a_offset], &const0, &const0, &const0, &n1, &j, &const1);
1297 }
1298 iw += b1;
1299 }
1300 /* Copy back data */
1301 if (*lda <= *inc) {
1302 if (*lda == 1) {
1303 ia = 1;
1304 iiw = 1;
1305 i__3 = n2;
1306 for (j = 1; j <= i__3; ++j) {
1307 iw = iiw;
1308 i__4 = n1;
1309 for (k = 1; k <= i__4; ++k) {
1310 i__5 = b;
1311 for (m = 1; m <= i__5; ++m) {
1312 /* An index checker would not like this */
1313 i__6 = ia + m - 1 + i__ * a_dim1;
1314 i__7 = iw + m - 2;
1315 a[i__6].re = wbig[i__7].re, a[i__6].im = wbig[i__7]
1316 .im;
1317 }
1318 ia += *inc;
1319 iw += b1;
1320 }
1321 iiw += w1;
1322 }
1323 } else {
1324 ia = 1;
1325 iiw = 1;
1326 i__3 = n2;
1327 for (j = 1; j <= i__3; ++j) {
1328 iw = iiw;
1329 i__4 = n1;
1330 for (k = 1; k <= i__4; ++k) {
1331 i__5 = b;
1332 for (m = 1; m <= i__5; ++m) {
1333 i__6 = ia + (m - 1 + i__) * a_dim1;
1334 i__7 = iw + m - 2;
1335 a[i__6].re = wbig[i__7].re, a[i__6].im = wbig[i__7]
1336 .im;
1337 }
1338 ia += *inc;
1339 iw += b1;
1340 }
1341 iiw += w1;
1342 }
1343 }
1344 } else {
1345 /* lda.gt.inc case */
1346 if (*inc == 1) {
1347 i__3 = b;
1348 for (m = 1; m <= i__3; ++m) {
1349 ia = 1;
1350 iw = m;
1351 i__4 = n2;
1352 for (j = 1; j <= i__4; ++j) {
1353 i__5 = n1;
1354 for (k = 1; k <= i__5; ++k) {
1355 i__6 = ia + k - 1 + (m - 1 + i__) * a_dim1;
1356 i__7 = iw + (k - 1) * b1 - 1;
1357 a[i__6].re = wbig[i__7].re, a[i__6].im = wbig[i__7]
1358 .im;
1359 }
1360 ia += n1;
1361 iw += w1;
1362 }
1363 }
1364 } else {
1365 i__3 = b;
1366 for (m = 1; m <= i__3; ++m) {
1367 ia = 1;
1368 iw = m;
1369 i__4 = n2;
1370 for (j = 1; j <= i__4; ++j) {
1371 i__5 = n1;
1372 for (k = 1; k <= i__5; ++k) {
1373 i__6 = ia + (k - 1) * *inc + (m - 1 + i__) *
1374 a_dim1;
1375 i__7 = iw + (k - 1) * b1 - 1;
1376 a[i__6].re = wbig[i__7].re, a[i__6].im = wbig[i__7]
1377 .im;
1378 }
1379 ia += n1 * *inc;
1380 iw += w1;
1381 }
1382 }
1383 }
1384 }
1385 /* next fft block */
1386 }
1387 return 0;
1388 } /* zfftm1big_ */
1389
1390 /* copy out routine for the large inc case */
zfftcopyout_(int * n,int * b,zomplex * a,int * inc,int * lda,zomplex * w)1391 void zfftcopyout_(int *n, int *b, zomplex *a, int *inc, int *lda,zomplex *w)
1392 {
1393 /* System generated locals */
1394 int w_dim1, w_offset, i__1, i__2, i__3, i__4;
1395
1396 /* Local variables */
1397 int i__, j, index, ia;
1398
1399 /* Parameter adjustments */
1400 w_dim1 = *b;
1401 w_offset = 1 + w_dim1 * 1;
1402 w -= w_offset;
1403 --a;
1404
1405 /* Function Body */
1406 ia = 1;
1407 i__1 = *n;
1408 for (j = 1; j <= i__1; ++j) {
1409 index = ia;
1410 i__2 = *b;
1411 for (i__ = 1; i__ <= i__2; ++i__) {
1412 i__3 = index;
1413 i__4 = i__ + j * w_dim1;
1414 a[i__3].re = w[i__4].re, a[i__3].im = w[i__4].im;
1415 index += *lda;
1416 }
1417 ia += *inc;
1418 }
1419 } /* zfftcopyout_ */
1420
zfftm1di_(int * n,zomplex * coeff)1421 int zfftm1di_(int *n,zomplex * coeff)
1422 {
1423 /* Local variables */
1424 int nf;
1425
1426
1427 /* Parameter adjustments */
1428 --coeff;
1429
1430 /* Function Body */
1431 if (*n < 1) {
1432 printf("zfftm1di: illegal FFT dimension %i\n",(*n));
1433 exit(-1);
1434 }
1435 coeff[1].re = 12345678., coeff[1].im = 0.;
1436 coeff[2].re = (double) (*n), coeff[2].im = 0.;
1437 fftfact_(n, &nf, (int*)(&coeff[4]));
1438 coeff[3].re = (double) nf, coeff[3].im = 0.;
1439 fftcoeff_(n, &coeff[15]);
1440 return 0;
1441 } /* zfftm1di_ */
1442
zfftm1d_(int * sign,int * n,int * p,zomplex * a,int * inc,int * lda,zomplex * coeff,int * lpref)1443 int zfftm1d_(int *sign,int * n,int * p, zomplex* a, int *inc,int * lda, zomplex *coeff, int *lpref)
1444 {
1445 /* System generated locals */
1446 int a_dim1, a_offset, i__1, i__2, i__3, i__4;
1447
1448
1449 /* Local variables */
1450 int bdim;
1451 int b;
1452 int i__;
1453 zomplex w[1024];
1454 int nf, ipadok;
1455 int rindex[16384];
1456
1457 /* Parameter adjustments */
1458 --coeff;
1459 a_dim1 = *lda;
1460 a_offset = 1 + a_dim1 * 1;
1461 a -= a_offset;
1462
1463 /* Function Body */
1464 if (coeff[1].re != 12345678. || coeff[1].im != 0.) {
1465 printf("zfftm1d: coeff not initialized.\n");
1466 exit(-1);
1467 }
1468 if ((double) (*n) != coeff[2].re || 0. != coeff[2].im) {
1469 printf("zfftm1d: coeff initialized with wrong dimension.\n");
1470 exit(-1);
1471 }
1472 nf = (int) coeff[3].re;
1473 zfftrev_(n, inc, &nf, (int*)(&coeff[4]), &const16384, rindex, &ipadok);
1474 if (*n <= 128) {
1475 /* Use L1 blocking only */
1476 bdim = 1024 / *n;
1477 i__1 = *p;
1478 i__2 = bdim;
1479 for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) {
1480 /* Computing MIN */
1481 i__3 = bdim, i__4 = *p - i__ + 1;
1482 b = min(i__3,i__4);
1483 zfftcopyin_(n, &b, &b, &a[i__ * a_dim1 + 1], inc, lda, rindex, w, lpref);
1484 if (*sign >= 1) {
1485 zfftm1rowf_(n, &b, &b, w, &nf, (int*)(&coeff[4]), (double*)(&coeff[15]), &a[i__ * a_dim1 + 1], inc, lda, &ipadok, &const1, &const1, &const1);
1486 } else {
1487 zfftm1rowb_(n, &b, &b, w, &nf, (int*)(&coeff[4]), (double*)(&coeff[15]), &a[i__ * a_dim1 + 1], inc, lda, &ipadok, &const1, &const1, &const1);
1488 }
1489 if (ipadok == 0) {
1490 zfftcopyout_(n, &b, &a[i__ * a_dim1 + 1], inc, lda, w);
1491 }
1492 }
1493 } else {
1494 /* Use both L1 and L2 blocking */
1495 zfftm1big_(sign, n, p, &a[a_offset], inc, lda, lpref, &nf, (int*)(&coeff[4]),
1496 &coeff[15], rindex);
1497 }
1498 return 0;
1499 } /* zfftm1d_ */
1500
1501
1502
1503
1504 /* Multidimensional transforms */
zfft2di_(int * x,int * y,zomplex * coeff)1505 int zfft2di_(int *x, int *y,zomplex * coeff)
1506 {
1507
1508 /* Parameter adjustments */
1509 --coeff;
1510
1511 /* Function Body */
1512 zfftm1di_(x, &coeff[1]);
1513 zfftm1di_(y, &coeff[*x + 16]);
1514 return 0;
1515 } /* zfft2di_ */
1516
zfft2d_(int * sign,int * x,int * y,zomplex * a,int * lda,zomplex * coeff)1517 int zfft2d_(int *sign, int *x, int *y, zomplex *a, int *lda, zomplex *coeff)
1518 {
1519 /* System generated locals */
1520 int a_dim1, a_offset;
1521 int L__1;
1522
1523 /* Local variables */
1524
1525 /* Parameter adjustments */
1526 --coeff;
1527 a_dim1 = *lda;
1528 a_offset = 1 + a_dim1 * 1;
1529 a -= a_offset;
1530
1531 /* Function Body */
1532 zfftm1d_(sign, y, x, &a[a_offset], lda, &const1, &coeff[*x + 16], &constTrue);
1533 L__1 = *x * *y > 32768;
1534 zfftm1d_(sign, x, y, &a[a_offset], &const1, lda, &coeff[1], &L__1);
1535 return 0;
1536 } /* zfft2d_ */
1537
zfft3di_(int * x,int * y,int * z__,zomplex * coeff)1538 int zfft3di_(int *x, int *y, int *z__,zomplex * coeff)
1539 {
1540
1541 /* Parameter adjustments */
1542 --coeff;
1543
1544 /* Function Body */
1545 zfftm1di_(x, &coeff[1]);
1546 zfftm1di_(y, &coeff[*x + 16]);
1547 zfftm1di_(z__, &coeff[*x + *y + 31]);
1548 return 0;
1549 } /* zfft3di_ */
1550
zfft3d_(int * sign,int * x,int * y,int * z__,zomplex * a,int * la1,int * la2,zomplex * coeff)1551 int zfft3d_(int *sign,int * x, int *y, int *z__,zomplex * a, int *la1, int *la2,zomplex * coeff)
1552 {
1553 /* System generated locals */
1554 int a_dim1, a_dim2, a_offset, i__1, i__2;
1555
1556 /* Local variables */
1557 int k;
1558 /* Parameter adjustments */
1559 --coeff;
1560 a_dim1 = *la1;
1561 a_dim2 = *la2;
1562 a_offset = 1 + a_dim1 * (1 + a_dim2 * 1);
1563 a -= a_offset;
1564
1565 /* Function Body */
1566 i__1 = *z__;
1567 for (k = 1; k <= i__1; ++k) {
1568 zfftm1d_(sign, y, x, &a[(k * a_dim2 + 1) * a_dim1 + 1], la1, &const1, &
1569 coeff[*x + 16], &constTrue);
1570 zfftm1d_(sign, x, y, &a[(k * a_dim2 + 1) * a_dim1 + 1], &const1, la1, &
1571 coeff[1], &constFalse);
1572 }
1573 i__1 = *y;
1574 for (k = 1; k <= i__1; ++k) {
1575 i__2 = *la1 * *la2;
1576 zfftm1d_(sign, z__, x, &a[(k + a_dim2) * a_dim1 + 1], &i__2, &const1, &
1577 coeff[*x + *y + 31], &constTrue);
1578 }
1579 return 0;
1580 } /* zfft3d_ */
1581
1582
1583 /* SGI-ZFFT wrapper */
zfftm1di(int m,zomplex * save)1584 zomplex *zfftm1di( int m, zomplex *save)
1585 {
1586 zomplex *c = 0;
1587 int i=0;
1588 c = (zomplex*)malloc((m+15)*sizeof(zomplex));
1589 zfftm1di_(&m,c);
1590 if(save != 0)
1591 for(i=0;i<m+15;i++)
1592 save[i] = c[i];
1593 return c;
1594 }
1595
zfftm1d(int sign,int m,int n,zomplex * array,int incI,int incJ,zomplex * save)1596 int zfftm1d( int sign, int m, int n, zomplex *array, int incI, int incJ, zomplex *save)
1597 {
1598 static int lpref = 0;
1599 zfftm1d_(&sign,&m,&n,array,&incI,&incJ,save,&lpref);
1600 return 0;
1601 }
1602
zfft2di(int n1,int n2,zomplex * save)1603 zomplex *zfft2di( int n1, int n2, zomplex *save)
1604 {
1605 zomplex *c = 0;
1606 int i=0;
1607 c = (zomplex*)malloc((n1+n2+30)*sizeof(zomplex));
1608 zfft2di_(&n1,&n2,c);
1609 if(save != 0)
1610 for(i=0;i<n1+n2+30;i++)
1611 save[i] = c[i];
1612 return c;
1613 }
1614
zfft2d(int sign,int n1,int n2,zomplex * array,int ld,zomplex * save)1615 int zfft2d( int sign, int n1, int n2, zomplex *array, int ld, zomplex *save)
1616 {
1617 zfft2d_(&sign,&n1,&n2,array,&ld,save);
1618 return 0;
1619 }
1620
zfft3di(int n1,int n2,int n3,zomplex * save)1621 zomplex *zfft3di( int n1, int n2, int n3, zomplex *save)
1622 {
1623 zomplex *c = 0;
1624 int i=0;
1625 c = (zomplex*)malloc((n1+n2+n3+45)*sizeof(zomplex));
1626 zfft3di_(&n1,&n2,&n3,c);
1627 if(save != 0)
1628 for(i=0;i<n1+n2+n3+45;i++)
1629 save[i] = c[i];
1630 return c;
1631 }
1632
zfft3d(int sign,int n1,int n2,int n3,zomplex * array,int ld1,int ld2,zomplex * save)1633 int zfft3d( int sign, int n1, int n2, int n3, zomplex *array, int ld1, int ld2, zomplex *save)
1634 {
1635 zfft3d_(&sign,&n1,&n2,&n3,array,&ld1,&ld2,save);
1636 return 0;
1637 }
1638
1639 #ifdef __cplusplus
1640 } /* extern "C" */
1641 #endif
1642
1643 #endif /* HAVE_FFT*/
1644