1 /*
2  * Copyright (c) 1997 Massachusetts Institute of Technology
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining
5  * a copy of this software and associated documentation files (the
6  * "Software"), to use, copy, modify, and distribute the Software without
7  * restriction, provided the Software, including any modified copies made
8  * under this license, is not distributed for a fee, subject to
9  * the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be
12  * included in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17  * IN NO EVENT SHALL THE MASSACHUSETTS INSTITUTE OF TECHNOLOGY BE LIABLE
18  * FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
19  * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Except as contained in this notice, the name of the Massachusetts
23  * Institute of Technology shall not be used in advertising or otherwise
24  * to promote the sale, use or other dealings in this Software without
25  * prior written authorization from the Massachusetts Institute of
26  * Technology.
27  *
28  */
29 
30 #include <math.h>
31 #include "fftw_dp.h"
32 #include "konst.h"
33 
34 #define FFTW_K2PI 6.2831853071795864769252867665590057683943387987502
35 
36 static fftw_twiddle* fftw_create_twiddle(int n, int r, int m);
37 static void fftw_destroy_twiddle(fftw_twiddle* tw);
38 static void* fftw_malloc(size_t n);
39 static void fftw_free(void* p);
40 static void fftw_strided_copy(int, FFTW_COMPLEX*, int, FFTW_COMPLEX*);
41 static void fftw_executor_simple(int, const FFTW_COMPLEX*, FFTW_COMPLEX*, fftw_plan_node*, int, int);
42 
43 static int fftw_node_cnt = 0;
44 static int fftw_plan_cnt = 0;
45 
46 static fftw_twiddle* twlist = (fftw_twiddle*)0;
47 static int fftw_twiddle_size = 0;	/* total allocated size, for debugging */
48 
49 #define NOTW_OPTIMAL_SIZE 32
50 #define TWIDDLE_OPTIMAL_SIZE 12
51 
52 /* the signature is the same as the size, for now */
53 #define NOTW_CODELET(x)  { x, x, fftw_no_twiddle_##x }
54 #define NOTWI_CODELET(x)  { x, x, fftwi_no_twiddle_##x }
55 
56 static notw_codelet fftw_no_twiddle_1;
57 static notw_codelet fftw_no_twiddle_2;
58 static notw_codelet fftw_no_twiddle_3;
59 static notw_codelet fftw_no_twiddle_4;
60 static notw_codelet fftw_no_twiddle_5;
61 static notw_codelet fftw_no_twiddle_6;
62 static notw_codelet fftw_no_twiddle_7;
63 static notw_codelet fftw_no_twiddle_8;
64 static notw_codelet fftw_no_twiddle_9;
65 static notw_codelet fftw_no_twiddle_10;
66 static notw_codelet fftw_no_twiddle_11;
67 static notw_codelet fftw_no_twiddle_12;
68 static notw_codelet fftw_no_twiddle_13;
69 static notw_codelet fftw_no_twiddle_14;
70 static notw_codelet fftw_no_twiddle_15;
71 static notw_codelet fftw_no_twiddle_16;
72 static notw_codelet fftw_no_twiddle_32;
73 static notw_codelet fftw_no_twiddle_64;
74 
75 static notw_codelet fftwi_no_twiddle_1;
76 static notw_codelet fftwi_no_twiddle_2;
77 static notw_codelet fftwi_no_twiddle_3;
78 static notw_codelet fftwi_no_twiddle_4;
79 static notw_codelet fftwi_no_twiddle_5;
80 static notw_codelet fftwi_no_twiddle_6;
81 static notw_codelet fftwi_no_twiddle_7;
82 static notw_codelet fftwi_no_twiddle_8;
83 static notw_codelet fftwi_no_twiddle_9;
84 static notw_codelet fftwi_no_twiddle_10;
85 static notw_codelet fftwi_no_twiddle_11;
86 static notw_codelet fftwi_no_twiddle_12;
87 static notw_codelet fftwi_no_twiddle_13;
88 static notw_codelet fftwi_no_twiddle_14;
89 static notw_codelet fftwi_no_twiddle_15;
90 static notw_codelet fftwi_no_twiddle_16;
91 static notw_codelet fftwi_no_twiddle_32;
92 static notw_codelet fftwi_no_twiddle_64;
93 
94 config_notw fftw_config_notw[] =
95 {
96      NOTW_CODELET(1),
97      NOTW_CODELET(2),
98      NOTW_CODELET(3),
99      NOTW_CODELET(4),
100      NOTW_CODELET(5),
101      NOTW_CODELET(6),
102      NOTW_CODELET(7),
103      NOTW_CODELET(8),
104      NOTW_CODELET(9),
105      NOTW_CODELET(10),
106      NOTW_CODELET(11),
107      NOTW_CODELET(12),
108      NOTW_CODELET(13),
109      NOTW_CODELET(14),
110      NOTW_CODELET(15),
111      NOTW_CODELET(16),
112      NOTW_CODELET(32),
113      NOTW_CODELET(64),
114      {0, 0, (notw_codelet *) 0}
115 };
116 
117 config_notw fftwi_config_notw[] =
118 {
119      NOTWI_CODELET(1),
120      NOTWI_CODELET(2),
121      NOTWI_CODELET(3),
122      NOTWI_CODELET(4),
123      NOTWI_CODELET(5),
124      NOTWI_CODELET(6),
125      NOTWI_CODELET(7),
126      NOTWI_CODELET(8),
127      NOTWI_CODELET(9),
128      NOTWI_CODELET(10),
129      NOTWI_CODELET(11),
130      NOTWI_CODELET(12),
131      NOTWI_CODELET(13),
132      NOTWI_CODELET(14),
133      NOTWI_CODELET(15),
134      NOTWI_CODELET(16),
135      NOTWI_CODELET(32),
136      NOTWI_CODELET(64),
137      {0, 0, (notw_codelet *) 0}
138 };
139 
140 /* the signature is the same as the size, for now */
141 #define TWIDDLE_CODELET(x)  { x, x, fftw_twiddle_##x }
142 #define TWIDDLEI_CODELET(x)  { x, x, fftwi_twiddle_##x }
143 
144 static twiddle_codelet fftw_twiddle_2;
145 static twiddle_codelet fftw_twiddle_3;
146 static twiddle_codelet fftw_twiddle_4;
147 static twiddle_codelet fftw_twiddle_5;
148 static twiddle_codelet fftw_twiddle_6;
149 static twiddle_codelet fftw_twiddle_7;
150 static twiddle_codelet fftw_twiddle_8;
151 static twiddle_codelet fftw_twiddle_9;
152 static twiddle_codelet fftw_twiddle_10;
153 static twiddle_codelet fftw_twiddle_16;
154 static twiddle_codelet fftw_twiddle_32;
155 static twiddle_codelet fftw_twiddle_64;
156 
157 static twiddle_codelet fftwi_twiddle_2;
158 static twiddle_codelet fftwi_twiddle_3;
159 static twiddle_codelet fftwi_twiddle_4;
160 static twiddle_codelet fftwi_twiddle_5;
161 static twiddle_codelet fftwi_twiddle_6;
162 static twiddle_codelet fftwi_twiddle_7;
163 static twiddle_codelet fftwi_twiddle_8;
164 static twiddle_codelet fftwi_twiddle_9;
165 static twiddle_codelet fftwi_twiddle_10;
166 static twiddle_codelet fftwi_twiddle_16;
167 static twiddle_codelet fftwi_twiddle_32;
168 static twiddle_codelet fftwi_twiddle_64;
169 
170 config_twiddle fftw_config_twiddle[] =
171 {
172      TWIDDLE_CODELET(2),
173      TWIDDLE_CODELET(3),
174      TWIDDLE_CODELET(4),
175      TWIDDLE_CODELET(5),
176      TWIDDLE_CODELET(6),
177      TWIDDLE_CODELET(7),
178      TWIDDLE_CODELET(8),
179      TWIDDLE_CODELET(9),
180      TWIDDLE_CODELET(10),
181      TWIDDLE_CODELET(16),
182      TWIDDLE_CODELET(32),
183      TWIDDLE_CODELET(64),
184      {0, 0, (twiddle_codelet *) 0}
185 };
186 
187 config_twiddle fftwi_config_twiddle[] =
188 {
189      TWIDDLEI_CODELET(2),
190      TWIDDLEI_CODELET(3),
191      TWIDDLEI_CODELET(4),
192      TWIDDLEI_CODELET(5),
193      TWIDDLEI_CODELET(6),
194      TWIDDLEI_CODELET(7),
195      TWIDDLEI_CODELET(8),
196      TWIDDLEI_CODELET(9),
197      TWIDDLEI_CODELET(10),
198      TWIDDLEI_CODELET(16),
199      TWIDDLEI_CODELET(32),
200      TWIDDLEI_CODELET(64),
201      {0, 0, (twiddle_codelet *) 0}
202 };
203 /*
204  * Copyright (c) 1997 Massachusetts Institute of Technology
205  *
206  * Permission is hereby granted, free of charge, to any person obtaining
207  * a copy of this software and associated documentation files (the
208  * "Software"), to use, copy, modify, and distribute the Software without
209  * restriction, provided the Software, including any modified copies made
210  * under this license, is not distributed for a fee, subject to
211  * the following conditions:
212  *
213  * The above copyright notice and this permission notice shall be
214  * included in all copies or substantial portions of the Software.
215  *
216  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
217  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
218  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
219  * IN NO EVENT SHALL THE MASSACHUSETTS INSTITUTE OF TECHNOLOGY BE LIABLE
220  * FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
221  * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
222  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
223  *
224  * Except as contained in this notice, the name of the Massachusetts
225  * Institute of Technology shall not be used in advertising or otherwise
226  * to promote the sale, use or other dealings in this Software without
227  * prior written authorization from the Massachusetts Institute of
228  * Technology.
229  *
230  */
231 
232 /*
233  * executor.c -- execute the fft
234  */
235 
236 /*
237  * This function is called in other files, so we cannot declare
238  * it as static.
239  */
240 
fftw_strided_copy(int n,FFTW_COMPLEX * in,int ostride,FFTW_COMPLEX * out)241 static void fftw_strided_copy(int n, FFTW_COMPLEX *in, int ostride, FFTW_COMPLEX *out)
242 {
243      int i;
244      FFTW_REAL r0, r1, i0, i1;
245      FFTW_REAL r2, r3, i2, i3;
246 
247      i = 0;
248      if (n & 3)
249 	  for (; i < (n & 3); ++i) {
250 	       out[i * ostride] = in[i];
251 	  }
252      for (; i < n; i += 4) {
253 	  r0 = c_re(in[i]);
254 	  i0 = c_im(in[i]);
255 	  r1 = c_re(in[i + 1]);
256 	  i1 = c_im(in[i + 1]);
257 	  r2 = c_re(in[i + 2]);
258 	  i2 = c_im(in[i + 2]);
259 	  r3 = c_re(in[i + 3]);
260 	  i3 = c_im(in[i + 3]);
261 	  c_re(out[i * ostride]) = r0;
262 	  c_im(out[i * ostride]) = i0;
263 	  c_re(out[(i + 1) * ostride]) = r1;
264 	  c_im(out[(i + 1) * ostride]) = i1;
265 	  c_re(out[(i + 2) * ostride]) = r2;
266 	  c_im(out[(i + 2) * ostride]) = i2;
267 	  c_re(out[(i + 3) * ostride]) = r3;
268 	  c_im(out[(i + 3) * ostride]) = i3;
269      }
270 }
271 
272 /*
273  * Do *not* declare simple executor as static--we need to call it
274  * from executor_cilk.cilk...also, preface its name with "fftw_"
275  * to avoid any possible name collisions.
276  */
fftw_executor_simple(int n,const FFTW_COMPLEX * in,FFTW_COMPLEX * out,fftw_plan_node * p,int istride,int ostride)277 static void fftw_executor_simple(int n, const FFTW_COMPLEX *in,
278 			  FFTW_COMPLEX *out,
279 			  fftw_plan_node *p,
280 			  int istride,
281 			  int ostride)
282 {
283      switch (p->type) {
284 	 case FFTW_NOTW:
285 	      (p->nodeu.notw.codelet) (in, out, istride, ostride);
286 	      break;
287 
288 	 case FFTW_TWIDDLE:
289 	      {
290 		   int r = p->nodeu.twiddle.size;
291 		   int m = n / r;
292 		   int i;
293 		   twiddle_codelet *codelet;
294 		   FFTW_COMPLEX *W;
295 
296 		   for (i = 0; i < r; ++i) {
297 			fftw_executor_simple(m, in + i * istride,
298 					     out + i * (m * ostride),
299 					     p->nodeu.twiddle.recurse,
300 					     istride * r, ostride);
301 		   }
302 
303 		   codelet = p->nodeu.twiddle.codelet;
304 		   W = p->nodeu.twiddle.tw->twarray;
305 		   codelet(out, W, m * ostride, m, ostride);
306 
307 		   break;
308 	      }
309 
310 	 case FFTW_GENERIC:
311 	      {
312 		   int r = p->nodeu.generic.size;
313 		   int m = n / r;
314 		   int i;
315 		   generic_codelet *codelet;
316 		   FFTW_COMPLEX *W;
317 
318 		   for (i = 0; i < r; ++i) {
319 			fftw_executor_simple(m, in + i * istride,
320 					     out + i * (m * ostride),
321 					     p->nodeu.generic.recurse,
322 					     istride * r, ostride);
323 		   }
324 
325 		   codelet = p->nodeu.generic.codelet;
326 		   W = p->nodeu.generic.tw->twarray;
327 		   codelet(out, W, m, r, n, ostride);
328 
329 		   break;
330 	      }
331 
332 	 default:
333 	      fftw_die("BUG in executor: illegal plan\n");
334 	      break;
335      }
336 }
337 
executor_simple_inplace(int n,FFTW_COMPLEX * in,FFTW_COMPLEX * out,fftw_plan_node * p,int istride)338 static void executor_simple_inplace(int n, FFTW_COMPLEX *in,
339 				    FFTW_COMPLEX *out,
340 				    fftw_plan_node *p,
341 				    int istride)
342 {
343      switch (p->type) {
344 	 case FFTW_NOTW:
345 	      (p->nodeu.notw.codelet) (in, in, istride, istride);
346 	      break;
347 
348 	 default:
349 	      {
350 		   FFTW_COMPLEX *tmp;
351 
352 		   if (out)
353 			tmp = out;
354 		   else
355 			tmp = (FFTW_COMPLEX *)
356 			    fftw_malloc(n * sizeof(FFTW_COMPLEX));
357 
358 		   fftw_executor_simple(n, in, tmp, p, istride, 1);
359 		   fftw_strided_copy(n, tmp, istride, in);
360 
361 		   if (!out)
362 			fftw_free(tmp);
363 	      }
364      }
365 }
366 
executor_many(int n,const FFTW_COMPLEX * in,FFTW_COMPLEX * out,fftw_plan_node * p,int istride,int ostride,int howmany,int idist,int odist)367 static void executor_many(int n, const FFTW_COMPLEX *in,
368 			  FFTW_COMPLEX *out,
369 			  fftw_plan_node *p,
370 			  int istride,
371 			  int ostride,
372 			  int howmany, int idist, int odist)
373 {
374      switch (p->type) {
375 	 case FFTW_NOTW:
376 	      {
377 		   int s;
378 		   notw_codelet *codelet = p->nodeu.notw.codelet;
379 		   for (s = 0; s < howmany; ++s)
380 			codelet(in + s * idist,
381 				out + s * odist,
382 				istride, ostride);
383 		   break;
384 	      }
385 
386 	 default:
387 	      {
388 		   int s;
389 		   for (s = 0; s < howmany; ++s) {
390 			fftw_executor_simple(n, in + s * idist,
391 					     out + s * odist,
392 					     p, istride, ostride);
393 		   }
394 	      }
395      }
396 }
397 
executor_many_inplace(int n,FFTW_COMPLEX * in,FFTW_COMPLEX * out,fftw_plan_node * p,int istride,int howmany,int idist)398 static void executor_many_inplace(int n, FFTW_COMPLEX *in,
399 				  FFTW_COMPLEX *out,
400 				  fftw_plan_node *p,
401 				  int istride,
402 				  int howmany, int idist)
403 {
404      switch (p->type) {
405 	 case FFTW_NOTW:
406 	      {
407 		   int s;
408 		   notw_codelet *codelet = p->nodeu.notw.codelet;
409 		   for (s = 0; s < howmany; ++s)
410 			codelet(in + s * idist,
411 				in + s * idist,
412 				istride, istride);
413 		   break;
414 	      }
415 
416 	 default:
417 	      {
418 		   int s;
419 		   FFTW_COMPLEX *tmp;
420 		   if (out)
421 			tmp = out;
422 		   else
423 			tmp = (FFTW_COMPLEX *)
424 			    fftw_malloc(n * sizeof(FFTW_COMPLEX));
425 
426 		   for (s = 0; s < howmany; ++s) {
427 			fftw_executor_simple(n,
428 					     in + s * idist,
429 					     tmp,
430 					     p, istride, 1);
431 			fftw_strided_copy(n, tmp, istride, in + s * idist);
432 		   }
433 
434 		   if (!out)
435 			fftw_free(tmp);
436 	      }
437      }
438 }
439 
440 
441 
442 /*
443  * Copyright (c) 1997 Massachusetts Institute of Technology
444  *
445  * Permission is hereby granted, free of charge, to any person obtaining
446  * a copy of this software and associated documentation files (the
447  * "Software"), to use, copy, modify, and distribute the Software without
448  * restriction, provided the Software, including any modified copies made
449  * under this license, is not distributed for a fee, subject to
450  * the following conditions:
451  *
452  * The above copyright notice and this permission notice shall be
453  * included in all copies or substantial portions of the Software.
454  *
455  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
456  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
457  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
458  * IN NO EVENT SHALL THE MASSACHUSETTS INSTITUTE OF TECHNOLOGY BE LIABLE
459  * FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
460  * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
461  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
462  *
463  * Except as contained in this notice, the name of the Massachusetts
464  * Institute of Technology shall not be used in advertising or otherwise
465  * to promote the sale, use or other dealings in this Software without
466  * prior written authorization from the Massachusetts Institute of
467  * Technology.
468  *
469  */
470 
471 
472 /* Prototypes for functions used internally in this file: */
473 
474 static void fftw2d_out_of_place_aux(fftwnd_plan p, int howmany,
475 				FFTW_COMPLEX *in, int istride, int idist,
476 			      FFTW_COMPLEX *out, int ostride, int odist);
477 static void fftw3d_out_of_place_aux(fftwnd_plan p, int howmany,
478 				FFTW_COMPLEX *in, int istride, int idist,
479 			      FFTW_COMPLEX *out, int ostride, int odist);
480 static void fftwnd_out_of_place_aux(fftwnd_plan p, int howmany,
481 				FFTW_COMPLEX *in, int istride, int idist,
482 			      FFTW_COMPLEX *out, int ostride, int odist);
483 
484 static void fftw2d_in_place_aux(fftwnd_plan p, int howmany,
485 			   FFTW_COMPLEX *in_out, int istride, int idist);
486 static void fftw3d_in_place_aux(fftwnd_plan p, int howmany,
487 			   FFTW_COMPLEX *in_out, int istride, int idist);
488 static void fftwnd_in_place_aux(fftwnd_plan p, int howmany,
489 			   FFTW_COMPLEX *in_out, int istride, int idist);
490 
491 
492 
fftw2d_out_of_place_aux(fftwnd_plan p,int howmany,FFTW_COMPLEX * in,int istride,int idist,FFTW_COMPLEX * out,int ostride,int odist)493 static void fftw2d_out_of_place_aux(fftwnd_plan p, int howmany,
494 				FFTW_COMPLEX *in, int istride, int idist,
495 			       FFTW_COMPLEX *out, int ostride, int odist)
496 {
497      int fft_iter;
498      fftw_plan p0, p1;
499      int n0, n1;
500 
501      p0 = p->plans[0];
502      p1 = p->plans[1];
503      n0 = p->n[0];
504      n1 = p->n[1];
505 
506      for (fft_iter = 0; fft_iter < howmany; ++fft_iter) {
507 	  /* FFT y dimension (out-of-place): */
508 	  fftw(p1, n0,
509 	       in + fft_iter * idist, istride, n1 * istride,
510 	       out + fft_iter * odist, ostride, n1 * ostride);
511 	  /* FFT x dimension (in-place): */
512 	  fftw(p0, n1,
513 	       out + fft_iter * odist, n1 * ostride, ostride,
514 	       p->work, 1, 1);
515      }
516 }
517 
fftw3d_out_of_place_aux(fftwnd_plan p,int howmany,FFTW_COMPLEX * in,int istride,int idist,FFTW_COMPLEX * out,int ostride,int odist)518 static void fftw3d_out_of_place_aux(fftwnd_plan p, int howmany,
519 				FFTW_COMPLEX *in, int istride, int idist,
520 			       FFTW_COMPLEX *out, int ostride, int odist)
521 {
522      int fft_iter;
523      int i;
524      fftw_plan p0, p1, p2;
525      int n0, n1, n2;
526 
527      p0 = p->plans[0];
528      p1 = p->plans[1];
529      p2 = p->plans[2];
530      n0 = p->n[0];
531      n1 = p->n[1];
532      n2 = p->n[2];
533 
534      for (fft_iter = 0; fft_iter < howmany; ++fft_iter) {
535 	  /* FFT z dimension (out-of-place): */
536 	  fftw(p2, n0 * n1,
537 	       in + fft_iter * idist, istride, n2 * istride,
538 	       out + fft_iter * odist, ostride, n2 * ostride);
539 	  /* FFT y dimension (in-place): */
540 	  for (i = 0; i < n0; ++i)
541 	       fftw(p1, n2,
542 		    out + fft_iter * odist + i * n1 * n2 * ostride,
543 		    n2 * ostride, ostride, p->work, 1, 0);
544 	  /* FFT x dimension (in-place): */
545 	  fftw(p0, n1 * n2,
546 	       out + fft_iter * odist, n1 * n2 * ostride, ostride,
547 	       p->work, 1, 0);
548      }
549 }
550 
fftwnd_out_of_place_aux(fftwnd_plan p,int howmany,FFTW_COMPLEX * in,int istride,int idist,FFTW_COMPLEX * out,int ostride,int odist)551 static void fftwnd_out_of_place_aux(fftwnd_plan p, int howmany,
552 				FFTW_COMPLEX *in, int istride, int idist,
553 			       FFTW_COMPLEX *out, int ostride, int odist)
554 {
555      int fft_iter;
556      int j, i;
557 
558      /* Do FFT for rank > 3: */
559 
560      for (fft_iter = 0; fft_iter < howmany; ++fft_iter) {
561 	  /* do last dimension (out-of-place): */
562 	  fftw(p->plans[p->rank - 1], p->n_before[p->rank - 1],
563 	     in + fft_iter * idist, istride, p->n[p->rank - 1] * istride,
564 	   out + fft_iter * odist, ostride, p->n[p->rank - 1] * ostride);
565 
566 	  /* do first dimension (in-place): */
567 	  fftw(p->plans[0], p->n_after[0],
568 	       out + fft_iter * odist, p->n_after[0] * ostride, ostride,
569 	       p->work, 1, 0);
570 
571 	  /* do other dimensions (in-place): */
572 	  for (j = 1; j < p->rank - 1; ++j)
573 	       for (i = 0; i < p->n_before[j]; ++i)
574 		    fftw(p->plans[j], p->n_after[j],
575 			 out + fft_iter * odist + i * ostride * p->n[j] *
576 			 p->n_after[j], p->n_after[j] * ostride,
577 			 ostride, p->work, 1, 0);
578      }
579 }
580 
fftw2d_in_place_aux(fftwnd_plan p,int howmany,FFTW_COMPLEX * in_out,int istride,int idist)581 static void fftw2d_in_place_aux(fftwnd_plan p, int howmany,
582 			    FFTW_COMPLEX *in_out, int istride, int idist)
583 {
584      int fft_iter;
585      fftw_plan p0, p1;
586      int n0, n1;
587 
588      p0 = p->plans[0];
589      p1 = p->plans[1];
590      n0 = p->n[0];
591      n1 = p->n[1];
592 
593      for (fft_iter = 0; fft_iter < howmany; ++fft_iter) {
594 	  /* FFT y dimension: */
595 	  fftw(p1, n0,
596 	       in_out + fft_iter * idist, istride, istride * n1,
597 	       p->work, 1, 0);
598 	  /* FFT x dimension: */
599 	  fftw(p0, n1,
600 	       in_out + fft_iter * idist, istride * n1, istride,
601 	       p->work, 1, 0);
602      }
603 }
604 
fftw3d_in_place_aux(fftwnd_plan p,int howmany,FFTW_COMPLEX * in_out,int istride,int idist)605 static void fftw3d_in_place_aux(fftwnd_plan p, int howmany,
606 			    FFTW_COMPLEX *in_out, int istride, int idist)
607 {
608      int i;
609      int fft_iter;
610      fftw_plan p0, p1, p2;
611      int n0, n1, n2;
612 
613      p0 = p->plans[0];
614      p1 = p->plans[1];
615      p2 = p->plans[2];
616      n0 = p->n[0];
617      n1 = p->n[1];
618      n2 = p->n[2];
619 
620      for (fft_iter = 0; fft_iter < howmany; ++fft_iter) {
621 	  /* FFT z dimension: */
622 	  fftw(p2, n0 * n1,
623 	       in_out + fft_iter * idist, istride, n2 * istride,
624 	       p->work, 1, 0);
625 	  /* FFT y dimension: */
626 	  for (i = 0; i < n0; ++i)
627 	       fftw(p1, n2,
628 		    in_out + fft_iter * idist + i * n1 *
629 		    n2 * istride, n2 * istride, istride, p->work, 1, 0);
630 	  /* FFT x dimension: */
631 	  fftw(p0, n1 * n2,
632 	       in_out + fft_iter * idist, n1 * n2 * istride, istride,
633 	       p->work, 1, 0);
634      }
635 }
636 
fftwnd_in_place_aux(fftwnd_plan p,int howmany,FFTW_COMPLEX * in_out,int istride,int idist)637 static void fftwnd_in_place_aux(fftwnd_plan p, int howmany,
638 			    FFTW_COMPLEX *in_out, int istride, int idist)
639 /* Do FFT for rank > 3: */
640 {
641      int fft_iter;
642      int j, i;
643 
644      for (fft_iter = 0; fft_iter < howmany; ++fft_iter) {
645 	  /* do last dimension: */
646 	  fftw(p->plans[p->rank - 1], p->n_before[p->rank - 1],
647 	  in_out + fft_iter * idist, istride, p->n[p->rank - 1] * istride,
648 	       p->work, 1, 0);
649 
650 	  /* do first dimension: */
651 	  fftw(p->plans[0], p->n_after[0],
652 	     in_out + fft_iter * idist, p->n_after[0] * istride, istride,
653 	       p->work, 1, 0);
654 
655 	  /* do other dimensions: */
656 	  for (j = 1; j < p->rank - 1; ++j)
657 	       for (i = 0; i < p->n_before[j]; ++i)
658 		    fftw(p->plans[j], p->n_after[j],
659 		      in_out + fft_iter * idist + i * istride * p->n[j] *
660 			 p->n_after[j], p->n_after[j] * istride, istride,
661 			 p->work, 1, 0);
662      }
663 }
664 /*
665  * Copyright (c) 1997 Massachusetts Institute of Technology
666  *
667  * Permission is hereby granted, free of charge, to any person obtaining
668  * a copy of this software and associated documentation files (the
669  * "Software"), to use, copy, modify, and distribute the Software without
670  * restriction, provided the Software, including any modified copies made
671  * under this license, is not distributed for a fee, subject to
672  * the following conditions:
673  *
674  * The above copyright notice and this permission notice shall be
675  * included in all copies or substantial portions of the Software.
676  *
677  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
678  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
679  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
680  * IN NO EVENT SHALL THE MASSACHUSETTS INSTITUTE OF TECHNOLOGY BE LIABLE
681  * FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
682  * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
683  * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
684  *
685  * Except as contained in this notice, the name of the Massachusetts
686  * Institute of Technology shall not be used in advertising or otherwise
687  * to promote the sale, use or other dealings in this Software without
688  * prior written authorization from the Massachusetts Institute of
689  * Technology.
690  *
691  */
692 
693 /* This function contains 0 FP additions and 0 FP multiplications */
694 
fftw_no_twiddle_1(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)695 static void fftw_no_twiddle_1(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
696 {
697      FFTW_REAL tre0_0_0;
698      FFTW_REAL tim0_0_0;
699      tre0_0_0 = c_re(in[0]);
700      tim0_0_0 = c_im(in[0]);
701      c_re(out[0]) = tre0_0_0;
702      c_im(out[0]) = tim0_0_0;
703 }
704 
705 /* This function contains 108 FP additions and 32 FP multiplications */
706 
fftw_no_twiddle_10(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)707 static void fftw_no_twiddle_10(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
708 {
709      FFTW_REAL tre0_0_0;
710      FFTW_REAL tim0_0_0;
711      FFTW_REAL tre0_0_1;
712      FFTW_REAL tim0_0_1;
713      FFTW_REAL tre0_0_2;
714      FFTW_REAL tim0_0_2;
715      FFTW_REAL tre0_0_3;
716      FFTW_REAL tim0_0_3;
717      FFTW_REAL tre0_0_4;
718      FFTW_REAL tim0_0_4;
719      FFTW_REAL tre0_1_0;
720      FFTW_REAL tim0_1_0;
721      FFTW_REAL tre0_1_1;
722      FFTW_REAL tim0_1_1;
723      FFTW_REAL tre0_1_2;
724      FFTW_REAL tim0_1_2;
725      FFTW_REAL tre0_1_3;
726      FFTW_REAL tim0_1_3;
727      FFTW_REAL tre0_1_4;
728      FFTW_REAL tim0_1_4;
729      {
730 	  FFTW_REAL tre1_0_0;
731 	  FFTW_REAL tim1_0_0;
732 	  FFTW_REAL tre1_1_0;
733 	  FFTW_REAL tim1_1_0;
734 	  tre1_0_0 = c_re(in[0]);
735 	  tim1_0_0 = c_im(in[0]);
736 	  tre1_1_0 = c_re(in[5 * istride]);
737 	  tim1_1_0 = c_im(in[5 * istride]);
738 	  tre0_0_0 = tre1_0_0 + tre1_1_0;
739 	  tim0_0_0 = tim1_0_0 + tim1_1_0;
740 	  tre0_1_0 = tre1_0_0 - tre1_1_0;
741 	  tim0_1_0 = tim1_0_0 - tim1_1_0;
742      }
743      {
744 	  FFTW_REAL tre1_0_0;
745 	  FFTW_REAL tim1_0_0;
746 	  FFTW_REAL tre1_1_0;
747 	  FFTW_REAL tim1_1_0;
748 	  tre1_0_0 = c_re(in[2 * istride]);
749 	  tim1_0_0 = c_im(in[2 * istride]);
750 	  tre1_1_0 = c_re(in[7 * istride]);
751 	  tim1_1_0 = c_im(in[7 * istride]);
752 	  tre0_0_1 = tre1_0_0 + tre1_1_0;
753 	  tim0_0_1 = tim1_0_0 + tim1_1_0;
754 	  tre0_1_1 = tre1_0_0 - tre1_1_0;
755 	  tim0_1_1 = tim1_0_0 - tim1_1_0;
756      }
757      {
758 	  FFTW_REAL tre1_0_0;
759 	  FFTW_REAL tim1_0_0;
760 	  FFTW_REAL tre1_1_0;
761 	  FFTW_REAL tim1_1_0;
762 	  tre1_0_0 = c_re(in[4 * istride]);
763 	  tim1_0_0 = c_im(in[4 * istride]);
764 	  tre1_1_0 = c_re(in[9 * istride]);
765 	  tim1_1_0 = c_im(in[9 * istride]);
766 	  tre0_0_2 = tre1_0_0 + tre1_1_0;
767 	  tim0_0_2 = tim1_0_0 + tim1_1_0;
768 	  tre0_1_2 = tre1_0_0 - tre1_1_0;
769 	  tim0_1_2 = tim1_0_0 - tim1_1_0;
770      }
771      {
772 	  FFTW_REAL tre1_0_0;
773 	  FFTW_REAL tim1_0_0;
774 	  FFTW_REAL tre1_1_0;
775 	  FFTW_REAL tim1_1_0;
776 	  tre1_0_0 = c_re(in[6 * istride]);
777 	  tim1_0_0 = c_im(in[6 * istride]);
778 	  tre1_1_0 = c_re(in[istride]);
779 	  tim1_1_0 = c_im(in[istride]);
780 	  tre0_0_3 = tre1_0_0 + tre1_1_0;
781 	  tim0_0_3 = tim1_0_0 + tim1_1_0;
782 	  tre0_1_3 = tre1_0_0 - tre1_1_0;
783 	  tim0_1_3 = tim1_0_0 - tim1_1_0;
784      }
785      {
786 	  FFTW_REAL tre1_0_0;
787 	  FFTW_REAL tim1_0_0;
788 	  FFTW_REAL tre1_1_0;
789 	  FFTW_REAL tim1_1_0;
790 	  tre1_0_0 = c_re(in[8 * istride]);
791 	  tim1_0_0 = c_im(in[8 * istride]);
792 	  tre1_1_0 = c_re(in[3 * istride]);
793 	  tim1_1_0 = c_im(in[3 * istride]);
794 	  tre0_0_4 = tre1_0_0 + tre1_1_0;
795 	  tim0_0_4 = tim1_0_0 + tim1_1_0;
796 	  tre0_1_4 = tre1_0_0 - tre1_1_0;
797 	  tim0_1_4 = tim1_0_0 - tim1_1_0;
798      }
799      c_re(out[0]) = tre0_0_0 + tre0_0_1 + tre0_0_2 + tre0_0_3 + tre0_0_4;
800      c_im(out[0]) = tim0_0_0 + tim0_0_1 + tim0_0_2 + tim0_0_3 + tim0_0_4;
801      {
802 	  FFTW_REAL tre2_0_0;
803 	  FFTW_REAL tre2_1_0;
804 	  tre2_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_0_1 + tre0_0_4)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_0_2 + tre0_0_3));
805 	  tre2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tim0_0_1 - tim0_0_4)) + (((FFTW_REAL) FFTW_K587785252) * (tim0_0_2 - tim0_0_3));
806 	  c_re(out[6 * ostride]) = tre2_0_0 + tre2_1_0;
807 	  c_re(out[4 * ostride]) = tre2_0_0 - tre2_1_0;
808      }
809      {
810 	  FFTW_REAL tim2_0_0;
811 	  FFTW_REAL tim2_1_0;
812 	  tim2_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_0_1 + tim0_0_4)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_0_2 + tim0_0_3));
813 	  tim2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tre0_0_4 - tre0_0_1)) + (((FFTW_REAL) FFTW_K587785252) * (tre0_0_3 - tre0_0_2));
814 	  c_im(out[6 * ostride]) = tim2_0_0 + tim2_1_0;
815 	  c_im(out[4 * ostride]) = tim2_0_0 - tim2_1_0;
816      }
817      {
818 	  FFTW_REAL tre2_0_0;
819 	  FFTW_REAL tre2_1_0;
820 	  tre2_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_0_2 + tre0_0_3)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_0_1 + tre0_0_4));
821 	  tre2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tim0_0_1 - tim0_0_4)) + (((FFTW_REAL) FFTW_K951056516) * (tim0_0_3 - tim0_0_2));
822 	  c_re(out[2 * ostride]) = tre2_0_0 + tre2_1_0;
823 	  c_re(out[8 * ostride]) = tre2_0_0 - tre2_1_0;
824      }
825      {
826 	  FFTW_REAL tim2_0_0;
827 	  FFTW_REAL tim2_1_0;
828 	  tim2_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_0_2 + tim0_0_3)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_0_1 + tim0_0_4));
829 	  tim2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tre0_0_4 - tre0_0_1)) + (((FFTW_REAL) FFTW_K951056516) * (tre0_0_2 - tre0_0_3));
830 	  c_im(out[2 * ostride]) = tim2_0_0 + tim2_1_0;
831 	  c_im(out[8 * ostride]) = tim2_0_0 - tim2_1_0;
832      }
833      c_re(out[5 * ostride]) = tre0_1_0 + tre0_1_1 + tre0_1_2 + tre0_1_3 + tre0_1_4;
834      c_im(out[5 * ostride]) = tim0_1_0 + tim0_1_1 + tim0_1_2 + tim0_1_3 + tim0_1_4;
835      {
836 	  FFTW_REAL tre2_0_0;
837 	  FFTW_REAL tre2_1_0;
838 	  tre2_0_0 = tre0_1_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_1_1 + tre0_1_4)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_1_2 + tre0_1_3));
839 	  tre2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tim0_1_1 - tim0_1_4)) + (((FFTW_REAL) FFTW_K587785252) * (tim0_1_2 - tim0_1_3));
840 	  c_re(out[ostride]) = tre2_0_0 + tre2_1_0;
841 	  c_re(out[9 * ostride]) = tre2_0_0 - tre2_1_0;
842      }
843      {
844 	  FFTW_REAL tim2_0_0;
845 	  FFTW_REAL tim2_1_0;
846 	  tim2_0_0 = tim0_1_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_1_1 + tim0_1_4)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_1_2 + tim0_1_3));
847 	  tim2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tre0_1_4 - tre0_1_1)) + (((FFTW_REAL) FFTW_K587785252) * (tre0_1_3 - tre0_1_2));
848 	  c_im(out[ostride]) = tim2_0_0 + tim2_1_0;
849 	  c_im(out[9 * ostride]) = tim2_0_0 - tim2_1_0;
850      }
851      {
852 	  FFTW_REAL tre2_0_0;
853 	  FFTW_REAL tre2_1_0;
854 	  tre2_0_0 = tre0_1_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_1_2 + tre0_1_3)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_1_1 + tre0_1_4));
855 	  tre2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tim0_1_1 - tim0_1_4)) + (((FFTW_REAL) FFTW_K951056516) * (tim0_1_3 - tim0_1_2));
856 	  c_re(out[7 * ostride]) = tre2_0_0 + tre2_1_0;
857 	  c_re(out[3 * ostride]) = tre2_0_0 - tre2_1_0;
858      }
859      {
860 	  FFTW_REAL tim2_0_0;
861 	  FFTW_REAL tim2_1_0;
862 	  tim2_0_0 = tim0_1_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_1_2 + tim0_1_3)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_1_1 + tim0_1_4));
863 	  tim2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tre0_1_4 - tre0_1_1)) + (((FFTW_REAL) FFTW_K951056516) * (tre0_1_2 - tre0_1_3));
864 	  c_im(out[7 * ostride]) = tim2_0_0 + tim2_1_0;
865 	  c_im(out[3 * ostride]) = tim2_0_0 - tim2_1_0;
866      }
867 }
868 
869 /* This function contains 230 FP additions and 100 FP multiplications */
870 
fftw_no_twiddle_11(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)871 static void fftw_no_twiddle_11(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
872 {
873      FFTW_REAL tre0_0_0;
874      FFTW_REAL tim0_0_0;
875      FFTW_REAL tre0_1_0;
876      FFTW_REAL tim0_1_0;
877      FFTW_REAL tre0_2_0;
878      FFTW_REAL tim0_2_0;
879      FFTW_REAL tre0_3_0;
880      FFTW_REAL tim0_3_0;
881      FFTW_REAL tre0_4_0;
882      FFTW_REAL tim0_4_0;
883      FFTW_REAL tre0_5_0;
884      FFTW_REAL tim0_5_0;
885      FFTW_REAL tre0_6_0;
886      FFTW_REAL tim0_6_0;
887      FFTW_REAL tre0_7_0;
888      FFTW_REAL tim0_7_0;
889      FFTW_REAL tre0_8_0;
890      FFTW_REAL tim0_8_0;
891      FFTW_REAL tre0_9_0;
892      FFTW_REAL tim0_9_0;
893      FFTW_REAL tre0_10_0;
894      FFTW_REAL tim0_10_0;
895      tre0_0_0 = c_re(in[0]);
896      tim0_0_0 = c_im(in[0]);
897      tre0_1_0 = c_re(in[istride]);
898      tim0_1_0 = c_im(in[istride]);
899      tre0_2_0 = c_re(in[2 * istride]);
900      tim0_2_0 = c_im(in[2 * istride]);
901      tre0_3_0 = c_re(in[3 * istride]);
902      tim0_3_0 = c_im(in[3 * istride]);
903      tre0_4_0 = c_re(in[4 * istride]);
904      tim0_4_0 = c_im(in[4 * istride]);
905      tre0_5_0 = c_re(in[5 * istride]);
906      tim0_5_0 = c_im(in[5 * istride]);
907      tre0_6_0 = c_re(in[6 * istride]);
908      tim0_6_0 = c_im(in[6 * istride]);
909      tre0_7_0 = c_re(in[7 * istride]);
910      tim0_7_0 = c_im(in[7 * istride]);
911      tre0_8_0 = c_re(in[8 * istride]);
912      tim0_8_0 = c_im(in[8 * istride]);
913      tre0_9_0 = c_re(in[9 * istride]);
914      tim0_9_0 = c_im(in[9 * istride]);
915      tre0_10_0 = c_re(in[10 * istride]);
916      tim0_10_0 = c_im(in[10 * istride]);
917      c_re(out[0]) = tre0_0_0 + tre0_1_0 + tre0_2_0 + tre0_3_0 + tre0_4_0 + tre0_5_0 + tre0_6_0 + tre0_7_0 + tre0_8_0 + tre0_9_0 + tre0_10_0;
918      c_im(out[0]) = tim0_0_0 + tim0_1_0 + tim0_2_0 + tim0_3_0 + tim0_4_0 + tim0_5_0 + tim0_6_0 + tim0_7_0 + tim0_8_0 + tim0_9_0 + tim0_10_0;
919      {
920 	  FFTW_REAL tre1_0_0;
921 	  FFTW_REAL tre1_1_0;
922 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K841253532) * (tre0_1_0 + tre0_10_0)) + (((FFTW_REAL) FFTW_K415415013) * (tre0_2_0 + tre0_9_0)) - (((FFTW_REAL) FFTW_K959492973) * (tre0_5_0 + tre0_6_0)) - (((FFTW_REAL) FFTW_K654860733) * (tre0_4_0 + tre0_7_0)) - (((FFTW_REAL) FFTW_K142314838) * (tre0_3_0 + tre0_8_0));
923 	  tre1_1_0 = (((FFTW_REAL) FFTW_K540640817) * (tim0_1_0 - tim0_10_0)) + (((FFTW_REAL) FFTW_K909631995) * (tim0_2_0 - tim0_9_0)) + (((FFTW_REAL) FFTW_K989821441) * (tim0_3_0 - tim0_8_0)) + (((FFTW_REAL) FFTW_K755749574) * (tim0_4_0 - tim0_7_0)) + (((FFTW_REAL) FFTW_K281732556) * (tim0_5_0 - tim0_6_0));
924 	  c_re(out[ostride]) = tre1_0_0 + tre1_1_0;
925 	  c_re(out[10 * ostride]) = tre1_0_0 - tre1_1_0;
926      }
927      {
928 	  FFTW_REAL tim1_0_0;
929 	  FFTW_REAL tim1_1_0;
930 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K841253532) * (tim0_1_0 + tim0_10_0)) + (((FFTW_REAL) FFTW_K415415013) * (tim0_2_0 + tim0_9_0)) - (((FFTW_REAL) FFTW_K959492973) * (tim0_5_0 + tim0_6_0)) - (((FFTW_REAL) FFTW_K654860733) * (tim0_4_0 + tim0_7_0)) - (((FFTW_REAL) FFTW_K142314838) * (tim0_3_0 + tim0_8_0));
931 	  tim1_1_0 = (((FFTW_REAL) FFTW_K540640817) * (tre0_10_0 - tre0_1_0)) + (((FFTW_REAL) FFTW_K909631995) * (tre0_9_0 - tre0_2_0)) + (((FFTW_REAL) FFTW_K989821441) * (tre0_8_0 - tre0_3_0)) + (((FFTW_REAL) FFTW_K755749574) * (tre0_7_0 - tre0_4_0)) + (((FFTW_REAL) FFTW_K281732556) * (tre0_6_0 - tre0_5_0));
932 	  c_im(out[ostride]) = tim1_0_0 + tim1_1_0;
933 	  c_im(out[10 * ostride]) = tim1_0_0 - tim1_1_0;
934      }
935      {
936 	  FFTW_REAL tre1_0_0;
937 	  FFTW_REAL tre1_1_0;
938 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K415415013) * (tre0_1_0 + tre0_10_0)) + (((FFTW_REAL) FFTW_K841253532) * (tre0_5_0 + tre0_6_0)) - (((FFTW_REAL) FFTW_K142314838) * (tre0_4_0 + tre0_7_0)) - (((FFTW_REAL) FFTW_K959492973) * (tre0_3_0 + tre0_8_0)) - (((FFTW_REAL) FFTW_K654860733) * (tre0_2_0 + tre0_9_0));
939 	  tre1_1_0 = (((FFTW_REAL) FFTW_K909631995) * (tim0_1_0 - tim0_10_0)) + (((FFTW_REAL) FFTW_K755749574) * (tim0_2_0 - tim0_9_0)) + (((FFTW_REAL) FFTW_K281732556) * (tim0_8_0 - tim0_3_0)) + (((FFTW_REAL) FFTW_K989821441) * (tim0_7_0 - tim0_4_0)) + (((FFTW_REAL) FFTW_K540640817) * (tim0_6_0 - tim0_5_0));
940 	  c_re(out[2 * ostride]) = tre1_0_0 + tre1_1_0;
941 	  c_re(out[9 * ostride]) = tre1_0_0 - tre1_1_0;
942      }
943      {
944 	  FFTW_REAL tim1_0_0;
945 	  FFTW_REAL tim1_1_0;
946 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K415415013) * (tim0_1_0 + tim0_10_0)) + (((FFTW_REAL) FFTW_K841253532) * (tim0_5_0 + tim0_6_0)) - (((FFTW_REAL) FFTW_K142314838) * (tim0_4_0 + tim0_7_0)) - (((FFTW_REAL) FFTW_K959492973) * (tim0_3_0 + tim0_8_0)) - (((FFTW_REAL) FFTW_K654860733) * (tim0_2_0 + tim0_9_0));
947 	  tim1_1_0 = (((FFTW_REAL) FFTW_K909631995) * (tre0_10_0 - tre0_1_0)) + (((FFTW_REAL) FFTW_K755749574) * (tre0_9_0 - tre0_2_0)) + (((FFTW_REAL) FFTW_K281732556) * (tre0_3_0 - tre0_8_0)) + (((FFTW_REAL) FFTW_K989821441) * (tre0_4_0 - tre0_7_0)) + (((FFTW_REAL) FFTW_K540640817) * (tre0_5_0 - tre0_6_0));
948 	  c_im(out[2 * ostride]) = tim1_0_0 + tim1_1_0;
949 	  c_im(out[9 * ostride]) = tim1_0_0 - tim1_1_0;
950      }
951      {
952 	  FFTW_REAL tre1_0_0;
953 	  FFTW_REAL tre1_1_0;
954 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K415415013) * (tre0_3_0 + tre0_8_0)) + (((FFTW_REAL) FFTW_K841253532) * (tre0_4_0 + tre0_7_0)) - (((FFTW_REAL) FFTW_K654860733) * (tre0_5_0 + tre0_6_0)) - (((FFTW_REAL) FFTW_K959492973) * (tre0_2_0 + tre0_9_0)) - (((FFTW_REAL) FFTW_K142314838) * (tre0_1_0 + tre0_10_0));
955 	  tre1_1_0 = (((FFTW_REAL) FFTW_K989821441) * (tim0_1_0 - tim0_10_0)) + (((FFTW_REAL) FFTW_K281732556) * (tim0_9_0 - tim0_2_0)) + (((FFTW_REAL) FFTW_K909631995) * (tim0_8_0 - tim0_3_0)) + (((FFTW_REAL) FFTW_K540640817) * (tim0_4_0 - tim0_7_0)) + (((FFTW_REAL) FFTW_K755749574) * (tim0_5_0 - tim0_6_0));
956 	  c_re(out[3 * ostride]) = tre1_0_0 + tre1_1_0;
957 	  c_re(out[8 * ostride]) = tre1_0_0 - tre1_1_0;
958      }
959      {
960 	  FFTW_REAL tim1_0_0;
961 	  FFTW_REAL tim1_1_0;
962 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K415415013) * (tim0_3_0 + tim0_8_0)) + (((FFTW_REAL) FFTW_K841253532) * (tim0_4_0 + tim0_7_0)) - (((FFTW_REAL) FFTW_K654860733) * (tim0_5_0 + tim0_6_0)) - (((FFTW_REAL) FFTW_K959492973) * (tim0_2_0 + tim0_9_0)) - (((FFTW_REAL) FFTW_K142314838) * (tim0_1_0 + tim0_10_0));
963 	  tim1_1_0 = (((FFTW_REAL) FFTW_K989821441) * (tre0_10_0 - tre0_1_0)) + (((FFTW_REAL) FFTW_K281732556) * (tre0_2_0 - tre0_9_0)) + (((FFTW_REAL) FFTW_K909631995) * (tre0_3_0 - tre0_8_0)) + (((FFTW_REAL) FFTW_K540640817) * (tre0_7_0 - tre0_4_0)) + (((FFTW_REAL) FFTW_K755749574) * (tre0_6_0 - tre0_5_0));
964 	  c_im(out[3 * ostride]) = tim1_0_0 + tim1_1_0;
965 	  c_im(out[8 * ostride]) = tim1_0_0 - tim1_1_0;
966      }
967      {
968 	  FFTW_REAL tre1_0_0;
969 	  FFTW_REAL tre1_1_0;
970 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K841253532) * (tre0_3_0 + tre0_8_0)) + (((FFTW_REAL) FFTW_K415415013) * (tre0_5_0 + tre0_6_0)) - (((FFTW_REAL) FFTW_K959492973) * (tre0_4_0 + tre0_7_0)) - (((FFTW_REAL) FFTW_K142314838) * (tre0_2_0 + tre0_9_0)) - (((FFTW_REAL) FFTW_K654860733) * (tre0_1_0 + tre0_10_0));
971 	  tre1_1_0 = (((FFTW_REAL) FFTW_K755749574) * (tim0_1_0 - tim0_10_0)) + (((FFTW_REAL) FFTW_K989821441) * (tim0_9_0 - tim0_2_0)) + (((FFTW_REAL) FFTW_K540640817) * (tim0_3_0 - tim0_8_0)) + (((FFTW_REAL) FFTW_K281732556) * (tim0_4_0 - tim0_7_0)) + (((FFTW_REAL) FFTW_K909631995) * (tim0_6_0 - tim0_5_0));
972 	  c_re(out[4 * ostride]) = tre1_0_0 + tre1_1_0;
973 	  c_re(out[7 * ostride]) = tre1_0_0 - tre1_1_0;
974      }
975      {
976 	  FFTW_REAL tim1_0_0;
977 	  FFTW_REAL tim1_1_0;
978 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K841253532) * (tim0_3_0 + tim0_8_0)) + (((FFTW_REAL) FFTW_K415415013) * (tim0_5_0 + tim0_6_0)) - (((FFTW_REAL) FFTW_K959492973) * (tim0_4_0 + tim0_7_0)) - (((FFTW_REAL) FFTW_K142314838) * (tim0_2_0 + tim0_9_0)) - (((FFTW_REAL) FFTW_K654860733) * (tim0_1_0 + tim0_10_0));
979 	  tim1_1_0 = (((FFTW_REAL) FFTW_K755749574) * (tre0_10_0 - tre0_1_0)) + (((FFTW_REAL) FFTW_K989821441) * (tre0_2_0 - tre0_9_0)) + (((FFTW_REAL) FFTW_K540640817) * (tre0_8_0 - tre0_3_0)) + (((FFTW_REAL) FFTW_K281732556) * (tre0_7_0 - tre0_4_0)) + (((FFTW_REAL) FFTW_K909631995) * (tre0_5_0 - tre0_6_0));
980 	  c_im(out[4 * ostride]) = tim1_0_0 + tim1_1_0;
981 	  c_im(out[7 * ostride]) = tim1_0_0 - tim1_1_0;
982      }
983      {
984 	  FFTW_REAL tre1_0_0;
985 	  FFTW_REAL tre1_1_0;
986 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K841253532) * (tre0_2_0 + tre0_9_0)) + (((FFTW_REAL) FFTW_K415415013) * (tre0_4_0 + tre0_7_0)) - (((FFTW_REAL) FFTW_K142314838) * (tre0_5_0 + tre0_6_0)) - (((FFTW_REAL) FFTW_K654860733) * (tre0_3_0 + tre0_8_0)) - (((FFTW_REAL) FFTW_K959492973) * (tre0_1_0 + tre0_10_0));
987 	  tre1_1_0 = (((FFTW_REAL) FFTW_K281732556) * (tim0_1_0 - tim0_10_0)) + (((FFTW_REAL) FFTW_K540640817) * (tim0_9_0 - tim0_2_0)) + (((FFTW_REAL) FFTW_K755749574) * (tim0_3_0 - tim0_8_0)) + (((FFTW_REAL) FFTW_K909631995) * (tim0_7_0 - tim0_4_0)) + (((FFTW_REAL) FFTW_K989821441) * (tim0_5_0 - tim0_6_0));
988 	  c_re(out[5 * ostride]) = tre1_0_0 + tre1_1_0;
989 	  c_re(out[6 * ostride]) = tre1_0_0 - tre1_1_0;
990      }
991      {
992 	  FFTW_REAL tim1_0_0;
993 	  FFTW_REAL tim1_1_0;
994 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K841253532) * (tim0_2_0 + tim0_9_0)) + (((FFTW_REAL) FFTW_K415415013) * (tim0_4_0 + tim0_7_0)) - (((FFTW_REAL) FFTW_K142314838) * (tim0_5_0 + tim0_6_0)) - (((FFTW_REAL) FFTW_K654860733) * (tim0_3_0 + tim0_8_0)) - (((FFTW_REAL) FFTW_K959492973) * (tim0_1_0 + tim0_10_0));
995 	  tim1_1_0 = (((FFTW_REAL) FFTW_K281732556) * (tre0_10_0 - tre0_1_0)) + (((FFTW_REAL) FFTW_K540640817) * (tre0_2_0 - tre0_9_0)) + (((FFTW_REAL) FFTW_K755749574) * (tre0_8_0 - tre0_3_0)) + (((FFTW_REAL) FFTW_K909631995) * (tre0_4_0 - tre0_7_0)) + (((FFTW_REAL) FFTW_K989821441) * (tre0_6_0 - tre0_5_0));
996 	  c_im(out[5 * ostride]) = tim1_0_0 + tim1_1_0;
997 	  c_im(out[6 * ostride]) = tim1_0_0 - tim1_1_0;
998      }
999 }
1000 
1001 /* This function contains 104 FP additions and 16 FP multiplications */
1002 
fftw_no_twiddle_12(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)1003 static void fftw_no_twiddle_12(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
1004 {
1005      FFTW_REAL tre0_0_0;
1006      FFTW_REAL tim0_0_0;
1007      FFTW_REAL tre0_0_1;
1008      FFTW_REAL tim0_0_1;
1009      FFTW_REAL tre0_0_2;
1010      FFTW_REAL tim0_0_2;
1011      FFTW_REAL tre0_0_3;
1012      FFTW_REAL tim0_0_3;
1013      FFTW_REAL tre0_1_0;
1014      FFTW_REAL tim0_1_0;
1015      FFTW_REAL tre0_1_1;
1016      FFTW_REAL tim0_1_1;
1017      FFTW_REAL tre0_1_2;
1018      FFTW_REAL tim0_1_2;
1019      FFTW_REAL tre0_1_3;
1020      FFTW_REAL tim0_1_3;
1021      FFTW_REAL tre0_2_0;
1022      FFTW_REAL tim0_2_0;
1023      FFTW_REAL tre0_2_1;
1024      FFTW_REAL tim0_2_1;
1025      FFTW_REAL tre0_2_2;
1026      FFTW_REAL tim0_2_2;
1027      FFTW_REAL tre0_2_3;
1028      FFTW_REAL tim0_2_3;
1029      {
1030 	  FFTW_REAL tre1_0_0;
1031 	  FFTW_REAL tim1_0_0;
1032 	  FFTW_REAL tre1_1_0;
1033 	  FFTW_REAL tim1_1_0;
1034 	  FFTW_REAL tre1_2_0;
1035 	  FFTW_REAL tim1_2_0;
1036 	  tre1_0_0 = c_re(in[0]);
1037 	  tim1_0_0 = c_im(in[0]);
1038 	  tre1_1_0 = c_re(in[4 * istride]);
1039 	  tim1_1_0 = c_im(in[4 * istride]);
1040 	  tre1_2_0 = c_re(in[8 * istride]);
1041 	  tim1_2_0 = c_im(in[8 * istride]);
1042 	  tre0_0_0 = tre1_0_0 + tre1_1_0 + tre1_2_0;
1043 	  tim0_0_0 = tim1_0_0 + tim1_1_0 + tim1_2_0;
1044 	  {
1045 	       FFTW_REAL tre2_0_0;
1046 	       FFTW_REAL tre2_1_0;
1047 	       tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
1048 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_1_0 - tim1_2_0);
1049 	       tre0_1_0 = tre2_0_0 + tre2_1_0;
1050 	       tre0_2_0 = tre2_0_0 - tre2_1_0;
1051 	  }
1052 	  {
1053 	       FFTW_REAL tim2_0_0;
1054 	       FFTW_REAL tim2_1_0;
1055 	       tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
1056 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_2_0 - tre1_1_0);
1057 	       tim0_1_0 = tim2_0_0 + tim2_1_0;
1058 	       tim0_2_0 = tim2_0_0 - tim2_1_0;
1059 	  }
1060      }
1061      {
1062 	  FFTW_REAL tre1_0_0;
1063 	  FFTW_REAL tim1_0_0;
1064 	  FFTW_REAL tre1_1_0;
1065 	  FFTW_REAL tim1_1_0;
1066 	  FFTW_REAL tre1_2_0;
1067 	  FFTW_REAL tim1_2_0;
1068 	  tre1_0_0 = c_re(in[3 * istride]);
1069 	  tim1_0_0 = c_im(in[3 * istride]);
1070 	  tre1_1_0 = c_re(in[7 * istride]);
1071 	  tim1_1_0 = c_im(in[7 * istride]);
1072 	  tre1_2_0 = c_re(in[11 * istride]);
1073 	  tim1_2_0 = c_im(in[11 * istride]);
1074 	  tre0_0_1 = tre1_0_0 + tre1_1_0 + tre1_2_0;
1075 	  tim0_0_1 = tim1_0_0 + tim1_1_0 + tim1_2_0;
1076 	  {
1077 	       FFTW_REAL tre2_0_0;
1078 	       FFTW_REAL tre2_1_0;
1079 	       tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
1080 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_1_0 - tim1_2_0);
1081 	       tre0_1_1 = tre2_0_0 + tre2_1_0;
1082 	       tre0_2_1 = tre2_0_0 - tre2_1_0;
1083 	  }
1084 	  {
1085 	       FFTW_REAL tim2_0_0;
1086 	       FFTW_REAL tim2_1_0;
1087 	       tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
1088 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_2_0 - tre1_1_0);
1089 	       tim0_1_1 = tim2_0_0 + tim2_1_0;
1090 	       tim0_2_1 = tim2_0_0 - tim2_1_0;
1091 	  }
1092      }
1093      {
1094 	  FFTW_REAL tre1_0_0;
1095 	  FFTW_REAL tim1_0_0;
1096 	  FFTW_REAL tre1_1_0;
1097 	  FFTW_REAL tim1_1_0;
1098 	  FFTW_REAL tre1_2_0;
1099 	  FFTW_REAL tim1_2_0;
1100 	  tre1_0_0 = c_re(in[6 * istride]);
1101 	  tim1_0_0 = c_im(in[6 * istride]);
1102 	  tre1_1_0 = c_re(in[10 * istride]);
1103 	  tim1_1_0 = c_im(in[10 * istride]);
1104 	  tre1_2_0 = c_re(in[2 * istride]);
1105 	  tim1_2_0 = c_im(in[2 * istride]);
1106 	  tre0_0_2 = tre1_0_0 + tre1_1_0 + tre1_2_0;
1107 	  tim0_0_2 = tim1_0_0 + tim1_1_0 + tim1_2_0;
1108 	  {
1109 	       FFTW_REAL tre2_0_0;
1110 	       FFTW_REAL tre2_1_0;
1111 	       tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
1112 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_1_0 - tim1_2_0);
1113 	       tre0_1_2 = tre2_0_0 + tre2_1_0;
1114 	       tre0_2_2 = tre2_0_0 - tre2_1_0;
1115 	  }
1116 	  {
1117 	       FFTW_REAL tim2_0_0;
1118 	       FFTW_REAL tim2_1_0;
1119 	       tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
1120 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_2_0 - tre1_1_0);
1121 	       tim0_1_2 = tim2_0_0 + tim2_1_0;
1122 	       tim0_2_2 = tim2_0_0 - tim2_1_0;
1123 	  }
1124      }
1125      {
1126 	  FFTW_REAL tre1_0_0;
1127 	  FFTW_REAL tim1_0_0;
1128 	  FFTW_REAL tre1_1_0;
1129 	  FFTW_REAL tim1_1_0;
1130 	  FFTW_REAL tre1_2_0;
1131 	  FFTW_REAL tim1_2_0;
1132 	  tre1_0_0 = c_re(in[9 * istride]);
1133 	  tim1_0_0 = c_im(in[9 * istride]);
1134 	  tre1_1_0 = c_re(in[istride]);
1135 	  tim1_1_0 = c_im(in[istride]);
1136 	  tre1_2_0 = c_re(in[5 * istride]);
1137 	  tim1_2_0 = c_im(in[5 * istride]);
1138 	  tre0_0_3 = tre1_0_0 + tre1_1_0 + tre1_2_0;
1139 	  tim0_0_3 = tim1_0_0 + tim1_1_0 + tim1_2_0;
1140 	  {
1141 	       FFTW_REAL tre2_0_0;
1142 	       FFTW_REAL tre2_1_0;
1143 	       tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
1144 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_1_0 - tim1_2_0);
1145 	       tre0_1_3 = tre2_0_0 + tre2_1_0;
1146 	       tre0_2_3 = tre2_0_0 - tre2_1_0;
1147 	  }
1148 	  {
1149 	       FFTW_REAL tim2_0_0;
1150 	       FFTW_REAL tim2_1_0;
1151 	       tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
1152 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_2_0 - tre1_1_0);
1153 	       tim0_1_3 = tim2_0_0 + tim2_1_0;
1154 	       tim0_2_3 = tim2_0_0 - tim2_1_0;
1155 	  }
1156      }
1157      {
1158 	  FFTW_REAL tre1_0_0;
1159 	  FFTW_REAL tim1_0_0;
1160 	  FFTW_REAL tre1_0_1;
1161 	  FFTW_REAL tim1_0_1;
1162 	  FFTW_REAL tre1_1_0;
1163 	  FFTW_REAL tim1_1_0;
1164 	  FFTW_REAL tre1_1_1;
1165 	  FFTW_REAL tim1_1_1;
1166 	  tre1_0_0 = tre0_0_0 + tre0_0_2;
1167 	  tim1_0_0 = tim0_0_0 + tim0_0_2;
1168 	  tre1_1_0 = tre0_0_0 - tre0_0_2;
1169 	  tim1_1_0 = tim0_0_0 - tim0_0_2;
1170 	  tre1_0_1 = tre0_0_1 + tre0_0_3;
1171 	  tim1_0_1 = tim0_0_1 + tim0_0_3;
1172 	  tre1_1_1 = tre0_0_1 - tre0_0_3;
1173 	  tim1_1_1 = tim0_0_1 - tim0_0_3;
1174 	  c_re(out[0]) = tre1_0_0 + tre1_0_1;
1175 	  c_im(out[0]) = tim1_0_0 + tim1_0_1;
1176 	  c_re(out[6 * ostride]) = tre1_0_0 - tre1_0_1;
1177 	  c_im(out[6 * ostride]) = tim1_0_0 - tim1_0_1;
1178 	  c_re(out[9 * ostride]) = tre1_1_0 + tim1_1_1;
1179 	  c_im(out[9 * ostride]) = tim1_1_0 - tre1_1_1;
1180 	  c_re(out[3 * ostride]) = tre1_1_0 - tim1_1_1;
1181 	  c_im(out[3 * ostride]) = tim1_1_0 + tre1_1_1;
1182      }
1183      {
1184 	  FFTW_REAL tre1_0_0;
1185 	  FFTW_REAL tim1_0_0;
1186 	  FFTW_REAL tre1_0_1;
1187 	  FFTW_REAL tim1_0_1;
1188 	  FFTW_REAL tre1_1_0;
1189 	  FFTW_REAL tim1_1_0;
1190 	  FFTW_REAL tre1_1_1;
1191 	  FFTW_REAL tim1_1_1;
1192 	  tre1_0_0 = tre0_1_0 + tre0_1_2;
1193 	  tim1_0_0 = tim0_1_0 + tim0_1_2;
1194 	  tre1_1_0 = tre0_1_0 - tre0_1_2;
1195 	  tim1_1_0 = tim0_1_0 - tim0_1_2;
1196 	  tre1_0_1 = tre0_1_1 + tre0_1_3;
1197 	  tim1_0_1 = tim0_1_1 + tim0_1_3;
1198 	  tre1_1_1 = tre0_1_1 - tre0_1_3;
1199 	  tim1_1_1 = tim0_1_1 - tim0_1_3;
1200 	  c_re(out[4 * ostride]) = tre1_0_0 + tre1_0_1;
1201 	  c_im(out[4 * ostride]) = tim1_0_0 + tim1_0_1;
1202 	  c_re(out[10 * ostride]) = tre1_0_0 - tre1_0_1;
1203 	  c_im(out[10 * ostride]) = tim1_0_0 - tim1_0_1;
1204 	  c_re(out[ostride]) = tre1_1_0 + tim1_1_1;
1205 	  c_im(out[ostride]) = tim1_1_0 - tre1_1_1;
1206 	  c_re(out[7 * ostride]) = tre1_1_0 - tim1_1_1;
1207 	  c_im(out[7 * ostride]) = tim1_1_0 + tre1_1_1;
1208      }
1209      {
1210 	  FFTW_REAL tre1_0_0;
1211 	  FFTW_REAL tim1_0_0;
1212 	  FFTW_REAL tre1_0_1;
1213 	  FFTW_REAL tim1_0_1;
1214 	  FFTW_REAL tre1_1_0;
1215 	  FFTW_REAL tim1_1_0;
1216 	  FFTW_REAL tre1_1_1;
1217 	  FFTW_REAL tim1_1_1;
1218 	  tre1_0_0 = tre0_2_0 + tre0_2_2;
1219 	  tim1_0_0 = tim0_2_0 + tim0_2_2;
1220 	  tre1_1_0 = tre0_2_0 - tre0_2_2;
1221 	  tim1_1_0 = tim0_2_0 - tim0_2_2;
1222 	  tre1_0_1 = tre0_2_1 + tre0_2_3;
1223 	  tim1_0_1 = tim0_2_1 + tim0_2_3;
1224 	  tre1_1_1 = tre0_2_1 - tre0_2_3;
1225 	  tim1_1_1 = tim0_2_1 - tim0_2_3;
1226 	  c_re(out[8 * ostride]) = tre1_0_0 + tre1_0_1;
1227 	  c_im(out[8 * ostride]) = tim1_0_0 + tim1_0_1;
1228 	  c_re(out[2 * ostride]) = tre1_0_0 - tre1_0_1;
1229 	  c_im(out[2 * ostride]) = tim1_0_0 - tim1_0_1;
1230 	  c_re(out[5 * ostride]) = tre1_1_0 + tim1_1_1;
1231 	  c_im(out[5 * ostride]) = tim1_1_0 - tre1_1_1;
1232 	  c_re(out[11 * ostride]) = tre1_1_0 - tim1_1_1;
1233 	  c_im(out[11 * ostride]) = tim1_1_0 + tre1_1_1;
1234      }
1235 }
1236 
1237 /* This function contains 324 FP additions and 144 FP multiplications */
1238 
fftw_no_twiddle_13(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)1239 static void fftw_no_twiddle_13(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
1240 {
1241      FFTW_REAL tre0_0_0;
1242      FFTW_REAL tim0_0_0;
1243      FFTW_REAL tre0_1_0;
1244      FFTW_REAL tim0_1_0;
1245      FFTW_REAL tre0_2_0;
1246      FFTW_REAL tim0_2_0;
1247      FFTW_REAL tre0_3_0;
1248      FFTW_REAL tim0_3_0;
1249      FFTW_REAL tre0_4_0;
1250      FFTW_REAL tim0_4_0;
1251      FFTW_REAL tre0_5_0;
1252      FFTW_REAL tim0_5_0;
1253      FFTW_REAL tre0_6_0;
1254      FFTW_REAL tim0_6_0;
1255      FFTW_REAL tre0_7_0;
1256      FFTW_REAL tim0_7_0;
1257      FFTW_REAL tre0_8_0;
1258      FFTW_REAL tim0_8_0;
1259      FFTW_REAL tre0_9_0;
1260      FFTW_REAL tim0_9_0;
1261      FFTW_REAL tre0_10_0;
1262      FFTW_REAL tim0_10_0;
1263      FFTW_REAL tre0_11_0;
1264      FFTW_REAL tim0_11_0;
1265      FFTW_REAL tre0_12_0;
1266      FFTW_REAL tim0_12_0;
1267      tre0_0_0 = c_re(in[0]);
1268      tim0_0_0 = c_im(in[0]);
1269      tre0_1_0 = c_re(in[istride]);
1270      tim0_1_0 = c_im(in[istride]);
1271      tre0_2_0 = c_re(in[2 * istride]);
1272      tim0_2_0 = c_im(in[2 * istride]);
1273      tre0_3_0 = c_re(in[3 * istride]);
1274      tim0_3_0 = c_im(in[3 * istride]);
1275      tre0_4_0 = c_re(in[4 * istride]);
1276      tim0_4_0 = c_im(in[4 * istride]);
1277      tre0_5_0 = c_re(in[5 * istride]);
1278      tim0_5_0 = c_im(in[5 * istride]);
1279      tre0_6_0 = c_re(in[6 * istride]);
1280      tim0_6_0 = c_im(in[6 * istride]);
1281      tre0_7_0 = c_re(in[7 * istride]);
1282      tim0_7_0 = c_im(in[7 * istride]);
1283      tre0_8_0 = c_re(in[8 * istride]);
1284      tim0_8_0 = c_im(in[8 * istride]);
1285      tre0_9_0 = c_re(in[9 * istride]);
1286      tim0_9_0 = c_im(in[9 * istride]);
1287      tre0_10_0 = c_re(in[10 * istride]);
1288      tim0_10_0 = c_im(in[10 * istride]);
1289      tre0_11_0 = c_re(in[11 * istride]);
1290      tim0_11_0 = c_im(in[11 * istride]);
1291      tre0_12_0 = c_re(in[12 * istride]);
1292      tim0_12_0 = c_im(in[12 * istride]);
1293      c_re(out[0]) = tre0_0_0 + tre0_1_0 + tre0_2_0 + tre0_3_0 + tre0_4_0 + tre0_5_0 + tre0_6_0 + tre0_7_0 + tre0_8_0 + tre0_9_0 + tre0_10_0 + tre0_11_0 + tre0_12_0;
1294      c_im(out[0]) = tim0_0_0 + tim0_1_0 + tim0_2_0 + tim0_3_0 + tim0_4_0 + tim0_5_0 + tim0_6_0 + tim0_7_0 + tim0_8_0 + tim0_9_0 + tim0_10_0 + tim0_11_0 + tim0_12_0;
1295      {
1296 	  FFTW_REAL tre1_0_0;
1297 	  FFTW_REAL tre1_1_0;
1298 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K885456025) * (tre0_1_0 + tre0_12_0)) + (((FFTW_REAL) FFTW_K568064746) * (tre0_2_0 + tre0_11_0)) + (((FFTW_REAL) FFTW_K120536680) * (tre0_3_0 + tre0_10_0)) - (((FFTW_REAL) FFTW_K970941817) * (tre0_6_0 + tre0_7_0)) - (((FFTW_REAL) FFTW_K748510748) * (tre0_5_0 + tre0_8_0)) - (((FFTW_REAL) FFTW_K354604887) * (tre0_4_0 + tre0_9_0));
1299 	  tre1_1_0 = (((FFTW_REAL) FFTW_K464723172) * (tim0_1_0 - tim0_12_0)) + (((FFTW_REAL) FFTW_K822983865) * (tim0_2_0 - tim0_11_0)) + (((FFTW_REAL) FFTW_K992708874) * (tim0_3_0 - tim0_10_0)) + (((FFTW_REAL) FFTW_K935016242) * (tim0_4_0 - tim0_9_0)) + (((FFTW_REAL) FFTW_K663122658) * (tim0_5_0 - tim0_8_0)) + (((FFTW_REAL) FFTW_K239315664) * (tim0_6_0 - tim0_7_0));
1300 	  c_re(out[ostride]) = tre1_0_0 + tre1_1_0;
1301 	  c_re(out[12 * ostride]) = tre1_0_0 - tre1_1_0;
1302      }
1303      {
1304 	  FFTW_REAL tim1_0_0;
1305 	  FFTW_REAL tim1_1_0;
1306 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K885456025) * (tim0_1_0 + tim0_12_0)) + (((FFTW_REAL) FFTW_K568064746) * (tim0_2_0 + tim0_11_0)) + (((FFTW_REAL) FFTW_K120536680) * (tim0_3_0 + tim0_10_0)) - (((FFTW_REAL) FFTW_K970941817) * (tim0_6_0 + tim0_7_0)) - (((FFTW_REAL) FFTW_K748510748) * (tim0_5_0 + tim0_8_0)) - (((FFTW_REAL) FFTW_K354604887) * (tim0_4_0 + tim0_9_0));
1307 	  tim1_1_0 = (((FFTW_REAL) FFTW_K464723172) * (tre0_12_0 - tre0_1_0)) + (((FFTW_REAL) FFTW_K822983865) * (tre0_11_0 - tre0_2_0)) + (((FFTW_REAL) FFTW_K992708874) * (tre0_10_0 - tre0_3_0)) + (((FFTW_REAL) FFTW_K935016242) * (tre0_9_0 - tre0_4_0)) + (((FFTW_REAL) FFTW_K663122658) * (tre0_8_0 - tre0_5_0)) + (((FFTW_REAL) FFTW_K239315664) * (tre0_7_0 - tre0_6_0));
1308 	  c_im(out[ostride]) = tim1_0_0 + tim1_1_0;
1309 	  c_im(out[12 * ostride]) = tim1_0_0 - tim1_1_0;
1310      }
1311      {
1312 	  FFTW_REAL tre1_0_0;
1313 	  FFTW_REAL tre1_1_0;
1314 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K568064746) * (tre0_1_0 + tre0_12_0)) + (((FFTW_REAL) FFTW_K120536680) * (tre0_5_0 + tre0_8_0)) + (((FFTW_REAL) FFTW_K885456025) * (tre0_6_0 + tre0_7_0)) - (((FFTW_REAL) FFTW_K748510748) * (tre0_4_0 + tre0_9_0)) - (((FFTW_REAL) FFTW_K970941817) * (tre0_3_0 + tre0_10_0)) - (((FFTW_REAL) FFTW_K354604887) * (tre0_2_0 + tre0_11_0));
1315 	  tre1_1_0 = (((FFTW_REAL) FFTW_K822983865) * (tim0_1_0 - tim0_12_0)) + (((FFTW_REAL) FFTW_K935016242) * (tim0_2_0 - tim0_11_0)) + (((FFTW_REAL) FFTW_K239315664) * (tim0_3_0 - tim0_10_0)) + (((FFTW_REAL) FFTW_K663122658) * (tim0_9_0 - tim0_4_0)) + (((FFTW_REAL) FFTW_K992708874) * (tim0_8_0 - tim0_5_0)) + (((FFTW_REAL) FFTW_K464723172) * (tim0_7_0 - tim0_6_0));
1316 	  c_re(out[2 * ostride]) = tre1_0_0 + tre1_1_0;
1317 	  c_re(out[11 * ostride]) = tre1_0_0 - tre1_1_0;
1318      }
1319      {
1320 	  FFTW_REAL tim1_0_0;
1321 	  FFTW_REAL tim1_1_0;
1322 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K568064746) * (tim0_1_0 + tim0_12_0)) + (((FFTW_REAL) FFTW_K120536680) * (tim0_5_0 + tim0_8_0)) + (((FFTW_REAL) FFTW_K885456025) * (tim0_6_0 + tim0_7_0)) - (((FFTW_REAL) FFTW_K748510748) * (tim0_4_0 + tim0_9_0)) - (((FFTW_REAL) FFTW_K970941817) * (tim0_3_0 + tim0_10_0)) - (((FFTW_REAL) FFTW_K354604887) * (tim0_2_0 + tim0_11_0));
1323 	  tim1_1_0 = (((FFTW_REAL) FFTW_K822983865) * (tre0_12_0 - tre0_1_0)) + (((FFTW_REAL) FFTW_K935016242) * (tre0_11_0 - tre0_2_0)) + (((FFTW_REAL) FFTW_K239315664) * (tre0_10_0 - tre0_3_0)) + (((FFTW_REAL) FFTW_K663122658) * (tre0_4_0 - tre0_9_0)) + (((FFTW_REAL) FFTW_K992708874) * (tre0_5_0 - tre0_8_0)) + (((FFTW_REAL) FFTW_K464723172) * (tre0_6_0 - tre0_7_0));
1324 	  c_im(out[2 * ostride]) = tim1_0_0 + tim1_1_0;
1325 	  c_im(out[11 * ostride]) = tim1_0_0 - tim1_1_0;
1326      }
1327      {
1328 	  FFTW_REAL tre1_0_0;
1329 	  FFTW_REAL tre1_1_0;
1330 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K120536680) * (tre0_1_0 + tre0_12_0)) + (((FFTW_REAL) FFTW_K885456025) * (tre0_4_0 + tre0_9_0)) + (((FFTW_REAL) FFTW_K568064746) * (tre0_5_0 + tre0_8_0)) - (((FFTW_REAL) FFTW_K748510748) * (tre0_6_0 + tre0_7_0)) - (((FFTW_REAL) FFTW_K354604887) * (tre0_3_0 + tre0_10_0)) - (((FFTW_REAL) FFTW_K970941817) * (tre0_2_0 + tre0_11_0));
1331 	  tre1_1_0 = (((FFTW_REAL) FFTW_K992708874) * (tim0_1_0 - tim0_12_0)) + (((FFTW_REAL) FFTW_K239315664) * (tim0_2_0 - tim0_11_0)) + (((FFTW_REAL) FFTW_K935016242) * (tim0_10_0 - tim0_3_0)) + (((FFTW_REAL) FFTW_K464723172) * (tim0_9_0 - tim0_4_0)) + (((FFTW_REAL) FFTW_K822983865) * (tim0_5_0 - tim0_8_0)) + (((FFTW_REAL) FFTW_K663122658) * (tim0_6_0 - tim0_7_0));
1332 	  c_re(out[3 * ostride]) = tre1_0_0 + tre1_1_0;
1333 	  c_re(out[10 * ostride]) = tre1_0_0 - tre1_1_0;
1334      }
1335      {
1336 	  FFTW_REAL tim1_0_0;
1337 	  FFTW_REAL tim1_1_0;
1338 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K120536680) * (tim0_1_0 + tim0_12_0)) + (((FFTW_REAL) FFTW_K885456025) * (tim0_4_0 + tim0_9_0)) + (((FFTW_REAL) FFTW_K568064746) * (tim0_5_0 + tim0_8_0)) - (((FFTW_REAL) FFTW_K748510748) * (tim0_6_0 + tim0_7_0)) - (((FFTW_REAL) FFTW_K354604887) * (tim0_3_0 + tim0_10_0)) - (((FFTW_REAL) FFTW_K970941817) * (tim0_2_0 + tim0_11_0));
1339 	  tim1_1_0 = (((FFTW_REAL) FFTW_K992708874) * (tre0_12_0 - tre0_1_0)) + (((FFTW_REAL) FFTW_K239315664) * (tre0_11_0 - tre0_2_0)) + (((FFTW_REAL) FFTW_K935016242) * (tre0_3_0 - tre0_10_0)) + (((FFTW_REAL) FFTW_K464723172) * (tre0_4_0 - tre0_9_0)) + (((FFTW_REAL) FFTW_K822983865) * (tre0_8_0 - tre0_5_0)) + (((FFTW_REAL) FFTW_K663122658) * (tre0_7_0 - tre0_6_0));
1340 	  c_im(out[3 * ostride]) = tim1_0_0 + tim1_1_0;
1341 	  c_im(out[10 * ostride]) = tim1_0_0 - tim1_1_0;
1342      }
1343      {
1344 	  FFTW_REAL tre1_0_0;
1345 	  FFTW_REAL tre1_1_0;
1346 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K885456025) * (tre0_3_0 + tre0_10_0)) + (((FFTW_REAL) FFTW_K120536680) * (tre0_4_0 + tre0_9_0)) + (((FFTW_REAL) FFTW_K568064746) * (tre0_6_0 + tre0_7_0)) - (((FFTW_REAL) FFTW_K970941817) * (tre0_5_0 + tre0_8_0)) - (((FFTW_REAL) FFTW_K748510748) * (tre0_2_0 + tre0_11_0)) - (((FFTW_REAL) FFTW_K354604887) * (tre0_1_0 + tre0_12_0));
1347 	  tre1_1_0 = (((FFTW_REAL) FFTW_K935016242) * (tim0_1_0 - tim0_12_0)) + (((FFTW_REAL) FFTW_K663122658) * (tim0_11_0 - tim0_2_0)) + (((FFTW_REAL) FFTW_K464723172) * (tim0_10_0 - tim0_3_0)) + (((FFTW_REAL) FFTW_K992708874) * (tim0_4_0 - tim0_9_0)) + (((FFTW_REAL) FFTW_K239315664) * (tim0_8_0 - tim0_5_0)) + (((FFTW_REAL) FFTW_K822983865) * (tim0_7_0 - tim0_6_0));
1348 	  c_re(out[4 * ostride]) = tre1_0_0 + tre1_1_0;
1349 	  c_re(out[9 * ostride]) = tre1_0_0 - tre1_1_0;
1350      }
1351      {
1352 	  FFTW_REAL tim1_0_0;
1353 	  FFTW_REAL tim1_1_0;
1354 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K885456025) * (tim0_3_0 + tim0_10_0)) + (((FFTW_REAL) FFTW_K120536680) * (tim0_4_0 + tim0_9_0)) + (((FFTW_REAL) FFTW_K568064746) * (tim0_6_0 + tim0_7_0)) - (((FFTW_REAL) FFTW_K970941817) * (tim0_5_0 + tim0_8_0)) - (((FFTW_REAL) FFTW_K748510748) * (tim0_2_0 + tim0_11_0)) - (((FFTW_REAL) FFTW_K354604887) * (tim0_1_0 + tim0_12_0));
1355 	  tim1_1_0 = (((FFTW_REAL) FFTW_K935016242) * (tre0_12_0 - tre0_1_0)) + (((FFTW_REAL) FFTW_K663122658) * (tre0_2_0 - tre0_11_0)) + (((FFTW_REAL) FFTW_K464723172) * (tre0_3_0 - tre0_10_0)) + (((FFTW_REAL) FFTW_K992708874) * (tre0_9_0 - tre0_4_0)) + (((FFTW_REAL) FFTW_K239315664) * (tre0_5_0 - tre0_8_0)) + (((FFTW_REAL) FFTW_K822983865) * (tre0_6_0 - tre0_7_0));
1356 	  c_im(out[4 * ostride]) = tim1_0_0 + tim1_1_0;
1357 	  c_im(out[9 * ostride]) = tim1_0_0 - tim1_1_0;
1358      }
1359      {
1360 	  FFTW_REAL tre1_0_0;
1361 	  FFTW_REAL tre1_1_0;
1362 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K120536680) * (tre0_2_0 + tre0_11_0)) + (((FFTW_REAL) FFTW_K568064746) * (tre0_3_0 + tre0_10_0)) + (((FFTW_REAL) FFTW_K885456025) * (tre0_5_0 + tre0_8_0)) - (((FFTW_REAL) FFTW_K354604887) * (tre0_6_0 + tre0_7_0)) - (((FFTW_REAL) FFTW_K970941817) * (tre0_4_0 + tre0_9_0)) - (((FFTW_REAL) FFTW_K748510748) * (tre0_1_0 + tre0_12_0));
1363 	  tre1_1_0 = (((FFTW_REAL) FFTW_K663122658) * (tim0_1_0 - tim0_12_0)) + (((FFTW_REAL) FFTW_K992708874) * (tim0_11_0 - tim0_2_0)) + (((FFTW_REAL) FFTW_K822983865) * (tim0_3_0 - tim0_10_0)) + (((FFTW_REAL) FFTW_K239315664) * (tim0_9_0 - tim0_4_0)) + (((FFTW_REAL) FFTW_K464723172) * (tim0_8_0 - tim0_5_0)) + (((FFTW_REAL) FFTW_K935016242) * (tim0_6_0 - tim0_7_0));
1364 	  c_re(out[5 * ostride]) = tre1_0_0 + tre1_1_0;
1365 	  c_re(out[8 * ostride]) = tre1_0_0 - tre1_1_0;
1366      }
1367      {
1368 	  FFTW_REAL tim1_0_0;
1369 	  FFTW_REAL tim1_1_0;
1370 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K120536680) * (tim0_2_0 + tim0_11_0)) + (((FFTW_REAL) FFTW_K568064746) * (tim0_3_0 + tim0_10_0)) + (((FFTW_REAL) FFTW_K885456025) * (tim0_5_0 + tim0_8_0)) - (((FFTW_REAL) FFTW_K354604887) * (tim0_6_0 + tim0_7_0)) - (((FFTW_REAL) FFTW_K970941817) * (tim0_4_0 + tim0_9_0)) - (((FFTW_REAL) FFTW_K748510748) * (tim0_1_0 + tim0_12_0));
1371 	  tim1_1_0 = (((FFTW_REAL) FFTW_K663122658) * (tre0_12_0 - tre0_1_0)) + (((FFTW_REAL) FFTW_K992708874) * (tre0_2_0 - tre0_11_0)) + (((FFTW_REAL) FFTW_K822983865) * (tre0_10_0 - tre0_3_0)) + (((FFTW_REAL) FFTW_K239315664) * (tre0_4_0 - tre0_9_0)) + (((FFTW_REAL) FFTW_K464723172) * (tre0_5_0 - tre0_8_0)) + (((FFTW_REAL) FFTW_K935016242) * (tre0_7_0 - tre0_6_0));
1372 	  c_im(out[5 * ostride]) = tim1_0_0 + tim1_1_0;
1373 	  c_im(out[8 * ostride]) = tim1_0_0 - tim1_1_0;
1374      }
1375      {
1376 	  FFTW_REAL tre1_0_0;
1377 	  FFTW_REAL tre1_1_0;
1378 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K885456025) * (tre0_2_0 + tre0_11_0)) + (((FFTW_REAL) FFTW_K568064746) * (tre0_4_0 + tre0_9_0)) + (((FFTW_REAL) FFTW_K120536680) * (tre0_6_0 + tre0_7_0)) - (((FFTW_REAL) FFTW_K354604887) * (tre0_5_0 + tre0_8_0)) - (((FFTW_REAL) FFTW_K748510748) * (tre0_3_0 + tre0_10_0)) - (((FFTW_REAL) FFTW_K970941817) * (tre0_1_0 + tre0_12_0));
1379 	  tre1_1_0 = (((FFTW_REAL) FFTW_K239315664) * (tim0_1_0 - tim0_12_0)) + (((FFTW_REAL) FFTW_K464723172) * (tim0_11_0 - tim0_2_0)) + (((FFTW_REAL) FFTW_K663122658) * (tim0_3_0 - tim0_10_0)) + (((FFTW_REAL) FFTW_K822983865) * (tim0_9_0 - tim0_4_0)) + (((FFTW_REAL) FFTW_K935016242) * (tim0_5_0 - tim0_8_0)) + (((FFTW_REAL) FFTW_K992708874) * (tim0_7_0 - tim0_6_0));
1380 	  c_re(out[6 * ostride]) = tre1_0_0 + tre1_1_0;
1381 	  c_re(out[7 * ostride]) = tre1_0_0 - tre1_1_0;
1382      }
1383      {
1384 	  FFTW_REAL tim1_0_0;
1385 	  FFTW_REAL tim1_1_0;
1386 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K885456025) * (tim0_2_0 + tim0_11_0)) + (((FFTW_REAL) FFTW_K568064746) * (tim0_4_0 + tim0_9_0)) + (((FFTW_REAL) FFTW_K120536680) * (tim0_6_0 + tim0_7_0)) - (((FFTW_REAL) FFTW_K354604887) * (tim0_5_0 + tim0_8_0)) - (((FFTW_REAL) FFTW_K748510748) * (tim0_3_0 + tim0_10_0)) - (((FFTW_REAL) FFTW_K970941817) * (tim0_1_0 + tim0_12_0));
1387 	  tim1_1_0 = (((FFTW_REAL) FFTW_K239315664) * (tre0_12_0 - tre0_1_0)) + (((FFTW_REAL) FFTW_K464723172) * (tre0_2_0 - tre0_11_0)) + (((FFTW_REAL) FFTW_K663122658) * (tre0_10_0 - tre0_3_0)) + (((FFTW_REAL) FFTW_K822983865) * (tre0_4_0 - tre0_9_0)) + (((FFTW_REAL) FFTW_K935016242) * (tre0_8_0 - tre0_5_0)) + (((FFTW_REAL) FFTW_K992708874) * (tre0_6_0 - tre0_7_0));
1388 	  c_im(out[6 * ostride]) = tim1_0_0 + tim1_1_0;
1389 	  c_im(out[7 * ostride]) = tim1_0_0 - tim1_1_0;
1390      }
1391 }
1392 
1393 /* This function contains 208 FP additions and 72 FP multiplications */
1394 
fftw_no_twiddle_14(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)1395 static void fftw_no_twiddle_14(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
1396 {
1397      FFTW_REAL tre0_0_0;
1398      FFTW_REAL tim0_0_0;
1399      FFTW_REAL tre0_0_1;
1400      FFTW_REAL tim0_0_1;
1401      FFTW_REAL tre0_0_2;
1402      FFTW_REAL tim0_0_2;
1403      FFTW_REAL tre0_0_3;
1404      FFTW_REAL tim0_0_3;
1405      FFTW_REAL tre0_0_4;
1406      FFTW_REAL tim0_0_4;
1407      FFTW_REAL tre0_0_5;
1408      FFTW_REAL tim0_0_5;
1409      FFTW_REAL tre0_0_6;
1410      FFTW_REAL tim0_0_6;
1411      FFTW_REAL tre0_1_0;
1412      FFTW_REAL tim0_1_0;
1413      FFTW_REAL tre0_1_1;
1414      FFTW_REAL tim0_1_1;
1415      FFTW_REAL tre0_1_2;
1416      FFTW_REAL tim0_1_2;
1417      FFTW_REAL tre0_1_3;
1418      FFTW_REAL tim0_1_3;
1419      FFTW_REAL tre0_1_4;
1420      FFTW_REAL tim0_1_4;
1421      FFTW_REAL tre0_1_5;
1422      FFTW_REAL tim0_1_5;
1423      FFTW_REAL tre0_1_6;
1424      FFTW_REAL tim0_1_6;
1425      {
1426 	  FFTW_REAL tre1_0_0;
1427 	  FFTW_REAL tim1_0_0;
1428 	  FFTW_REAL tre1_1_0;
1429 	  FFTW_REAL tim1_1_0;
1430 	  tre1_0_0 = c_re(in[0]);
1431 	  tim1_0_0 = c_im(in[0]);
1432 	  tre1_1_0 = c_re(in[7 * istride]);
1433 	  tim1_1_0 = c_im(in[7 * istride]);
1434 	  tre0_0_0 = tre1_0_0 + tre1_1_0;
1435 	  tim0_0_0 = tim1_0_0 + tim1_1_0;
1436 	  tre0_1_0 = tre1_0_0 - tre1_1_0;
1437 	  tim0_1_0 = tim1_0_0 - tim1_1_0;
1438      }
1439      {
1440 	  FFTW_REAL tre1_0_0;
1441 	  FFTW_REAL tim1_0_0;
1442 	  FFTW_REAL tre1_1_0;
1443 	  FFTW_REAL tim1_1_0;
1444 	  tre1_0_0 = c_re(in[2 * istride]);
1445 	  tim1_0_0 = c_im(in[2 * istride]);
1446 	  tre1_1_0 = c_re(in[9 * istride]);
1447 	  tim1_1_0 = c_im(in[9 * istride]);
1448 	  tre0_0_1 = tre1_0_0 + tre1_1_0;
1449 	  tim0_0_1 = tim1_0_0 + tim1_1_0;
1450 	  tre0_1_1 = tre1_0_0 - tre1_1_0;
1451 	  tim0_1_1 = tim1_0_0 - tim1_1_0;
1452      }
1453      {
1454 	  FFTW_REAL tre1_0_0;
1455 	  FFTW_REAL tim1_0_0;
1456 	  FFTW_REAL tre1_1_0;
1457 	  FFTW_REAL tim1_1_0;
1458 	  tre1_0_0 = c_re(in[4 * istride]);
1459 	  tim1_0_0 = c_im(in[4 * istride]);
1460 	  tre1_1_0 = c_re(in[11 * istride]);
1461 	  tim1_1_0 = c_im(in[11 * istride]);
1462 	  tre0_0_2 = tre1_0_0 + tre1_1_0;
1463 	  tim0_0_2 = tim1_0_0 + tim1_1_0;
1464 	  tre0_1_2 = tre1_0_0 - tre1_1_0;
1465 	  tim0_1_2 = tim1_0_0 - tim1_1_0;
1466      }
1467      {
1468 	  FFTW_REAL tre1_0_0;
1469 	  FFTW_REAL tim1_0_0;
1470 	  FFTW_REAL tre1_1_0;
1471 	  FFTW_REAL tim1_1_0;
1472 	  tre1_0_0 = c_re(in[6 * istride]);
1473 	  tim1_0_0 = c_im(in[6 * istride]);
1474 	  tre1_1_0 = c_re(in[13 * istride]);
1475 	  tim1_1_0 = c_im(in[13 * istride]);
1476 	  tre0_0_3 = tre1_0_0 + tre1_1_0;
1477 	  tim0_0_3 = tim1_0_0 + tim1_1_0;
1478 	  tre0_1_3 = tre1_0_0 - tre1_1_0;
1479 	  tim0_1_3 = tim1_0_0 - tim1_1_0;
1480      }
1481      {
1482 	  FFTW_REAL tre1_0_0;
1483 	  FFTW_REAL tim1_0_0;
1484 	  FFTW_REAL tre1_1_0;
1485 	  FFTW_REAL tim1_1_0;
1486 	  tre1_0_0 = c_re(in[8 * istride]);
1487 	  tim1_0_0 = c_im(in[8 * istride]);
1488 	  tre1_1_0 = c_re(in[istride]);
1489 	  tim1_1_0 = c_im(in[istride]);
1490 	  tre0_0_4 = tre1_0_0 + tre1_1_0;
1491 	  tim0_0_4 = tim1_0_0 + tim1_1_0;
1492 	  tre0_1_4 = tre1_0_0 - tre1_1_0;
1493 	  tim0_1_4 = tim1_0_0 - tim1_1_0;
1494      }
1495      {
1496 	  FFTW_REAL tre1_0_0;
1497 	  FFTW_REAL tim1_0_0;
1498 	  FFTW_REAL tre1_1_0;
1499 	  FFTW_REAL tim1_1_0;
1500 	  tre1_0_0 = c_re(in[10 * istride]);
1501 	  tim1_0_0 = c_im(in[10 * istride]);
1502 	  tre1_1_0 = c_re(in[3 * istride]);
1503 	  tim1_1_0 = c_im(in[3 * istride]);
1504 	  tre0_0_5 = tre1_0_0 + tre1_1_0;
1505 	  tim0_0_5 = tim1_0_0 + tim1_1_0;
1506 	  tre0_1_5 = tre1_0_0 - tre1_1_0;
1507 	  tim0_1_5 = tim1_0_0 - tim1_1_0;
1508      }
1509      {
1510 	  FFTW_REAL tre1_0_0;
1511 	  FFTW_REAL tim1_0_0;
1512 	  FFTW_REAL tre1_1_0;
1513 	  FFTW_REAL tim1_1_0;
1514 	  tre1_0_0 = c_re(in[12 * istride]);
1515 	  tim1_0_0 = c_im(in[12 * istride]);
1516 	  tre1_1_0 = c_re(in[5 * istride]);
1517 	  tim1_1_0 = c_im(in[5 * istride]);
1518 	  tre0_0_6 = tre1_0_0 + tre1_1_0;
1519 	  tim0_0_6 = tim1_0_0 + tim1_1_0;
1520 	  tre0_1_6 = tre1_0_0 - tre1_1_0;
1521 	  tim0_1_6 = tim1_0_0 - tim1_1_0;
1522      }
1523      c_re(out[0]) = tre0_0_0 + tre0_0_1 + tre0_0_2 + tre0_0_3 + tre0_0_4 + tre0_0_5 + tre0_0_6;
1524      c_im(out[0]) = tim0_0_0 + tim0_0_1 + tim0_0_2 + tim0_0_3 + tim0_0_4 + tim0_0_5 + tim0_0_6;
1525      {
1526 	  FFTW_REAL tre2_0_0;
1527 	  FFTW_REAL tre2_1_0;
1528 	  tre2_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tre0_0_1 + tre0_0_6)) - (((FFTW_REAL) FFTW_K900968867) * (tre0_0_3 + tre0_0_4)) - (((FFTW_REAL) FFTW_K222520933) * (tre0_0_2 + tre0_0_5));
1529 	  tre2_1_0 = (((FFTW_REAL) FFTW_K781831482) * (tim0_0_1 - tim0_0_6)) + (((FFTW_REAL) FFTW_K974927912) * (tim0_0_2 - tim0_0_5)) + (((FFTW_REAL) FFTW_K433883739) * (tim0_0_3 - tim0_0_4));
1530 	  c_re(out[8 * ostride]) = tre2_0_0 + tre2_1_0;
1531 	  c_re(out[6 * ostride]) = tre2_0_0 - tre2_1_0;
1532      }
1533      {
1534 	  FFTW_REAL tim2_0_0;
1535 	  FFTW_REAL tim2_1_0;
1536 	  tim2_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tim0_0_1 + tim0_0_6)) - (((FFTW_REAL) FFTW_K900968867) * (tim0_0_3 + tim0_0_4)) - (((FFTW_REAL) FFTW_K222520933) * (tim0_0_2 + tim0_0_5));
1537 	  tim2_1_0 = (((FFTW_REAL) FFTW_K781831482) * (tre0_0_6 - tre0_0_1)) + (((FFTW_REAL) FFTW_K974927912) * (tre0_0_5 - tre0_0_2)) + (((FFTW_REAL) FFTW_K433883739) * (tre0_0_4 - tre0_0_3));
1538 	  c_im(out[8 * ostride]) = tim2_0_0 + tim2_1_0;
1539 	  c_im(out[6 * ostride]) = tim2_0_0 - tim2_1_0;
1540      }
1541      {
1542 	  FFTW_REAL tre2_0_0;
1543 	  FFTW_REAL tre2_1_0;
1544 	  tre2_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tre0_0_3 + tre0_0_4)) - (((FFTW_REAL) FFTW_K900968867) * (tre0_0_2 + tre0_0_5)) - (((FFTW_REAL) FFTW_K222520933) * (tre0_0_1 + tre0_0_6));
1545 	  tre2_1_0 = (((FFTW_REAL) FFTW_K974927912) * (tim0_0_1 - tim0_0_6)) + (((FFTW_REAL) FFTW_K433883739) * (tim0_0_5 - tim0_0_2)) + (((FFTW_REAL) FFTW_K781831482) * (tim0_0_4 - tim0_0_3));
1546 	  c_re(out[2 * ostride]) = tre2_0_0 + tre2_1_0;
1547 	  c_re(out[12 * ostride]) = tre2_0_0 - tre2_1_0;
1548      }
1549      {
1550 	  FFTW_REAL tim2_0_0;
1551 	  FFTW_REAL tim2_1_0;
1552 	  tim2_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tim0_0_3 + tim0_0_4)) - (((FFTW_REAL) FFTW_K900968867) * (tim0_0_2 + tim0_0_5)) - (((FFTW_REAL) FFTW_K222520933) * (tim0_0_1 + tim0_0_6));
1553 	  tim2_1_0 = (((FFTW_REAL) FFTW_K974927912) * (tre0_0_6 - tre0_0_1)) + (((FFTW_REAL) FFTW_K433883739) * (tre0_0_2 - tre0_0_5)) + (((FFTW_REAL) FFTW_K781831482) * (tre0_0_3 - tre0_0_4));
1554 	  c_im(out[2 * ostride]) = tim2_0_0 + tim2_1_0;
1555 	  c_im(out[12 * ostride]) = tim2_0_0 - tim2_1_0;
1556      }
1557      {
1558 	  FFTW_REAL tre2_0_0;
1559 	  FFTW_REAL tre2_1_0;
1560 	  tre2_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tre0_0_2 + tre0_0_5)) - (((FFTW_REAL) FFTW_K222520933) * (tre0_0_3 + tre0_0_4)) - (((FFTW_REAL) FFTW_K900968867) * (tre0_0_1 + tre0_0_6));
1561 	  tre2_1_0 = (((FFTW_REAL) FFTW_K433883739) * (tim0_0_1 - tim0_0_6)) + (((FFTW_REAL) FFTW_K781831482) * (tim0_0_5 - tim0_0_2)) + (((FFTW_REAL) FFTW_K974927912) * (tim0_0_3 - tim0_0_4));
1562 	  c_re(out[10 * ostride]) = tre2_0_0 + tre2_1_0;
1563 	  c_re(out[4 * ostride]) = tre2_0_0 - tre2_1_0;
1564      }
1565      {
1566 	  FFTW_REAL tim2_0_0;
1567 	  FFTW_REAL tim2_1_0;
1568 	  tim2_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tim0_0_2 + tim0_0_5)) - (((FFTW_REAL) FFTW_K222520933) * (tim0_0_3 + tim0_0_4)) - (((FFTW_REAL) FFTW_K900968867) * (tim0_0_1 + tim0_0_6));
1569 	  tim2_1_0 = (((FFTW_REAL) FFTW_K433883739) * (tre0_0_6 - tre0_0_1)) + (((FFTW_REAL) FFTW_K781831482) * (tre0_0_2 - tre0_0_5)) + (((FFTW_REAL) FFTW_K974927912) * (tre0_0_4 - tre0_0_3));
1570 	  c_im(out[10 * ostride]) = tim2_0_0 + tim2_1_0;
1571 	  c_im(out[4 * ostride]) = tim2_0_0 - tim2_1_0;
1572      }
1573      c_re(out[7 * ostride]) = tre0_1_0 + tre0_1_1 + tre0_1_2 + tre0_1_3 + tre0_1_4 + tre0_1_5 + tre0_1_6;
1574      c_im(out[7 * ostride]) = tim0_1_0 + tim0_1_1 + tim0_1_2 + tim0_1_3 + tim0_1_4 + tim0_1_5 + tim0_1_6;
1575      {
1576 	  FFTW_REAL tre2_0_0;
1577 	  FFTW_REAL tre2_1_0;
1578 	  tre2_0_0 = tre0_1_0 + (((FFTW_REAL) FFTW_K623489801) * (tre0_1_1 + tre0_1_6)) - (((FFTW_REAL) FFTW_K900968867) * (tre0_1_3 + tre0_1_4)) - (((FFTW_REAL) FFTW_K222520933) * (tre0_1_2 + tre0_1_5));
1579 	  tre2_1_0 = (((FFTW_REAL) FFTW_K781831482) * (tim0_1_1 - tim0_1_6)) + (((FFTW_REAL) FFTW_K974927912) * (tim0_1_2 - tim0_1_5)) + (((FFTW_REAL) FFTW_K433883739) * (tim0_1_3 - tim0_1_4));
1580 	  c_re(out[ostride]) = tre2_0_0 + tre2_1_0;
1581 	  c_re(out[13 * ostride]) = tre2_0_0 - tre2_1_0;
1582      }
1583      {
1584 	  FFTW_REAL tim2_0_0;
1585 	  FFTW_REAL tim2_1_0;
1586 	  tim2_0_0 = tim0_1_0 + (((FFTW_REAL) FFTW_K623489801) * (tim0_1_1 + tim0_1_6)) - (((FFTW_REAL) FFTW_K900968867) * (tim0_1_3 + tim0_1_4)) - (((FFTW_REAL) FFTW_K222520933) * (tim0_1_2 + tim0_1_5));
1587 	  tim2_1_0 = (((FFTW_REAL) FFTW_K781831482) * (tre0_1_6 - tre0_1_1)) + (((FFTW_REAL) FFTW_K974927912) * (tre0_1_5 - tre0_1_2)) + (((FFTW_REAL) FFTW_K433883739) * (tre0_1_4 - tre0_1_3));
1588 	  c_im(out[ostride]) = tim2_0_0 + tim2_1_0;
1589 	  c_im(out[13 * ostride]) = tim2_0_0 - tim2_1_0;
1590      }
1591      {
1592 	  FFTW_REAL tre2_0_0;
1593 	  FFTW_REAL tre2_1_0;
1594 	  tre2_0_0 = tre0_1_0 + (((FFTW_REAL) FFTW_K623489801) * (tre0_1_3 + tre0_1_4)) - (((FFTW_REAL) FFTW_K900968867) * (tre0_1_2 + tre0_1_5)) - (((FFTW_REAL) FFTW_K222520933) * (tre0_1_1 + tre0_1_6));
1595 	  tre2_1_0 = (((FFTW_REAL) FFTW_K974927912) * (tim0_1_1 - tim0_1_6)) + (((FFTW_REAL) FFTW_K433883739) * (tim0_1_5 - tim0_1_2)) + (((FFTW_REAL) FFTW_K781831482) * (tim0_1_4 - tim0_1_3));
1596 	  c_re(out[9 * ostride]) = tre2_0_0 + tre2_1_0;
1597 	  c_re(out[5 * ostride]) = tre2_0_0 - tre2_1_0;
1598      }
1599      {
1600 	  FFTW_REAL tim2_0_0;
1601 	  FFTW_REAL tim2_1_0;
1602 	  tim2_0_0 = tim0_1_0 + (((FFTW_REAL) FFTW_K623489801) * (tim0_1_3 + tim0_1_4)) - (((FFTW_REAL) FFTW_K900968867) * (tim0_1_2 + tim0_1_5)) - (((FFTW_REAL) FFTW_K222520933) * (tim0_1_1 + tim0_1_6));
1603 	  tim2_1_0 = (((FFTW_REAL) FFTW_K974927912) * (tre0_1_6 - tre0_1_1)) + (((FFTW_REAL) FFTW_K433883739) * (tre0_1_2 - tre0_1_5)) + (((FFTW_REAL) FFTW_K781831482) * (tre0_1_3 - tre0_1_4));
1604 	  c_im(out[9 * ostride]) = tim2_0_0 + tim2_1_0;
1605 	  c_im(out[5 * ostride]) = tim2_0_0 - tim2_1_0;
1606      }
1607      {
1608 	  FFTW_REAL tre2_0_0;
1609 	  FFTW_REAL tre2_1_0;
1610 	  tre2_0_0 = tre0_1_0 + (((FFTW_REAL) FFTW_K623489801) * (tre0_1_2 + tre0_1_5)) - (((FFTW_REAL) FFTW_K222520933) * (tre0_1_3 + tre0_1_4)) - (((FFTW_REAL) FFTW_K900968867) * (tre0_1_1 + tre0_1_6));
1611 	  tre2_1_0 = (((FFTW_REAL) FFTW_K433883739) * (tim0_1_1 - tim0_1_6)) + (((FFTW_REAL) FFTW_K781831482) * (tim0_1_5 - tim0_1_2)) + (((FFTW_REAL) FFTW_K974927912) * (tim0_1_3 - tim0_1_4));
1612 	  c_re(out[3 * ostride]) = tre2_0_0 + tre2_1_0;
1613 	  c_re(out[11 * ostride]) = tre2_0_0 - tre2_1_0;
1614      }
1615      {
1616 	  FFTW_REAL tim2_0_0;
1617 	  FFTW_REAL tim2_1_0;
1618 	  tim2_0_0 = tim0_1_0 + (((FFTW_REAL) FFTW_K623489801) * (tim0_1_2 + tim0_1_5)) - (((FFTW_REAL) FFTW_K222520933) * (tim0_1_3 + tim0_1_4)) - (((FFTW_REAL) FFTW_K900968867) * (tim0_1_1 + tim0_1_6));
1619 	  tim2_1_0 = (((FFTW_REAL) FFTW_K433883739) * (tre0_1_6 - tre0_1_1)) + (((FFTW_REAL) FFTW_K781831482) * (tre0_1_2 - tre0_1_5)) + (((FFTW_REAL) FFTW_K974927912) * (tre0_1_4 - tre0_1_3));
1620 	  c_im(out[3 * ostride]) = tim2_0_0 + tim2_1_0;
1621 	  c_im(out[11 * ostride]) = tim2_0_0 - tim2_1_0;
1622      }
1623 }
1624 
1625 /* This function contains 202 FP additions and 68 FP multiplications */
1626 
fftw_no_twiddle_15(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)1627 static void fftw_no_twiddle_15(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
1628 {
1629      FFTW_REAL tre0_0_0;
1630      FFTW_REAL tim0_0_0;
1631      FFTW_REAL tre0_0_1;
1632      FFTW_REAL tim0_0_1;
1633      FFTW_REAL tre0_0_2;
1634      FFTW_REAL tim0_0_2;
1635      FFTW_REAL tre0_0_3;
1636      FFTW_REAL tim0_0_3;
1637      FFTW_REAL tre0_0_4;
1638      FFTW_REAL tim0_0_4;
1639      FFTW_REAL tre0_1_0;
1640      FFTW_REAL tim0_1_0;
1641      FFTW_REAL tre0_1_1;
1642      FFTW_REAL tim0_1_1;
1643      FFTW_REAL tre0_1_2;
1644      FFTW_REAL tim0_1_2;
1645      FFTW_REAL tre0_1_3;
1646      FFTW_REAL tim0_1_3;
1647      FFTW_REAL tre0_1_4;
1648      FFTW_REAL tim0_1_4;
1649      FFTW_REAL tre0_2_0;
1650      FFTW_REAL tim0_2_0;
1651      FFTW_REAL tre0_2_1;
1652      FFTW_REAL tim0_2_1;
1653      FFTW_REAL tre0_2_2;
1654      FFTW_REAL tim0_2_2;
1655      FFTW_REAL tre0_2_3;
1656      FFTW_REAL tim0_2_3;
1657      FFTW_REAL tre0_2_4;
1658      FFTW_REAL tim0_2_4;
1659      {
1660 	  FFTW_REAL tre1_0_0;
1661 	  FFTW_REAL tim1_0_0;
1662 	  FFTW_REAL tre1_1_0;
1663 	  FFTW_REAL tim1_1_0;
1664 	  FFTW_REAL tre1_2_0;
1665 	  FFTW_REAL tim1_2_0;
1666 	  tre1_0_0 = c_re(in[0]);
1667 	  tim1_0_0 = c_im(in[0]);
1668 	  tre1_1_0 = c_re(in[5 * istride]);
1669 	  tim1_1_0 = c_im(in[5 * istride]);
1670 	  tre1_2_0 = c_re(in[10 * istride]);
1671 	  tim1_2_0 = c_im(in[10 * istride]);
1672 	  tre0_0_0 = tre1_0_0 + tre1_1_0 + tre1_2_0;
1673 	  tim0_0_0 = tim1_0_0 + tim1_1_0 + tim1_2_0;
1674 	  {
1675 	       FFTW_REAL tre2_0_0;
1676 	       FFTW_REAL tre2_1_0;
1677 	       tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
1678 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_1_0 - tim1_2_0);
1679 	       tre0_1_0 = tre2_0_0 + tre2_1_0;
1680 	       tre0_2_0 = tre2_0_0 - tre2_1_0;
1681 	  }
1682 	  {
1683 	       FFTW_REAL tim2_0_0;
1684 	       FFTW_REAL tim2_1_0;
1685 	       tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
1686 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_2_0 - tre1_1_0);
1687 	       tim0_1_0 = tim2_0_0 + tim2_1_0;
1688 	       tim0_2_0 = tim2_0_0 - tim2_1_0;
1689 	  }
1690      }
1691      {
1692 	  FFTW_REAL tre1_0_0;
1693 	  FFTW_REAL tim1_0_0;
1694 	  FFTW_REAL tre1_1_0;
1695 	  FFTW_REAL tim1_1_0;
1696 	  FFTW_REAL tre1_2_0;
1697 	  FFTW_REAL tim1_2_0;
1698 	  tre1_0_0 = c_re(in[3 * istride]);
1699 	  tim1_0_0 = c_im(in[3 * istride]);
1700 	  tre1_1_0 = c_re(in[8 * istride]);
1701 	  tim1_1_0 = c_im(in[8 * istride]);
1702 	  tre1_2_0 = c_re(in[13 * istride]);
1703 	  tim1_2_0 = c_im(in[13 * istride]);
1704 	  tre0_0_1 = tre1_0_0 + tre1_1_0 + tre1_2_0;
1705 	  tim0_0_1 = tim1_0_0 + tim1_1_0 + tim1_2_0;
1706 	  {
1707 	       FFTW_REAL tre2_0_0;
1708 	       FFTW_REAL tre2_1_0;
1709 	       tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
1710 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_1_0 - tim1_2_0);
1711 	       tre0_1_1 = tre2_0_0 + tre2_1_0;
1712 	       tre0_2_1 = tre2_0_0 - tre2_1_0;
1713 	  }
1714 	  {
1715 	       FFTW_REAL tim2_0_0;
1716 	       FFTW_REAL tim2_1_0;
1717 	       tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
1718 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_2_0 - tre1_1_0);
1719 	       tim0_1_1 = tim2_0_0 + tim2_1_0;
1720 	       tim0_2_1 = tim2_0_0 - tim2_1_0;
1721 	  }
1722      }
1723      {
1724 	  FFTW_REAL tre1_0_0;
1725 	  FFTW_REAL tim1_0_0;
1726 	  FFTW_REAL tre1_1_0;
1727 	  FFTW_REAL tim1_1_0;
1728 	  FFTW_REAL tre1_2_0;
1729 	  FFTW_REAL tim1_2_0;
1730 	  tre1_0_0 = c_re(in[6 * istride]);
1731 	  tim1_0_0 = c_im(in[6 * istride]);
1732 	  tre1_1_0 = c_re(in[11 * istride]);
1733 	  tim1_1_0 = c_im(in[11 * istride]);
1734 	  tre1_2_0 = c_re(in[istride]);
1735 	  tim1_2_0 = c_im(in[istride]);
1736 	  tre0_0_2 = tre1_0_0 + tre1_1_0 + tre1_2_0;
1737 	  tim0_0_2 = tim1_0_0 + tim1_1_0 + tim1_2_0;
1738 	  {
1739 	       FFTW_REAL tre2_0_0;
1740 	       FFTW_REAL tre2_1_0;
1741 	       tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
1742 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_1_0 - tim1_2_0);
1743 	       tre0_1_2 = tre2_0_0 + tre2_1_0;
1744 	       tre0_2_2 = tre2_0_0 - tre2_1_0;
1745 	  }
1746 	  {
1747 	       FFTW_REAL tim2_0_0;
1748 	       FFTW_REAL tim2_1_0;
1749 	       tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
1750 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_2_0 - tre1_1_0);
1751 	       tim0_1_2 = tim2_0_0 + tim2_1_0;
1752 	       tim0_2_2 = tim2_0_0 - tim2_1_0;
1753 	  }
1754      }
1755      {
1756 	  FFTW_REAL tre1_0_0;
1757 	  FFTW_REAL tim1_0_0;
1758 	  FFTW_REAL tre1_1_0;
1759 	  FFTW_REAL tim1_1_0;
1760 	  FFTW_REAL tre1_2_0;
1761 	  FFTW_REAL tim1_2_0;
1762 	  tre1_0_0 = c_re(in[9 * istride]);
1763 	  tim1_0_0 = c_im(in[9 * istride]);
1764 	  tre1_1_0 = c_re(in[14 * istride]);
1765 	  tim1_1_0 = c_im(in[14 * istride]);
1766 	  tre1_2_0 = c_re(in[4 * istride]);
1767 	  tim1_2_0 = c_im(in[4 * istride]);
1768 	  tre0_0_3 = tre1_0_0 + tre1_1_0 + tre1_2_0;
1769 	  tim0_0_3 = tim1_0_0 + tim1_1_0 + tim1_2_0;
1770 	  {
1771 	       FFTW_REAL tre2_0_0;
1772 	       FFTW_REAL tre2_1_0;
1773 	       tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
1774 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_1_0 - tim1_2_0);
1775 	       tre0_1_3 = tre2_0_0 + tre2_1_0;
1776 	       tre0_2_3 = tre2_0_0 - tre2_1_0;
1777 	  }
1778 	  {
1779 	       FFTW_REAL tim2_0_0;
1780 	       FFTW_REAL tim2_1_0;
1781 	       tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
1782 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_2_0 - tre1_1_0);
1783 	       tim0_1_3 = tim2_0_0 + tim2_1_0;
1784 	       tim0_2_3 = tim2_0_0 - tim2_1_0;
1785 	  }
1786      }
1787      {
1788 	  FFTW_REAL tre1_0_0;
1789 	  FFTW_REAL tim1_0_0;
1790 	  FFTW_REAL tre1_1_0;
1791 	  FFTW_REAL tim1_1_0;
1792 	  FFTW_REAL tre1_2_0;
1793 	  FFTW_REAL tim1_2_0;
1794 	  tre1_0_0 = c_re(in[12 * istride]);
1795 	  tim1_0_0 = c_im(in[12 * istride]);
1796 	  tre1_1_0 = c_re(in[2 * istride]);
1797 	  tim1_1_0 = c_im(in[2 * istride]);
1798 	  tre1_2_0 = c_re(in[7 * istride]);
1799 	  tim1_2_0 = c_im(in[7 * istride]);
1800 	  tre0_0_4 = tre1_0_0 + tre1_1_0 + tre1_2_0;
1801 	  tim0_0_4 = tim1_0_0 + tim1_1_0 + tim1_2_0;
1802 	  {
1803 	       FFTW_REAL tre2_0_0;
1804 	       FFTW_REAL tre2_1_0;
1805 	       tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
1806 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_1_0 - tim1_2_0);
1807 	       tre0_1_4 = tre2_0_0 + tre2_1_0;
1808 	       tre0_2_4 = tre2_0_0 - tre2_1_0;
1809 	  }
1810 	  {
1811 	       FFTW_REAL tim2_0_0;
1812 	       FFTW_REAL tim2_1_0;
1813 	       tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
1814 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_2_0 - tre1_1_0);
1815 	       tim0_1_4 = tim2_0_0 + tim2_1_0;
1816 	       tim0_2_4 = tim2_0_0 - tim2_1_0;
1817 	  }
1818      }
1819      c_re(out[0]) = tre0_0_0 + tre0_0_1 + tre0_0_2 + tre0_0_3 + tre0_0_4;
1820      c_im(out[0]) = tim0_0_0 + tim0_0_1 + tim0_0_2 + tim0_0_3 + tim0_0_4;
1821      {
1822 	  FFTW_REAL tre2_0_0;
1823 	  FFTW_REAL tre2_1_0;
1824 	  tre2_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_0_1 + tre0_0_4)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_0_2 + tre0_0_3));
1825 	  tre2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tim0_0_1 - tim0_0_4)) + (((FFTW_REAL) FFTW_K587785252) * (tim0_0_2 - tim0_0_3));
1826 	  c_re(out[6 * ostride]) = tre2_0_0 + tre2_1_0;
1827 	  c_re(out[9 * ostride]) = tre2_0_0 - tre2_1_0;
1828      }
1829      {
1830 	  FFTW_REAL tim2_0_0;
1831 	  FFTW_REAL tim2_1_0;
1832 	  tim2_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_0_1 + tim0_0_4)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_0_2 + tim0_0_3));
1833 	  tim2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tre0_0_4 - tre0_0_1)) + (((FFTW_REAL) FFTW_K587785252) * (tre0_0_3 - tre0_0_2));
1834 	  c_im(out[6 * ostride]) = tim2_0_0 + tim2_1_0;
1835 	  c_im(out[9 * ostride]) = tim2_0_0 - tim2_1_0;
1836      }
1837      {
1838 	  FFTW_REAL tre2_0_0;
1839 	  FFTW_REAL tre2_1_0;
1840 	  tre2_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_0_2 + tre0_0_3)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_0_1 + tre0_0_4));
1841 	  tre2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tim0_0_1 - tim0_0_4)) + (((FFTW_REAL) FFTW_K951056516) * (tim0_0_3 - tim0_0_2));
1842 	  c_re(out[12 * ostride]) = tre2_0_0 + tre2_1_0;
1843 	  c_re(out[3 * ostride]) = tre2_0_0 - tre2_1_0;
1844      }
1845      {
1846 	  FFTW_REAL tim2_0_0;
1847 	  FFTW_REAL tim2_1_0;
1848 	  tim2_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_0_2 + tim0_0_3)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_0_1 + tim0_0_4));
1849 	  tim2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tre0_0_4 - tre0_0_1)) + (((FFTW_REAL) FFTW_K951056516) * (tre0_0_2 - tre0_0_3));
1850 	  c_im(out[12 * ostride]) = tim2_0_0 + tim2_1_0;
1851 	  c_im(out[3 * ostride]) = tim2_0_0 - tim2_1_0;
1852      }
1853      c_re(out[10 * ostride]) = tre0_1_0 + tre0_1_1 + tre0_1_2 + tre0_1_3 + tre0_1_4;
1854      c_im(out[10 * ostride]) = tim0_1_0 + tim0_1_1 + tim0_1_2 + tim0_1_3 + tim0_1_4;
1855      {
1856 	  FFTW_REAL tre2_0_0;
1857 	  FFTW_REAL tre2_1_0;
1858 	  tre2_0_0 = tre0_1_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_1_1 + tre0_1_4)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_1_2 + tre0_1_3));
1859 	  tre2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tim0_1_1 - tim0_1_4)) + (((FFTW_REAL) FFTW_K587785252) * (tim0_1_2 - tim0_1_3));
1860 	  c_re(out[ostride]) = tre2_0_0 + tre2_1_0;
1861 	  c_re(out[4 * ostride]) = tre2_0_0 - tre2_1_0;
1862      }
1863      {
1864 	  FFTW_REAL tim2_0_0;
1865 	  FFTW_REAL tim2_1_0;
1866 	  tim2_0_0 = tim0_1_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_1_1 + tim0_1_4)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_1_2 + tim0_1_3));
1867 	  tim2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tre0_1_4 - tre0_1_1)) + (((FFTW_REAL) FFTW_K587785252) * (tre0_1_3 - tre0_1_2));
1868 	  c_im(out[ostride]) = tim2_0_0 + tim2_1_0;
1869 	  c_im(out[4 * ostride]) = tim2_0_0 - tim2_1_0;
1870      }
1871      {
1872 	  FFTW_REAL tre2_0_0;
1873 	  FFTW_REAL tre2_1_0;
1874 	  tre2_0_0 = tre0_1_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_1_2 + tre0_1_3)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_1_1 + tre0_1_4));
1875 	  tre2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tim0_1_1 - tim0_1_4)) + (((FFTW_REAL) FFTW_K951056516) * (tim0_1_3 - tim0_1_2));
1876 	  c_re(out[7 * ostride]) = tre2_0_0 + tre2_1_0;
1877 	  c_re(out[13 * ostride]) = tre2_0_0 - tre2_1_0;
1878      }
1879      {
1880 	  FFTW_REAL tim2_0_0;
1881 	  FFTW_REAL tim2_1_0;
1882 	  tim2_0_0 = tim0_1_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_1_2 + tim0_1_3)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_1_1 + tim0_1_4));
1883 	  tim2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tre0_1_4 - tre0_1_1)) + (((FFTW_REAL) FFTW_K951056516) * (tre0_1_2 - tre0_1_3));
1884 	  c_im(out[7 * ostride]) = tim2_0_0 + tim2_1_0;
1885 	  c_im(out[13 * ostride]) = tim2_0_0 - tim2_1_0;
1886      }
1887      c_re(out[5 * ostride]) = tre0_2_0 + tre0_2_1 + tre0_2_2 + tre0_2_3 + tre0_2_4;
1888      c_im(out[5 * ostride]) = tim0_2_0 + tim0_2_1 + tim0_2_2 + tim0_2_3 + tim0_2_4;
1889      {
1890 	  FFTW_REAL tre2_0_0;
1891 	  FFTW_REAL tre2_1_0;
1892 	  tre2_0_0 = tre0_2_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_2_1 + tre0_2_4)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_2_2 + tre0_2_3));
1893 	  tre2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tim0_2_1 - tim0_2_4)) + (((FFTW_REAL) FFTW_K587785252) * (tim0_2_2 - tim0_2_3));
1894 	  c_re(out[11 * ostride]) = tre2_0_0 + tre2_1_0;
1895 	  c_re(out[14 * ostride]) = tre2_0_0 - tre2_1_0;
1896      }
1897      {
1898 	  FFTW_REAL tim2_0_0;
1899 	  FFTW_REAL tim2_1_0;
1900 	  tim2_0_0 = tim0_2_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_2_1 + tim0_2_4)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_2_2 + tim0_2_3));
1901 	  tim2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tre0_2_4 - tre0_2_1)) + (((FFTW_REAL) FFTW_K587785252) * (tre0_2_3 - tre0_2_2));
1902 	  c_im(out[11 * ostride]) = tim2_0_0 + tim2_1_0;
1903 	  c_im(out[14 * ostride]) = tim2_0_0 - tim2_1_0;
1904      }
1905      {
1906 	  FFTW_REAL tre2_0_0;
1907 	  FFTW_REAL tre2_1_0;
1908 	  tre2_0_0 = tre0_2_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_2_2 + tre0_2_3)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_2_1 + tre0_2_4));
1909 	  tre2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tim0_2_1 - tim0_2_4)) + (((FFTW_REAL) FFTW_K951056516) * (tim0_2_3 - tim0_2_2));
1910 	  c_re(out[2 * ostride]) = tre2_0_0 + tre2_1_0;
1911 	  c_re(out[8 * ostride]) = tre2_0_0 - tre2_1_0;
1912      }
1913      {
1914 	  FFTW_REAL tim2_0_0;
1915 	  FFTW_REAL tim2_1_0;
1916 	  tim2_0_0 = tim0_2_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_2_2 + tim0_2_3)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_2_1 + tim0_2_4));
1917 	  tim2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tre0_2_4 - tre0_2_1)) + (((FFTW_REAL) FFTW_K951056516) * (tre0_2_2 - tre0_2_3));
1918 	  c_im(out[2 * ostride]) = tim2_0_0 + tim2_1_0;
1919 	  c_im(out[8 * ostride]) = tim2_0_0 - tim2_1_0;
1920      }
1921 }
1922 
1923 /* This function contains 144 FP additions and 24 FP multiplications */
1924 
fftw_no_twiddle_16(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)1925 static void fftw_no_twiddle_16(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
1926 {
1927      FFTW_REAL tre0_0_0;
1928      FFTW_REAL tim0_0_0;
1929      FFTW_REAL tre0_0_1;
1930      FFTW_REAL tim0_0_1;
1931      FFTW_REAL tre0_0_2;
1932      FFTW_REAL tim0_0_2;
1933      FFTW_REAL tre0_0_3;
1934      FFTW_REAL tim0_0_3;
1935      FFTW_REAL tre0_1_0;
1936      FFTW_REAL tim0_1_0;
1937      FFTW_REAL tre0_1_1;
1938      FFTW_REAL tim0_1_1;
1939      FFTW_REAL tre0_1_2;
1940      FFTW_REAL tim0_1_2;
1941      FFTW_REAL tre0_1_3;
1942      FFTW_REAL tim0_1_3;
1943      FFTW_REAL tre0_2_0;
1944      FFTW_REAL tim0_2_0;
1945      FFTW_REAL tre0_2_1;
1946      FFTW_REAL tim0_2_1;
1947      FFTW_REAL tre0_2_2;
1948      FFTW_REAL tim0_2_2;
1949      FFTW_REAL tre0_2_3;
1950      FFTW_REAL tim0_2_3;
1951      FFTW_REAL tre0_3_0;
1952      FFTW_REAL tim0_3_0;
1953      FFTW_REAL tre0_3_1;
1954      FFTW_REAL tim0_3_1;
1955      FFTW_REAL tre0_3_2;
1956      FFTW_REAL tim0_3_2;
1957      FFTW_REAL tre0_3_3;
1958      FFTW_REAL tim0_3_3;
1959      {
1960 	  FFTW_REAL tre1_0_0;
1961 	  FFTW_REAL tim1_0_0;
1962 	  FFTW_REAL tre1_0_1;
1963 	  FFTW_REAL tim1_0_1;
1964 	  FFTW_REAL tre1_1_0;
1965 	  FFTW_REAL tim1_1_0;
1966 	  FFTW_REAL tre1_1_1;
1967 	  FFTW_REAL tim1_1_1;
1968 	  {
1969 	       FFTW_REAL tre2_0_0;
1970 	       FFTW_REAL tim2_0_0;
1971 	       FFTW_REAL tre2_1_0;
1972 	       FFTW_REAL tim2_1_0;
1973 	       tre2_0_0 = c_re(in[0]);
1974 	       tim2_0_0 = c_im(in[0]);
1975 	       tre2_1_0 = c_re(in[8 * istride]);
1976 	       tim2_1_0 = c_im(in[8 * istride]);
1977 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
1978 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
1979 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
1980 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
1981 	  }
1982 	  {
1983 	       FFTW_REAL tre2_0_0;
1984 	       FFTW_REAL tim2_0_0;
1985 	       FFTW_REAL tre2_1_0;
1986 	       FFTW_REAL tim2_1_0;
1987 	       tre2_0_0 = c_re(in[4 * istride]);
1988 	       tim2_0_0 = c_im(in[4 * istride]);
1989 	       tre2_1_0 = c_re(in[12 * istride]);
1990 	       tim2_1_0 = c_im(in[12 * istride]);
1991 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
1992 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
1993 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
1994 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
1995 	  }
1996 	  tre0_0_0 = tre1_0_0 + tre1_0_1;
1997 	  tim0_0_0 = tim1_0_0 + tim1_0_1;
1998 	  tre0_2_0 = tre1_0_0 - tre1_0_1;
1999 	  tim0_2_0 = tim1_0_0 - tim1_0_1;
2000 	  tre0_1_0 = tre1_1_0 + tim1_1_1;
2001 	  tim0_1_0 = tim1_1_0 - tre1_1_1;
2002 	  tre0_3_0 = tre1_1_0 - tim1_1_1;
2003 	  tim0_3_0 = tim1_1_0 + tre1_1_1;
2004      }
2005      {
2006 	  FFTW_REAL tre1_0_0;
2007 	  FFTW_REAL tim1_0_0;
2008 	  FFTW_REAL tre1_0_1;
2009 	  FFTW_REAL tim1_0_1;
2010 	  FFTW_REAL tre1_1_0;
2011 	  FFTW_REAL tim1_1_0;
2012 	  FFTW_REAL tre1_1_1;
2013 	  FFTW_REAL tim1_1_1;
2014 	  {
2015 	       FFTW_REAL tre2_0_0;
2016 	       FFTW_REAL tim2_0_0;
2017 	       FFTW_REAL tre2_1_0;
2018 	       FFTW_REAL tim2_1_0;
2019 	       tre2_0_0 = c_re(in[istride]);
2020 	       tim2_0_0 = c_im(in[istride]);
2021 	       tre2_1_0 = c_re(in[9 * istride]);
2022 	       tim2_1_0 = c_im(in[9 * istride]);
2023 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
2024 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
2025 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
2026 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
2027 	  }
2028 	  {
2029 	       FFTW_REAL tre2_0_0;
2030 	       FFTW_REAL tim2_0_0;
2031 	       FFTW_REAL tre2_1_0;
2032 	       FFTW_REAL tim2_1_0;
2033 	       tre2_0_0 = c_re(in[5 * istride]);
2034 	       tim2_0_0 = c_im(in[5 * istride]);
2035 	       tre2_1_0 = c_re(in[13 * istride]);
2036 	       tim2_1_0 = c_im(in[13 * istride]);
2037 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
2038 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
2039 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
2040 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
2041 	  }
2042 	  tre0_0_1 = tre1_0_0 + tre1_0_1;
2043 	  tim0_0_1 = tim1_0_0 + tim1_0_1;
2044 	  tre0_2_1 = tre1_0_0 - tre1_0_1;
2045 	  tim0_2_1 = tim1_0_0 - tim1_0_1;
2046 	  tre0_1_1 = tre1_1_0 + tim1_1_1;
2047 	  tim0_1_1 = tim1_1_0 - tre1_1_1;
2048 	  tre0_3_1 = tre1_1_0 - tim1_1_1;
2049 	  tim0_3_1 = tim1_1_0 + tre1_1_1;
2050      }
2051      {
2052 	  FFTW_REAL tre1_0_0;
2053 	  FFTW_REAL tim1_0_0;
2054 	  FFTW_REAL tre1_0_1;
2055 	  FFTW_REAL tim1_0_1;
2056 	  FFTW_REAL tre1_1_0;
2057 	  FFTW_REAL tim1_1_0;
2058 	  FFTW_REAL tre1_1_1;
2059 	  FFTW_REAL tim1_1_1;
2060 	  {
2061 	       FFTW_REAL tre2_0_0;
2062 	       FFTW_REAL tim2_0_0;
2063 	       FFTW_REAL tre2_1_0;
2064 	       FFTW_REAL tim2_1_0;
2065 	       tre2_0_0 = c_re(in[2 * istride]);
2066 	       tim2_0_0 = c_im(in[2 * istride]);
2067 	       tre2_1_0 = c_re(in[10 * istride]);
2068 	       tim2_1_0 = c_im(in[10 * istride]);
2069 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
2070 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
2071 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
2072 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
2073 	  }
2074 	  {
2075 	       FFTW_REAL tre2_0_0;
2076 	       FFTW_REAL tim2_0_0;
2077 	       FFTW_REAL tre2_1_0;
2078 	       FFTW_REAL tim2_1_0;
2079 	       tre2_0_0 = c_re(in[6 * istride]);
2080 	       tim2_0_0 = c_im(in[6 * istride]);
2081 	       tre2_1_0 = c_re(in[14 * istride]);
2082 	       tim2_1_0 = c_im(in[14 * istride]);
2083 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
2084 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
2085 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
2086 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
2087 	  }
2088 	  tre0_0_2 = tre1_0_0 + tre1_0_1;
2089 	  tim0_0_2 = tim1_0_0 + tim1_0_1;
2090 	  tre0_2_2 = tre1_0_0 - tre1_0_1;
2091 	  tim0_2_2 = tim1_0_0 - tim1_0_1;
2092 	  tre0_1_2 = tre1_1_0 + tim1_1_1;
2093 	  tim0_1_2 = tim1_1_0 - tre1_1_1;
2094 	  tre0_3_2 = tre1_1_0 - tim1_1_1;
2095 	  tim0_3_2 = tim1_1_0 + tre1_1_1;
2096      }
2097      {
2098 	  FFTW_REAL tre1_0_0;
2099 	  FFTW_REAL tim1_0_0;
2100 	  FFTW_REAL tre1_0_1;
2101 	  FFTW_REAL tim1_0_1;
2102 	  FFTW_REAL tre1_1_0;
2103 	  FFTW_REAL tim1_1_0;
2104 	  FFTW_REAL tre1_1_1;
2105 	  FFTW_REAL tim1_1_1;
2106 	  {
2107 	       FFTW_REAL tre2_0_0;
2108 	       FFTW_REAL tim2_0_0;
2109 	       FFTW_REAL tre2_1_0;
2110 	       FFTW_REAL tim2_1_0;
2111 	       tre2_0_0 = c_re(in[3 * istride]);
2112 	       tim2_0_0 = c_im(in[3 * istride]);
2113 	       tre2_1_0 = c_re(in[11 * istride]);
2114 	       tim2_1_0 = c_im(in[11 * istride]);
2115 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
2116 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
2117 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
2118 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
2119 	  }
2120 	  {
2121 	       FFTW_REAL tre2_0_0;
2122 	       FFTW_REAL tim2_0_0;
2123 	       FFTW_REAL tre2_1_0;
2124 	       FFTW_REAL tim2_1_0;
2125 	       tre2_0_0 = c_re(in[7 * istride]);
2126 	       tim2_0_0 = c_im(in[7 * istride]);
2127 	       tre2_1_0 = c_re(in[15 * istride]);
2128 	       tim2_1_0 = c_im(in[15 * istride]);
2129 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
2130 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
2131 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
2132 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
2133 	  }
2134 	  tre0_0_3 = tre1_0_0 + tre1_0_1;
2135 	  tim0_0_3 = tim1_0_0 + tim1_0_1;
2136 	  tre0_2_3 = tre1_0_0 - tre1_0_1;
2137 	  tim0_2_3 = tim1_0_0 - tim1_0_1;
2138 	  tre0_1_3 = tre1_1_0 + tim1_1_1;
2139 	  tim0_1_3 = tim1_1_0 - tre1_1_1;
2140 	  tre0_3_3 = tre1_1_0 - tim1_1_1;
2141 	  tim0_3_3 = tim1_1_0 + tre1_1_1;
2142      }
2143      {
2144 	  FFTW_REAL tre1_0_0;
2145 	  FFTW_REAL tim1_0_0;
2146 	  FFTW_REAL tre1_0_1;
2147 	  FFTW_REAL tim1_0_1;
2148 	  FFTW_REAL tre1_1_0;
2149 	  FFTW_REAL tim1_1_0;
2150 	  FFTW_REAL tre1_1_1;
2151 	  FFTW_REAL tim1_1_1;
2152 	  tre1_0_0 = tre0_0_0 + tre0_0_2;
2153 	  tim1_0_0 = tim0_0_0 + tim0_0_2;
2154 	  tre1_1_0 = tre0_0_0 - tre0_0_2;
2155 	  tim1_1_0 = tim0_0_0 - tim0_0_2;
2156 	  tre1_0_1 = tre0_0_1 + tre0_0_3;
2157 	  tim1_0_1 = tim0_0_1 + tim0_0_3;
2158 	  tre1_1_1 = tre0_0_1 - tre0_0_3;
2159 	  tim1_1_1 = tim0_0_1 - tim0_0_3;
2160 	  c_re(out[0]) = tre1_0_0 + tre1_0_1;
2161 	  c_im(out[0]) = tim1_0_0 + tim1_0_1;
2162 	  c_re(out[8 * ostride]) = tre1_0_0 - tre1_0_1;
2163 	  c_im(out[8 * ostride]) = tim1_0_0 - tim1_0_1;
2164 	  c_re(out[4 * ostride]) = tre1_1_0 + tim1_1_1;
2165 	  c_im(out[4 * ostride]) = tim1_1_0 - tre1_1_1;
2166 	  c_re(out[12 * ostride]) = tre1_1_0 - tim1_1_1;
2167 	  c_im(out[12 * ostride]) = tim1_1_0 + tre1_1_1;
2168      }
2169      {
2170 	  FFTW_REAL tre1_0_0;
2171 	  FFTW_REAL tim1_0_0;
2172 	  FFTW_REAL tre1_0_1;
2173 	  FFTW_REAL tim1_0_1;
2174 	  FFTW_REAL tre1_1_0;
2175 	  FFTW_REAL tim1_1_0;
2176 	  FFTW_REAL tre1_1_1;
2177 	  FFTW_REAL tim1_1_1;
2178 	  {
2179 	       FFTW_REAL tre2_1_0;
2180 	       FFTW_REAL tim2_1_0;
2181 	       tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_1_2 + tim0_1_2);
2182 	       tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_1_2 - tre0_1_2);
2183 	       tre1_0_0 = tre0_1_0 + tre2_1_0;
2184 	       tim1_0_0 = tim0_1_0 + tim2_1_0;
2185 	       tre1_1_0 = tre0_1_0 - tre2_1_0;
2186 	       tim1_1_0 = tim0_1_0 - tim2_1_0;
2187 	  }
2188 	  {
2189 	       FFTW_REAL tre2_0_0;
2190 	       FFTW_REAL tim2_0_0;
2191 	       FFTW_REAL tre2_1_0;
2192 	       FFTW_REAL tim2_1_0;
2193 	       tre2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_1_1) + (((FFTW_REAL) FFTW_K382683432) * tim0_1_1);
2194 	       tim2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_1_1) - (((FFTW_REAL) FFTW_K382683432) * tre0_1_1);
2195 	       tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_1_3) + (((FFTW_REAL) FFTW_K923879532) * tim0_1_3);
2196 	       tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_1_3) - (((FFTW_REAL) FFTW_K923879532) * tre0_1_3);
2197 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
2198 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
2199 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
2200 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
2201 	  }
2202 	  c_re(out[ostride]) = tre1_0_0 + tre1_0_1;
2203 	  c_im(out[ostride]) = tim1_0_0 + tim1_0_1;
2204 	  c_re(out[9 * ostride]) = tre1_0_0 - tre1_0_1;
2205 	  c_im(out[9 * ostride]) = tim1_0_0 - tim1_0_1;
2206 	  c_re(out[5 * ostride]) = tre1_1_0 + tim1_1_1;
2207 	  c_im(out[5 * ostride]) = tim1_1_0 - tre1_1_1;
2208 	  c_re(out[13 * ostride]) = tre1_1_0 - tim1_1_1;
2209 	  c_im(out[13 * ostride]) = tim1_1_0 + tre1_1_1;
2210      }
2211      {
2212 	  FFTW_REAL tre1_0_0;
2213 	  FFTW_REAL tim1_0_0;
2214 	  FFTW_REAL tre1_0_1;
2215 	  FFTW_REAL tim1_0_1;
2216 	  FFTW_REAL tre1_1_0;
2217 	  FFTW_REAL tim1_1_0;
2218 	  FFTW_REAL tre1_1_1;
2219 	  FFTW_REAL tim1_1_1;
2220 	  tre1_0_0 = tre0_2_0 + tim0_2_2;
2221 	  tim1_0_0 = tim0_2_0 - tre0_2_2;
2222 	  tre1_1_0 = tre0_2_0 - tim0_2_2;
2223 	  tim1_1_0 = tim0_2_0 + tre0_2_2;
2224 	  {
2225 	       FFTW_REAL tre2_0_0;
2226 	       FFTW_REAL tim2_0_0;
2227 	       FFTW_REAL tre2_1_0;
2228 	       FFTW_REAL tim2_1_0;
2229 	       tre2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_2_1 + tim0_2_1);
2230 	       tim2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_2_1 - tre0_2_1);
2231 	       tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_2_3 - tre0_2_3);
2232 	       tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_2_3 + tre0_2_3);
2233 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
2234 	       tim1_0_1 = tim2_0_0 - tim2_1_0;
2235 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
2236 	       tim1_1_1 = tim2_0_0 + tim2_1_0;
2237 	  }
2238 	  c_re(out[2 * ostride]) = tre1_0_0 + tre1_0_1;
2239 	  c_im(out[2 * ostride]) = tim1_0_0 + tim1_0_1;
2240 	  c_re(out[10 * ostride]) = tre1_0_0 - tre1_0_1;
2241 	  c_im(out[10 * ostride]) = tim1_0_0 - tim1_0_1;
2242 	  c_re(out[6 * ostride]) = tre1_1_0 + tim1_1_1;
2243 	  c_im(out[6 * ostride]) = tim1_1_0 - tre1_1_1;
2244 	  c_re(out[14 * ostride]) = tre1_1_0 - tim1_1_1;
2245 	  c_im(out[14 * ostride]) = tim1_1_0 + tre1_1_1;
2246      }
2247      {
2248 	  FFTW_REAL tre1_0_0;
2249 	  FFTW_REAL tim1_0_0;
2250 	  FFTW_REAL tre1_0_1;
2251 	  FFTW_REAL tim1_0_1;
2252 	  FFTW_REAL tre1_1_0;
2253 	  FFTW_REAL tim1_1_0;
2254 	  FFTW_REAL tre1_1_1;
2255 	  FFTW_REAL tim1_1_1;
2256 	  {
2257 	       FFTW_REAL tre2_1_0;
2258 	       FFTW_REAL tim2_1_0;
2259 	       tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_3_2 - tre0_3_2);
2260 	       tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_3_2 + tre0_3_2);
2261 	       tre1_0_0 = tre0_3_0 + tre2_1_0;
2262 	       tim1_0_0 = tim0_3_0 - tim2_1_0;
2263 	       tre1_1_0 = tre0_3_0 - tre2_1_0;
2264 	       tim1_1_0 = tim0_3_0 + tim2_1_0;
2265 	  }
2266 	  {
2267 	       FFTW_REAL tre2_0_0;
2268 	       FFTW_REAL tim2_0_0;
2269 	       FFTW_REAL tre2_1_0;
2270 	       FFTW_REAL tim2_1_0;
2271 	       tre2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_3_1) + (((FFTW_REAL) FFTW_K923879532) * tim0_3_1);
2272 	       tim2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_3_1) - (((FFTW_REAL) FFTW_K923879532) * tre0_3_1);
2273 	       tre2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_3_3) + (((FFTW_REAL) FFTW_K382683432) * tim0_3_3);
2274 	       tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_3_3) - (((FFTW_REAL) FFTW_K923879532) * tim0_3_3);
2275 	       tre1_0_1 = tre2_0_0 - tre2_1_0;
2276 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
2277 	       tre1_1_1 = tre2_0_0 + tre2_1_0;
2278 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
2279 	  }
2280 	  c_re(out[3 * ostride]) = tre1_0_0 + tre1_0_1;
2281 	  c_im(out[3 * ostride]) = tim1_0_0 + tim1_0_1;
2282 	  c_re(out[11 * ostride]) = tre1_0_0 - tre1_0_1;
2283 	  c_im(out[11 * ostride]) = tim1_0_0 - tim1_0_1;
2284 	  c_re(out[7 * ostride]) = tre1_1_0 + tim1_1_1;
2285 	  c_im(out[7 * ostride]) = tim1_1_0 - tre1_1_1;
2286 	  c_re(out[15 * ostride]) = tre1_1_0 - tim1_1_1;
2287 	  c_im(out[15 * ostride]) = tim1_1_0 + tre1_1_1;
2288      }
2289 }
2290 
2291 /* This function contains 4 FP additions and 0 FP multiplications */
2292 
fftw_no_twiddle_2(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)2293 static void fftw_no_twiddle_2(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
2294 {
2295      FFTW_REAL tre0_0_0;
2296      FFTW_REAL tim0_0_0;
2297      FFTW_REAL tre0_1_0;
2298      FFTW_REAL tim0_1_0;
2299      tre0_0_0 = c_re(in[0]);
2300      tim0_0_0 = c_im(in[0]);
2301      tre0_1_0 = c_re(in[istride]);
2302      tim0_1_0 = c_im(in[istride]);
2303      c_re(out[0]) = tre0_0_0 + tre0_1_0;
2304      c_im(out[0]) = tim0_0_0 + tim0_1_0;
2305      c_re(out[ostride]) = tre0_0_0 - tre0_1_0;
2306      c_im(out[ostride]) = tim0_0_0 - tim0_1_0;
2307 }
2308 
2309 /* This function contains 14 FP additions and 4 FP multiplications */
2310 
fftw_no_twiddle_3(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)2311 static void fftw_no_twiddle_3(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
2312 {
2313      FFTW_REAL tre0_0_0;
2314      FFTW_REAL tim0_0_0;
2315      FFTW_REAL tre0_1_0;
2316      FFTW_REAL tim0_1_0;
2317      FFTW_REAL tre0_2_0;
2318      FFTW_REAL tim0_2_0;
2319      tre0_0_0 = c_re(in[0]);
2320      tim0_0_0 = c_im(in[0]);
2321      tre0_1_0 = c_re(in[istride]);
2322      tim0_1_0 = c_im(in[istride]);
2323      tre0_2_0 = c_re(in[2 * istride]);
2324      tim0_2_0 = c_im(in[2 * istride]);
2325      c_re(out[0]) = tre0_0_0 + tre0_1_0 + tre0_2_0;
2326      c_im(out[0]) = tim0_0_0 + tim0_1_0 + tim0_2_0;
2327      {
2328 	  FFTW_REAL tre1_0_0;
2329 	  FFTW_REAL tre1_1_0;
2330 	  tre1_0_0 = tre0_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre0_1_0 + tre0_2_0));
2331 	  tre1_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim0_1_0 - tim0_2_0);
2332 	  c_re(out[ostride]) = tre1_0_0 + tre1_1_0;
2333 	  c_re(out[2 * ostride]) = tre1_0_0 - tre1_1_0;
2334      }
2335      {
2336 	  FFTW_REAL tim1_0_0;
2337 	  FFTW_REAL tim1_1_0;
2338 	  tim1_0_0 = tim0_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim0_1_0 + tim0_2_0));
2339 	  tim1_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre0_2_0 - tre0_1_0);
2340 	  c_im(out[ostride]) = tim1_0_0 + tim1_1_0;
2341 	  c_im(out[2 * ostride]) = tim1_0_0 - tim1_1_0;
2342      }
2343 }
2344 
2345 /* This function contains 376 FP additions and 88 FP multiplications */
2346 
fftw_no_twiddle_32(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)2347 static void fftw_no_twiddle_32(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
2348 {
2349      FFTW_REAL tre0_0_0;
2350      FFTW_REAL tim0_0_0;
2351      FFTW_REAL tre0_0_1;
2352      FFTW_REAL tim0_0_1;
2353      FFTW_REAL tre0_0_2;
2354      FFTW_REAL tim0_0_2;
2355      FFTW_REAL tre0_0_3;
2356      FFTW_REAL tim0_0_3;
2357      FFTW_REAL tre0_0_4;
2358      FFTW_REAL tim0_0_4;
2359      FFTW_REAL tre0_0_5;
2360      FFTW_REAL tim0_0_5;
2361      FFTW_REAL tre0_0_6;
2362      FFTW_REAL tim0_0_6;
2363      FFTW_REAL tre0_0_7;
2364      FFTW_REAL tim0_0_7;
2365      FFTW_REAL tre0_1_0;
2366      FFTW_REAL tim0_1_0;
2367      FFTW_REAL tre0_1_1;
2368      FFTW_REAL tim0_1_1;
2369      FFTW_REAL tre0_1_2;
2370      FFTW_REAL tim0_1_2;
2371      FFTW_REAL tre0_1_3;
2372      FFTW_REAL tim0_1_3;
2373      FFTW_REAL tre0_1_4;
2374      FFTW_REAL tim0_1_4;
2375      FFTW_REAL tre0_1_5;
2376      FFTW_REAL tim0_1_5;
2377      FFTW_REAL tre0_1_6;
2378      FFTW_REAL tim0_1_6;
2379      FFTW_REAL tre0_1_7;
2380      FFTW_REAL tim0_1_7;
2381      FFTW_REAL tre0_2_0;
2382      FFTW_REAL tim0_2_0;
2383      FFTW_REAL tre0_2_1;
2384      FFTW_REAL tim0_2_1;
2385      FFTW_REAL tre0_2_2;
2386      FFTW_REAL tim0_2_2;
2387      FFTW_REAL tre0_2_3;
2388      FFTW_REAL tim0_2_3;
2389      FFTW_REAL tre0_2_4;
2390      FFTW_REAL tim0_2_4;
2391      FFTW_REAL tre0_2_5;
2392      FFTW_REAL tim0_2_5;
2393      FFTW_REAL tre0_2_6;
2394      FFTW_REAL tim0_2_6;
2395      FFTW_REAL tre0_2_7;
2396      FFTW_REAL tim0_2_7;
2397      FFTW_REAL tre0_3_0;
2398      FFTW_REAL tim0_3_0;
2399      FFTW_REAL tre0_3_1;
2400      FFTW_REAL tim0_3_1;
2401      FFTW_REAL tre0_3_2;
2402      FFTW_REAL tim0_3_2;
2403      FFTW_REAL tre0_3_3;
2404      FFTW_REAL tim0_3_3;
2405      FFTW_REAL tre0_3_4;
2406      FFTW_REAL tim0_3_4;
2407      FFTW_REAL tre0_3_5;
2408      FFTW_REAL tim0_3_5;
2409      FFTW_REAL tre0_3_6;
2410      FFTW_REAL tim0_3_6;
2411      FFTW_REAL tre0_3_7;
2412      FFTW_REAL tim0_3_7;
2413      {
2414 	  FFTW_REAL tre1_0_0;
2415 	  FFTW_REAL tim1_0_0;
2416 	  FFTW_REAL tre1_0_1;
2417 	  FFTW_REAL tim1_0_1;
2418 	  FFTW_REAL tre1_1_0;
2419 	  FFTW_REAL tim1_1_0;
2420 	  FFTW_REAL tre1_1_1;
2421 	  FFTW_REAL tim1_1_1;
2422 	  {
2423 	       FFTW_REAL tre2_0_0;
2424 	       FFTW_REAL tim2_0_0;
2425 	       FFTW_REAL tre2_1_0;
2426 	       FFTW_REAL tim2_1_0;
2427 	       tre2_0_0 = c_re(in[0]);
2428 	       tim2_0_0 = c_im(in[0]);
2429 	       tre2_1_0 = c_re(in[16 * istride]);
2430 	       tim2_1_0 = c_im(in[16 * istride]);
2431 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
2432 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
2433 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
2434 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
2435 	  }
2436 	  {
2437 	       FFTW_REAL tre2_0_0;
2438 	       FFTW_REAL tim2_0_0;
2439 	       FFTW_REAL tre2_1_0;
2440 	       FFTW_REAL tim2_1_0;
2441 	       tre2_0_0 = c_re(in[8 * istride]);
2442 	       tim2_0_0 = c_im(in[8 * istride]);
2443 	       tre2_1_0 = c_re(in[24 * istride]);
2444 	       tim2_1_0 = c_im(in[24 * istride]);
2445 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
2446 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
2447 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
2448 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
2449 	  }
2450 	  tre0_0_0 = tre1_0_0 + tre1_0_1;
2451 	  tim0_0_0 = tim1_0_0 + tim1_0_1;
2452 	  tre0_2_0 = tre1_0_0 - tre1_0_1;
2453 	  tim0_2_0 = tim1_0_0 - tim1_0_1;
2454 	  tre0_1_0 = tre1_1_0 + tim1_1_1;
2455 	  tim0_1_0 = tim1_1_0 - tre1_1_1;
2456 	  tre0_3_0 = tre1_1_0 - tim1_1_1;
2457 	  tim0_3_0 = tim1_1_0 + tre1_1_1;
2458      }
2459      {
2460 	  FFTW_REAL tre1_0_0;
2461 	  FFTW_REAL tim1_0_0;
2462 	  FFTW_REAL tre1_0_1;
2463 	  FFTW_REAL tim1_0_1;
2464 	  FFTW_REAL tre1_1_0;
2465 	  FFTW_REAL tim1_1_0;
2466 	  FFTW_REAL tre1_1_1;
2467 	  FFTW_REAL tim1_1_1;
2468 	  {
2469 	       FFTW_REAL tre2_0_0;
2470 	       FFTW_REAL tim2_0_0;
2471 	       FFTW_REAL tre2_1_0;
2472 	       FFTW_REAL tim2_1_0;
2473 	       tre2_0_0 = c_re(in[istride]);
2474 	       tim2_0_0 = c_im(in[istride]);
2475 	       tre2_1_0 = c_re(in[17 * istride]);
2476 	       tim2_1_0 = c_im(in[17 * istride]);
2477 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
2478 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
2479 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
2480 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
2481 	  }
2482 	  {
2483 	       FFTW_REAL tre2_0_0;
2484 	       FFTW_REAL tim2_0_0;
2485 	       FFTW_REAL tre2_1_0;
2486 	       FFTW_REAL tim2_1_0;
2487 	       tre2_0_0 = c_re(in[9 * istride]);
2488 	       tim2_0_0 = c_im(in[9 * istride]);
2489 	       tre2_1_0 = c_re(in[25 * istride]);
2490 	       tim2_1_0 = c_im(in[25 * istride]);
2491 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
2492 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
2493 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
2494 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
2495 	  }
2496 	  tre0_0_1 = tre1_0_0 + tre1_0_1;
2497 	  tim0_0_1 = tim1_0_0 + tim1_0_1;
2498 	  tre0_2_1 = tre1_0_0 - tre1_0_1;
2499 	  tim0_2_1 = tim1_0_0 - tim1_0_1;
2500 	  tre0_1_1 = tre1_1_0 + tim1_1_1;
2501 	  tim0_1_1 = tim1_1_0 - tre1_1_1;
2502 	  tre0_3_1 = tre1_1_0 - tim1_1_1;
2503 	  tim0_3_1 = tim1_1_0 + tre1_1_1;
2504      }
2505      {
2506 	  FFTW_REAL tre1_0_0;
2507 	  FFTW_REAL tim1_0_0;
2508 	  FFTW_REAL tre1_0_1;
2509 	  FFTW_REAL tim1_0_1;
2510 	  FFTW_REAL tre1_1_0;
2511 	  FFTW_REAL tim1_1_0;
2512 	  FFTW_REAL tre1_1_1;
2513 	  FFTW_REAL tim1_1_1;
2514 	  {
2515 	       FFTW_REAL tre2_0_0;
2516 	       FFTW_REAL tim2_0_0;
2517 	       FFTW_REAL tre2_1_0;
2518 	       FFTW_REAL tim2_1_0;
2519 	       tre2_0_0 = c_re(in[2 * istride]);
2520 	       tim2_0_0 = c_im(in[2 * istride]);
2521 	       tre2_1_0 = c_re(in[18 * istride]);
2522 	       tim2_1_0 = c_im(in[18 * istride]);
2523 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
2524 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
2525 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
2526 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
2527 	  }
2528 	  {
2529 	       FFTW_REAL tre2_0_0;
2530 	       FFTW_REAL tim2_0_0;
2531 	       FFTW_REAL tre2_1_0;
2532 	       FFTW_REAL tim2_1_0;
2533 	       tre2_0_0 = c_re(in[10 * istride]);
2534 	       tim2_0_0 = c_im(in[10 * istride]);
2535 	       tre2_1_0 = c_re(in[26 * istride]);
2536 	       tim2_1_0 = c_im(in[26 * istride]);
2537 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
2538 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
2539 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
2540 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
2541 	  }
2542 	  tre0_0_2 = tre1_0_0 + tre1_0_1;
2543 	  tim0_0_2 = tim1_0_0 + tim1_0_1;
2544 	  tre0_2_2 = tre1_0_0 - tre1_0_1;
2545 	  tim0_2_2 = tim1_0_0 - tim1_0_1;
2546 	  tre0_1_2 = tre1_1_0 + tim1_1_1;
2547 	  tim0_1_2 = tim1_1_0 - tre1_1_1;
2548 	  tre0_3_2 = tre1_1_0 - tim1_1_1;
2549 	  tim0_3_2 = tim1_1_0 + tre1_1_1;
2550      }
2551      {
2552 	  FFTW_REAL tre1_0_0;
2553 	  FFTW_REAL tim1_0_0;
2554 	  FFTW_REAL tre1_0_1;
2555 	  FFTW_REAL tim1_0_1;
2556 	  FFTW_REAL tre1_1_0;
2557 	  FFTW_REAL tim1_1_0;
2558 	  FFTW_REAL tre1_1_1;
2559 	  FFTW_REAL tim1_1_1;
2560 	  {
2561 	       FFTW_REAL tre2_0_0;
2562 	       FFTW_REAL tim2_0_0;
2563 	       FFTW_REAL tre2_1_0;
2564 	       FFTW_REAL tim2_1_0;
2565 	       tre2_0_0 = c_re(in[3 * istride]);
2566 	       tim2_0_0 = c_im(in[3 * istride]);
2567 	       tre2_1_0 = c_re(in[19 * istride]);
2568 	       tim2_1_0 = c_im(in[19 * istride]);
2569 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
2570 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
2571 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
2572 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
2573 	  }
2574 	  {
2575 	       FFTW_REAL tre2_0_0;
2576 	       FFTW_REAL tim2_0_0;
2577 	       FFTW_REAL tre2_1_0;
2578 	       FFTW_REAL tim2_1_0;
2579 	       tre2_0_0 = c_re(in[11 * istride]);
2580 	       tim2_0_0 = c_im(in[11 * istride]);
2581 	       tre2_1_0 = c_re(in[27 * istride]);
2582 	       tim2_1_0 = c_im(in[27 * istride]);
2583 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
2584 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
2585 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
2586 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
2587 	  }
2588 	  tre0_0_3 = tre1_0_0 + tre1_0_1;
2589 	  tim0_0_3 = tim1_0_0 + tim1_0_1;
2590 	  tre0_2_3 = tre1_0_0 - tre1_0_1;
2591 	  tim0_2_3 = tim1_0_0 - tim1_0_1;
2592 	  tre0_1_3 = tre1_1_0 + tim1_1_1;
2593 	  tim0_1_3 = tim1_1_0 - tre1_1_1;
2594 	  tre0_3_3 = tre1_1_0 - tim1_1_1;
2595 	  tim0_3_3 = tim1_1_0 + tre1_1_1;
2596      }
2597      {
2598 	  FFTW_REAL tre1_0_0;
2599 	  FFTW_REAL tim1_0_0;
2600 	  FFTW_REAL tre1_0_1;
2601 	  FFTW_REAL tim1_0_1;
2602 	  FFTW_REAL tre1_1_0;
2603 	  FFTW_REAL tim1_1_0;
2604 	  FFTW_REAL tre1_1_1;
2605 	  FFTW_REAL tim1_1_1;
2606 	  {
2607 	       FFTW_REAL tre2_0_0;
2608 	       FFTW_REAL tim2_0_0;
2609 	       FFTW_REAL tre2_1_0;
2610 	       FFTW_REAL tim2_1_0;
2611 	       tre2_0_0 = c_re(in[4 * istride]);
2612 	       tim2_0_0 = c_im(in[4 * istride]);
2613 	       tre2_1_0 = c_re(in[20 * istride]);
2614 	       tim2_1_0 = c_im(in[20 * istride]);
2615 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
2616 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
2617 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
2618 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
2619 	  }
2620 	  {
2621 	       FFTW_REAL tre2_0_0;
2622 	       FFTW_REAL tim2_0_0;
2623 	       FFTW_REAL tre2_1_0;
2624 	       FFTW_REAL tim2_1_0;
2625 	       tre2_0_0 = c_re(in[12 * istride]);
2626 	       tim2_0_0 = c_im(in[12 * istride]);
2627 	       tre2_1_0 = c_re(in[28 * istride]);
2628 	       tim2_1_0 = c_im(in[28 * istride]);
2629 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
2630 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
2631 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
2632 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
2633 	  }
2634 	  tre0_0_4 = tre1_0_0 + tre1_0_1;
2635 	  tim0_0_4 = tim1_0_0 + tim1_0_1;
2636 	  tre0_2_4 = tre1_0_0 - tre1_0_1;
2637 	  tim0_2_4 = tim1_0_0 - tim1_0_1;
2638 	  tre0_1_4 = tre1_1_0 + tim1_1_1;
2639 	  tim0_1_4 = tim1_1_0 - tre1_1_1;
2640 	  tre0_3_4 = tre1_1_0 - tim1_1_1;
2641 	  tim0_3_4 = tim1_1_0 + tre1_1_1;
2642      }
2643      {
2644 	  FFTW_REAL tre1_0_0;
2645 	  FFTW_REAL tim1_0_0;
2646 	  FFTW_REAL tre1_0_1;
2647 	  FFTW_REAL tim1_0_1;
2648 	  FFTW_REAL tre1_1_0;
2649 	  FFTW_REAL tim1_1_0;
2650 	  FFTW_REAL tre1_1_1;
2651 	  FFTW_REAL tim1_1_1;
2652 	  {
2653 	       FFTW_REAL tre2_0_0;
2654 	       FFTW_REAL tim2_0_0;
2655 	       FFTW_REAL tre2_1_0;
2656 	       FFTW_REAL tim2_1_0;
2657 	       tre2_0_0 = c_re(in[5 * istride]);
2658 	       tim2_0_0 = c_im(in[5 * istride]);
2659 	       tre2_1_0 = c_re(in[21 * istride]);
2660 	       tim2_1_0 = c_im(in[21 * istride]);
2661 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
2662 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
2663 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
2664 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
2665 	  }
2666 	  {
2667 	       FFTW_REAL tre2_0_0;
2668 	       FFTW_REAL tim2_0_0;
2669 	       FFTW_REAL tre2_1_0;
2670 	       FFTW_REAL tim2_1_0;
2671 	       tre2_0_0 = c_re(in[13 * istride]);
2672 	       tim2_0_0 = c_im(in[13 * istride]);
2673 	       tre2_1_0 = c_re(in[29 * istride]);
2674 	       tim2_1_0 = c_im(in[29 * istride]);
2675 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
2676 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
2677 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
2678 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
2679 	  }
2680 	  tre0_0_5 = tre1_0_0 + tre1_0_1;
2681 	  tim0_0_5 = tim1_0_0 + tim1_0_1;
2682 	  tre0_2_5 = tre1_0_0 - tre1_0_1;
2683 	  tim0_2_5 = tim1_0_0 - tim1_0_1;
2684 	  tre0_1_5 = tre1_1_0 + tim1_1_1;
2685 	  tim0_1_5 = tim1_1_0 - tre1_1_1;
2686 	  tre0_3_5 = tre1_1_0 - tim1_1_1;
2687 	  tim0_3_5 = tim1_1_0 + tre1_1_1;
2688      }
2689      {
2690 	  FFTW_REAL tre1_0_0;
2691 	  FFTW_REAL tim1_0_0;
2692 	  FFTW_REAL tre1_0_1;
2693 	  FFTW_REAL tim1_0_1;
2694 	  FFTW_REAL tre1_1_0;
2695 	  FFTW_REAL tim1_1_0;
2696 	  FFTW_REAL tre1_1_1;
2697 	  FFTW_REAL tim1_1_1;
2698 	  {
2699 	       FFTW_REAL tre2_0_0;
2700 	       FFTW_REAL tim2_0_0;
2701 	       FFTW_REAL tre2_1_0;
2702 	       FFTW_REAL tim2_1_0;
2703 	       tre2_0_0 = c_re(in[6 * istride]);
2704 	       tim2_0_0 = c_im(in[6 * istride]);
2705 	       tre2_1_0 = c_re(in[22 * istride]);
2706 	       tim2_1_0 = c_im(in[22 * istride]);
2707 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
2708 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
2709 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
2710 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
2711 	  }
2712 	  {
2713 	       FFTW_REAL tre2_0_0;
2714 	       FFTW_REAL tim2_0_0;
2715 	       FFTW_REAL tre2_1_0;
2716 	       FFTW_REAL tim2_1_0;
2717 	       tre2_0_0 = c_re(in[14 * istride]);
2718 	       tim2_0_0 = c_im(in[14 * istride]);
2719 	       tre2_1_0 = c_re(in[30 * istride]);
2720 	       tim2_1_0 = c_im(in[30 * istride]);
2721 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
2722 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
2723 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
2724 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
2725 	  }
2726 	  tre0_0_6 = tre1_0_0 + tre1_0_1;
2727 	  tim0_0_6 = tim1_0_0 + tim1_0_1;
2728 	  tre0_2_6 = tre1_0_0 - tre1_0_1;
2729 	  tim0_2_6 = tim1_0_0 - tim1_0_1;
2730 	  tre0_1_6 = tre1_1_0 + tim1_1_1;
2731 	  tim0_1_6 = tim1_1_0 - tre1_1_1;
2732 	  tre0_3_6 = tre1_1_0 - tim1_1_1;
2733 	  tim0_3_6 = tim1_1_0 + tre1_1_1;
2734      }
2735      {
2736 	  FFTW_REAL tre1_0_0;
2737 	  FFTW_REAL tim1_0_0;
2738 	  FFTW_REAL tre1_0_1;
2739 	  FFTW_REAL tim1_0_1;
2740 	  FFTW_REAL tre1_1_0;
2741 	  FFTW_REAL tim1_1_0;
2742 	  FFTW_REAL tre1_1_1;
2743 	  FFTW_REAL tim1_1_1;
2744 	  {
2745 	       FFTW_REAL tre2_0_0;
2746 	       FFTW_REAL tim2_0_0;
2747 	       FFTW_REAL tre2_1_0;
2748 	       FFTW_REAL tim2_1_0;
2749 	       tre2_0_0 = c_re(in[7 * istride]);
2750 	       tim2_0_0 = c_im(in[7 * istride]);
2751 	       tre2_1_0 = c_re(in[23 * istride]);
2752 	       tim2_1_0 = c_im(in[23 * istride]);
2753 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
2754 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
2755 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
2756 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
2757 	  }
2758 	  {
2759 	       FFTW_REAL tre2_0_0;
2760 	       FFTW_REAL tim2_0_0;
2761 	       FFTW_REAL tre2_1_0;
2762 	       FFTW_REAL tim2_1_0;
2763 	       tre2_0_0 = c_re(in[15 * istride]);
2764 	       tim2_0_0 = c_im(in[15 * istride]);
2765 	       tre2_1_0 = c_re(in[31 * istride]);
2766 	       tim2_1_0 = c_im(in[31 * istride]);
2767 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
2768 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
2769 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
2770 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
2771 	  }
2772 	  tre0_0_7 = tre1_0_0 + tre1_0_1;
2773 	  tim0_0_7 = tim1_0_0 + tim1_0_1;
2774 	  tre0_2_7 = tre1_0_0 - tre1_0_1;
2775 	  tim0_2_7 = tim1_0_0 - tim1_0_1;
2776 	  tre0_1_7 = tre1_1_0 + tim1_1_1;
2777 	  tim0_1_7 = tim1_1_0 - tre1_1_1;
2778 	  tre0_3_7 = tre1_1_0 - tim1_1_1;
2779 	  tim0_3_7 = tim1_1_0 + tre1_1_1;
2780      }
2781      {
2782 	  FFTW_REAL tre1_0_0;
2783 	  FFTW_REAL tim1_0_0;
2784 	  FFTW_REAL tre1_0_1;
2785 	  FFTW_REAL tim1_0_1;
2786 	  FFTW_REAL tre1_0_2;
2787 	  FFTW_REAL tim1_0_2;
2788 	  FFTW_REAL tre1_0_3;
2789 	  FFTW_REAL tim1_0_3;
2790 	  FFTW_REAL tre1_1_0;
2791 	  FFTW_REAL tim1_1_0;
2792 	  FFTW_REAL tre1_1_1;
2793 	  FFTW_REAL tim1_1_1;
2794 	  FFTW_REAL tre1_1_2;
2795 	  FFTW_REAL tim1_1_2;
2796 	  FFTW_REAL tre1_1_3;
2797 	  FFTW_REAL tim1_1_3;
2798 	  tre1_0_0 = tre0_0_0 + tre0_0_4;
2799 	  tim1_0_0 = tim0_0_0 + tim0_0_4;
2800 	  tre1_1_0 = tre0_0_0 - tre0_0_4;
2801 	  tim1_1_0 = tim0_0_0 - tim0_0_4;
2802 	  tre1_0_1 = tre0_0_1 + tre0_0_5;
2803 	  tim1_0_1 = tim0_0_1 + tim0_0_5;
2804 	  tre1_1_1 = tre0_0_1 - tre0_0_5;
2805 	  tim1_1_1 = tim0_0_1 - tim0_0_5;
2806 	  tre1_0_2 = tre0_0_2 + tre0_0_6;
2807 	  tim1_0_2 = tim0_0_2 + tim0_0_6;
2808 	  tre1_1_2 = tre0_0_2 - tre0_0_6;
2809 	  tim1_1_2 = tim0_0_2 - tim0_0_6;
2810 	  tre1_0_3 = tre0_0_3 + tre0_0_7;
2811 	  tim1_0_3 = tim0_0_3 + tim0_0_7;
2812 	  tre1_1_3 = tre0_0_3 - tre0_0_7;
2813 	  tim1_1_3 = tim0_0_3 - tim0_0_7;
2814 	  {
2815 	       FFTW_REAL tre2_0_0;
2816 	       FFTW_REAL tim2_0_0;
2817 	       FFTW_REAL tre2_0_1;
2818 	       FFTW_REAL tim2_0_1;
2819 	       FFTW_REAL tre2_1_0;
2820 	       FFTW_REAL tim2_1_0;
2821 	       FFTW_REAL tre2_1_1;
2822 	       FFTW_REAL tim2_1_1;
2823 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
2824 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
2825 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
2826 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
2827 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
2828 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
2829 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
2830 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
2831 	       c_re(out[0]) = tre2_0_0 + tre2_0_1;
2832 	       c_im(out[0]) = tim2_0_0 + tim2_0_1;
2833 	       c_re(out[16 * ostride]) = tre2_0_0 - tre2_0_1;
2834 	       c_im(out[16 * ostride]) = tim2_0_0 - tim2_0_1;
2835 	       c_re(out[8 * ostride]) = tre2_1_0 + tim2_1_1;
2836 	       c_im(out[8 * ostride]) = tim2_1_0 - tre2_1_1;
2837 	       c_re(out[24 * ostride]) = tre2_1_0 - tim2_1_1;
2838 	       c_im(out[24 * ostride]) = tim2_1_0 + tre2_1_1;
2839 	  }
2840 	  {
2841 	       FFTW_REAL tre2_0_0;
2842 	       FFTW_REAL tim2_0_0;
2843 	       FFTW_REAL tre2_0_1;
2844 	       FFTW_REAL tim2_0_1;
2845 	       FFTW_REAL tre2_1_0;
2846 	       FFTW_REAL tim2_1_0;
2847 	       FFTW_REAL tre2_1_1;
2848 	       FFTW_REAL tim2_1_1;
2849 	       tre2_0_0 = tre1_1_0 + tim1_1_2;
2850 	       tim2_0_0 = tim1_1_0 - tre1_1_2;
2851 	       tre2_1_0 = tre1_1_0 - tim1_1_2;
2852 	       tim2_1_0 = tim1_1_0 + tre1_1_2;
2853 	       {
2854 		    FFTW_REAL tre3_0_0;
2855 		    FFTW_REAL tim3_0_0;
2856 		    FFTW_REAL tre3_1_0;
2857 		    FFTW_REAL tim3_1_0;
2858 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
2859 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
2860 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
2861 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
2862 		    tre2_0_1 = tre3_0_0 + tre3_1_0;
2863 		    tim2_0_1 = tim3_0_0 - tim3_1_0;
2864 		    tre2_1_1 = tre3_0_0 - tre3_1_0;
2865 		    tim2_1_1 = tim3_0_0 + tim3_1_0;
2866 	       }
2867 	       c_re(out[4 * ostride]) = tre2_0_0 + tre2_0_1;
2868 	       c_im(out[4 * ostride]) = tim2_0_0 + tim2_0_1;
2869 	       c_re(out[20 * ostride]) = tre2_0_0 - tre2_0_1;
2870 	       c_im(out[20 * ostride]) = tim2_0_0 - tim2_0_1;
2871 	       c_re(out[12 * ostride]) = tre2_1_0 + tim2_1_1;
2872 	       c_im(out[12 * ostride]) = tim2_1_0 - tre2_1_1;
2873 	       c_re(out[28 * ostride]) = tre2_1_0 - tim2_1_1;
2874 	       c_im(out[28 * ostride]) = tim2_1_0 + tre2_1_1;
2875 	  }
2876      }
2877      {
2878 	  FFTW_REAL tre1_0_0;
2879 	  FFTW_REAL tim1_0_0;
2880 	  FFTW_REAL tre1_0_1;
2881 	  FFTW_REAL tim1_0_1;
2882 	  FFTW_REAL tre1_0_2;
2883 	  FFTW_REAL tim1_0_2;
2884 	  FFTW_REAL tre1_0_3;
2885 	  FFTW_REAL tim1_0_3;
2886 	  FFTW_REAL tre1_1_0;
2887 	  FFTW_REAL tim1_1_0;
2888 	  FFTW_REAL tre1_1_1;
2889 	  FFTW_REAL tim1_1_1;
2890 	  FFTW_REAL tre1_1_2;
2891 	  FFTW_REAL tim1_1_2;
2892 	  FFTW_REAL tre1_1_3;
2893 	  FFTW_REAL tim1_1_3;
2894 	  {
2895 	       FFTW_REAL tre2_1_0;
2896 	       FFTW_REAL tim2_1_0;
2897 	       tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_1_4 + tim0_1_4);
2898 	       tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_1_4 - tre0_1_4);
2899 	       tre1_0_0 = tre0_1_0 + tre2_1_0;
2900 	       tim1_0_0 = tim0_1_0 + tim2_1_0;
2901 	       tre1_1_0 = tre0_1_0 - tre2_1_0;
2902 	       tim1_1_0 = tim0_1_0 - tim2_1_0;
2903 	  }
2904 	  {
2905 	       FFTW_REAL tre2_0_0;
2906 	       FFTW_REAL tim2_0_0;
2907 	       FFTW_REAL tre2_1_0;
2908 	       FFTW_REAL tim2_1_0;
2909 	       tre2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tre0_1_1) + (((FFTW_REAL) FFTW_K195090322) * tim0_1_1);
2910 	       tim2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tim0_1_1) - (((FFTW_REAL) FFTW_K195090322) * tre0_1_1);
2911 	       tre2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tre0_1_5) + (((FFTW_REAL) FFTW_K831469612) * tim0_1_5);
2912 	       tim2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tim0_1_5) - (((FFTW_REAL) FFTW_K831469612) * tre0_1_5);
2913 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
2914 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
2915 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
2916 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
2917 	  }
2918 	  {
2919 	       FFTW_REAL tre2_0_0;
2920 	       FFTW_REAL tim2_0_0;
2921 	       FFTW_REAL tre2_1_0;
2922 	       FFTW_REAL tim2_1_0;
2923 	       tre2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_1_2) + (((FFTW_REAL) FFTW_K382683432) * tim0_1_2);
2924 	       tim2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_1_2) - (((FFTW_REAL) FFTW_K382683432) * tre0_1_2);
2925 	       tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_1_6) + (((FFTW_REAL) FFTW_K923879532) * tim0_1_6);
2926 	       tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_1_6) - (((FFTW_REAL) FFTW_K923879532) * tre0_1_6);
2927 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
2928 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
2929 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
2930 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
2931 	  }
2932 	  {
2933 	       FFTW_REAL tre2_0_0;
2934 	       FFTW_REAL tim2_0_0;
2935 	       FFTW_REAL tre2_1_0;
2936 	       FFTW_REAL tim2_1_0;
2937 	       tre2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_1_3) + (((FFTW_REAL) FFTW_K555570233) * tim0_1_3);
2938 	       tim2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_1_3) - (((FFTW_REAL) FFTW_K555570233) * tre0_1_3);
2939 	       tre2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tre0_1_7) + (((FFTW_REAL) FFTW_K980785280) * tim0_1_7);
2940 	       tim2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tim0_1_7) - (((FFTW_REAL) FFTW_K980785280) * tre0_1_7);
2941 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
2942 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
2943 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
2944 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
2945 	  }
2946 	  {
2947 	       FFTW_REAL tre2_0_0;
2948 	       FFTW_REAL tim2_0_0;
2949 	       FFTW_REAL tre2_0_1;
2950 	       FFTW_REAL tim2_0_1;
2951 	       FFTW_REAL tre2_1_0;
2952 	       FFTW_REAL tim2_1_0;
2953 	       FFTW_REAL tre2_1_1;
2954 	       FFTW_REAL tim2_1_1;
2955 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
2956 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
2957 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
2958 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
2959 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
2960 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
2961 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
2962 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
2963 	       c_re(out[ostride]) = tre2_0_0 + tre2_0_1;
2964 	       c_im(out[ostride]) = tim2_0_0 + tim2_0_1;
2965 	       c_re(out[17 * ostride]) = tre2_0_0 - tre2_0_1;
2966 	       c_im(out[17 * ostride]) = tim2_0_0 - tim2_0_1;
2967 	       c_re(out[9 * ostride]) = tre2_1_0 + tim2_1_1;
2968 	       c_im(out[9 * ostride]) = tim2_1_0 - tre2_1_1;
2969 	       c_re(out[25 * ostride]) = tre2_1_0 - tim2_1_1;
2970 	       c_im(out[25 * ostride]) = tim2_1_0 + tre2_1_1;
2971 	  }
2972 	  {
2973 	       FFTW_REAL tre2_0_0;
2974 	       FFTW_REAL tim2_0_0;
2975 	       FFTW_REAL tre2_0_1;
2976 	       FFTW_REAL tim2_0_1;
2977 	       FFTW_REAL tre2_1_0;
2978 	       FFTW_REAL tim2_1_0;
2979 	       FFTW_REAL tre2_1_1;
2980 	       FFTW_REAL tim2_1_1;
2981 	       tre2_0_0 = tre1_1_0 + tim1_1_2;
2982 	       tim2_0_0 = tim1_1_0 - tre1_1_2;
2983 	       tre2_1_0 = tre1_1_0 - tim1_1_2;
2984 	       tim2_1_0 = tim1_1_0 + tre1_1_2;
2985 	       {
2986 		    FFTW_REAL tre3_0_0;
2987 		    FFTW_REAL tim3_0_0;
2988 		    FFTW_REAL tre3_1_0;
2989 		    FFTW_REAL tim3_1_0;
2990 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
2991 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
2992 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
2993 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
2994 		    tre2_0_1 = tre3_0_0 + tre3_1_0;
2995 		    tim2_0_1 = tim3_0_0 - tim3_1_0;
2996 		    tre2_1_1 = tre3_0_0 - tre3_1_0;
2997 		    tim2_1_1 = tim3_0_0 + tim3_1_0;
2998 	       }
2999 	       c_re(out[5 * ostride]) = tre2_0_0 + tre2_0_1;
3000 	       c_im(out[5 * ostride]) = tim2_0_0 + tim2_0_1;
3001 	       c_re(out[21 * ostride]) = tre2_0_0 - tre2_0_1;
3002 	       c_im(out[21 * ostride]) = tim2_0_0 - tim2_0_1;
3003 	       c_re(out[13 * ostride]) = tre2_1_0 + tim2_1_1;
3004 	       c_im(out[13 * ostride]) = tim2_1_0 - tre2_1_1;
3005 	       c_re(out[29 * ostride]) = tre2_1_0 - tim2_1_1;
3006 	       c_im(out[29 * ostride]) = tim2_1_0 + tre2_1_1;
3007 	  }
3008      }
3009      {
3010 	  FFTW_REAL tre1_0_0;
3011 	  FFTW_REAL tim1_0_0;
3012 	  FFTW_REAL tre1_0_1;
3013 	  FFTW_REAL tim1_0_1;
3014 	  FFTW_REAL tre1_0_2;
3015 	  FFTW_REAL tim1_0_2;
3016 	  FFTW_REAL tre1_0_3;
3017 	  FFTW_REAL tim1_0_3;
3018 	  FFTW_REAL tre1_1_0;
3019 	  FFTW_REAL tim1_1_0;
3020 	  FFTW_REAL tre1_1_1;
3021 	  FFTW_REAL tim1_1_1;
3022 	  FFTW_REAL tre1_1_2;
3023 	  FFTW_REAL tim1_1_2;
3024 	  FFTW_REAL tre1_1_3;
3025 	  FFTW_REAL tim1_1_3;
3026 	  tre1_0_0 = tre0_2_0 + tim0_2_4;
3027 	  tim1_0_0 = tim0_2_0 - tre0_2_4;
3028 	  tre1_1_0 = tre0_2_0 - tim0_2_4;
3029 	  tim1_1_0 = tim0_2_0 + tre0_2_4;
3030 	  {
3031 	       FFTW_REAL tre2_0_0;
3032 	       FFTW_REAL tim2_0_0;
3033 	       FFTW_REAL tre2_1_0;
3034 	       FFTW_REAL tim2_1_0;
3035 	       tre2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_2_1) + (((FFTW_REAL) FFTW_K382683432) * tim0_2_1);
3036 	       tim2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_2_1) - (((FFTW_REAL) FFTW_K382683432) * tre0_2_1);
3037 	       tre2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_2_5) - (((FFTW_REAL) FFTW_K382683432) * tre0_2_5);
3038 	       tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_2_5) + (((FFTW_REAL) FFTW_K923879532) * tre0_2_5);
3039 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
3040 	       tim1_0_1 = tim2_0_0 - tim2_1_0;
3041 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
3042 	       tim1_1_1 = tim2_0_0 + tim2_1_0;
3043 	  }
3044 	  {
3045 	       FFTW_REAL tre2_0_0;
3046 	       FFTW_REAL tim2_0_0;
3047 	       FFTW_REAL tre2_1_0;
3048 	       FFTW_REAL tim2_1_0;
3049 	       tre2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_2_2 + tim0_2_2);
3050 	       tim2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_2_2 - tre0_2_2);
3051 	       tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_2_6 - tre0_2_6);
3052 	       tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_2_6 + tre0_2_6);
3053 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
3054 	       tim1_0_2 = tim2_0_0 - tim2_1_0;
3055 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
3056 	       tim1_1_2 = tim2_0_0 + tim2_1_0;
3057 	  }
3058 	  {
3059 	       FFTW_REAL tre2_0_0;
3060 	       FFTW_REAL tim2_0_0;
3061 	       FFTW_REAL tre2_1_0;
3062 	       FFTW_REAL tim2_1_0;
3063 	       tre2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_2_3) + (((FFTW_REAL) FFTW_K923879532) * tim0_2_3);
3064 	       tim2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_2_3) - (((FFTW_REAL) FFTW_K923879532) * tre0_2_3);
3065 	       tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_2_7) - (((FFTW_REAL) FFTW_K923879532) * tre0_2_7);
3066 	       tim2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_2_7) + (((FFTW_REAL) FFTW_K382683432) * tre0_2_7);
3067 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
3068 	       tim1_0_3 = tim2_0_0 - tim2_1_0;
3069 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
3070 	       tim1_1_3 = tim2_0_0 + tim2_1_0;
3071 	  }
3072 	  {
3073 	       FFTW_REAL tre2_0_0;
3074 	       FFTW_REAL tim2_0_0;
3075 	       FFTW_REAL tre2_0_1;
3076 	       FFTW_REAL tim2_0_1;
3077 	       FFTW_REAL tre2_1_0;
3078 	       FFTW_REAL tim2_1_0;
3079 	       FFTW_REAL tre2_1_1;
3080 	       FFTW_REAL tim2_1_1;
3081 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
3082 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
3083 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
3084 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
3085 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
3086 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
3087 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
3088 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
3089 	       c_re(out[2 * ostride]) = tre2_0_0 + tre2_0_1;
3090 	       c_im(out[2 * ostride]) = tim2_0_0 + tim2_0_1;
3091 	       c_re(out[18 * ostride]) = tre2_0_0 - tre2_0_1;
3092 	       c_im(out[18 * ostride]) = tim2_0_0 - tim2_0_1;
3093 	       c_re(out[10 * ostride]) = tre2_1_0 + tim2_1_1;
3094 	       c_im(out[10 * ostride]) = tim2_1_0 - tre2_1_1;
3095 	       c_re(out[26 * ostride]) = tre2_1_0 - tim2_1_1;
3096 	       c_im(out[26 * ostride]) = tim2_1_0 + tre2_1_1;
3097 	  }
3098 	  {
3099 	       FFTW_REAL tre2_0_0;
3100 	       FFTW_REAL tim2_0_0;
3101 	       FFTW_REAL tre2_0_1;
3102 	       FFTW_REAL tim2_0_1;
3103 	       FFTW_REAL tre2_1_0;
3104 	       FFTW_REAL tim2_1_0;
3105 	       FFTW_REAL tre2_1_1;
3106 	       FFTW_REAL tim2_1_1;
3107 	       tre2_0_0 = tre1_1_0 + tim1_1_2;
3108 	       tim2_0_0 = tim1_1_0 - tre1_1_2;
3109 	       tre2_1_0 = tre1_1_0 - tim1_1_2;
3110 	       tim2_1_0 = tim1_1_0 + tre1_1_2;
3111 	       {
3112 		    FFTW_REAL tre3_0_0;
3113 		    FFTW_REAL tim3_0_0;
3114 		    FFTW_REAL tre3_1_0;
3115 		    FFTW_REAL tim3_1_0;
3116 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
3117 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
3118 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
3119 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
3120 		    tre2_0_1 = tre3_0_0 + tre3_1_0;
3121 		    tim2_0_1 = tim3_0_0 - tim3_1_0;
3122 		    tre2_1_1 = tre3_0_0 - tre3_1_0;
3123 		    tim2_1_1 = tim3_0_0 + tim3_1_0;
3124 	       }
3125 	       c_re(out[6 * ostride]) = tre2_0_0 + tre2_0_1;
3126 	       c_im(out[6 * ostride]) = tim2_0_0 + tim2_0_1;
3127 	       c_re(out[22 * ostride]) = tre2_0_0 - tre2_0_1;
3128 	       c_im(out[22 * ostride]) = tim2_0_0 - tim2_0_1;
3129 	       c_re(out[14 * ostride]) = tre2_1_0 + tim2_1_1;
3130 	       c_im(out[14 * ostride]) = tim2_1_0 - tre2_1_1;
3131 	       c_re(out[30 * ostride]) = tre2_1_0 - tim2_1_1;
3132 	       c_im(out[30 * ostride]) = tim2_1_0 + tre2_1_1;
3133 	  }
3134      }
3135      {
3136 	  FFTW_REAL tre1_0_0;
3137 	  FFTW_REAL tim1_0_0;
3138 	  FFTW_REAL tre1_0_1;
3139 	  FFTW_REAL tim1_0_1;
3140 	  FFTW_REAL tre1_0_2;
3141 	  FFTW_REAL tim1_0_2;
3142 	  FFTW_REAL tre1_0_3;
3143 	  FFTW_REAL tim1_0_3;
3144 	  FFTW_REAL tre1_1_0;
3145 	  FFTW_REAL tim1_1_0;
3146 	  FFTW_REAL tre1_1_1;
3147 	  FFTW_REAL tim1_1_1;
3148 	  FFTW_REAL tre1_1_2;
3149 	  FFTW_REAL tim1_1_2;
3150 	  FFTW_REAL tre1_1_3;
3151 	  FFTW_REAL tim1_1_3;
3152 	  {
3153 	       FFTW_REAL tre2_1_0;
3154 	       FFTW_REAL tim2_1_0;
3155 	       tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_3_4 - tre0_3_4);
3156 	       tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_3_4 + tre0_3_4);
3157 	       tre1_0_0 = tre0_3_0 + tre2_1_0;
3158 	       tim1_0_0 = tim0_3_0 - tim2_1_0;
3159 	       tre1_1_0 = tre0_3_0 - tre2_1_0;
3160 	       tim1_1_0 = tim0_3_0 + tim2_1_0;
3161 	  }
3162 	  {
3163 	       FFTW_REAL tre2_0_0;
3164 	       FFTW_REAL tim2_0_0;
3165 	       FFTW_REAL tre2_1_0;
3166 	       FFTW_REAL tim2_1_0;
3167 	       tre2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_3_1) + (((FFTW_REAL) FFTW_K555570233) * tim0_3_1);
3168 	       tim2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_3_1) - (((FFTW_REAL) FFTW_K555570233) * tre0_3_1);
3169 	       tre2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tim0_3_5) - (((FFTW_REAL) FFTW_K980785280) * tre0_3_5);
3170 	       tim2_1_0 = (((FFTW_REAL) FFTW_K980785280) * tim0_3_5) + (((FFTW_REAL) FFTW_K195090322) * tre0_3_5);
3171 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
3172 	       tim1_0_1 = tim2_0_0 - tim2_1_0;
3173 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
3174 	       tim1_1_1 = tim2_0_0 + tim2_1_0;
3175 	  }
3176 	  {
3177 	       FFTW_REAL tre2_0_0;
3178 	       FFTW_REAL tim2_0_0;
3179 	       FFTW_REAL tre2_1_0;
3180 	       FFTW_REAL tim2_1_0;
3181 	       tre2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_3_2) + (((FFTW_REAL) FFTW_K923879532) * tim0_3_2);
3182 	       tim2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_3_2) - (((FFTW_REAL) FFTW_K923879532) * tre0_3_2);
3183 	       tre2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_3_6) + (((FFTW_REAL) FFTW_K382683432) * tim0_3_6);
3184 	       tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_3_6) - (((FFTW_REAL) FFTW_K923879532) * tim0_3_6);
3185 	       tre1_0_2 = tre2_0_0 - tre2_1_0;
3186 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
3187 	       tre1_1_2 = tre2_0_0 + tre2_1_0;
3188 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
3189 	  }
3190 	  {
3191 	       FFTW_REAL tre2_0_0;
3192 	       FFTW_REAL tim2_0_0;
3193 	       FFTW_REAL tre2_1_0;
3194 	       FFTW_REAL tim2_1_0;
3195 	       tre2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tim0_3_3) - (((FFTW_REAL) FFTW_K195090322) * tre0_3_3);
3196 	       tim2_0_0 = (((FFTW_REAL) FFTW_K195090322) * tim0_3_3) + (((FFTW_REAL) FFTW_K980785280) * tre0_3_3);
3197 	       tre2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tre0_3_7) + (((FFTW_REAL) FFTW_K831469612) * tim0_3_7);
3198 	       tim2_1_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_3_7) - (((FFTW_REAL) FFTW_K555570233) * tim0_3_7);
3199 	       tre1_0_3 = tre2_0_0 - tre2_1_0;
3200 	       tim1_0_3 = tim2_1_0 - tim2_0_0;
3201 	       tre1_1_3 = tre2_0_0 + tre2_1_0;
3202 	       tim1_1_3 = (-(tim2_0_0 + tim2_1_0));
3203 	  }
3204 	  {
3205 	       FFTW_REAL tre2_0_0;
3206 	       FFTW_REAL tim2_0_0;
3207 	       FFTW_REAL tre2_0_1;
3208 	       FFTW_REAL tim2_0_1;
3209 	       FFTW_REAL tre2_1_0;
3210 	       FFTW_REAL tim2_1_0;
3211 	       FFTW_REAL tre2_1_1;
3212 	       FFTW_REAL tim2_1_1;
3213 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
3214 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
3215 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
3216 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
3217 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
3218 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
3219 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
3220 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
3221 	       c_re(out[3 * ostride]) = tre2_0_0 + tre2_0_1;
3222 	       c_im(out[3 * ostride]) = tim2_0_0 + tim2_0_1;
3223 	       c_re(out[19 * ostride]) = tre2_0_0 - tre2_0_1;
3224 	       c_im(out[19 * ostride]) = tim2_0_0 - tim2_0_1;
3225 	       c_re(out[11 * ostride]) = tre2_1_0 + tim2_1_1;
3226 	       c_im(out[11 * ostride]) = tim2_1_0 - tre2_1_1;
3227 	       c_re(out[27 * ostride]) = tre2_1_0 - tim2_1_1;
3228 	       c_im(out[27 * ostride]) = tim2_1_0 + tre2_1_1;
3229 	  }
3230 	  {
3231 	       FFTW_REAL tre2_0_0;
3232 	       FFTW_REAL tim2_0_0;
3233 	       FFTW_REAL tre2_0_1;
3234 	       FFTW_REAL tim2_0_1;
3235 	       FFTW_REAL tre2_1_0;
3236 	       FFTW_REAL tim2_1_0;
3237 	       FFTW_REAL tre2_1_1;
3238 	       FFTW_REAL tim2_1_1;
3239 	       tre2_0_0 = tre1_1_0 + tim1_1_2;
3240 	       tim2_0_0 = tim1_1_0 - tre1_1_2;
3241 	       tre2_1_0 = tre1_1_0 - tim1_1_2;
3242 	       tim2_1_0 = tim1_1_0 + tre1_1_2;
3243 	       {
3244 		    FFTW_REAL tre3_0_0;
3245 		    FFTW_REAL tim3_0_0;
3246 		    FFTW_REAL tre3_1_0;
3247 		    FFTW_REAL tim3_1_0;
3248 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
3249 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
3250 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
3251 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
3252 		    tre2_0_1 = tre3_0_0 + tre3_1_0;
3253 		    tim2_0_1 = tim3_0_0 - tim3_1_0;
3254 		    tre2_1_1 = tre3_0_0 - tre3_1_0;
3255 		    tim2_1_1 = tim3_0_0 + tim3_1_0;
3256 	       }
3257 	       c_re(out[7 * ostride]) = tre2_0_0 + tre2_0_1;
3258 	       c_im(out[7 * ostride]) = tim2_0_0 + tim2_0_1;
3259 	       c_re(out[23 * ostride]) = tre2_0_0 - tre2_0_1;
3260 	       c_im(out[23 * ostride]) = tim2_0_0 - tim2_0_1;
3261 	       c_re(out[15 * ostride]) = tre2_1_0 + tim2_1_1;
3262 	       c_im(out[15 * ostride]) = tim2_1_0 - tre2_1_1;
3263 	       c_re(out[31 * ostride]) = tre2_1_0 - tim2_1_1;
3264 	       c_im(out[31 * ostride]) = tim2_1_0 + tre2_1_1;
3265 	  }
3266      }
3267 }
3268 
3269 /* This function contains 16 FP additions and 0 FP multiplications */
3270 
fftw_no_twiddle_4(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)3271 static void fftw_no_twiddle_4(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
3272 {
3273      FFTW_REAL tre0_0_0;
3274      FFTW_REAL tim0_0_0;
3275      FFTW_REAL tre0_0_1;
3276      FFTW_REAL tim0_0_1;
3277      FFTW_REAL tre0_1_0;
3278      FFTW_REAL tim0_1_0;
3279      FFTW_REAL tre0_1_1;
3280      FFTW_REAL tim0_1_1;
3281      {
3282 	  FFTW_REAL tre1_0_0;
3283 	  FFTW_REAL tim1_0_0;
3284 	  FFTW_REAL tre1_1_0;
3285 	  FFTW_REAL tim1_1_0;
3286 	  tre1_0_0 = c_re(in[0]);
3287 	  tim1_0_0 = c_im(in[0]);
3288 	  tre1_1_0 = c_re(in[2 * istride]);
3289 	  tim1_1_0 = c_im(in[2 * istride]);
3290 	  tre0_0_0 = tre1_0_0 + tre1_1_0;
3291 	  tim0_0_0 = tim1_0_0 + tim1_1_0;
3292 	  tre0_1_0 = tre1_0_0 - tre1_1_0;
3293 	  tim0_1_0 = tim1_0_0 - tim1_1_0;
3294      }
3295      {
3296 	  FFTW_REAL tre1_0_0;
3297 	  FFTW_REAL tim1_0_0;
3298 	  FFTW_REAL tre1_1_0;
3299 	  FFTW_REAL tim1_1_0;
3300 	  tre1_0_0 = c_re(in[istride]);
3301 	  tim1_0_0 = c_im(in[istride]);
3302 	  tre1_1_0 = c_re(in[3 * istride]);
3303 	  tim1_1_0 = c_im(in[3 * istride]);
3304 	  tre0_0_1 = tre1_0_0 + tre1_1_0;
3305 	  tim0_0_1 = tim1_0_0 + tim1_1_0;
3306 	  tre0_1_1 = tre1_0_0 - tre1_1_0;
3307 	  tim0_1_1 = tim1_0_0 - tim1_1_0;
3308      }
3309      c_re(out[0]) = tre0_0_0 + tre0_0_1;
3310      c_im(out[0]) = tim0_0_0 + tim0_0_1;
3311      c_re(out[2 * ostride]) = tre0_0_0 - tre0_0_1;
3312      c_im(out[2 * ostride]) = tim0_0_0 - tim0_0_1;
3313      c_re(out[ostride]) = tre0_1_0 + tim0_1_1;
3314      c_im(out[ostride]) = tim0_1_0 - tre0_1_1;
3315      c_re(out[3 * ostride]) = tre0_1_0 - tim0_1_1;
3316      c_im(out[3 * ostride]) = tim0_1_0 + tre0_1_1;
3317 }
3318 
3319 /* This function contains 44 FP additions and 16 FP multiplications */
3320 
fftw_no_twiddle_5(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)3321 static void fftw_no_twiddle_5(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
3322 {
3323      FFTW_REAL tre0_0_0;
3324      FFTW_REAL tim0_0_0;
3325      FFTW_REAL tre0_1_0;
3326      FFTW_REAL tim0_1_0;
3327      FFTW_REAL tre0_2_0;
3328      FFTW_REAL tim0_2_0;
3329      FFTW_REAL tre0_3_0;
3330      FFTW_REAL tim0_3_0;
3331      FFTW_REAL tre0_4_0;
3332      FFTW_REAL tim0_4_0;
3333      tre0_0_0 = c_re(in[0]);
3334      tim0_0_0 = c_im(in[0]);
3335      tre0_1_0 = c_re(in[istride]);
3336      tim0_1_0 = c_im(in[istride]);
3337      tre0_2_0 = c_re(in[2 * istride]);
3338      tim0_2_0 = c_im(in[2 * istride]);
3339      tre0_3_0 = c_re(in[3 * istride]);
3340      tim0_3_0 = c_im(in[3 * istride]);
3341      tre0_4_0 = c_re(in[4 * istride]);
3342      tim0_4_0 = c_im(in[4 * istride]);
3343      c_re(out[0]) = tre0_0_0 + tre0_1_0 + tre0_2_0 + tre0_3_0 + tre0_4_0;
3344      c_im(out[0]) = tim0_0_0 + tim0_1_0 + tim0_2_0 + tim0_3_0 + tim0_4_0;
3345      {
3346 	  FFTW_REAL tre1_0_0;
3347 	  FFTW_REAL tre1_1_0;
3348 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_1_0 + tre0_4_0)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_2_0 + tre0_3_0));
3349 	  tre1_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tim0_1_0 - tim0_4_0)) + (((FFTW_REAL) FFTW_K587785252) * (tim0_2_0 - tim0_3_0));
3350 	  c_re(out[ostride]) = tre1_0_0 + tre1_1_0;
3351 	  c_re(out[4 * ostride]) = tre1_0_0 - tre1_1_0;
3352      }
3353      {
3354 	  FFTW_REAL tim1_0_0;
3355 	  FFTW_REAL tim1_1_0;
3356 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_1_0 + tim0_4_0)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_2_0 + tim0_3_0));
3357 	  tim1_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tre0_4_0 - tre0_1_0)) + (((FFTW_REAL) FFTW_K587785252) * (tre0_3_0 - tre0_2_0));
3358 	  c_im(out[ostride]) = tim1_0_0 + tim1_1_0;
3359 	  c_im(out[4 * ostride]) = tim1_0_0 - tim1_1_0;
3360      }
3361      {
3362 	  FFTW_REAL tre1_0_0;
3363 	  FFTW_REAL tre1_1_0;
3364 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_2_0 + tre0_3_0)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_1_0 + tre0_4_0));
3365 	  tre1_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tim0_1_0 - tim0_4_0)) + (((FFTW_REAL) FFTW_K951056516) * (tim0_3_0 - tim0_2_0));
3366 	  c_re(out[2 * ostride]) = tre1_0_0 + tre1_1_0;
3367 	  c_re(out[3 * ostride]) = tre1_0_0 - tre1_1_0;
3368      }
3369      {
3370 	  FFTW_REAL tim1_0_0;
3371 	  FFTW_REAL tim1_1_0;
3372 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_2_0 + tim0_3_0)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_1_0 + tim0_4_0));
3373 	  tim1_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tre0_4_0 - tre0_1_0)) + (((FFTW_REAL) FFTW_K951056516) * (tre0_2_0 - tre0_3_0));
3374 	  c_im(out[2 * ostride]) = tim1_0_0 + tim1_1_0;
3375 	  c_im(out[3 * ostride]) = tim1_0_0 - tim1_1_0;
3376      }
3377 }
3378 
3379 /* This function contains 40 FP additions and 8 FP multiplications */
3380 
fftw_no_twiddle_6(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)3381 static void fftw_no_twiddle_6(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
3382 {
3383      FFTW_REAL tre0_0_0;
3384      FFTW_REAL tim0_0_0;
3385      FFTW_REAL tre0_0_1;
3386      FFTW_REAL tim0_0_1;
3387      FFTW_REAL tre0_0_2;
3388      FFTW_REAL tim0_0_2;
3389      FFTW_REAL tre0_1_0;
3390      FFTW_REAL tim0_1_0;
3391      FFTW_REAL tre0_1_1;
3392      FFTW_REAL tim0_1_1;
3393      FFTW_REAL tre0_1_2;
3394      FFTW_REAL tim0_1_2;
3395      {
3396 	  FFTW_REAL tre1_0_0;
3397 	  FFTW_REAL tim1_0_0;
3398 	  FFTW_REAL tre1_1_0;
3399 	  FFTW_REAL tim1_1_0;
3400 	  tre1_0_0 = c_re(in[0]);
3401 	  tim1_0_0 = c_im(in[0]);
3402 	  tre1_1_0 = c_re(in[3 * istride]);
3403 	  tim1_1_0 = c_im(in[3 * istride]);
3404 	  tre0_0_0 = tre1_0_0 + tre1_1_0;
3405 	  tim0_0_0 = tim1_0_0 + tim1_1_0;
3406 	  tre0_1_0 = tre1_0_0 - tre1_1_0;
3407 	  tim0_1_0 = tim1_0_0 - tim1_1_0;
3408      }
3409      {
3410 	  FFTW_REAL tre1_0_0;
3411 	  FFTW_REAL tim1_0_0;
3412 	  FFTW_REAL tre1_1_0;
3413 	  FFTW_REAL tim1_1_0;
3414 	  tre1_0_0 = c_re(in[2 * istride]);
3415 	  tim1_0_0 = c_im(in[2 * istride]);
3416 	  tre1_1_0 = c_re(in[5 * istride]);
3417 	  tim1_1_0 = c_im(in[5 * istride]);
3418 	  tre0_0_1 = tre1_0_0 + tre1_1_0;
3419 	  tim0_0_1 = tim1_0_0 + tim1_1_0;
3420 	  tre0_1_1 = tre1_0_0 - tre1_1_0;
3421 	  tim0_1_1 = tim1_0_0 - tim1_1_0;
3422      }
3423      {
3424 	  FFTW_REAL tre1_0_0;
3425 	  FFTW_REAL tim1_0_0;
3426 	  FFTW_REAL tre1_1_0;
3427 	  FFTW_REAL tim1_1_0;
3428 	  tre1_0_0 = c_re(in[4 * istride]);
3429 	  tim1_0_0 = c_im(in[4 * istride]);
3430 	  tre1_1_0 = c_re(in[istride]);
3431 	  tim1_1_0 = c_im(in[istride]);
3432 	  tre0_0_2 = tre1_0_0 + tre1_1_0;
3433 	  tim0_0_2 = tim1_0_0 + tim1_1_0;
3434 	  tre0_1_2 = tre1_0_0 - tre1_1_0;
3435 	  tim0_1_2 = tim1_0_0 - tim1_1_0;
3436      }
3437      c_re(out[0]) = tre0_0_0 + tre0_0_1 + tre0_0_2;
3438      c_im(out[0]) = tim0_0_0 + tim0_0_1 + tim0_0_2;
3439      {
3440 	  FFTW_REAL tre2_0_0;
3441 	  FFTW_REAL tre2_1_0;
3442 	  tre2_0_0 = tre0_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre0_0_1 + tre0_0_2));
3443 	  tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim0_0_1 - tim0_0_2);
3444 	  c_re(out[4 * ostride]) = tre2_0_0 + tre2_1_0;
3445 	  c_re(out[2 * ostride]) = tre2_0_0 - tre2_1_0;
3446      }
3447      {
3448 	  FFTW_REAL tim2_0_0;
3449 	  FFTW_REAL tim2_1_0;
3450 	  tim2_0_0 = tim0_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim0_0_1 + tim0_0_2));
3451 	  tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre0_0_2 - tre0_0_1);
3452 	  c_im(out[4 * ostride]) = tim2_0_0 + tim2_1_0;
3453 	  c_im(out[2 * ostride]) = tim2_0_0 - tim2_1_0;
3454      }
3455      c_re(out[3 * ostride]) = tre0_1_0 + tre0_1_1 + tre0_1_2;
3456      c_im(out[3 * ostride]) = tim0_1_0 + tim0_1_1 + tim0_1_2;
3457      {
3458 	  FFTW_REAL tre2_0_0;
3459 	  FFTW_REAL tre2_1_0;
3460 	  tre2_0_0 = tre0_1_0 - (((FFTW_REAL) FFTW_K499999999) * (tre0_1_1 + tre0_1_2));
3461 	  tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim0_1_1 - tim0_1_2);
3462 	  c_re(out[ostride]) = tre2_0_0 + tre2_1_0;
3463 	  c_re(out[5 * ostride]) = tre2_0_0 - tre2_1_0;
3464      }
3465      {
3466 	  FFTW_REAL tim2_0_0;
3467 	  FFTW_REAL tim2_1_0;
3468 	  tim2_0_0 = tim0_1_0 - (((FFTW_REAL) FFTW_K499999999) * (tim0_1_1 + tim0_1_2));
3469 	  tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre0_1_2 - tre0_1_1);
3470 	  c_im(out[ostride]) = tim2_0_0 + tim2_1_0;
3471 	  c_im(out[5 * ostride]) = tim2_0_0 - tim2_1_0;
3472      }
3473 }
3474 
3475 /* This function contains 928 FP additions and 248 FP multiplications */
3476 
fftw_no_twiddle_64(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)3477 static void fftw_no_twiddle_64(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
3478 {
3479      FFTW_REAL tre0_0_0;
3480      FFTW_REAL tim0_0_0;
3481      FFTW_REAL tre0_0_1;
3482      FFTW_REAL tim0_0_1;
3483      FFTW_REAL tre0_0_2;
3484      FFTW_REAL tim0_0_2;
3485      FFTW_REAL tre0_0_3;
3486      FFTW_REAL tim0_0_3;
3487      FFTW_REAL tre0_0_4;
3488      FFTW_REAL tim0_0_4;
3489      FFTW_REAL tre0_0_5;
3490      FFTW_REAL tim0_0_5;
3491      FFTW_REAL tre0_0_6;
3492      FFTW_REAL tim0_0_6;
3493      FFTW_REAL tre0_0_7;
3494      FFTW_REAL tim0_0_7;
3495      FFTW_REAL tre0_1_0;
3496      FFTW_REAL tim0_1_0;
3497      FFTW_REAL tre0_1_1;
3498      FFTW_REAL tim0_1_1;
3499      FFTW_REAL tre0_1_2;
3500      FFTW_REAL tim0_1_2;
3501      FFTW_REAL tre0_1_3;
3502      FFTW_REAL tim0_1_3;
3503      FFTW_REAL tre0_1_4;
3504      FFTW_REAL tim0_1_4;
3505      FFTW_REAL tre0_1_5;
3506      FFTW_REAL tim0_1_5;
3507      FFTW_REAL tre0_1_6;
3508      FFTW_REAL tim0_1_6;
3509      FFTW_REAL tre0_1_7;
3510      FFTW_REAL tim0_1_7;
3511      FFTW_REAL tre0_2_0;
3512      FFTW_REAL tim0_2_0;
3513      FFTW_REAL tre0_2_1;
3514      FFTW_REAL tim0_2_1;
3515      FFTW_REAL tre0_2_2;
3516      FFTW_REAL tim0_2_2;
3517      FFTW_REAL tre0_2_3;
3518      FFTW_REAL tim0_2_3;
3519      FFTW_REAL tre0_2_4;
3520      FFTW_REAL tim0_2_4;
3521      FFTW_REAL tre0_2_5;
3522      FFTW_REAL tim0_2_5;
3523      FFTW_REAL tre0_2_6;
3524      FFTW_REAL tim0_2_6;
3525      FFTW_REAL tre0_2_7;
3526      FFTW_REAL tim0_2_7;
3527      FFTW_REAL tre0_3_0;
3528      FFTW_REAL tim0_3_0;
3529      FFTW_REAL tre0_3_1;
3530      FFTW_REAL tim0_3_1;
3531      FFTW_REAL tre0_3_2;
3532      FFTW_REAL tim0_3_2;
3533      FFTW_REAL tre0_3_3;
3534      FFTW_REAL tim0_3_3;
3535      FFTW_REAL tre0_3_4;
3536      FFTW_REAL tim0_3_4;
3537      FFTW_REAL tre0_3_5;
3538      FFTW_REAL tim0_3_5;
3539      FFTW_REAL tre0_3_6;
3540      FFTW_REAL tim0_3_6;
3541      FFTW_REAL tre0_3_7;
3542      FFTW_REAL tim0_3_7;
3543      FFTW_REAL tre0_4_0;
3544      FFTW_REAL tim0_4_0;
3545      FFTW_REAL tre0_4_1;
3546      FFTW_REAL tim0_4_1;
3547      FFTW_REAL tre0_4_2;
3548      FFTW_REAL tim0_4_2;
3549      FFTW_REAL tre0_4_3;
3550      FFTW_REAL tim0_4_3;
3551      FFTW_REAL tre0_4_4;
3552      FFTW_REAL tim0_4_4;
3553      FFTW_REAL tre0_4_5;
3554      FFTW_REAL tim0_4_5;
3555      FFTW_REAL tre0_4_6;
3556      FFTW_REAL tim0_4_6;
3557      FFTW_REAL tre0_4_7;
3558      FFTW_REAL tim0_4_7;
3559      FFTW_REAL tre0_5_0;
3560      FFTW_REAL tim0_5_0;
3561      FFTW_REAL tre0_5_1;
3562      FFTW_REAL tim0_5_1;
3563      FFTW_REAL tre0_5_2;
3564      FFTW_REAL tim0_5_2;
3565      FFTW_REAL tre0_5_3;
3566      FFTW_REAL tim0_5_3;
3567      FFTW_REAL tre0_5_4;
3568      FFTW_REAL tim0_5_4;
3569      FFTW_REAL tre0_5_5;
3570      FFTW_REAL tim0_5_5;
3571      FFTW_REAL tre0_5_6;
3572      FFTW_REAL tim0_5_6;
3573      FFTW_REAL tre0_5_7;
3574      FFTW_REAL tim0_5_7;
3575      FFTW_REAL tre0_6_0;
3576      FFTW_REAL tim0_6_0;
3577      FFTW_REAL tre0_6_1;
3578      FFTW_REAL tim0_6_1;
3579      FFTW_REAL tre0_6_2;
3580      FFTW_REAL tim0_6_2;
3581      FFTW_REAL tre0_6_3;
3582      FFTW_REAL tim0_6_3;
3583      FFTW_REAL tre0_6_4;
3584      FFTW_REAL tim0_6_4;
3585      FFTW_REAL tre0_6_5;
3586      FFTW_REAL tim0_6_5;
3587      FFTW_REAL tre0_6_6;
3588      FFTW_REAL tim0_6_6;
3589      FFTW_REAL tre0_6_7;
3590      FFTW_REAL tim0_6_7;
3591      FFTW_REAL tre0_7_0;
3592      FFTW_REAL tim0_7_0;
3593      FFTW_REAL tre0_7_1;
3594      FFTW_REAL tim0_7_1;
3595      FFTW_REAL tre0_7_2;
3596      FFTW_REAL tim0_7_2;
3597      FFTW_REAL tre0_7_3;
3598      FFTW_REAL tim0_7_3;
3599      FFTW_REAL tre0_7_4;
3600      FFTW_REAL tim0_7_4;
3601      FFTW_REAL tre0_7_5;
3602      FFTW_REAL tim0_7_5;
3603      FFTW_REAL tre0_7_6;
3604      FFTW_REAL tim0_7_6;
3605      FFTW_REAL tre0_7_7;
3606      FFTW_REAL tim0_7_7;
3607      {
3608 	  FFTW_REAL tre1_0_0;
3609 	  FFTW_REAL tim1_0_0;
3610 	  FFTW_REAL tre1_0_1;
3611 	  FFTW_REAL tim1_0_1;
3612 	  FFTW_REAL tre1_0_2;
3613 	  FFTW_REAL tim1_0_2;
3614 	  FFTW_REAL tre1_0_3;
3615 	  FFTW_REAL tim1_0_3;
3616 	  FFTW_REAL tre1_1_0;
3617 	  FFTW_REAL tim1_1_0;
3618 	  FFTW_REAL tre1_1_1;
3619 	  FFTW_REAL tim1_1_1;
3620 	  FFTW_REAL tre1_1_2;
3621 	  FFTW_REAL tim1_1_2;
3622 	  FFTW_REAL tre1_1_3;
3623 	  FFTW_REAL tim1_1_3;
3624 	  {
3625 	       FFTW_REAL tre2_0_0;
3626 	       FFTW_REAL tim2_0_0;
3627 	       FFTW_REAL tre2_1_0;
3628 	       FFTW_REAL tim2_1_0;
3629 	       tre2_0_0 = c_re(in[0]);
3630 	       tim2_0_0 = c_im(in[0]);
3631 	       tre2_1_0 = c_re(in[32 * istride]);
3632 	       tim2_1_0 = c_im(in[32 * istride]);
3633 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
3634 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
3635 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
3636 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
3637 	  }
3638 	  {
3639 	       FFTW_REAL tre2_0_0;
3640 	       FFTW_REAL tim2_0_0;
3641 	       FFTW_REAL tre2_1_0;
3642 	       FFTW_REAL tim2_1_0;
3643 	       tre2_0_0 = c_re(in[8 * istride]);
3644 	       tim2_0_0 = c_im(in[8 * istride]);
3645 	       tre2_1_0 = c_re(in[40 * istride]);
3646 	       tim2_1_0 = c_im(in[40 * istride]);
3647 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
3648 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
3649 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
3650 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
3651 	  }
3652 	  {
3653 	       FFTW_REAL tre2_0_0;
3654 	       FFTW_REAL tim2_0_0;
3655 	       FFTW_REAL tre2_1_0;
3656 	       FFTW_REAL tim2_1_0;
3657 	       tre2_0_0 = c_re(in[16 * istride]);
3658 	       tim2_0_0 = c_im(in[16 * istride]);
3659 	       tre2_1_0 = c_re(in[48 * istride]);
3660 	       tim2_1_0 = c_im(in[48 * istride]);
3661 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
3662 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
3663 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
3664 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
3665 	  }
3666 	  {
3667 	       FFTW_REAL tre2_0_0;
3668 	       FFTW_REAL tim2_0_0;
3669 	       FFTW_REAL tre2_1_0;
3670 	       FFTW_REAL tim2_1_0;
3671 	       tre2_0_0 = c_re(in[24 * istride]);
3672 	       tim2_0_0 = c_im(in[24 * istride]);
3673 	       tre2_1_0 = c_re(in[56 * istride]);
3674 	       tim2_1_0 = c_im(in[56 * istride]);
3675 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
3676 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
3677 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
3678 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
3679 	  }
3680 	  {
3681 	       FFTW_REAL tre2_0_0;
3682 	       FFTW_REAL tim2_0_0;
3683 	       FFTW_REAL tre2_0_1;
3684 	       FFTW_REAL tim2_0_1;
3685 	       FFTW_REAL tre2_1_0;
3686 	       FFTW_REAL tim2_1_0;
3687 	       FFTW_REAL tre2_1_1;
3688 	       FFTW_REAL tim2_1_1;
3689 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
3690 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
3691 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
3692 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
3693 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
3694 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
3695 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
3696 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
3697 	       tre0_0_0 = tre2_0_0 + tre2_0_1;
3698 	       tim0_0_0 = tim2_0_0 + tim2_0_1;
3699 	       tre0_4_0 = tre2_0_0 - tre2_0_1;
3700 	       tim0_4_0 = tim2_0_0 - tim2_0_1;
3701 	       tre0_2_0 = tre2_1_0 + tim2_1_1;
3702 	       tim0_2_0 = tim2_1_0 - tre2_1_1;
3703 	       tre0_6_0 = tre2_1_0 - tim2_1_1;
3704 	       tim0_6_0 = tim2_1_0 + tre2_1_1;
3705 	  }
3706 	  {
3707 	       FFTW_REAL tre2_0_0;
3708 	       FFTW_REAL tim2_0_0;
3709 	       FFTW_REAL tre2_0_1;
3710 	       FFTW_REAL tim2_0_1;
3711 	       FFTW_REAL tre2_1_0;
3712 	       FFTW_REAL tim2_1_0;
3713 	       FFTW_REAL tre2_1_1;
3714 	       FFTW_REAL tim2_1_1;
3715 	       tre2_0_0 = tre1_1_0 + tim1_1_2;
3716 	       tim2_0_0 = tim1_1_0 - tre1_1_2;
3717 	       tre2_1_0 = tre1_1_0 - tim1_1_2;
3718 	       tim2_1_0 = tim1_1_0 + tre1_1_2;
3719 	       {
3720 		    FFTW_REAL tre3_0_0;
3721 		    FFTW_REAL tim3_0_0;
3722 		    FFTW_REAL tre3_1_0;
3723 		    FFTW_REAL tim3_1_0;
3724 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
3725 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
3726 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
3727 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
3728 		    tre2_0_1 = tre3_0_0 + tre3_1_0;
3729 		    tim2_0_1 = tim3_0_0 - tim3_1_0;
3730 		    tre2_1_1 = tre3_0_0 - tre3_1_0;
3731 		    tim2_1_1 = tim3_0_0 + tim3_1_0;
3732 	       }
3733 	       tre0_1_0 = tre2_0_0 + tre2_0_1;
3734 	       tim0_1_0 = tim2_0_0 + tim2_0_1;
3735 	       tre0_5_0 = tre2_0_0 - tre2_0_1;
3736 	       tim0_5_0 = tim2_0_0 - tim2_0_1;
3737 	       tre0_3_0 = tre2_1_0 + tim2_1_1;
3738 	       tim0_3_0 = tim2_1_0 - tre2_1_1;
3739 	       tre0_7_0 = tre2_1_0 - tim2_1_1;
3740 	       tim0_7_0 = tim2_1_0 + tre2_1_1;
3741 	  }
3742      }
3743      {
3744 	  FFTW_REAL tre1_0_0;
3745 	  FFTW_REAL tim1_0_0;
3746 	  FFTW_REAL tre1_0_1;
3747 	  FFTW_REAL tim1_0_1;
3748 	  FFTW_REAL tre1_0_2;
3749 	  FFTW_REAL tim1_0_2;
3750 	  FFTW_REAL tre1_0_3;
3751 	  FFTW_REAL tim1_0_3;
3752 	  FFTW_REAL tre1_1_0;
3753 	  FFTW_REAL tim1_1_0;
3754 	  FFTW_REAL tre1_1_1;
3755 	  FFTW_REAL tim1_1_1;
3756 	  FFTW_REAL tre1_1_2;
3757 	  FFTW_REAL tim1_1_2;
3758 	  FFTW_REAL tre1_1_3;
3759 	  FFTW_REAL tim1_1_3;
3760 	  {
3761 	       FFTW_REAL tre2_0_0;
3762 	       FFTW_REAL tim2_0_0;
3763 	       FFTW_REAL tre2_1_0;
3764 	       FFTW_REAL tim2_1_0;
3765 	       tre2_0_0 = c_re(in[istride]);
3766 	       tim2_0_0 = c_im(in[istride]);
3767 	       tre2_1_0 = c_re(in[33 * istride]);
3768 	       tim2_1_0 = c_im(in[33 * istride]);
3769 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
3770 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
3771 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
3772 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
3773 	  }
3774 	  {
3775 	       FFTW_REAL tre2_0_0;
3776 	       FFTW_REAL tim2_0_0;
3777 	       FFTW_REAL tre2_1_0;
3778 	       FFTW_REAL tim2_1_0;
3779 	       tre2_0_0 = c_re(in[9 * istride]);
3780 	       tim2_0_0 = c_im(in[9 * istride]);
3781 	       tre2_1_0 = c_re(in[41 * istride]);
3782 	       tim2_1_0 = c_im(in[41 * istride]);
3783 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
3784 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
3785 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
3786 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
3787 	  }
3788 	  {
3789 	       FFTW_REAL tre2_0_0;
3790 	       FFTW_REAL tim2_0_0;
3791 	       FFTW_REAL tre2_1_0;
3792 	       FFTW_REAL tim2_1_0;
3793 	       tre2_0_0 = c_re(in[17 * istride]);
3794 	       tim2_0_0 = c_im(in[17 * istride]);
3795 	       tre2_1_0 = c_re(in[49 * istride]);
3796 	       tim2_1_0 = c_im(in[49 * istride]);
3797 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
3798 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
3799 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
3800 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
3801 	  }
3802 	  {
3803 	       FFTW_REAL tre2_0_0;
3804 	       FFTW_REAL tim2_0_0;
3805 	       FFTW_REAL tre2_1_0;
3806 	       FFTW_REAL tim2_1_0;
3807 	       tre2_0_0 = c_re(in[25 * istride]);
3808 	       tim2_0_0 = c_im(in[25 * istride]);
3809 	       tre2_1_0 = c_re(in[57 * istride]);
3810 	       tim2_1_0 = c_im(in[57 * istride]);
3811 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
3812 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
3813 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
3814 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
3815 	  }
3816 	  {
3817 	       FFTW_REAL tre2_0_0;
3818 	       FFTW_REAL tim2_0_0;
3819 	       FFTW_REAL tre2_0_1;
3820 	       FFTW_REAL tim2_0_1;
3821 	       FFTW_REAL tre2_1_0;
3822 	       FFTW_REAL tim2_1_0;
3823 	       FFTW_REAL tre2_1_1;
3824 	       FFTW_REAL tim2_1_1;
3825 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
3826 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
3827 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
3828 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
3829 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
3830 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
3831 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
3832 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
3833 	       tre0_0_1 = tre2_0_0 + tre2_0_1;
3834 	       tim0_0_1 = tim2_0_0 + tim2_0_1;
3835 	       tre0_4_1 = tre2_0_0 - tre2_0_1;
3836 	       tim0_4_1 = tim2_0_0 - tim2_0_1;
3837 	       tre0_2_1 = tre2_1_0 + tim2_1_1;
3838 	       tim0_2_1 = tim2_1_0 - tre2_1_1;
3839 	       tre0_6_1 = tre2_1_0 - tim2_1_1;
3840 	       tim0_6_1 = tim2_1_0 + tre2_1_1;
3841 	  }
3842 	  {
3843 	       FFTW_REAL tre2_0_0;
3844 	       FFTW_REAL tim2_0_0;
3845 	       FFTW_REAL tre2_0_1;
3846 	       FFTW_REAL tim2_0_1;
3847 	       FFTW_REAL tre2_1_0;
3848 	       FFTW_REAL tim2_1_0;
3849 	       FFTW_REAL tre2_1_1;
3850 	       FFTW_REAL tim2_1_1;
3851 	       tre2_0_0 = tre1_1_0 + tim1_1_2;
3852 	       tim2_0_0 = tim1_1_0 - tre1_1_2;
3853 	       tre2_1_0 = tre1_1_0 - tim1_1_2;
3854 	       tim2_1_0 = tim1_1_0 + tre1_1_2;
3855 	       {
3856 		    FFTW_REAL tre3_0_0;
3857 		    FFTW_REAL tim3_0_0;
3858 		    FFTW_REAL tre3_1_0;
3859 		    FFTW_REAL tim3_1_0;
3860 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
3861 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
3862 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
3863 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
3864 		    tre2_0_1 = tre3_0_0 + tre3_1_0;
3865 		    tim2_0_1 = tim3_0_0 - tim3_1_0;
3866 		    tre2_1_1 = tre3_0_0 - tre3_1_0;
3867 		    tim2_1_1 = tim3_0_0 + tim3_1_0;
3868 	       }
3869 	       tre0_1_1 = tre2_0_0 + tre2_0_1;
3870 	       tim0_1_1 = tim2_0_0 + tim2_0_1;
3871 	       tre0_5_1 = tre2_0_0 - tre2_0_1;
3872 	       tim0_5_1 = tim2_0_0 - tim2_0_1;
3873 	       tre0_3_1 = tre2_1_0 + tim2_1_1;
3874 	       tim0_3_1 = tim2_1_0 - tre2_1_1;
3875 	       tre0_7_1 = tre2_1_0 - tim2_1_1;
3876 	       tim0_7_1 = tim2_1_0 + tre2_1_1;
3877 	  }
3878      }
3879      {
3880 	  FFTW_REAL tre1_0_0;
3881 	  FFTW_REAL tim1_0_0;
3882 	  FFTW_REAL tre1_0_1;
3883 	  FFTW_REAL tim1_0_1;
3884 	  FFTW_REAL tre1_0_2;
3885 	  FFTW_REAL tim1_0_2;
3886 	  FFTW_REAL tre1_0_3;
3887 	  FFTW_REAL tim1_0_3;
3888 	  FFTW_REAL tre1_1_0;
3889 	  FFTW_REAL tim1_1_0;
3890 	  FFTW_REAL tre1_1_1;
3891 	  FFTW_REAL tim1_1_1;
3892 	  FFTW_REAL tre1_1_2;
3893 	  FFTW_REAL tim1_1_2;
3894 	  FFTW_REAL tre1_1_3;
3895 	  FFTW_REAL tim1_1_3;
3896 	  {
3897 	       FFTW_REAL tre2_0_0;
3898 	       FFTW_REAL tim2_0_0;
3899 	       FFTW_REAL tre2_1_0;
3900 	       FFTW_REAL tim2_1_0;
3901 	       tre2_0_0 = c_re(in[2 * istride]);
3902 	       tim2_0_0 = c_im(in[2 * istride]);
3903 	       tre2_1_0 = c_re(in[34 * istride]);
3904 	       tim2_1_0 = c_im(in[34 * istride]);
3905 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
3906 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
3907 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
3908 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
3909 	  }
3910 	  {
3911 	       FFTW_REAL tre2_0_0;
3912 	       FFTW_REAL tim2_0_0;
3913 	       FFTW_REAL tre2_1_0;
3914 	       FFTW_REAL tim2_1_0;
3915 	       tre2_0_0 = c_re(in[10 * istride]);
3916 	       tim2_0_0 = c_im(in[10 * istride]);
3917 	       tre2_1_0 = c_re(in[42 * istride]);
3918 	       tim2_1_0 = c_im(in[42 * istride]);
3919 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
3920 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
3921 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
3922 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
3923 	  }
3924 	  {
3925 	       FFTW_REAL tre2_0_0;
3926 	       FFTW_REAL tim2_0_0;
3927 	       FFTW_REAL tre2_1_0;
3928 	       FFTW_REAL tim2_1_0;
3929 	       tre2_0_0 = c_re(in[18 * istride]);
3930 	       tim2_0_0 = c_im(in[18 * istride]);
3931 	       tre2_1_0 = c_re(in[50 * istride]);
3932 	       tim2_1_0 = c_im(in[50 * istride]);
3933 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
3934 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
3935 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
3936 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
3937 	  }
3938 	  {
3939 	       FFTW_REAL tre2_0_0;
3940 	       FFTW_REAL tim2_0_0;
3941 	       FFTW_REAL tre2_1_0;
3942 	       FFTW_REAL tim2_1_0;
3943 	       tre2_0_0 = c_re(in[26 * istride]);
3944 	       tim2_0_0 = c_im(in[26 * istride]);
3945 	       tre2_1_0 = c_re(in[58 * istride]);
3946 	       tim2_1_0 = c_im(in[58 * istride]);
3947 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
3948 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
3949 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
3950 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
3951 	  }
3952 	  {
3953 	       FFTW_REAL tre2_0_0;
3954 	       FFTW_REAL tim2_0_0;
3955 	       FFTW_REAL tre2_0_1;
3956 	       FFTW_REAL tim2_0_1;
3957 	       FFTW_REAL tre2_1_0;
3958 	       FFTW_REAL tim2_1_0;
3959 	       FFTW_REAL tre2_1_1;
3960 	       FFTW_REAL tim2_1_1;
3961 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
3962 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
3963 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
3964 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
3965 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
3966 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
3967 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
3968 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
3969 	       tre0_0_2 = tre2_0_0 + tre2_0_1;
3970 	       tim0_0_2 = tim2_0_0 + tim2_0_1;
3971 	       tre0_4_2 = tre2_0_0 - tre2_0_1;
3972 	       tim0_4_2 = tim2_0_0 - tim2_0_1;
3973 	       tre0_2_2 = tre2_1_0 + tim2_1_1;
3974 	       tim0_2_2 = tim2_1_0 - tre2_1_1;
3975 	       tre0_6_2 = tre2_1_0 - tim2_1_1;
3976 	       tim0_6_2 = tim2_1_0 + tre2_1_1;
3977 	  }
3978 	  {
3979 	       FFTW_REAL tre2_0_0;
3980 	       FFTW_REAL tim2_0_0;
3981 	       FFTW_REAL tre2_0_1;
3982 	       FFTW_REAL tim2_0_1;
3983 	       FFTW_REAL tre2_1_0;
3984 	       FFTW_REAL tim2_1_0;
3985 	       FFTW_REAL tre2_1_1;
3986 	       FFTW_REAL tim2_1_1;
3987 	       tre2_0_0 = tre1_1_0 + tim1_1_2;
3988 	       tim2_0_0 = tim1_1_0 - tre1_1_2;
3989 	       tre2_1_0 = tre1_1_0 - tim1_1_2;
3990 	       tim2_1_0 = tim1_1_0 + tre1_1_2;
3991 	       {
3992 		    FFTW_REAL tre3_0_0;
3993 		    FFTW_REAL tim3_0_0;
3994 		    FFTW_REAL tre3_1_0;
3995 		    FFTW_REAL tim3_1_0;
3996 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
3997 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
3998 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
3999 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
4000 		    tre2_0_1 = tre3_0_0 + tre3_1_0;
4001 		    tim2_0_1 = tim3_0_0 - tim3_1_0;
4002 		    tre2_1_1 = tre3_0_0 - tre3_1_0;
4003 		    tim2_1_1 = tim3_0_0 + tim3_1_0;
4004 	       }
4005 	       tre0_1_2 = tre2_0_0 + tre2_0_1;
4006 	       tim0_1_2 = tim2_0_0 + tim2_0_1;
4007 	       tre0_5_2 = tre2_0_0 - tre2_0_1;
4008 	       tim0_5_2 = tim2_0_0 - tim2_0_1;
4009 	       tre0_3_2 = tre2_1_0 + tim2_1_1;
4010 	       tim0_3_2 = tim2_1_0 - tre2_1_1;
4011 	       tre0_7_2 = tre2_1_0 - tim2_1_1;
4012 	       tim0_7_2 = tim2_1_0 + tre2_1_1;
4013 	  }
4014      }
4015      {
4016 	  FFTW_REAL tre1_0_0;
4017 	  FFTW_REAL tim1_0_0;
4018 	  FFTW_REAL tre1_0_1;
4019 	  FFTW_REAL tim1_0_1;
4020 	  FFTW_REAL tre1_0_2;
4021 	  FFTW_REAL tim1_0_2;
4022 	  FFTW_REAL tre1_0_3;
4023 	  FFTW_REAL tim1_0_3;
4024 	  FFTW_REAL tre1_1_0;
4025 	  FFTW_REAL tim1_1_0;
4026 	  FFTW_REAL tre1_1_1;
4027 	  FFTW_REAL tim1_1_1;
4028 	  FFTW_REAL tre1_1_2;
4029 	  FFTW_REAL tim1_1_2;
4030 	  FFTW_REAL tre1_1_3;
4031 	  FFTW_REAL tim1_1_3;
4032 	  {
4033 	       FFTW_REAL tre2_0_0;
4034 	       FFTW_REAL tim2_0_0;
4035 	       FFTW_REAL tre2_1_0;
4036 	       FFTW_REAL tim2_1_0;
4037 	       tre2_0_0 = c_re(in[3 * istride]);
4038 	       tim2_0_0 = c_im(in[3 * istride]);
4039 	       tre2_1_0 = c_re(in[35 * istride]);
4040 	       tim2_1_0 = c_im(in[35 * istride]);
4041 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
4042 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
4043 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
4044 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
4045 	  }
4046 	  {
4047 	       FFTW_REAL tre2_0_0;
4048 	       FFTW_REAL tim2_0_0;
4049 	       FFTW_REAL tre2_1_0;
4050 	       FFTW_REAL tim2_1_0;
4051 	       tre2_0_0 = c_re(in[11 * istride]);
4052 	       tim2_0_0 = c_im(in[11 * istride]);
4053 	       tre2_1_0 = c_re(in[43 * istride]);
4054 	       tim2_1_0 = c_im(in[43 * istride]);
4055 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
4056 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
4057 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
4058 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
4059 	  }
4060 	  {
4061 	       FFTW_REAL tre2_0_0;
4062 	       FFTW_REAL tim2_0_0;
4063 	       FFTW_REAL tre2_1_0;
4064 	       FFTW_REAL tim2_1_0;
4065 	       tre2_0_0 = c_re(in[19 * istride]);
4066 	       tim2_0_0 = c_im(in[19 * istride]);
4067 	       tre2_1_0 = c_re(in[51 * istride]);
4068 	       tim2_1_0 = c_im(in[51 * istride]);
4069 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
4070 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
4071 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
4072 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
4073 	  }
4074 	  {
4075 	       FFTW_REAL tre2_0_0;
4076 	       FFTW_REAL tim2_0_0;
4077 	       FFTW_REAL tre2_1_0;
4078 	       FFTW_REAL tim2_1_0;
4079 	       tre2_0_0 = c_re(in[27 * istride]);
4080 	       tim2_0_0 = c_im(in[27 * istride]);
4081 	       tre2_1_0 = c_re(in[59 * istride]);
4082 	       tim2_1_0 = c_im(in[59 * istride]);
4083 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
4084 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
4085 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
4086 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
4087 	  }
4088 	  {
4089 	       FFTW_REAL tre2_0_0;
4090 	       FFTW_REAL tim2_0_0;
4091 	       FFTW_REAL tre2_0_1;
4092 	       FFTW_REAL tim2_0_1;
4093 	       FFTW_REAL tre2_1_0;
4094 	       FFTW_REAL tim2_1_0;
4095 	       FFTW_REAL tre2_1_1;
4096 	       FFTW_REAL tim2_1_1;
4097 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
4098 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
4099 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
4100 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
4101 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
4102 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
4103 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
4104 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
4105 	       tre0_0_3 = tre2_0_0 + tre2_0_1;
4106 	       tim0_0_3 = tim2_0_0 + tim2_0_1;
4107 	       tre0_4_3 = tre2_0_0 - tre2_0_1;
4108 	       tim0_4_3 = tim2_0_0 - tim2_0_1;
4109 	       tre0_2_3 = tre2_1_0 + tim2_1_1;
4110 	       tim0_2_3 = tim2_1_0 - tre2_1_1;
4111 	       tre0_6_3 = tre2_1_0 - tim2_1_1;
4112 	       tim0_6_3 = tim2_1_0 + tre2_1_1;
4113 	  }
4114 	  {
4115 	       FFTW_REAL tre2_0_0;
4116 	       FFTW_REAL tim2_0_0;
4117 	       FFTW_REAL tre2_0_1;
4118 	       FFTW_REAL tim2_0_1;
4119 	       FFTW_REAL tre2_1_0;
4120 	       FFTW_REAL tim2_1_0;
4121 	       FFTW_REAL tre2_1_1;
4122 	       FFTW_REAL tim2_1_1;
4123 	       tre2_0_0 = tre1_1_0 + tim1_1_2;
4124 	       tim2_0_0 = tim1_1_0 - tre1_1_2;
4125 	       tre2_1_0 = tre1_1_0 - tim1_1_2;
4126 	       tim2_1_0 = tim1_1_0 + tre1_1_2;
4127 	       {
4128 		    FFTW_REAL tre3_0_0;
4129 		    FFTW_REAL tim3_0_0;
4130 		    FFTW_REAL tre3_1_0;
4131 		    FFTW_REAL tim3_1_0;
4132 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
4133 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
4134 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
4135 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
4136 		    tre2_0_1 = tre3_0_0 + tre3_1_0;
4137 		    tim2_0_1 = tim3_0_0 - tim3_1_0;
4138 		    tre2_1_1 = tre3_0_0 - tre3_1_0;
4139 		    tim2_1_1 = tim3_0_0 + tim3_1_0;
4140 	       }
4141 	       tre0_1_3 = tre2_0_0 + tre2_0_1;
4142 	       tim0_1_3 = tim2_0_0 + tim2_0_1;
4143 	       tre0_5_3 = tre2_0_0 - tre2_0_1;
4144 	       tim0_5_3 = tim2_0_0 - tim2_0_1;
4145 	       tre0_3_3 = tre2_1_0 + tim2_1_1;
4146 	       tim0_3_3 = tim2_1_0 - tre2_1_1;
4147 	       tre0_7_3 = tre2_1_0 - tim2_1_1;
4148 	       tim0_7_3 = tim2_1_0 + tre2_1_1;
4149 	  }
4150      }
4151      {
4152 	  FFTW_REAL tre1_0_0;
4153 	  FFTW_REAL tim1_0_0;
4154 	  FFTW_REAL tre1_0_1;
4155 	  FFTW_REAL tim1_0_1;
4156 	  FFTW_REAL tre1_0_2;
4157 	  FFTW_REAL tim1_0_2;
4158 	  FFTW_REAL tre1_0_3;
4159 	  FFTW_REAL tim1_0_3;
4160 	  FFTW_REAL tre1_1_0;
4161 	  FFTW_REAL tim1_1_0;
4162 	  FFTW_REAL tre1_1_1;
4163 	  FFTW_REAL tim1_1_1;
4164 	  FFTW_REAL tre1_1_2;
4165 	  FFTW_REAL tim1_1_2;
4166 	  FFTW_REAL tre1_1_3;
4167 	  FFTW_REAL tim1_1_3;
4168 	  {
4169 	       FFTW_REAL tre2_0_0;
4170 	       FFTW_REAL tim2_0_0;
4171 	       FFTW_REAL tre2_1_0;
4172 	       FFTW_REAL tim2_1_0;
4173 	       tre2_0_0 = c_re(in[4 * istride]);
4174 	       tim2_0_0 = c_im(in[4 * istride]);
4175 	       tre2_1_0 = c_re(in[36 * istride]);
4176 	       tim2_1_0 = c_im(in[36 * istride]);
4177 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
4178 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
4179 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
4180 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
4181 	  }
4182 	  {
4183 	       FFTW_REAL tre2_0_0;
4184 	       FFTW_REAL tim2_0_0;
4185 	       FFTW_REAL tre2_1_0;
4186 	       FFTW_REAL tim2_1_0;
4187 	       tre2_0_0 = c_re(in[12 * istride]);
4188 	       tim2_0_0 = c_im(in[12 * istride]);
4189 	       tre2_1_0 = c_re(in[44 * istride]);
4190 	       tim2_1_0 = c_im(in[44 * istride]);
4191 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
4192 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
4193 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
4194 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
4195 	  }
4196 	  {
4197 	       FFTW_REAL tre2_0_0;
4198 	       FFTW_REAL tim2_0_0;
4199 	       FFTW_REAL tre2_1_0;
4200 	       FFTW_REAL tim2_1_0;
4201 	       tre2_0_0 = c_re(in[20 * istride]);
4202 	       tim2_0_0 = c_im(in[20 * istride]);
4203 	       tre2_1_0 = c_re(in[52 * istride]);
4204 	       tim2_1_0 = c_im(in[52 * istride]);
4205 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
4206 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
4207 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
4208 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
4209 	  }
4210 	  {
4211 	       FFTW_REAL tre2_0_0;
4212 	       FFTW_REAL tim2_0_0;
4213 	       FFTW_REAL tre2_1_0;
4214 	       FFTW_REAL tim2_1_0;
4215 	       tre2_0_0 = c_re(in[28 * istride]);
4216 	       tim2_0_0 = c_im(in[28 * istride]);
4217 	       tre2_1_0 = c_re(in[60 * istride]);
4218 	       tim2_1_0 = c_im(in[60 * istride]);
4219 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
4220 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
4221 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
4222 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
4223 	  }
4224 	  {
4225 	       FFTW_REAL tre2_0_0;
4226 	       FFTW_REAL tim2_0_0;
4227 	       FFTW_REAL tre2_0_1;
4228 	       FFTW_REAL tim2_0_1;
4229 	       FFTW_REAL tre2_1_0;
4230 	       FFTW_REAL tim2_1_0;
4231 	       FFTW_REAL tre2_1_1;
4232 	       FFTW_REAL tim2_1_1;
4233 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
4234 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
4235 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
4236 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
4237 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
4238 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
4239 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
4240 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
4241 	       tre0_0_4 = tre2_0_0 + tre2_0_1;
4242 	       tim0_0_4 = tim2_0_0 + tim2_0_1;
4243 	       tre0_4_4 = tre2_0_0 - tre2_0_1;
4244 	       tim0_4_4 = tim2_0_0 - tim2_0_1;
4245 	       tre0_2_4 = tre2_1_0 + tim2_1_1;
4246 	       tim0_2_4 = tim2_1_0 - tre2_1_1;
4247 	       tre0_6_4 = tre2_1_0 - tim2_1_1;
4248 	       tim0_6_4 = tim2_1_0 + tre2_1_1;
4249 	  }
4250 	  {
4251 	       FFTW_REAL tre2_0_0;
4252 	       FFTW_REAL tim2_0_0;
4253 	       FFTW_REAL tre2_0_1;
4254 	       FFTW_REAL tim2_0_1;
4255 	       FFTW_REAL tre2_1_0;
4256 	       FFTW_REAL tim2_1_0;
4257 	       FFTW_REAL tre2_1_1;
4258 	       FFTW_REAL tim2_1_1;
4259 	       tre2_0_0 = tre1_1_0 + tim1_1_2;
4260 	       tim2_0_0 = tim1_1_0 - tre1_1_2;
4261 	       tre2_1_0 = tre1_1_0 - tim1_1_2;
4262 	       tim2_1_0 = tim1_1_0 + tre1_1_2;
4263 	       {
4264 		    FFTW_REAL tre3_0_0;
4265 		    FFTW_REAL tim3_0_0;
4266 		    FFTW_REAL tre3_1_0;
4267 		    FFTW_REAL tim3_1_0;
4268 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
4269 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
4270 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
4271 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
4272 		    tre2_0_1 = tre3_0_0 + tre3_1_0;
4273 		    tim2_0_1 = tim3_0_0 - tim3_1_0;
4274 		    tre2_1_1 = tre3_0_0 - tre3_1_0;
4275 		    tim2_1_1 = tim3_0_0 + tim3_1_0;
4276 	       }
4277 	       tre0_1_4 = tre2_0_0 + tre2_0_1;
4278 	       tim0_1_4 = tim2_0_0 + tim2_0_1;
4279 	       tre0_5_4 = tre2_0_0 - tre2_0_1;
4280 	       tim0_5_4 = tim2_0_0 - tim2_0_1;
4281 	       tre0_3_4 = tre2_1_0 + tim2_1_1;
4282 	       tim0_3_4 = tim2_1_0 - tre2_1_1;
4283 	       tre0_7_4 = tre2_1_0 - tim2_1_1;
4284 	       tim0_7_4 = tim2_1_0 + tre2_1_1;
4285 	  }
4286      }
4287      {
4288 	  FFTW_REAL tre1_0_0;
4289 	  FFTW_REAL tim1_0_0;
4290 	  FFTW_REAL tre1_0_1;
4291 	  FFTW_REAL tim1_0_1;
4292 	  FFTW_REAL tre1_0_2;
4293 	  FFTW_REAL tim1_0_2;
4294 	  FFTW_REAL tre1_0_3;
4295 	  FFTW_REAL tim1_0_3;
4296 	  FFTW_REAL tre1_1_0;
4297 	  FFTW_REAL tim1_1_0;
4298 	  FFTW_REAL tre1_1_1;
4299 	  FFTW_REAL tim1_1_1;
4300 	  FFTW_REAL tre1_1_2;
4301 	  FFTW_REAL tim1_1_2;
4302 	  FFTW_REAL tre1_1_3;
4303 	  FFTW_REAL tim1_1_3;
4304 	  {
4305 	       FFTW_REAL tre2_0_0;
4306 	       FFTW_REAL tim2_0_0;
4307 	       FFTW_REAL tre2_1_0;
4308 	       FFTW_REAL tim2_1_0;
4309 	       tre2_0_0 = c_re(in[5 * istride]);
4310 	       tim2_0_0 = c_im(in[5 * istride]);
4311 	       tre2_1_0 = c_re(in[37 * istride]);
4312 	       tim2_1_0 = c_im(in[37 * istride]);
4313 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
4314 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
4315 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
4316 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
4317 	  }
4318 	  {
4319 	       FFTW_REAL tre2_0_0;
4320 	       FFTW_REAL tim2_0_0;
4321 	       FFTW_REAL tre2_1_0;
4322 	       FFTW_REAL tim2_1_0;
4323 	       tre2_0_0 = c_re(in[13 * istride]);
4324 	       tim2_0_0 = c_im(in[13 * istride]);
4325 	       tre2_1_0 = c_re(in[45 * istride]);
4326 	       tim2_1_0 = c_im(in[45 * istride]);
4327 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
4328 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
4329 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
4330 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
4331 	  }
4332 	  {
4333 	       FFTW_REAL tre2_0_0;
4334 	       FFTW_REAL tim2_0_0;
4335 	       FFTW_REAL tre2_1_0;
4336 	       FFTW_REAL tim2_1_0;
4337 	       tre2_0_0 = c_re(in[21 * istride]);
4338 	       tim2_0_0 = c_im(in[21 * istride]);
4339 	       tre2_1_0 = c_re(in[53 * istride]);
4340 	       tim2_1_0 = c_im(in[53 * istride]);
4341 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
4342 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
4343 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
4344 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
4345 	  }
4346 	  {
4347 	       FFTW_REAL tre2_0_0;
4348 	       FFTW_REAL tim2_0_0;
4349 	       FFTW_REAL tre2_1_0;
4350 	       FFTW_REAL tim2_1_0;
4351 	       tre2_0_0 = c_re(in[29 * istride]);
4352 	       tim2_0_0 = c_im(in[29 * istride]);
4353 	       tre2_1_0 = c_re(in[61 * istride]);
4354 	       tim2_1_0 = c_im(in[61 * istride]);
4355 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
4356 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
4357 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
4358 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
4359 	  }
4360 	  {
4361 	       FFTW_REAL tre2_0_0;
4362 	       FFTW_REAL tim2_0_0;
4363 	       FFTW_REAL tre2_0_1;
4364 	       FFTW_REAL tim2_0_1;
4365 	       FFTW_REAL tre2_1_0;
4366 	       FFTW_REAL tim2_1_0;
4367 	       FFTW_REAL tre2_1_1;
4368 	       FFTW_REAL tim2_1_1;
4369 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
4370 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
4371 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
4372 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
4373 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
4374 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
4375 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
4376 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
4377 	       tre0_0_5 = tre2_0_0 + tre2_0_1;
4378 	       tim0_0_5 = tim2_0_0 + tim2_0_1;
4379 	       tre0_4_5 = tre2_0_0 - tre2_0_1;
4380 	       tim0_4_5 = tim2_0_0 - tim2_0_1;
4381 	       tre0_2_5 = tre2_1_0 + tim2_1_1;
4382 	       tim0_2_5 = tim2_1_0 - tre2_1_1;
4383 	       tre0_6_5 = tre2_1_0 - tim2_1_1;
4384 	       tim0_6_5 = tim2_1_0 + tre2_1_1;
4385 	  }
4386 	  {
4387 	       FFTW_REAL tre2_0_0;
4388 	       FFTW_REAL tim2_0_0;
4389 	       FFTW_REAL tre2_0_1;
4390 	       FFTW_REAL tim2_0_1;
4391 	       FFTW_REAL tre2_1_0;
4392 	       FFTW_REAL tim2_1_0;
4393 	       FFTW_REAL tre2_1_1;
4394 	       FFTW_REAL tim2_1_1;
4395 	       tre2_0_0 = tre1_1_0 + tim1_1_2;
4396 	       tim2_0_0 = tim1_1_0 - tre1_1_2;
4397 	       tre2_1_0 = tre1_1_0 - tim1_1_2;
4398 	       tim2_1_0 = tim1_1_0 + tre1_1_2;
4399 	       {
4400 		    FFTW_REAL tre3_0_0;
4401 		    FFTW_REAL tim3_0_0;
4402 		    FFTW_REAL tre3_1_0;
4403 		    FFTW_REAL tim3_1_0;
4404 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
4405 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
4406 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
4407 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
4408 		    tre2_0_1 = tre3_0_0 + tre3_1_0;
4409 		    tim2_0_1 = tim3_0_0 - tim3_1_0;
4410 		    tre2_1_1 = tre3_0_0 - tre3_1_0;
4411 		    tim2_1_1 = tim3_0_0 + tim3_1_0;
4412 	       }
4413 	       tre0_1_5 = tre2_0_0 + tre2_0_1;
4414 	       tim0_1_5 = tim2_0_0 + tim2_0_1;
4415 	       tre0_5_5 = tre2_0_0 - tre2_0_1;
4416 	       tim0_5_5 = tim2_0_0 - tim2_0_1;
4417 	       tre0_3_5 = tre2_1_0 + tim2_1_1;
4418 	       tim0_3_5 = tim2_1_0 - tre2_1_1;
4419 	       tre0_7_5 = tre2_1_0 - tim2_1_1;
4420 	       tim0_7_5 = tim2_1_0 + tre2_1_1;
4421 	  }
4422      }
4423      {
4424 	  FFTW_REAL tre1_0_0;
4425 	  FFTW_REAL tim1_0_0;
4426 	  FFTW_REAL tre1_0_1;
4427 	  FFTW_REAL tim1_0_1;
4428 	  FFTW_REAL tre1_0_2;
4429 	  FFTW_REAL tim1_0_2;
4430 	  FFTW_REAL tre1_0_3;
4431 	  FFTW_REAL tim1_0_3;
4432 	  FFTW_REAL tre1_1_0;
4433 	  FFTW_REAL tim1_1_0;
4434 	  FFTW_REAL tre1_1_1;
4435 	  FFTW_REAL tim1_1_1;
4436 	  FFTW_REAL tre1_1_2;
4437 	  FFTW_REAL tim1_1_2;
4438 	  FFTW_REAL tre1_1_3;
4439 	  FFTW_REAL tim1_1_3;
4440 	  {
4441 	       FFTW_REAL tre2_0_0;
4442 	       FFTW_REAL tim2_0_0;
4443 	       FFTW_REAL tre2_1_0;
4444 	       FFTW_REAL tim2_1_0;
4445 	       tre2_0_0 = c_re(in[6 * istride]);
4446 	       tim2_0_0 = c_im(in[6 * istride]);
4447 	       tre2_1_0 = c_re(in[38 * istride]);
4448 	       tim2_1_0 = c_im(in[38 * istride]);
4449 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
4450 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
4451 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
4452 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
4453 	  }
4454 	  {
4455 	       FFTW_REAL tre2_0_0;
4456 	       FFTW_REAL tim2_0_0;
4457 	       FFTW_REAL tre2_1_0;
4458 	       FFTW_REAL tim2_1_0;
4459 	       tre2_0_0 = c_re(in[14 * istride]);
4460 	       tim2_0_0 = c_im(in[14 * istride]);
4461 	       tre2_1_0 = c_re(in[46 * istride]);
4462 	       tim2_1_0 = c_im(in[46 * istride]);
4463 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
4464 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
4465 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
4466 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
4467 	  }
4468 	  {
4469 	       FFTW_REAL tre2_0_0;
4470 	       FFTW_REAL tim2_0_0;
4471 	       FFTW_REAL tre2_1_0;
4472 	       FFTW_REAL tim2_1_0;
4473 	       tre2_0_0 = c_re(in[22 * istride]);
4474 	       tim2_0_0 = c_im(in[22 * istride]);
4475 	       tre2_1_0 = c_re(in[54 * istride]);
4476 	       tim2_1_0 = c_im(in[54 * istride]);
4477 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
4478 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
4479 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
4480 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
4481 	  }
4482 	  {
4483 	       FFTW_REAL tre2_0_0;
4484 	       FFTW_REAL tim2_0_0;
4485 	       FFTW_REAL tre2_1_0;
4486 	       FFTW_REAL tim2_1_0;
4487 	       tre2_0_0 = c_re(in[30 * istride]);
4488 	       tim2_0_0 = c_im(in[30 * istride]);
4489 	       tre2_1_0 = c_re(in[62 * istride]);
4490 	       tim2_1_0 = c_im(in[62 * istride]);
4491 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
4492 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
4493 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
4494 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
4495 	  }
4496 	  {
4497 	       FFTW_REAL tre2_0_0;
4498 	       FFTW_REAL tim2_0_0;
4499 	       FFTW_REAL tre2_0_1;
4500 	       FFTW_REAL tim2_0_1;
4501 	       FFTW_REAL tre2_1_0;
4502 	       FFTW_REAL tim2_1_0;
4503 	       FFTW_REAL tre2_1_1;
4504 	       FFTW_REAL tim2_1_1;
4505 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
4506 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
4507 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
4508 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
4509 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
4510 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
4511 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
4512 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
4513 	       tre0_0_6 = tre2_0_0 + tre2_0_1;
4514 	       tim0_0_6 = tim2_0_0 + tim2_0_1;
4515 	       tre0_4_6 = tre2_0_0 - tre2_0_1;
4516 	       tim0_4_6 = tim2_0_0 - tim2_0_1;
4517 	       tre0_2_6 = tre2_1_0 + tim2_1_1;
4518 	       tim0_2_6 = tim2_1_0 - tre2_1_1;
4519 	       tre0_6_6 = tre2_1_0 - tim2_1_1;
4520 	       tim0_6_6 = tim2_1_0 + tre2_1_1;
4521 	  }
4522 	  {
4523 	       FFTW_REAL tre2_0_0;
4524 	       FFTW_REAL tim2_0_0;
4525 	       FFTW_REAL tre2_0_1;
4526 	       FFTW_REAL tim2_0_1;
4527 	       FFTW_REAL tre2_1_0;
4528 	       FFTW_REAL tim2_1_0;
4529 	       FFTW_REAL tre2_1_1;
4530 	       FFTW_REAL tim2_1_1;
4531 	       tre2_0_0 = tre1_1_0 + tim1_1_2;
4532 	       tim2_0_0 = tim1_1_0 - tre1_1_2;
4533 	       tre2_1_0 = tre1_1_0 - tim1_1_2;
4534 	       tim2_1_0 = tim1_1_0 + tre1_1_2;
4535 	       {
4536 		    FFTW_REAL tre3_0_0;
4537 		    FFTW_REAL tim3_0_0;
4538 		    FFTW_REAL tre3_1_0;
4539 		    FFTW_REAL tim3_1_0;
4540 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
4541 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
4542 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
4543 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
4544 		    tre2_0_1 = tre3_0_0 + tre3_1_0;
4545 		    tim2_0_1 = tim3_0_0 - tim3_1_0;
4546 		    tre2_1_1 = tre3_0_0 - tre3_1_0;
4547 		    tim2_1_1 = tim3_0_0 + tim3_1_0;
4548 	       }
4549 	       tre0_1_6 = tre2_0_0 + tre2_0_1;
4550 	       tim0_1_6 = tim2_0_0 + tim2_0_1;
4551 	       tre0_5_6 = tre2_0_0 - tre2_0_1;
4552 	       tim0_5_6 = tim2_0_0 - tim2_0_1;
4553 	       tre0_3_6 = tre2_1_0 + tim2_1_1;
4554 	       tim0_3_6 = tim2_1_0 - tre2_1_1;
4555 	       tre0_7_6 = tre2_1_0 - tim2_1_1;
4556 	       tim0_7_6 = tim2_1_0 + tre2_1_1;
4557 	  }
4558      }
4559      {
4560 	  FFTW_REAL tre1_0_0;
4561 	  FFTW_REAL tim1_0_0;
4562 	  FFTW_REAL tre1_0_1;
4563 	  FFTW_REAL tim1_0_1;
4564 	  FFTW_REAL tre1_0_2;
4565 	  FFTW_REAL tim1_0_2;
4566 	  FFTW_REAL tre1_0_3;
4567 	  FFTW_REAL tim1_0_3;
4568 	  FFTW_REAL tre1_1_0;
4569 	  FFTW_REAL tim1_1_0;
4570 	  FFTW_REAL tre1_1_1;
4571 	  FFTW_REAL tim1_1_1;
4572 	  FFTW_REAL tre1_1_2;
4573 	  FFTW_REAL tim1_1_2;
4574 	  FFTW_REAL tre1_1_3;
4575 	  FFTW_REAL tim1_1_3;
4576 	  {
4577 	       FFTW_REAL tre2_0_0;
4578 	       FFTW_REAL tim2_0_0;
4579 	       FFTW_REAL tre2_1_0;
4580 	       FFTW_REAL tim2_1_0;
4581 	       tre2_0_0 = c_re(in[7 * istride]);
4582 	       tim2_0_0 = c_im(in[7 * istride]);
4583 	       tre2_1_0 = c_re(in[39 * istride]);
4584 	       tim2_1_0 = c_im(in[39 * istride]);
4585 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
4586 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
4587 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
4588 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
4589 	  }
4590 	  {
4591 	       FFTW_REAL tre2_0_0;
4592 	       FFTW_REAL tim2_0_0;
4593 	       FFTW_REAL tre2_1_0;
4594 	       FFTW_REAL tim2_1_0;
4595 	       tre2_0_0 = c_re(in[15 * istride]);
4596 	       tim2_0_0 = c_im(in[15 * istride]);
4597 	       tre2_1_0 = c_re(in[47 * istride]);
4598 	       tim2_1_0 = c_im(in[47 * istride]);
4599 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
4600 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
4601 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
4602 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
4603 	  }
4604 	  {
4605 	       FFTW_REAL tre2_0_0;
4606 	       FFTW_REAL tim2_0_0;
4607 	       FFTW_REAL tre2_1_0;
4608 	       FFTW_REAL tim2_1_0;
4609 	       tre2_0_0 = c_re(in[23 * istride]);
4610 	       tim2_0_0 = c_im(in[23 * istride]);
4611 	       tre2_1_0 = c_re(in[55 * istride]);
4612 	       tim2_1_0 = c_im(in[55 * istride]);
4613 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
4614 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
4615 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
4616 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
4617 	  }
4618 	  {
4619 	       FFTW_REAL tre2_0_0;
4620 	       FFTW_REAL tim2_0_0;
4621 	       FFTW_REAL tre2_1_0;
4622 	       FFTW_REAL tim2_1_0;
4623 	       tre2_0_0 = c_re(in[31 * istride]);
4624 	       tim2_0_0 = c_im(in[31 * istride]);
4625 	       tre2_1_0 = c_re(in[63 * istride]);
4626 	       tim2_1_0 = c_im(in[63 * istride]);
4627 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
4628 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
4629 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
4630 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
4631 	  }
4632 	  {
4633 	       FFTW_REAL tre2_0_0;
4634 	       FFTW_REAL tim2_0_0;
4635 	       FFTW_REAL tre2_0_1;
4636 	       FFTW_REAL tim2_0_1;
4637 	       FFTW_REAL tre2_1_0;
4638 	       FFTW_REAL tim2_1_0;
4639 	       FFTW_REAL tre2_1_1;
4640 	       FFTW_REAL tim2_1_1;
4641 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
4642 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
4643 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
4644 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
4645 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
4646 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
4647 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
4648 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
4649 	       tre0_0_7 = tre2_0_0 + tre2_0_1;
4650 	       tim0_0_7 = tim2_0_0 + tim2_0_1;
4651 	       tre0_4_7 = tre2_0_0 - tre2_0_1;
4652 	       tim0_4_7 = tim2_0_0 - tim2_0_1;
4653 	       tre0_2_7 = tre2_1_0 + tim2_1_1;
4654 	       tim0_2_7 = tim2_1_0 - tre2_1_1;
4655 	       tre0_6_7 = tre2_1_0 - tim2_1_1;
4656 	       tim0_6_7 = tim2_1_0 + tre2_1_1;
4657 	  }
4658 	  {
4659 	       FFTW_REAL tre2_0_0;
4660 	       FFTW_REAL tim2_0_0;
4661 	       FFTW_REAL tre2_0_1;
4662 	       FFTW_REAL tim2_0_1;
4663 	       FFTW_REAL tre2_1_0;
4664 	       FFTW_REAL tim2_1_0;
4665 	       FFTW_REAL tre2_1_1;
4666 	       FFTW_REAL tim2_1_1;
4667 	       tre2_0_0 = tre1_1_0 + tim1_1_2;
4668 	       tim2_0_0 = tim1_1_0 - tre1_1_2;
4669 	       tre2_1_0 = tre1_1_0 - tim1_1_2;
4670 	       tim2_1_0 = tim1_1_0 + tre1_1_2;
4671 	       {
4672 		    FFTW_REAL tre3_0_0;
4673 		    FFTW_REAL tim3_0_0;
4674 		    FFTW_REAL tre3_1_0;
4675 		    FFTW_REAL tim3_1_0;
4676 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
4677 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
4678 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
4679 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
4680 		    tre2_0_1 = tre3_0_0 + tre3_1_0;
4681 		    tim2_0_1 = tim3_0_0 - tim3_1_0;
4682 		    tre2_1_1 = tre3_0_0 - tre3_1_0;
4683 		    tim2_1_1 = tim3_0_0 + tim3_1_0;
4684 	       }
4685 	       tre0_1_7 = tre2_0_0 + tre2_0_1;
4686 	       tim0_1_7 = tim2_0_0 + tim2_0_1;
4687 	       tre0_5_7 = tre2_0_0 - tre2_0_1;
4688 	       tim0_5_7 = tim2_0_0 - tim2_0_1;
4689 	       tre0_3_7 = tre2_1_0 + tim2_1_1;
4690 	       tim0_3_7 = tim2_1_0 - tre2_1_1;
4691 	       tre0_7_7 = tre2_1_0 - tim2_1_1;
4692 	       tim0_7_7 = tim2_1_0 + tre2_1_1;
4693 	  }
4694      }
4695      {
4696 	  FFTW_REAL tre1_0_0;
4697 	  FFTW_REAL tim1_0_0;
4698 	  FFTW_REAL tre1_0_1;
4699 	  FFTW_REAL tim1_0_1;
4700 	  FFTW_REAL tre1_0_2;
4701 	  FFTW_REAL tim1_0_2;
4702 	  FFTW_REAL tre1_0_3;
4703 	  FFTW_REAL tim1_0_3;
4704 	  FFTW_REAL tre1_1_0;
4705 	  FFTW_REAL tim1_1_0;
4706 	  FFTW_REAL tre1_1_1;
4707 	  FFTW_REAL tim1_1_1;
4708 	  FFTW_REAL tre1_1_2;
4709 	  FFTW_REAL tim1_1_2;
4710 	  FFTW_REAL tre1_1_3;
4711 	  FFTW_REAL tim1_1_3;
4712 	  tre1_0_0 = tre0_0_0 + tre0_0_4;
4713 	  tim1_0_0 = tim0_0_0 + tim0_0_4;
4714 	  tre1_1_0 = tre0_0_0 - tre0_0_4;
4715 	  tim1_1_0 = tim0_0_0 - tim0_0_4;
4716 	  tre1_0_1 = tre0_0_1 + tre0_0_5;
4717 	  tim1_0_1 = tim0_0_1 + tim0_0_5;
4718 	  tre1_1_1 = tre0_0_1 - tre0_0_5;
4719 	  tim1_1_1 = tim0_0_1 - tim0_0_5;
4720 	  tre1_0_2 = tre0_0_2 + tre0_0_6;
4721 	  tim1_0_2 = tim0_0_2 + tim0_0_6;
4722 	  tre1_1_2 = tre0_0_2 - tre0_0_6;
4723 	  tim1_1_2 = tim0_0_2 - tim0_0_6;
4724 	  tre1_0_3 = tre0_0_3 + tre0_0_7;
4725 	  tim1_0_3 = tim0_0_3 + tim0_0_7;
4726 	  tre1_1_3 = tre0_0_3 - tre0_0_7;
4727 	  tim1_1_3 = tim0_0_3 - tim0_0_7;
4728 	  {
4729 	       FFTW_REAL tre2_0_0;
4730 	       FFTW_REAL tim2_0_0;
4731 	       FFTW_REAL tre2_0_1;
4732 	       FFTW_REAL tim2_0_1;
4733 	       FFTW_REAL tre2_1_0;
4734 	       FFTW_REAL tim2_1_0;
4735 	       FFTW_REAL tre2_1_1;
4736 	       FFTW_REAL tim2_1_1;
4737 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
4738 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
4739 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
4740 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
4741 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
4742 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
4743 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
4744 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
4745 	       c_re(out[0]) = tre2_0_0 + tre2_0_1;
4746 	       c_im(out[0]) = tim2_0_0 + tim2_0_1;
4747 	       c_re(out[32 * ostride]) = tre2_0_0 - tre2_0_1;
4748 	       c_im(out[32 * ostride]) = tim2_0_0 - tim2_0_1;
4749 	       c_re(out[16 * ostride]) = tre2_1_0 + tim2_1_1;
4750 	       c_im(out[16 * ostride]) = tim2_1_0 - tre2_1_1;
4751 	       c_re(out[48 * ostride]) = tre2_1_0 - tim2_1_1;
4752 	       c_im(out[48 * ostride]) = tim2_1_0 + tre2_1_1;
4753 	  }
4754 	  {
4755 	       FFTW_REAL tre2_0_0;
4756 	       FFTW_REAL tim2_0_0;
4757 	       FFTW_REAL tre2_0_1;
4758 	       FFTW_REAL tim2_0_1;
4759 	       FFTW_REAL tre2_1_0;
4760 	       FFTW_REAL tim2_1_0;
4761 	       FFTW_REAL tre2_1_1;
4762 	       FFTW_REAL tim2_1_1;
4763 	       tre2_0_0 = tre1_1_0 + tim1_1_2;
4764 	       tim2_0_0 = tim1_1_0 - tre1_1_2;
4765 	       tre2_1_0 = tre1_1_0 - tim1_1_2;
4766 	       tim2_1_0 = tim1_1_0 + tre1_1_2;
4767 	       {
4768 		    FFTW_REAL tre3_0_0;
4769 		    FFTW_REAL tim3_0_0;
4770 		    FFTW_REAL tre3_1_0;
4771 		    FFTW_REAL tim3_1_0;
4772 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
4773 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
4774 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
4775 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
4776 		    tre2_0_1 = tre3_0_0 + tre3_1_0;
4777 		    tim2_0_1 = tim3_0_0 - tim3_1_0;
4778 		    tre2_1_1 = tre3_0_0 - tre3_1_0;
4779 		    tim2_1_1 = tim3_0_0 + tim3_1_0;
4780 	       }
4781 	       c_re(out[8 * ostride]) = tre2_0_0 + tre2_0_1;
4782 	       c_im(out[8 * ostride]) = tim2_0_0 + tim2_0_1;
4783 	       c_re(out[40 * ostride]) = tre2_0_0 - tre2_0_1;
4784 	       c_im(out[40 * ostride]) = tim2_0_0 - tim2_0_1;
4785 	       c_re(out[24 * ostride]) = tre2_1_0 + tim2_1_1;
4786 	       c_im(out[24 * ostride]) = tim2_1_0 - tre2_1_1;
4787 	       c_re(out[56 * ostride]) = tre2_1_0 - tim2_1_1;
4788 	       c_im(out[56 * ostride]) = tim2_1_0 + tre2_1_1;
4789 	  }
4790      }
4791      {
4792 	  FFTW_REAL tre1_0_0;
4793 	  FFTW_REAL tim1_0_0;
4794 	  FFTW_REAL tre1_0_1;
4795 	  FFTW_REAL tim1_0_1;
4796 	  FFTW_REAL tre1_0_2;
4797 	  FFTW_REAL tim1_0_2;
4798 	  FFTW_REAL tre1_0_3;
4799 	  FFTW_REAL tim1_0_3;
4800 	  FFTW_REAL tre1_1_0;
4801 	  FFTW_REAL tim1_1_0;
4802 	  FFTW_REAL tre1_1_1;
4803 	  FFTW_REAL tim1_1_1;
4804 	  FFTW_REAL tre1_1_2;
4805 	  FFTW_REAL tim1_1_2;
4806 	  FFTW_REAL tre1_1_3;
4807 	  FFTW_REAL tim1_1_3;
4808 	  {
4809 	       FFTW_REAL tre2_1_0;
4810 	       FFTW_REAL tim2_1_0;
4811 	       tre2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_1_4) + (((FFTW_REAL) FFTW_K382683432) * tim0_1_4);
4812 	       tim2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_1_4) - (((FFTW_REAL) FFTW_K382683432) * tre0_1_4);
4813 	       tre1_0_0 = tre0_1_0 + tre2_1_0;
4814 	       tim1_0_0 = tim0_1_0 + tim2_1_0;
4815 	       tre1_1_0 = tre0_1_0 - tre2_1_0;
4816 	       tim1_1_0 = tim0_1_0 - tim2_1_0;
4817 	  }
4818 	  {
4819 	       FFTW_REAL tre2_0_0;
4820 	       FFTW_REAL tim2_0_0;
4821 	       FFTW_REAL tre2_1_0;
4822 	       FFTW_REAL tim2_1_0;
4823 	       tre2_0_0 = (((FFTW_REAL) FFTW_K995184726) * tre0_1_1) + (((FFTW_REAL) FFTW_K098017140) * tim0_1_1);
4824 	       tim2_0_0 = (((FFTW_REAL) FFTW_K995184726) * tim0_1_1) - (((FFTW_REAL) FFTW_K098017140) * tre0_1_1);
4825 	       tre2_1_0 = (((FFTW_REAL) FFTW_K881921264) * tre0_1_5) + (((FFTW_REAL) FFTW_K471396736) * tim0_1_5);
4826 	       tim2_1_0 = (((FFTW_REAL) FFTW_K881921264) * tim0_1_5) - (((FFTW_REAL) FFTW_K471396736) * tre0_1_5);
4827 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
4828 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
4829 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
4830 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
4831 	  }
4832 	  {
4833 	       FFTW_REAL tre2_0_0;
4834 	       FFTW_REAL tim2_0_0;
4835 	       FFTW_REAL tre2_1_0;
4836 	       FFTW_REAL tim2_1_0;
4837 	       tre2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tre0_1_2) + (((FFTW_REAL) FFTW_K195090322) * tim0_1_2);
4838 	       tim2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tim0_1_2) - (((FFTW_REAL) FFTW_K195090322) * tre0_1_2);
4839 	       tre2_1_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_1_6) + (((FFTW_REAL) FFTW_K555570233) * tim0_1_6);
4840 	       tim2_1_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_1_6) - (((FFTW_REAL) FFTW_K555570233) * tre0_1_6);
4841 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
4842 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
4843 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
4844 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
4845 	  }
4846 	  {
4847 	       FFTW_REAL tre2_0_0;
4848 	       FFTW_REAL tim2_0_0;
4849 	       FFTW_REAL tre2_1_0;
4850 	       FFTW_REAL tim2_1_0;
4851 	       tre2_0_0 = (((FFTW_REAL) FFTW_K956940335) * tre0_1_3) + (((FFTW_REAL) FFTW_K290284677) * tim0_1_3);
4852 	       tim2_0_0 = (((FFTW_REAL) FFTW_K956940335) * tim0_1_3) - (((FFTW_REAL) FFTW_K290284677) * tre0_1_3);
4853 	       tre2_1_0 = (((FFTW_REAL) FFTW_K773010453) * tre0_1_7) + (((FFTW_REAL) FFTW_K634393284) * tim0_1_7);
4854 	       tim2_1_0 = (((FFTW_REAL) FFTW_K773010453) * tim0_1_7) - (((FFTW_REAL) FFTW_K634393284) * tre0_1_7);
4855 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
4856 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
4857 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
4858 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
4859 	  }
4860 	  {
4861 	       FFTW_REAL tre2_0_0;
4862 	       FFTW_REAL tim2_0_0;
4863 	       FFTW_REAL tre2_0_1;
4864 	       FFTW_REAL tim2_0_1;
4865 	       FFTW_REAL tre2_1_0;
4866 	       FFTW_REAL tim2_1_0;
4867 	       FFTW_REAL tre2_1_1;
4868 	       FFTW_REAL tim2_1_1;
4869 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
4870 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
4871 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
4872 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
4873 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
4874 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
4875 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
4876 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
4877 	       c_re(out[ostride]) = tre2_0_0 + tre2_0_1;
4878 	       c_im(out[ostride]) = tim2_0_0 + tim2_0_1;
4879 	       c_re(out[33 * ostride]) = tre2_0_0 - tre2_0_1;
4880 	       c_im(out[33 * ostride]) = tim2_0_0 - tim2_0_1;
4881 	       c_re(out[17 * ostride]) = tre2_1_0 + tim2_1_1;
4882 	       c_im(out[17 * ostride]) = tim2_1_0 - tre2_1_1;
4883 	       c_re(out[49 * ostride]) = tre2_1_0 - tim2_1_1;
4884 	       c_im(out[49 * ostride]) = tim2_1_0 + tre2_1_1;
4885 	  }
4886 	  {
4887 	       FFTW_REAL tre2_0_0;
4888 	       FFTW_REAL tim2_0_0;
4889 	       FFTW_REAL tre2_0_1;
4890 	       FFTW_REAL tim2_0_1;
4891 	       FFTW_REAL tre2_1_0;
4892 	       FFTW_REAL tim2_1_0;
4893 	       FFTW_REAL tre2_1_1;
4894 	       FFTW_REAL tim2_1_1;
4895 	       tre2_0_0 = tre1_1_0 + tim1_1_2;
4896 	       tim2_0_0 = tim1_1_0 - tre1_1_2;
4897 	       tre2_1_0 = tre1_1_0 - tim1_1_2;
4898 	       tim2_1_0 = tim1_1_0 + tre1_1_2;
4899 	       {
4900 		    FFTW_REAL tre3_0_0;
4901 		    FFTW_REAL tim3_0_0;
4902 		    FFTW_REAL tre3_1_0;
4903 		    FFTW_REAL tim3_1_0;
4904 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
4905 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
4906 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
4907 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
4908 		    tre2_0_1 = tre3_0_0 + tre3_1_0;
4909 		    tim2_0_1 = tim3_0_0 - tim3_1_0;
4910 		    tre2_1_1 = tre3_0_0 - tre3_1_0;
4911 		    tim2_1_1 = tim3_0_0 + tim3_1_0;
4912 	       }
4913 	       c_re(out[9 * ostride]) = tre2_0_0 + tre2_0_1;
4914 	       c_im(out[9 * ostride]) = tim2_0_0 + tim2_0_1;
4915 	       c_re(out[41 * ostride]) = tre2_0_0 - tre2_0_1;
4916 	       c_im(out[41 * ostride]) = tim2_0_0 - tim2_0_1;
4917 	       c_re(out[25 * ostride]) = tre2_1_0 + tim2_1_1;
4918 	       c_im(out[25 * ostride]) = tim2_1_0 - tre2_1_1;
4919 	       c_re(out[57 * ostride]) = tre2_1_0 - tim2_1_1;
4920 	       c_im(out[57 * ostride]) = tim2_1_0 + tre2_1_1;
4921 	  }
4922      }
4923      {
4924 	  FFTW_REAL tre1_0_0;
4925 	  FFTW_REAL tim1_0_0;
4926 	  FFTW_REAL tre1_0_1;
4927 	  FFTW_REAL tim1_0_1;
4928 	  FFTW_REAL tre1_0_2;
4929 	  FFTW_REAL tim1_0_2;
4930 	  FFTW_REAL tre1_0_3;
4931 	  FFTW_REAL tim1_0_3;
4932 	  FFTW_REAL tre1_1_0;
4933 	  FFTW_REAL tim1_1_0;
4934 	  FFTW_REAL tre1_1_1;
4935 	  FFTW_REAL tim1_1_1;
4936 	  FFTW_REAL tre1_1_2;
4937 	  FFTW_REAL tim1_1_2;
4938 	  FFTW_REAL tre1_1_3;
4939 	  FFTW_REAL tim1_1_3;
4940 	  {
4941 	       FFTW_REAL tre2_1_0;
4942 	       FFTW_REAL tim2_1_0;
4943 	       tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_2_4 + tim0_2_4);
4944 	       tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_2_4 - tre0_2_4);
4945 	       tre1_0_0 = tre0_2_0 + tre2_1_0;
4946 	       tim1_0_0 = tim0_2_0 + tim2_1_0;
4947 	       tre1_1_0 = tre0_2_0 - tre2_1_0;
4948 	       tim1_1_0 = tim0_2_0 - tim2_1_0;
4949 	  }
4950 	  {
4951 	       FFTW_REAL tre2_0_0;
4952 	       FFTW_REAL tim2_0_0;
4953 	       FFTW_REAL tre2_1_0;
4954 	       FFTW_REAL tim2_1_0;
4955 	       tre2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tre0_2_1) + (((FFTW_REAL) FFTW_K195090322) * tim0_2_1);
4956 	       tim2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tim0_2_1) - (((FFTW_REAL) FFTW_K195090322) * tre0_2_1);
4957 	       tre2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tre0_2_5) + (((FFTW_REAL) FFTW_K831469612) * tim0_2_5);
4958 	       tim2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tim0_2_5) - (((FFTW_REAL) FFTW_K831469612) * tre0_2_5);
4959 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
4960 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
4961 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
4962 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
4963 	  }
4964 	  {
4965 	       FFTW_REAL tre2_0_0;
4966 	       FFTW_REAL tim2_0_0;
4967 	       FFTW_REAL tre2_1_0;
4968 	       FFTW_REAL tim2_1_0;
4969 	       tre2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_2_2) + (((FFTW_REAL) FFTW_K382683432) * tim0_2_2);
4970 	       tim2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_2_2) - (((FFTW_REAL) FFTW_K382683432) * tre0_2_2);
4971 	       tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_2_6) + (((FFTW_REAL) FFTW_K923879532) * tim0_2_6);
4972 	       tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_2_6) - (((FFTW_REAL) FFTW_K923879532) * tre0_2_6);
4973 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
4974 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
4975 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
4976 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
4977 	  }
4978 	  {
4979 	       FFTW_REAL tre2_0_0;
4980 	       FFTW_REAL tim2_0_0;
4981 	       FFTW_REAL tre2_1_0;
4982 	       FFTW_REAL tim2_1_0;
4983 	       tre2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_2_3) + (((FFTW_REAL) FFTW_K555570233) * tim0_2_3);
4984 	       tim2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_2_3) - (((FFTW_REAL) FFTW_K555570233) * tre0_2_3);
4985 	       tre2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tre0_2_7) + (((FFTW_REAL) FFTW_K980785280) * tim0_2_7);
4986 	       tim2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tim0_2_7) - (((FFTW_REAL) FFTW_K980785280) * tre0_2_7);
4987 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
4988 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
4989 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
4990 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
4991 	  }
4992 	  {
4993 	       FFTW_REAL tre2_0_0;
4994 	       FFTW_REAL tim2_0_0;
4995 	       FFTW_REAL tre2_0_1;
4996 	       FFTW_REAL tim2_0_1;
4997 	       FFTW_REAL tre2_1_0;
4998 	       FFTW_REAL tim2_1_0;
4999 	       FFTW_REAL tre2_1_1;
5000 	       FFTW_REAL tim2_1_1;
5001 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
5002 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
5003 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
5004 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
5005 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
5006 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
5007 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
5008 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
5009 	       c_re(out[2 * ostride]) = tre2_0_0 + tre2_0_1;
5010 	       c_im(out[2 * ostride]) = tim2_0_0 + tim2_0_1;
5011 	       c_re(out[34 * ostride]) = tre2_0_0 - tre2_0_1;
5012 	       c_im(out[34 * ostride]) = tim2_0_0 - tim2_0_1;
5013 	       c_re(out[18 * ostride]) = tre2_1_0 + tim2_1_1;
5014 	       c_im(out[18 * ostride]) = tim2_1_0 - tre2_1_1;
5015 	       c_re(out[50 * ostride]) = tre2_1_0 - tim2_1_1;
5016 	       c_im(out[50 * ostride]) = tim2_1_0 + tre2_1_1;
5017 	  }
5018 	  {
5019 	       FFTW_REAL tre2_0_0;
5020 	       FFTW_REAL tim2_0_0;
5021 	       FFTW_REAL tre2_0_1;
5022 	       FFTW_REAL tim2_0_1;
5023 	       FFTW_REAL tre2_1_0;
5024 	       FFTW_REAL tim2_1_0;
5025 	       FFTW_REAL tre2_1_1;
5026 	       FFTW_REAL tim2_1_1;
5027 	       tre2_0_0 = tre1_1_0 + tim1_1_2;
5028 	       tim2_0_0 = tim1_1_0 - tre1_1_2;
5029 	       tre2_1_0 = tre1_1_0 - tim1_1_2;
5030 	       tim2_1_0 = tim1_1_0 + tre1_1_2;
5031 	       {
5032 		    FFTW_REAL tre3_0_0;
5033 		    FFTW_REAL tim3_0_0;
5034 		    FFTW_REAL tre3_1_0;
5035 		    FFTW_REAL tim3_1_0;
5036 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
5037 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
5038 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
5039 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
5040 		    tre2_0_1 = tre3_0_0 + tre3_1_0;
5041 		    tim2_0_1 = tim3_0_0 - tim3_1_0;
5042 		    tre2_1_1 = tre3_0_0 - tre3_1_0;
5043 		    tim2_1_1 = tim3_0_0 + tim3_1_0;
5044 	       }
5045 	       c_re(out[10 * ostride]) = tre2_0_0 + tre2_0_1;
5046 	       c_im(out[10 * ostride]) = tim2_0_0 + tim2_0_1;
5047 	       c_re(out[42 * ostride]) = tre2_0_0 - tre2_0_1;
5048 	       c_im(out[42 * ostride]) = tim2_0_0 - tim2_0_1;
5049 	       c_re(out[26 * ostride]) = tre2_1_0 + tim2_1_1;
5050 	       c_im(out[26 * ostride]) = tim2_1_0 - tre2_1_1;
5051 	       c_re(out[58 * ostride]) = tre2_1_0 - tim2_1_1;
5052 	       c_im(out[58 * ostride]) = tim2_1_0 + tre2_1_1;
5053 	  }
5054      }
5055      {
5056 	  FFTW_REAL tre1_0_0;
5057 	  FFTW_REAL tim1_0_0;
5058 	  FFTW_REAL tre1_0_1;
5059 	  FFTW_REAL tim1_0_1;
5060 	  FFTW_REAL tre1_0_2;
5061 	  FFTW_REAL tim1_0_2;
5062 	  FFTW_REAL tre1_0_3;
5063 	  FFTW_REAL tim1_0_3;
5064 	  FFTW_REAL tre1_1_0;
5065 	  FFTW_REAL tim1_1_0;
5066 	  FFTW_REAL tre1_1_1;
5067 	  FFTW_REAL tim1_1_1;
5068 	  FFTW_REAL tre1_1_2;
5069 	  FFTW_REAL tim1_1_2;
5070 	  FFTW_REAL tre1_1_3;
5071 	  FFTW_REAL tim1_1_3;
5072 	  {
5073 	       FFTW_REAL tre2_1_0;
5074 	       FFTW_REAL tim2_1_0;
5075 	       tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_3_4) + (((FFTW_REAL) FFTW_K923879532) * tim0_3_4);
5076 	       tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_3_4) - (((FFTW_REAL) FFTW_K923879532) * tre0_3_4);
5077 	       tre1_0_0 = tre0_3_0 + tre2_1_0;
5078 	       tim1_0_0 = tim0_3_0 + tim2_1_0;
5079 	       tre1_1_0 = tre0_3_0 - tre2_1_0;
5080 	       tim1_1_0 = tim0_3_0 - tim2_1_0;
5081 	  }
5082 	  {
5083 	       FFTW_REAL tre2_0_0;
5084 	       FFTW_REAL tim2_0_0;
5085 	       FFTW_REAL tre2_1_0;
5086 	       FFTW_REAL tim2_1_0;
5087 	       tre2_0_0 = (((FFTW_REAL) FFTW_K956940335) * tre0_3_1) + (((FFTW_REAL) FFTW_K290284677) * tim0_3_1);
5088 	       tim2_0_0 = (((FFTW_REAL) FFTW_K956940335) * tim0_3_1) - (((FFTW_REAL) FFTW_K290284677) * tre0_3_1);
5089 	       tre2_1_0 = (((FFTW_REAL) FFTW_K098017140) * tre0_3_5) + (((FFTW_REAL) FFTW_K995184726) * tim0_3_5);
5090 	       tim2_1_0 = (((FFTW_REAL) FFTW_K098017140) * tim0_3_5) - (((FFTW_REAL) FFTW_K995184726) * tre0_3_5);
5091 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
5092 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
5093 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
5094 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
5095 	  }
5096 	  {
5097 	       FFTW_REAL tre2_0_0;
5098 	       FFTW_REAL tim2_0_0;
5099 	       FFTW_REAL tre2_1_0;
5100 	       FFTW_REAL tim2_1_0;
5101 	       tre2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_3_2) + (((FFTW_REAL) FFTW_K555570233) * tim0_3_2);
5102 	       tim2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_3_2) - (((FFTW_REAL) FFTW_K555570233) * tre0_3_2);
5103 	       tre2_1_0 = (((FFTW_REAL) FFTW_K980785280) * tim0_3_6) - (((FFTW_REAL) FFTW_K195090322) * tre0_3_6);
5104 	       tim2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tim0_3_6) + (((FFTW_REAL) FFTW_K980785280) * tre0_3_6);
5105 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
5106 	       tim1_0_2 = tim2_0_0 - tim2_1_0;
5107 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
5108 	       tim1_1_2 = tim2_0_0 + tim2_1_0;
5109 	  }
5110 	  {
5111 	       FFTW_REAL tre2_0_0;
5112 	       FFTW_REAL tim2_0_0;
5113 	       FFTW_REAL tre2_1_0;
5114 	       FFTW_REAL tim2_1_0;
5115 	       tre2_0_0 = (((FFTW_REAL) FFTW_K634393284) * tre0_3_3) + (((FFTW_REAL) FFTW_K773010453) * tim0_3_3);
5116 	       tim2_0_0 = (((FFTW_REAL) FFTW_K634393284) * tim0_3_3) - (((FFTW_REAL) FFTW_K773010453) * tre0_3_3);
5117 	       tre2_1_0 = (((FFTW_REAL) FFTW_K881921264) * tim0_3_7) - (((FFTW_REAL) FFTW_K471396736) * tre0_3_7);
5118 	       tim2_1_0 = (((FFTW_REAL) FFTW_K471396736) * tim0_3_7) + (((FFTW_REAL) FFTW_K881921264) * tre0_3_7);
5119 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
5120 	       tim1_0_3 = tim2_0_0 - tim2_1_0;
5121 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
5122 	       tim1_1_3 = tim2_0_0 + tim2_1_0;
5123 	  }
5124 	  {
5125 	       FFTW_REAL tre2_0_0;
5126 	       FFTW_REAL tim2_0_0;
5127 	       FFTW_REAL tre2_0_1;
5128 	       FFTW_REAL tim2_0_1;
5129 	       FFTW_REAL tre2_1_0;
5130 	       FFTW_REAL tim2_1_0;
5131 	       FFTW_REAL tre2_1_1;
5132 	       FFTW_REAL tim2_1_1;
5133 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
5134 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
5135 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
5136 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
5137 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
5138 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
5139 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
5140 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
5141 	       c_re(out[3 * ostride]) = tre2_0_0 + tre2_0_1;
5142 	       c_im(out[3 * ostride]) = tim2_0_0 + tim2_0_1;
5143 	       c_re(out[35 * ostride]) = tre2_0_0 - tre2_0_1;
5144 	       c_im(out[35 * ostride]) = tim2_0_0 - tim2_0_1;
5145 	       c_re(out[19 * ostride]) = tre2_1_0 + tim2_1_1;
5146 	       c_im(out[19 * ostride]) = tim2_1_0 - tre2_1_1;
5147 	       c_re(out[51 * ostride]) = tre2_1_0 - tim2_1_1;
5148 	       c_im(out[51 * ostride]) = tim2_1_0 + tre2_1_1;
5149 	  }
5150 	  {
5151 	       FFTW_REAL tre2_0_0;
5152 	       FFTW_REAL tim2_0_0;
5153 	       FFTW_REAL tre2_0_1;
5154 	       FFTW_REAL tim2_0_1;
5155 	       FFTW_REAL tre2_1_0;
5156 	       FFTW_REAL tim2_1_0;
5157 	       FFTW_REAL tre2_1_1;
5158 	       FFTW_REAL tim2_1_1;
5159 	       tre2_0_0 = tre1_1_0 + tim1_1_2;
5160 	       tim2_0_0 = tim1_1_0 - tre1_1_2;
5161 	       tre2_1_0 = tre1_1_0 - tim1_1_2;
5162 	       tim2_1_0 = tim1_1_0 + tre1_1_2;
5163 	       {
5164 		    FFTW_REAL tre3_0_0;
5165 		    FFTW_REAL tim3_0_0;
5166 		    FFTW_REAL tre3_1_0;
5167 		    FFTW_REAL tim3_1_0;
5168 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
5169 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
5170 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
5171 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
5172 		    tre2_0_1 = tre3_0_0 + tre3_1_0;
5173 		    tim2_0_1 = tim3_0_0 - tim3_1_0;
5174 		    tre2_1_1 = tre3_0_0 - tre3_1_0;
5175 		    tim2_1_1 = tim3_0_0 + tim3_1_0;
5176 	       }
5177 	       c_re(out[11 * ostride]) = tre2_0_0 + tre2_0_1;
5178 	       c_im(out[11 * ostride]) = tim2_0_0 + tim2_0_1;
5179 	       c_re(out[43 * ostride]) = tre2_0_0 - tre2_0_1;
5180 	       c_im(out[43 * ostride]) = tim2_0_0 - tim2_0_1;
5181 	       c_re(out[27 * ostride]) = tre2_1_0 + tim2_1_1;
5182 	       c_im(out[27 * ostride]) = tim2_1_0 - tre2_1_1;
5183 	       c_re(out[59 * ostride]) = tre2_1_0 - tim2_1_1;
5184 	       c_im(out[59 * ostride]) = tim2_1_0 + tre2_1_1;
5185 	  }
5186      }
5187      {
5188 	  FFTW_REAL tre1_0_0;
5189 	  FFTW_REAL tim1_0_0;
5190 	  FFTW_REAL tre1_0_1;
5191 	  FFTW_REAL tim1_0_1;
5192 	  FFTW_REAL tre1_0_2;
5193 	  FFTW_REAL tim1_0_2;
5194 	  FFTW_REAL tre1_0_3;
5195 	  FFTW_REAL tim1_0_3;
5196 	  FFTW_REAL tre1_1_0;
5197 	  FFTW_REAL tim1_1_0;
5198 	  FFTW_REAL tre1_1_1;
5199 	  FFTW_REAL tim1_1_1;
5200 	  FFTW_REAL tre1_1_2;
5201 	  FFTW_REAL tim1_1_2;
5202 	  FFTW_REAL tre1_1_3;
5203 	  FFTW_REAL tim1_1_3;
5204 	  tre1_0_0 = tre0_4_0 + tim0_4_4;
5205 	  tim1_0_0 = tim0_4_0 - tre0_4_4;
5206 	  tre1_1_0 = tre0_4_0 - tim0_4_4;
5207 	  tim1_1_0 = tim0_4_0 + tre0_4_4;
5208 	  {
5209 	       FFTW_REAL tre2_0_0;
5210 	       FFTW_REAL tim2_0_0;
5211 	       FFTW_REAL tre2_1_0;
5212 	       FFTW_REAL tim2_1_0;
5213 	       tre2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_4_1) + (((FFTW_REAL) FFTW_K382683432) * tim0_4_1);
5214 	       tim2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_4_1) - (((FFTW_REAL) FFTW_K382683432) * tre0_4_1);
5215 	       tre2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_4_5) - (((FFTW_REAL) FFTW_K382683432) * tre0_4_5);
5216 	       tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_4_5) + (((FFTW_REAL) FFTW_K923879532) * tre0_4_5);
5217 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
5218 	       tim1_0_1 = tim2_0_0 - tim2_1_0;
5219 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
5220 	       tim1_1_1 = tim2_0_0 + tim2_1_0;
5221 	  }
5222 	  {
5223 	       FFTW_REAL tre2_0_0;
5224 	       FFTW_REAL tim2_0_0;
5225 	       FFTW_REAL tre2_1_0;
5226 	       FFTW_REAL tim2_1_0;
5227 	       tre2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_4_2 + tim0_4_2);
5228 	       tim2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_4_2 - tre0_4_2);
5229 	       tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_4_6 - tre0_4_6);
5230 	       tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_4_6 + tre0_4_6);
5231 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
5232 	       tim1_0_2 = tim2_0_0 - tim2_1_0;
5233 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
5234 	       tim1_1_2 = tim2_0_0 + tim2_1_0;
5235 	  }
5236 	  {
5237 	       FFTW_REAL tre2_0_0;
5238 	       FFTW_REAL tim2_0_0;
5239 	       FFTW_REAL tre2_1_0;
5240 	       FFTW_REAL tim2_1_0;
5241 	       tre2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_4_3) + (((FFTW_REAL) FFTW_K923879532) * tim0_4_3);
5242 	       tim2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_4_3) - (((FFTW_REAL) FFTW_K923879532) * tre0_4_3);
5243 	       tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_4_7) - (((FFTW_REAL) FFTW_K923879532) * tre0_4_7);
5244 	       tim2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_4_7) + (((FFTW_REAL) FFTW_K382683432) * tre0_4_7);
5245 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
5246 	       tim1_0_3 = tim2_0_0 - tim2_1_0;
5247 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
5248 	       tim1_1_3 = tim2_0_0 + tim2_1_0;
5249 	  }
5250 	  {
5251 	       FFTW_REAL tre2_0_0;
5252 	       FFTW_REAL tim2_0_0;
5253 	       FFTW_REAL tre2_0_1;
5254 	       FFTW_REAL tim2_0_1;
5255 	       FFTW_REAL tre2_1_0;
5256 	       FFTW_REAL tim2_1_0;
5257 	       FFTW_REAL tre2_1_1;
5258 	       FFTW_REAL tim2_1_1;
5259 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
5260 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
5261 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
5262 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
5263 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
5264 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
5265 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
5266 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
5267 	       c_re(out[4 * ostride]) = tre2_0_0 + tre2_0_1;
5268 	       c_im(out[4 * ostride]) = tim2_0_0 + tim2_0_1;
5269 	       c_re(out[36 * ostride]) = tre2_0_0 - tre2_0_1;
5270 	       c_im(out[36 * ostride]) = tim2_0_0 - tim2_0_1;
5271 	       c_re(out[20 * ostride]) = tre2_1_0 + tim2_1_1;
5272 	       c_im(out[20 * ostride]) = tim2_1_0 - tre2_1_1;
5273 	       c_re(out[52 * ostride]) = tre2_1_0 - tim2_1_1;
5274 	       c_im(out[52 * ostride]) = tim2_1_0 + tre2_1_1;
5275 	  }
5276 	  {
5277 	       FFTW_REAL tre2_0_0;
5278 	       FFTW_REAL tim2_0_0;
5279 	       FFTW_REAL tre2_0_1;
5280 	       FFTW_REAL tim2_0_1;
5281 	       FFTW_REAL tre2_1_0;
5282 	       FFTW_REAL tim2_1_0;
5283 	       FFTW_REAL tre2_1_1;
5284 	       FFTW_REAL tim2_1_1;
5285 	       tre2_0_0 = tre1_1_0 + tim1_1_2;
5286 	       tim2_0_0 = tim1_1_0 - tre1_1_2;
5287 	       tre2_1_0 = tre1_1_0 - tim1_1_2;
5288 	       tim2_1_0 = tim1_1_0 + tre1_1_2;
5289 	       {
5290 		    FFTW_REAL tre3_0_0;
5291 		    FFTW_REAL tim3_0_0;
5292 		    FFTW_REAL tre3_1_0;
5293 		    FFTW_REAL tim3_1_0;
5294 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
5295 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
5296 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
5297 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
5298 		    tre2_0_1 = tre3_0_0 + tre3_1_0;
5299 		    tim2_0_1 = tim3_0_0 - tim3_1_0;
5300 		    tre2_1_1 = tre3_0_0 - tre3_1_0;
5301 		    tim2_1_1 = tim3_0_0 + tim3_1_0;
5302 	       }
5303 	       c_re(out[12 * ostride]) = tre2_0_0 + tre2_0_1;
5304 	       c_im(out[12 * ostride]) = tim2_0_0 + tim2_0_1;
5305 	       c_re(out[44 * ostride]) = tre2_0_0 - tre2_0_1;
5306 	       c_im(out[44 * ostride]) = tim2_0_0 - tim2_0_1;
5307 	       c_re(out[28 * ostride]) = tre2_1_0 + tim2_1_1;
5308 	       c_im(out[28 * ostride]) = tim2_1_0 - tre2_1_1;
5309 	       c_re(out[60 * ostride]) = tre2_1_0 - tim2_1_1;
5310 	       c_im(out[60 * ostride]) = tim2_1_0 + tre2_1_1;
5311 	  }
5312      }
5313      {
5314 	  FFTW_REAL tre1_0_0;
5315 	  FFTW_REAL tim1_0_0;
5316 	  FFTW_REAL tre1_0_1;
5317 	  FFTW_REAL tim1_0_1;
5318 	  FFTW_REAL tre1_0_2;
5319 	  FFTW_REAL tim1_0_2;
5320 	  FFTW_REAL tre1_0_3;
5321 	  FFTW_REAL tim1_0_3;
5322 	  FFTW_REAL tre1_1_0;
5323 	  FFTW_REAL tim1_1_0;
5324 	  FFTW_REAL tre1_1_1;
5325 	  FFTW_REAL tim1_1_1;
5326 	  FFTW_REAL tre1_1_2;
5327 	  FFTW_REAL tim1_1_2;
5328 	  FFTW_REAL tre1_1_3;
5329 	  FFTW_REAL tim1_1_3;
5330 	  {
5331 	       FFTW_REAL tre2_1_0;
5332 	       FFTW_REAL tim2_1_0;
5333 	       tre2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_5_4) - (((FFTW_REAL) FFTW_K382683432) * tre0_5_4);
5334 	       tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_5_4) + (((FFTW_REAL) FFTW_K923879532) * tre0_5_4);
5335 	       tre1_0_0 = tre0_5_0 + tre2_1_0;
5336 	       tim1_0_0 = tim0_5_0 - tim2_1_0;
5337 	       tre1_1_0 = tre0_5_0 - tre2_1_0;
5338 	       tim1_1_0 = tim0_5_0 + tim2_1_0;
5339 	  }
5340 	  {
5341 	       FFTW_REAL tre2_0_0;
5342 	       FFTW_REAL tim2_0_0;
5343 	       FFTW_REAL tre2_1_0;
5344 	       FFTW_REAL tim2_1_0;
5345 	       tre2_0_0 = (((FFTW_REAL) FFTW_K881921264) * tre0_5_1) + (((FFTW_REAL) FFTW_K471396736) * tim0_5_1);
5346 	       tim2_0_0 = (((FFTW_REAL) FFTW_K881921264) * tim0_5_1) - (((FFTW_REAL) FFTW_K471396736) * tre0_5_1);
5347 	       tre2_1_0 = (((FFTW_REAL) FFTW_K634393284) * tim0_5_5) - (((FFTW_REAL) FFTW_K773010453) * tre0_5_5);
5348 	       tim2_1_0 = (((FFTW_REAL) FFTW_K773010453) * tim0_5_5) + (((FFTW_REAL) FFTW_K634393284) * tre0_5_5);
5349 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
5350 	       tim1_0_1 = tim2_0_0 - tim2_1_0;
5351 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
5352 	       tim1_1_1 = tim2_0_0 + tim2_1_0;
5353 	  }
5354 	  {
5355 	       FFTW_REAL tre2_0_0;
5356 	       FFTW_REAL tim2_0_0;
5357 	       FFTW_REAL tre2_1_0;
5358 	       FFTW_REAL tim2_1_0;
5359 	       tre2_0_0 = (((FFTW_REAL) FFTW_K555570233) * tre0_5_2) + (((FFTW_REAL) FFTW_K831469612) * tim0_5_2);
5360 	       tim2_0_0 = (((FFTW_REAL) FFTW_K555570233) * tim0_5_2) - (((FFTW_REAL) FFTW_K831469612) * tre0_5_2);
5361 	       tre2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tim0_5_6) - (((FFTW_REAL) FFTW_K980785280) * tre0_5_6);
5362 	       tim2_1_0 = (((FFTW_REAL) FFTW_K980785280) * tim0_5_6) + (((FFTW_REAL) FFTW_K195090322) * tre0_5_6);
5363 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
5364 	       tim1_0_2 = tim2_0_0 - tim2_1_0;
5365 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
5366 	       tim1_1_2 = tim2_0_0 + tim2_1_0;
5367 	  }
5368 	  {
5369 	       FFTW_REAL tre2_0_0;
5370 	       FFTW_REAL tim2_0_0;
5371 	       FFTW_REAL tre2_1_0;
5372 	       FFTW_REAL tim2_1_0;
5373 	       tre2_0_0 = (((FFTW_REAL) FFTW_K098017140) * tre0_5_3) + (((FFTW_REAL) FFTW_K995184726) * tim0_5_3);
5374 	       tim2_0_0 = (((FFTW_REAL) FFTW_K098017140) * tim0_5_3) - (((FFTW_REAL) FFTW_K995184726) * tre0_5_3);
5375 	       tre2_1_0 = (((FFTW_REAL) FFTW_K956940335) * tre0_5_7) + (((FFTW_REAL) FFTW_K290284677) * tim0_5_7);
5376 	       tim2_1_0 = (((FFTW_REAL) FFTW_K290284677) * tre0_5_7) - (((FFTW_REAL) FFTW_K956940335) * tim0_5_7);
5377 	       tre1_0_3 = tre2_0_0 - tre2_1_0;
5378 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
5379 	       tre1_1_3 = tre2_0_0 + tre2_1_0;
5380 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
5381 	  }
5382 	  {
5383 	       FFTW_REAL tre2_0_0;
5384 	       FFTW_REAL tim2_0_0;
5385 	       FFTW_REAL tre2_0_1;
5386 	       FFTW_REAL tim2_0_1;
5387 	       FFTW_REAL tre2_1_0;
5388 	       FFTW_REAL tim2_1_0;
5389 	       FFTW_REAL tre2_1_1;
5390 	       FFTW_REAL tim2_1_1;
5391 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
5392 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
5393 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
5394 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
5395 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
5396 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
5397 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
5398 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
5399 	       c_re(out[5 * ostride]) = tre2_0_0 + tre2_0_1;
5400 	       c_im(out[5 * ostride]) = tim2_0_0 + tim2_0_1;
5401 	       c_re(out[37 * ostride]) = tre2_0_0 - tre2_0_1;
5402 	       c_im(out[37 * ostride]) = tim2_0_0 - tim2_0_1;
5403 	       c_re(out[21 * ostride]) = tre2_1_0 + tim2_1_1;
5404 	       c_im(out[21 * ostride]) = tim2_1_0 - tre2_1_1;
5405 	       c_re(out[53 * ostride]) = tre2_1_0 - tim2_1_1;
5406 	       c_im(out[53 * ostride]) = tim2_1_0 + tre2_1_1;
5407 	  }
5408 	  {
5409 	       FFTW_REAL tre2_0_0;
5410 	       FFTW_REAL tim2_0_0;
5411 	       FFTW_REAL tre2_0_1;
5412 	       FFTW_REAL tim2_0_1;
5413 	       FFTW_REAL tre2_1_0;
5414 	       FFTW_REAL tim2_1_0;
5415 	       FFTW_REAL tre2_1_1;
5416 	       FFTW_REAL tim2_1_1;
5417 	       tre2_0_0 = tre1_1_0 + tim1_1_2;
5418 	       tim2_0_0 = tim1_1_0 - tre1_1_2;
5419 	       tre2_1_0 = tre1_1_0 - tim1_1_2;
5420 	       tim2_1_0 = tim1_1_0 + tre1_1_2;
5421 	       {
5422 		    FFTW_REAL tre3_0_0;
5423 		    FFTW_REAL tim3_0_0;
5424 		    FFTW_REAL tre3_1_0;
5425 		    FFTW_REAL tim3_1_0;
5426 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
5427 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
5428 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
5429 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
5430 		    tre2_0_1 = tre3_0_0 + tre3_1_0;
5431 		    tim2_0_1 = tim3_0_0 - tim3_1_0;
5432 		    tre2_1_1 = tre3_0_0 - tre3_1_0;
5433 		    tim2_1_1 = tim3_0_0 + tim3_1_0;
5434 	       }
5435 	       c_re(out[13 * ostride]) = tre2_0_0 + tre2_0_1;
5436 	       c_im(out[13 * ostride]) = tim2_0_0 + tim2_0_1;
5437 	       c_re(out[45 * ostride]) = tre2_0_0 - tre2_0_1;
5438 	       c_im(out[45 * ostride]) = tim2_0_0 - tim2_0_1;
5439 	       c_re(out[29 * ostride]) = tre2_1_0 + tim2_1_1;
5440 	       c_im(out[29 * ostride]) = tim2_1_0 - tre2_1_1;
5441 	       c_re(out[61 * ostride]) = tre2_1_0 - tim2_1_1;
5442 	       c_im(out[61 * ostride]) = tim2_1_0 + tre2_1_1;
5443 	  }
5444      }
5445      {
5446 	  FFTW_REAL tre1_0_0;
5447 	  FFTW_REAL tim1_0_0;
5448 	  FFTW_REAL tre1_0_1;
5449 	  FFTW_REAL tim1_0_1;
5450 	  FFTW_REAL tre1_0_2;
5451 	  FFTW_REAL tim1_0_2;
5452 	  FFTW_REAL tre1_0_3;
5453 	  FFTW_REAL tim1_0_3;
5454 	  FFTW_REAL tre1_1_0;
5455 	  FFTW_REAL tim1_1_0;
5456 	  FFTW_REAL tre1_1_1;
5457 	  FFTW_REAL tim1_1_1;
5458 	  FFTW_REAL tre1_1_2;
5459 	  FFTW_REAL tim1_1_2;
5460 	  FFTW_REAL tre1_1_3;
5461 	  FFTW_REAL tim1_1_3;
5462 	  {
5463 	       FFTW_REAL tre2_1_0;
5464 	       FFTW_REAL tim2_1_0;
5465 	       tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_6_4 - tre0_6_4);
5466 	       tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_6_4 + tre0_6_4);
5467 	       tre1_0_0 = tre0_6_0 + tre2_1_0;
5468 	       tim1_0_0 = tim0_6_0 - tim2_1_0;
5469 	       tre1_1_0 = tre0_6_0 - tre2_1_0;
5470 	       tim1_1_0 = tim0_6_0 + tim2_1_0;
5471 	  }
5472 	  {
5473 	       FFTW_REAL tre2_0_0;
5474 	       FFTW_REAL tim2_0_0;
5475 	       FFTW_REAL tre2_1_0;
5476 	       FFTW_REAL tim2_1_0;
5477 	       tre2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_6_1) + (((FFTW_REAL) FFTW_K555570233) * tim0_6_1);
5478 	       tim2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_6_1) - (((FFTW_REAL) FFTW_K555570233) * tre0_6_1);
5479 	       tre2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tim0_6_5) - (((FFTW_REAL) FFTW_K980785280) * tre0_6_5);
5480 	       tim2_1_0 = (((FFTW_REAL) FFTW_K980785280) * tim0_6_5) + (((FFTW_REAL) FFTW_K195090322) * tre0_6_5);
5481 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
5482 	       tim1_0_1 = tim2_0_0 - tim2_1_0;
5483 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
5484 	       tim1_1_1 = tim2_0_0 + tim2_1_0;
5485 	  }
5486 	  {
5487 	       FFTW_REAL tre2_0_0;
5488 	       FFTW_REAL tim2_0_0;
5489 	       FFTW_REAL tre2_1_0;
5490 	       FFTW_REAL tim2_1_0;
5491 	       tre2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_6_2) + (((FFTW_REAL) FFTW_K923879532) * tim0_6_2);
5492 	       tim2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_6_2) - (((FFTW_REAL) FFTW_K923879532) * tre0_6_2);
5493 	       tre2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_6_6) + (((FFTW_REAL) FFTW_K382683432) * tim0_6_6);
5494 	       tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_6_6) - (((FFTW_REAL) FFTW_K923879532) * tim0_6_6);
5495 	       tre1_0_2 = tre2_0_0 - tre2_1_0;
5496 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
5497 	       tre1_1_2 = tre2_0_0 + tre2_1_0;
5498 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
5499 	  }
5500 	  {
5501 	       FFTW_REAL tre2_0_0;
5502 	       FFTW_REAL tim2_0_0;
5503 	       FFTW_REAL tre2_1_0;
5504 	       FFTW_REAL tim2_1_0;
5505 	       tre2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tim0_6_3) - (((FFTW_REAL) FFTW_K195090322) * tre0_6_3);
5506 	       tim2_0_0 = (((FFTW_REAL) FFTW_K195090322) * tim0_6_3) + (((FFTW_REAL) FFTW_K980785280) * tre0_6_3);
5507 	       tre2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tre0_6_7) + (((FFTW_REAL) FFTW_K831469612) * tim0_6_7);
5508 	       tim2_1_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_6_7) - (((FFTW_REAL) FFTW_K555570233) * tim0_6_7);
5509 	       tre1_0_3 = tre2_0_0 - tre2_1_0;
5510 	       tim1_0_3 = tim2_1_0 - tim2_0_0;
5511 	       tre1_1_3 = tre2_0_0 + tre2_1_0;
5512 	       tim1_1_3 = (-(tim2_0_0 + tim2_1_0));
5513 	  }
5514 	  {
5515 	       FFTW_REAL tre2_0_0;
5516 	       FFTW_REAL tim2_0_0;
5517 	       FFTW_REAL tre2_0_1;
5518 	       FFTW_REAL tim2_0_1;
5519 	       FFTW_REAL tre2_1_0;
5520 	       FFTW_REAL tim2_1_0;
5521 	       FFTW_REAL tre2_1_1;
5522 	       FFTW_REAL tim2_1_1;
5523 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
5524 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
5525 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
5526 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
5527 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
5528 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
5529 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
5530 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
5531 	       c_re(out[6 * ostride]) = tre2_0_0 + tre2_0_1;
5532 	       c_im(out[6 * ostride]) = tim2_0_0 + tim2_0_1;
5533 	       c_re(out[38 * ostride]) = tre2_0_0 - tre2_0_1;
5534 	       c_im(out[38 * ostride]) = tim2_0_0 - tim2_0_1;
5535 	       c_re(out[22 * ostride]) = tre2_1_0 + tim2_1_1;
5536 	       c_im(out[22 * ostride]) = tim2_1_0 - tre2_1_1;
5537 	       c_re(out[54 * ostride]) = tre2_1_0 - tim2_1_1;
5538 	       c_im(out[54 * ostride]) = tim2_1_0 + tre2_1_1;
5539 	  }
5540 	  {
5541 	       FFTW_REAL tre2_0_0;
5542 	       FFTW_REAL tim2_0_0;
5543 	       FFTW_REAL tre2_0_1;
5544 	       FFTW_REAL tim2_0_1;
5545 	       FFTW_REAL tre2_1_0;
5546 	       FFTW_REAL tim2_1_0;
5547 	       FFTW_REAL tre2_1_1;
5548 	       FFTW_REAL tim2_1_1;
5549 	       tre2_0_0 = tre1_1_0 + tim1_1_2;
5550 	       tim2_0_0 = tim1_1_0 - tre1_1_2;
5551 	       tre2_1_0 = tre1_1_0 - tim1_1_2;
5552 	       tim2_1_0 = tim1_1_0 + tre1_1_2;
5553 	       {
5554 		    FFTW_REAL tre3_0_0;
5555 		    FFTW_REAL tim3_0_0;
5556 		    FFTW_REAL tre3_1_0;
5557 		    FFTW_REAL tim3_1_0;
5558 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
5559 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
5560 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
5561 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
5562 		    tre2_0_1 = tre3_0_0 + tre3_1_0;
5563 		    tim2_0_1 = tim3_0_0 - tim3_1_0;
5564 		    tre2_1_1 = tre3_0_0 - tre3_1_0;
5565 		    tim2_1_1 = tim3_0_0 + tim3_1_0;
5566 	       }
5567 	       c_re(out[14 * ostride]) = tre2_0_0 + tre2_0_1;
5568 	       c_im(out[14 * ostride]) = tim2_0_0 + tim2_0_1;
5569 	       c_re(out[46 * ostride]) = tre2_0_0 - tre2_0_1;
5570 	       c_im(out[46 * ostride]) = tim2_0_0 - tim2_0_1;
5571 	       c_re(out[30 * ostride]) = tre2_1_0 + tim2_1_1;
5572 	       c_im(out[30 * ostride]) = tim2_1_0 - tre2_1_1;
5573 	       c_re(out[62 * ostride]) = tre2_1_0 - tim2_1_1;
5574 	       c_im(out[62 * ostride]) = tim2_1_0 + tre2_1_1;
5575 	  }
5576      }
5577      {
5578 	  FFTW_REAL tre1_0_0;
5579 	  FFTW_REAL tim1_0_0;
5580 	  FFTW_REAL tre1_0_1;
5581 	  FFTW_REAL tim1_0_1;
5582 	  FFTW_REAL tre1_0_2;
5583 	  FFTW_REAL tim1_0_2;
5584 	  FFTW_REAL tre1_0_3;
5585 	  FFTW_REAL tim1_0_3;
5586 	  FFTW_REAL tre1_1_0;
5587 	  FFTW_REAL tim1_1_0;
5588 	  FFTW_REAL tre1_1_1;
5589 	  FFTW_REAL tim1_1_1;
5590 	  FFTW_REAL tre1_1_2;
5591 	  FFTW_REAL tim1_1_2;
5592 	  FFTW_REAL tre1_1_3;
5593 	  FFTW_REAL tim1_1_3;
5594 	  {
5595 	       FFTW_REAL tre2_1_0;
5596 	       FFTW_REAL tim2_1_0;
5597 	       tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_7_4) - (((FFTW_REAL) FFTW_K923879532) * tre0_7_4);
5598 	       tim2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_7_4) + (((FFTW_REAL) FFTW_K382683432) * tre0_7_4);
5599 	       tre1_0_0 = tre0_7_0 + tre2_1_0;
5600 	       tim1_0_0 = tim0_7_0 - tim2_1_0;
5601 	       tre1_1_0 = tre0_7_0 - tre2_1_0;
5602 	       tim1_1_0 = tim0_7_0 + tim2_1_0;
5603 	  }
5604 	  {
5605 	       FFTW_REAL tre2_0_0;
5606 	       FFTW_REAL tim2_0_0;
5607 	       FFTW_REAL tre2_1_0;
5608 	       FFTW_REAL tim2_1_0;
5609 	       tre2_0_0 = (((FFTW_REAL) FFTW_K773010453) * tre0_7_1) + (((FFTW_REAL) FFTW_K634393284) * tim0_7_1);
5610 	       tim2_0_0 = (((FFTW_REAL) FFTW_K773010453) * tim0_7_1) - (((FFTW_REAL) FFTW_K634393284) * tre0_7_1);
5611 	       tre2_1_0 = (((FFTW_REAL) FFTW_K956940335) * tre0_7_5) + (((FFTW_REAL) FFTW_K290284677) * tim0_7_5);
5612 	       tim2_1_0 = (((FFTW_REAL) FFTW_K290284677) * tre0_7_5) - (((FFTW_REAL) FFTW_K956940335) * tim0_7_5);
5613 	       tre1_0_1 = tre2_0_0 - tre2_1_0;
5614 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
5615 	       tre1_1_1 = tre2_0_0 + tre2_1_0;
5616 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
5617 	  }
5618 	  {
5619 	       FFTW_REAL tre2_0_0;
5620 	       FFTW_REAL tim2_0_0;
5621 	       FFTW_REAL tre2_1_0;
5622 	       FFTW_REAL tim2_1_0;
5623 	       tre2_0_0 = (((FFTW_REAL) FFTW_K195090322) * tre0_7_2) + (((FFTW_REAL) FFTW_K980785280) * tim0_7_2);
5624 	       tim2_0_0 = (((FFTW_REAL) FFTW_K195090322) * tim0_7_2) - (((FFTW_REAL) FFTW_K980785280) * tre0_7_2);
5625 	       tre2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tre0_7_6) + (((FFTW_REAL) FFTW_K831469612) * tim0_7_6);
5626 	       tim2_1_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_7_6) - (((FFTW_REAL) FFTW_K555570233) * tim0_7_6);
5627 	       tre1_0_2 = tre2_0_0 - tre2_1_0;
5628 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
5629 	       tre1_1_2 = tre2_0_0 + tre2_1_0;
5630 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
5631 	  }
5632 	  {
5633 	       FFTW_REAL tre2_0_0;
5634 	       FFTW_REAL tim2_0_0;
5635 	       FFTW_REAL tre2_1_0;
5636 	       FFTW_REAL tim2_1_0;
5637 	       tre2_0_0 = (((FFTW_REAL) FFTW_K881921264) * tim0_7_3) - (((FFTW_REAL) FFTW_K471396736) * tre0_7_3);
5638 	       tim2_0_0 = (((FFTW_REAL) FFTW_K471396736) * tim0_7_3) + (((FFTW_REAL) FFTW_K881921264) * tre0_7_3);
5639 	       tre2_1_0 = (((FFTW_REAL) FFTW_K098017140) * tre0_7_7) - (((FFTW_REAL) FFTW_K995184726) * tim0_7_7);
5640 	       tim2_1_0 = (((FFTW_REAL) FFTW_K098017140) * tim0_7_7) + (((FFTW_REAL) FFTW_K995184726) * tre0_7_7);
5641 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
5642 	       tim1_0_3 = tim2_1_0 - tim2_0_0;
5643 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
5644 	       tim1_1_3 = (-(tim2_0_0 + tim2_1_0));
5645 	  }
5646 	  {
5647 	       FFTW_REAL tre2_0_0;
5648 	       FFTW_REAL tim2_0_0;
5649 	       FFTW_REAL tre2_0_1;
5650 	       FFTW_REAL tim2_0_1;
5651 	       FFTW_REAL tre2_1_0;
5652 	       FFTW_REAL tim2_1_0;
5653 	       FFTW_REAL tre2_1_1;
5654 	       FFTW_REAL tim2_1_1;
5655 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
5656 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
5657 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
5658 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
5659 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
5660 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
5661 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
5662 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
5663 	       c_re(out[7 * ostride]) = tre2_0_0 + tre2_0_1;
5664 	       c_im(out[7 * ostride]) = tim2_0_0 + tim2_0_1;
5665 	       c_re(out[39 * ostride]) = tre2_0_0 - tre2_0_1;
5666 	       c_im(out[39 * ostride]) = tim2_0_0 - tim2_0_1;
5667 	       c_re(out[23 * ostride]) = tre2_1_0 + tim2_1_1;
5668 	       c_im(out[23 * ostride]) = tim2_1_0 - tre2_1_1;
5669 	       c_re(out[55 * ostride]) = tre2_1_0 - tim2_1_1;
5670 	       c_im(out[55 * ostride]) = tim2_1_0 + tre2_1_1;
5671 	  }
5672 	  {
5673 	       FFTW_REAL tre2_0_0;
5674 	       FFTW_REAL tim2_0_0;
5675 	       FFTW_REAL tre2_0_1;
5676 	       FFTW_REAL tim2_0_1;
5677 	       FFTW_REAL tre2_1_0;
5678 	       FFTW_REAL tim2_1_0;
5679 	       FFTW_REAL tre2_1_1;
5680 	       FFTW_REAL tim2_1_1;
5681 	       tre2_0_0 = tre1_1_0 + tim1_1_2;
5682 	       tim2_0_0 = tim1_1_0 - tre1_1_2;
5683 	       tre2_1_0 = tre1_1_0 - tim1_1_2;
5684 	       tim2_1_0 = tim1_1_0 + tre1_1_2;
5685 	       {
5686 		    FFTW_REAL tre3_0_0;
5687 		    FFTW_REAL tim3_0_0;
5688 		    FFTW_REAL tre3_1_0;
5689 		    FFTW_REAL tim3_1_0;
5690 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
5691 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
5692 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
5693 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
5694 		    tre2_0_1 = tre3_0_0 + tre3_1_0;
5695 		    tim2_0_1 = tim3_0_0 - tim3_1_0;
5696 		    tre2_1_1 = tre3_0_0 - tre3_1_0;
5697 		    tim2_1_1 = tim3_0_0 + tim3_1_0;
5698 	       }
5699 	       c_re(out[15 * ostride]) = tre2_0_0 + tre2_0_1;
5700 	       c_im(out[15 * ostride]) = tim2_0_0 + tim2_0_1;
5701 	       c_re(out[47 * ostride]) = tre2_0_0 - tre2_0_1;
5702 	       c_im(out[47 * ostride]) = tim2_0_0 - tim2_0_1;
5703 	       c_re(out[31 * ostride]) = tre2_1_0 + tim2_1_1;
5704 	       c_im(out[31 * ostride]) = tim2_1_0 - tre2_1_1;
5705 	       c_re(out[63 * ostride]) = tre2_1_0 - tim2_1_1;
5706 	       c_im(out[63 * ostride]) = tim2_1_0 + tre2_1_1;
5707 	  }
5708      }
5709 }
5710 
5711 /* This function contains 90 FP additions and 36 FP multiplications */
5712 
fftw_no_twiddle_7(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)5713 static void fftw_no_twiddle_7(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
5714 {
5715      FFTW_REAL tre0_0_0;
5716      FFTW_REAL tim0_0_0;
5717      FFTW_REAL tre0_1_0;
5718      FFTW_REAL tim0_1_0;
5719      FFTW_REAL tre0_2_0;
5720      FFTW_REAL tim0_2_0;
5721      FFTW_REAL tre0_3_0;
5722      FFTW_REAL tim0_3_0;
5723      FFTW_REAL tre0_4_0;
5724      FFTW_REAL tim0_4_0;
5725      FFTW_REAL tre0_5_0;
5726      FFTW_REAL tim0_5_0;
5727      FFTW_REAL tre0_6_0;
5728      FFTW_REAL tim0_6_0;
5729      tre0_0_0 = c_re(in[0]);
5730      tim0_0_0 = c_im(in[0]);
5731      tre0_1_0 = c_re(in[istride]);
5732      tim0_1_0 = c_im(in[istride]);
5733      tre0_2_0 = c_re(in[2 * istride]);
5734      tim0_2_0 = c_im(in[2 * istride]);
5735      tre0_3_0 = c_re(in[3 * istride]);
5736      tim0_3_0 = c_im(in[3 * istride]);
5737      tre0_4_0 = c_re(in[4 * istride]);
5738      tim0_4_0 = c_im(in[4 * istride]);
5739      tre0_5_0 = c_re(in[5 * istride]);
5740      tim0_5_0 = c_im(in[5 * istride]);
5741      tre0_6_0 = c_re(in[6 * istride]);
5742      tim0_6_0 = c_im(in[6 * istride]);
5743      c_re(out[0]) = tre0_0_0 + tre0_1_0 + tre0_2_0 + tre0_3_0 + tre0_4_0 + tre0_5_0 + tre0_6_0;
5744      c_im(out[0]) = tim0_0_0 + tim0_1_0 + tim0_2_0 + tim0_3_0 + tim0_4_0 + tim0_5_0 + tim0_6_0;
5745      {
5746 	  FFTW_REAL tre1_0_0;
5747 	  FFTW_REAL tre1_1_0;
5748 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tre0_1_0 + tre0_6_0)) - (((FFTW_REAL) FFTW_K900968867) * (tre0_3_0 + tre0_4_0)) - (((FFTW_REAL) FFTW_K222520933) * (tre0_2_0 + tre0_5_0));
5749 	  tre1_1_0 = (((FFTW_REAL) FFTW_K781831482) * (tim0_1_0 - tim0_6_0)) + (((FFTW_REAL) FFTW_K974927912) * (tim0_2_0 - tim0_5_0)) + (((FFTW_REAL) FFTW_K433883739) * (tim0_3_0 - tim0_4_0));
5750 	  c_re(out[ostride]) = tre1_0_0 + tre1_1_0;
5751 	  c_re(out[6 * ostride]) = tre1_0_0 - tre1_1_0;
5752      }
5753      {
5754 	  FFTW_REAL tim1_0_0;
5755 	  FFTW_REAL tim1_1_0;
5756 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tim0_1_0 + tim0_6_0)) - (((FFTW_REAL) FFTW_K900968867) * (tim0_3_0 + tim0_4_0)) - (((FFTW_REAL) FFTW_K222520933) * (tim0_2_0 + tim0_5_0));
5757 	  tim1_1_0 = (((FFTW_REAL) FFTW_K781831482) * (tre0_6_0 - tre0_1_0)) + (((FFTW_REAL) FFTW_K974927912) * (tre0_5_0 - tre0_2_0)) + (((FFTW_REAL) FFTW_K433883739) * (tre0_4_0 - tre0_3_0));
5758 	  c_im(out[ostride]) = tim1_0_0 + tim1_1_0;
5759 	  c_im(out[6 * ostride]) = tim1_0_0 - tim1_1_0;
5760      }
5761      {
5762 	  FFTW_REAL tre1_0_0;
5763 	  FFTW_REAL tre1_1_0;
5764 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tre0_3_0 + tre0_4_0)) - (((FFTW_REAL) FFTW_K900968867) * (tre0_2_0 + tre0_5_0)) - (((FFTW_REAL) FFTW_K222520933) * (tre0_1_0 + tre0_6_0));
5765 	  tre1_1_0 = (((FFTW_REAL) FFTW_K974927912) * (tim0_1_0 - tim0_6_0)) + (((FFTW_REAL) FFTW_K433883739) * (tim0_5_0 - tim0_2_0)) + (((FFTW_REAL) FFTW_K781831482) * (tim0_4_0 - tim0_3_0));
5766 	  c_re(out[2 * ostride]) = tre1_0_0 + tre1_1_0;
5767 	  c_re(out[5 * ostride]) = tre1_0_0 - tre1_1_0;
5768      }
5769      {
5770 	  FFTW_REAL tim1_0_0;
5771 	  FFTW_REAL tim1_1_0;
5772 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tim0_3_0 + tim0_4_0)) - (((FFTW_REAL) FFTW_K900968867) * (tim0_2_0 + tim0_5_0)) - (((FFTW_REAL) FFTW_K222520933) * (tim0_1_0 + tim0_6_0));
5773 	  tim1_1_0 = (((FFTW_REAL) FFTW_K974927912) * (tre0_6_0 - tre0_1_0)) + (((FFTW_REAL) FFTW_K433883739) * (tre0_2_0 - tre0_5_0)) + (((FFTW_REAL) FFTW_K781831482) * (tre0_3_0 - tre0_4_0));
5774 	  c_im(out[2 * ostride]) = tim1_0_0 + tim1_1_0;
5775 	  c_im(out[5 * ostride]) = tim1_0_0 - tim1_1_0;
5776      }
5777      {
5778 	  FFTW_REAL tre1_0_0;
5779 	  FFTW_REAL tre1_1_0;
5780 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tre0_2_0 + tre0_5_0)) - (((FFTW_REAL) FFTW_K222520933) * (tre0_3_0 + tre0_4_0)) - (((FFTW_REAL) FFTW_K900968867) * (tre0_1_0 + tre0_6_0));
5781 	  tre1_1_0 = (((FFTW_REAL) FFTW_K433883739) * (tim0_1_0 - tim0_6_0)) + (((FFTW_REAL) FFTW_K781831482) * (tim0_5_0 - tim0_2_0)) + (((FFTW_REAL) FFTW_K974927912) * (tim0_3_0 - tim0_4_0));
5782 	  c_re(out[3 * ostride]) = tre1_0_0 + tre1_1_0;
5783 	  c_re(out[4 * ostride]) = tre1_0_0 - tre1_1_0;
5784      }
5785      {
5786 	  FFTW_REAL tim1_0_0;
5787 	  FFTW_REAL tim1_1_0;
5788 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tim0_2_0 + tim0_5_0)) - (((FFTW_REAL) FFTW_K222520933) * (tim0_3_0 + tim0_4_0)) - (((FFTW_REAL) FFTW_K900968867) * (tim0_1_0 + tim0_6_0));
5789 	  tim1_1_0 = (((FFTW_REAL) FFTW_K433883739) * (tre0_6_0 - tre0_1_0)) + (((FFTW_REAL) FFTW_K781831482) * (tre0_2_0 - tre0_5_0)) + (((FFTW_REAL) FFTW_K974927912) * (tre0_4_0 - tre0_3_0));
5790 	  c_im(out[3 * ostride]) = tim1_0_0 + tim1_1_0;
5791 	  c_im(out[4 * ostride]) = tim1_0_0 - tim1_1_0;
5792      }
5793 }
5794 
5795 /* This function contains 52 FP additions and 4 FP multiplications */
5796 
fftw_no_twiddle_8(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)5797 static void fftw_no_twiddle_8(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
5798 {
5799      FFTW_REAL tre0_0_0;
5800      FFTW_REAL tim0_0_0;
5801      FFTW_REAL tre0_0_1;
5802      FFTW_REAL tim0_0_1;
5803      FFTW_REAL tre0_0_2;
5804      FFTW_REAL tim0_0_2;
5805      FFTW_REAL tre0_0_3;
5806      FFTW_REAL tim0_0_3;
5807      FFTW_REAL tre0_1_0;
5808      FFTW_REAL tim0_1_0;
5809      FFTW_REAL tre0_1_1;
5810      FFTW_REAL tim0_1_1;
5811      FFTW_REAL tre0_1_2;
5812      FFTW_REAL tim0_1_2;
5813      FFTW_REAL tre0_1_3;
5814      FFTW_REAL tim0_1_3;
5815      {
5816 	  FFTW_REAL tre1_0_0;
5817 	  FFTW_REAL tim1_0_0;
5818 	  FFTW_REAL tre1_1_0;
5819 	  FFTW_REAL tim1_1_0;
5820 	  tre1_0_0 = c_re(in[0]);
5821 	  tim1_0_0 = c_im(in[0]);
5822 	  tre1_1_0 = c_re(in[4 * istride]);
5823 	  tim1_1_0 = c_im(in[4 * istride]);
5824 	  tre0_0_0 = tre1_0_0 + tre1_1_0;
5825 	  tim0_0_0 = tim1_0_0 + tim1_1_0;
5826 	  tre0_1_0 = tre1_0_0 - tre1_1_0;
5827 	  tim0_1_0 = tim1_0_0 - tim1_1_0;
5828      }
5829      {
5830 	  FFTW_REAL tre1_0_0;
5831 	  FFTW_REAL tim1_0_0;
5832 	  FFTW_REAL tre1_1_0;
5833 	  FFTW_REAL tim1_1_0;
5834 	  tre1_0_0 = c_re(in[istride]);
5835 	  tim1_0_0 = c_im(in[istride]);
5836 	  tre1_1_0 = c_re(in[5 * istride]);
5837 	  tim1_1_0 = c_im(in[5 * istride]);
5838 	  tre0_0_1 = tre1_0_0 + tre1_1_0;
5839 	  tim0_0_1 = tim1_0_0 + tim1_1_0;
5840 	  tre0_1_1 = tre1_0_0 - tre1_1_0;
5841 	  tim0_1_1 = tim1_0_0 - tim1_1_0;
5842      }
5843      {
5844 	  FFTW_REAL tre1_0_0;
5845 	  FFTW_REAL tim1_0_0;
5846 	  FFTW_REAL tre1_1_0;
5847 	  FFTW_REAL tim1_1_0;
5848 	  tre1_0_0 = c_re(in[2 * istride]);
5849 	  tim1_0_0 = c_im(in[2 * istride]);
5850 	  tre1_1_0 = c_re(in[6 * istride]);
5851 	  tim1_1_0 = c_im(in[6 * istride]);
5852 	  tre0_0_2 = tre1_0_0 + tre1_1_0;
5853 	  tim0_0_2 = tim1_0_0 + tim1_1_0;
5854 	  tre0_1_2 = tre1_0_0 - tre1_1_0;
5855 	  tim0_1_2 = tim1_0_0 - tim1_1_0;
5856      }
5857      {
5858 	  FFTW_REAL tre1_0_0;
5859 	  FFTW_REAL tim1_0_0;
5860 	  FFTW_REAL tre1_1_0;
5861 	  FFTW_REAL tim1_1_0;
5862 	  tre1_0_0 = c_re(in[3 * istride]);
5863 	  tim1_0_0 = c_im(in[3 * istride]);
5864 	  tre1_1_0 = c_re(in[7 * istride]);
5865 	  tim1_1_0 = c_im(in[7 * istride]);
5866 	  tre0_0_3 = tre1_0_0 + tre1_1_0;
5867 	  tim0_0_3 = tim1_0_0 + tim1_1_0;
5868 	  tre0_1_3 = tre1_0_0 - tre1_1_0;
5869 	  tim0_1_3 = tim1_0_0 - tim1_1_0;
5870      }
5871      {
5872 	  FFTW_REAL tre1_0_0;
5873 	  FFTW_REAL tim1_0_0;
5874 	  FFTW_REAL tre1_0_1;
5875 	  FFTW_REAL tim1_0_1;
5876 	  FFTW_REAL tre1_1_0;
5877 	  FFTW_REAL tim1_1_0;
5878 	  FFTW_REAL tre1_1_1;
5879 	  FFTW_REAL tim1_1_1;
5880 	  tre1_0_0 = tre0_0_0 + tre0_0_2;
5881 	  tim1_0_0 = tim0_0_0 + tim0_0_2;
5882 	  tre1_1_0 = tre0_0_0 - tre0_0_2;
5883 	  tim1_1_0 = tim0_0_0 - tim0_0_2;
5884 	  tre1_0_1 = tre0_0_1 + tre0_0_3;
5885 	  tim1_0_1 = tim0_0_1 + tim0_0_3;
5886 	  tre1_1_1 = tre0_0_1 - tre0_0_3;
5887 	  tim1_1_1 = tim0_0_1 - tim0_0_3;
5888 	  c_re(out[0]) = tre1_0_0 + tre1_0_1;
5889 	  c_im(out[0]) = tim1_0_0 + tim1_0_1;
5890 	  c_re(out[4 * ostride]) = tre1_0_0 - tre1_0_1;
5891 	  c_im(out[4 * ostride]) = tim1_0_0 - tim1_0_1;
5892 	  c_re(out[2 * ostride]) = tre1_1_0 + tim1_1_1;
5893 	  c_im(out[2 * ostride]) = tim1_1_0 - tre1_1_1;
5894 	  c_re(out[6 * ostride]) = tre1_1_0 - tim1_1_1;
5895 	  c_im(out[6 * ostride]) = tim1_1_0 + tre1_1_1;
5896      }
5897      {
5898 	  FFTW_REAL tre1_0_0;
5899 	  FFTW_REAL tim1_0_0;
5900 	  FFTW_REAL tre1_0_1;
5901 	  FFTW_REAL tim1_0_1;
5902 	  FFTW_REAL tre1_1_0;
5903 	  FFTW_REAL tim1_1_0;
5904 	  FFTW_REAL tre1_1_1;
5905 	  FFTW_REAL tim1_1_1;
5906 	  tre1_0_0 = tre0_1_0 + tim0_1_2;
5907 	  tim1_0_0 = tim0_1_0 - tre0_1_2;
5908 	  tre1_1_0 = tre0_1_0 - tim0_1_2;
5909 	  tim1_1_0 = tim0_1_0 + tre0_1_2;
5910 	  {
5911 	       FFTW_REAL tre2_0_0;
5912 	       FFTW_REAL tim2_0_0;
5913 	       FFTW_REAL tre2_1_0;
5914 	       FFTW_REAL tim2_1_0;
5915 	       tre2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_1_1 + tim0_1_1);
5916 	       tim2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_1_1 - tre0_1_1);
5917 	       tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_1_3 - tre0_1_3);
5918 	       tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_1_3 + tre0_1_3);
5919 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
5920 	       tim1_0_1 = tim2_0_0 - tim2_1_0;
5921 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
5922 	       tim1_1_1 = tim2_0_0 + tim2_1_0;
5923 	  }
5924 	  c_re(out[ostride]) = tre1_0_0 + tre1_0_1;
5925 	  c_im(out[ostride]) = tim1_0_0 + tim1_0_1;
5926 	  c_re(out[5 * ostride]) = tre1_0_0 - tre1_0_1;
5927 	  c_im(out[5 * ostride]) = tim1_0_0 - tim1_0_1;
5928 	  c_re(out[3 * ostride]) = tre1_1_0 + tim1_1_1;
5929 	  c_im(out[3 * ostride]) = tim1_1_0 - tre1_1_1;
5930 	  c_re(out[7 * ostride]) = tre1_1_0 - tim1_1_1;
5931 	  c_im(out[7 * ostride]) = tim1_1_0 + tre1_1_1;
5932      }
5933 }
5934 
5935 /* This function contains 92 FP additions and 40 FP multiplications */
5936 
fftw_no_twiddle_9(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)5937 static void fftw_no_twiddle_9(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
5938 {
5939      FFTW_REAL tre0_0_0;
5940      FFTW_REAL tim0_0_0;
5941      FFTW_REAL tre0_0_1;
5942      FFTW_REAL tim0_0_1;
5943      FFTW_REAL tre0_0_2;
5944      FFTW_REAL tim0_0_2;
5945      FFTW_REAL tre0_1_0;
5946      FFTW_REAL tim0_1_0;
5947      FFTW_REAL tre0_1_1;
5948      FFTW_REAL tim0_1_1;
5949      FFTW_REAL tre0_1_2;
5950      FFTW_REAL tim0_1_2;
5951      FFTW_REAL tre0_2_0;
5952      FFTW_REAL tim0_2_0;
5953      FFTW_REAL tre0_2_1;
5954      FFTW_REAL tim0_2_1;
5955      FFTW_REAL tre0_2_2;
5956      FFTW_REAL tim0_2_2;
5957      {
5958 	  FFTW_REAL tre1_0_0;
5959 	  FFTW_REAL tim1_0_0;
5960 	  FFTW_REAL tre1_1_0;
5961 	  FFTW_REAL tim1_1_0;
5962 	  FFTW_REAL tre1_2_0;
5963 	  FFTW_REAL tim1_2_0;
5964 	  tre1_0_0 = c_re(in[0]);
5965 	  tim1_0_0 = c_im(in[0]);
5966 	  tre1_1_0 = c_re(in[3 * istride]);
5967 	  tim1_1_0 = c_im(in[3 * istride]);
5968 	  tre1_2_0 = c_re(in[6 * istride]);
5969 	  tim1_2_0 = c_im(in[6 * istride]);
5970 	  tre0_0_0 = tre1_0_0 + tre1_1_0 + tre1_2_0;
5971 	  tim0_0_0 = tim1_0_0 + tim1_1_0 + tim1_2_0;
5972 	  {
5973 	       FFTW_REAL tre2_0_0;
5974 	       FFTW_REAL tre2_1_0;
5975 	       tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
5976 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_1_0 - tim1_2_0);
5977 	       tre0_1_0 = tre2_0_0 + tre2_1_0;
5978 	       tre0_2_0 = tre2_0_0 - tre2_1_0;
5979 	  }
5980 	  {
5981 	       FFTW_REAL tim2_0_0;
5982 	       FFTW_REAL tim2_1_0;
5983 	       tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
5984 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_2_0 - tre1_1_0);
5985 	       tim0_1_0 = tim2_0_0 + tim2_1_0;
5986 	       tim0_2_0 = tim2_0_0 - tim2_1_0;
5987 	  }
5988      }
5989      {
5990 	  FFTW_REAL tre1_0_0;
5991 	  FFTW_REAL tim1_0_0;
5992 	  FFTW_REAL tre1_1_0;
5993 	  FFTW_REAL tim1_1_0;
5994 	  FFTW_REAL tre1_2_0;
5995 	  FFTW_REAL tim1_2_0;
5996 	  tre1_0_0 = c_re(in[istride]);
5997 	  tim1_0_0 = c_im(in[istride]);
5998 	  tre1_1_0 = c_re(in[4 * istride]);
5999 	  tim1_1_0 = c_im(in[4 * istride]);
6000 	  tre1_2_0 = c_re(in[7 * istride]);
6001 	  tim1_2_0 = c_im(in[7 * istride]);
6002 	  tre0_0_1 = tre1_0_0 + tre1_1_0 + tre1_2_0;
6003 	  tim0_0_1 = tim1_0_0 + tim1_1_0 + tim1_2_0;
6004 	  {
6005 	       FFTW_REAL tre2_0_0;
6006 	       FFTW_REAL tre2_1_0;
6007 	       tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
6008 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_1_0 - tim1_2_0);
6009 	       tre0_1_1 = tre2_0_0 + tre2_1_0;
6010 	       tre0_2_1 = tre2_0_0 - tre2_1_0;
6011 	  }
6012 	  {
6013 	       FFTW_REAL tim2_0_0;
6014 	       FFTW_REAL tim2_1_0;
6015 	       tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
6016 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_2_0 - tre1_1_0);
6017 	       tim0_1_1 = tim2_0_0 + tim2_1_0;
6018 	       tim0_2_1 = tim2_0_0 - tim2_1_0;
6019 	  }
6020      }
6021      {
6022 	  FFTW_REAL tre1_0_0;
6023 	  FFTW_REAL tim1_0_0;
6024 	  FFTW_REAL tre1_1_0;
6025 	  FFTW_REAL tim1_1_0;
6026 	  FFTW_REAL tre1_2_0;
6027 	  FFTW_REAL tim1_2_0;
6028 	  tre1_0_0 = c_re(in[2 * istride]);
6029 	  tim1_0_0 = c_im(in[2 * istride]);
6030 	  tre1_1_0 = c_re(in[5 * istride]);
6031 	  tim1_1_0 = c_im(in[5 * istride]);
6032 	  tre1_2_0 = c_re(in[8 * istride]);
6033 	  tim1_2_0 = c_im(in[8 * istride]);
6034 	  tre0_0_2 = tre1_0_0 + tre1_1_0 + tre1_2_0;
6035 	  tim0_0_2 = tim1_0_0 + tim1_1_0 + tim1_2_0;
6036 	  {
6037 	       FFTW_REAL tre2_0_0;
6038 	       FFTW_REAL tre2_1_0;
6039 	       tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
6040 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_1_0 - tim1_2_0);
6041 	       tre0_1_2 = tre2_0_0 + tre2_1_0;
6042 	       tre0_2_2 = tre2_0_0 - tre2_1_0;
6043 	  }
6044 	  {
6045 	       FFTW_REAL tim2_0_0;
6046 	       FFTW_REAL tim2_1_0;
6047 	       tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
6048 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_2_0 - tre1_1_0);
6049 	       tim0_1_2 = tim2_0_0 + tim2_1_0;
6050 	       tim0_2_2 = tim2_0_0 - tim2_1_0;
6051 	  }
6052      }
6053      c_re(out[0]) = tre0_0_0 + tre0_0_1 + tre0_0_2;
6054      c_im(out[0]) = tim0_0_0 + tim0_0_1 + tim0_0_2;
6055      {
6056 	  FFTW_REAL tre2_0_0;
6057 	  FFTW_REAL tre2_1_0;
6058 	  tre2_0_0 = tre0_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre0_0_1 + tre0_0_2));
6059 	  tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim0_0_1 - tim0_0_2);
6060 	  c_re(out[3 * ostride]) = tre2_0_0 + tre2_1_0;
6061 	  c_re(out[6 * ostride]) = tre2_0_0 - tre2_1_0;
6062      }
6063      {
6064 	  FFTW_REAL tim2_0_0;
6065 	  FFTW_REAL tim2_1_0;
6066 	  tim2_0_0 = tim0_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim0_0_1 + tim0_0_2));
6067 	  tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre0_0_2 - tre0_0_1);
6068 	  c_im(out[3 * ostride]) = tim2_0_0 + tim2_1_0;
6069 	  c_im(out[6 * ostride]) = tim2_0_0 - tim2_1_0;
6070      }
6071      {
6072 	  FFTW_REAL tre1_1_0;
6073 	  FFTW_REAL tim1_1_0;
6074 	  FFTW_REAL tre1_2_0;
6075 	  FFTW_REAL tim1_2_0;
6076 	  tre1_1_0 = (((FFTW_REAL) FFTW_K766044443) * tre0_1_1) + (((FFTW_REAL) FFTW_K642787609) * tim0_1_1);
6077 	  tim1_1_0 = (((FFTW_REAL) FFTW_K766044443) * tim0_1_1) - (((FFTW_REAL) FFTW_K642787609) * tre0_1_1);
6078 	  tre1_2_0 = (((FFTW_REAL) FFTW_K173648177) * tre0_1_2) + (((FFTW_REAL) FFTW_K984807753) * tim0_1_2);
6079 	  tim1_2_0 = (((FFTW_REAL) FFTW_K173648177) * tim0_1_2) - (((FFTW_REAL) FFTW_K984807753) * tre0_1_2);
6080 	  c_re(out[ostride]) = tre0_1_0 + tre1_1_0 + tre1_2_0;
6081 	  c_im(out[ostride]) = tim0_1_0 + tim1_1_0 + tim1_2_0;
6082 	  {
6083 	       FFTW_REAL tre2_0_0;
6084 	       FFTW_REAL tre2_1_0;
6085 	       tre2_0_0 = tre0_1_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
6086 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_1_0 - tim1_2_0);
6087 	       c_re(out[4 * ostride]) = tre2_0_0 + tre2_1_0;
6088 	       c_re(out[7 * ostride]) = tre2_0_0 - tre2_1_0;
6089 	  }
6090 	  {
6091 	       FFTW_REAL tim2_0_0;
6092 	       FFTW_REAL tim2_1_0;
6093 	       tim2_0_0 = tim0_1_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
6094 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_2_0 - tre1_1_0);
6095 	       c_im(out[4 * ostride]) = tim2_0_0 + tim2_1_0;
6096 	       c_im(out[7 * ostride]) = tim2_0_0 - tim2_1_0;
6097 	  }
6098      }
6099      {
6100 	  FFTW_REAL tre1_1_0;
6101 	  FFTW_REAL tim1_1_0;
6102 	  FFTW_REAL tre1_2_0;
6103 	  FFTW_REAL tim1_2_0;
6104 	  tre1_1_0 = (((FFTW_REAL) FFTW_K173648177) * tre0_2_1) + (((FFTW_REAL) FFTW_K984807753) * tim0_2_1);
6105 	  tim1_1_0 = (((FFTW_REAL) FFTW_K173648177) * tim0_2_1) - (((FFTW_REAL) FFTW_K984807753) * tre0_2_1);
6106 	  tre1_2_0 = (((FFTW_REAL) FFTW_K342020143) * tim0_2_2) - (((FFTW_REAL) FFTW_K939692620) * tre0_2_2);
6107 	  tim1_2_0 = (((FFTW_REAL) FFTW_K939692620) * tim0_2_2) + (((FFTW_REAL) FFTW_K342020143) * tre0_2_2);
6108 	  c_re(out[2 * ostride]) = tre0_2_0 + tre1_1_0 + tre1_2_0;
6109 	  c_im(out[2 * ostride]) = tim0_2_0 + tim1_1_0 - tim1_2_0;
6110 	  {
6111 	       FFTW_REAL tre2_0_0;
6112 	       FFTW_REAL tre2_1_0;
6113 	       tre2_0_0 = tre0_2_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
6114 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_1_0 + tim1_2_0);
6115 	       c_re(out[5 * ostride]) = tre2_0_0 + tre2_1_0;
6116 	       c_re(out[8 * ostride]) = tre2_0_0 - tre2_1_0;
6117 	  }
6118 	  {
6119 	       FFTW_REAL tim2_0_0;
6120 	       FFTW_REAL tim2_1_0;
6121 	       tim2_0_0 = tim0_2_0 + (((FFTW_REAL) FFTW_K499999999) * (tim1_2_0 - tim1_1_0));
6122 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_2_0 - tre1_1_0);
6123 	       c_im(out[5 * ostride]) = tim2_0_0 + tim2_1_0;
6124 	       c_im(out[8 * ostride]) = tim2_0_0 - tim2_1_0;
6125 	  }
6126      }
6127 }
6128 
6129 /* This function contains 0 FP additions and 0 FP multiplications */
6130 
fftwi_no_twiddle_1(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)6131 static void fftwi_no_twiddle_1(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
6132 {
6133      FFTW_REAL tre0_0_0;
6134      FFTW_REAL tim0_0_0;
6135      tre0_0_0 = c_re(in[0]);
6136      tim0_0_0 = c_im(in[0]);
6137      c_re(out[0]) = tre0_0_0;
6138      c_im(out[0]) = tim0_0_0;
6139 }
6140 
6141 /* This function contains 108 FP additions and 32 FP multiplications */
6142 
fftwi_no_twiddle_10(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)6143 static void fftwi_no_twiddle_10(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
6144 {
6145      FFTW_REAL tre0_0_0;
6146      FFTW_REAL tim0_0_0;
6147      FFTW_REAL tre0_0_1;
6148      FFTW_REAL tim0_0_1;
6149      FFTW_REAL tre0_0_2;
6150      FFTW_REAL tim0_0_2;
6151      FFTW_REAL tre0_0_3;
6152      FFTW_REAL tim0_0_3;
6153      FFTW_REAL tre0_0_4;
6154      FFTW_REAL tim0_0_4;
6155      FFTW_REAL tre0_1_0;
6156      FFTW_REAL tim0_1_0;
6157      FFTW_REAL tre0_1_1;
6158      FFTW_REAL tim0_1_1;
6159      FFTW_REAL tre0_1_2;
6160      FFTW_REAL tim0_1_2;
6161      FFTW_REAL tre0_1_3;
6162      FFTW_REAL tim0_1_3;
6163      FFTW_REAL tre0_1_4;
6164      FFTW_REAL tim0_1_4;
6165      {
6166 	  FFTW_REAL tre1_0_0;
6167 	  FFTW_REAL tim1_0_0;
6168 	  FFTW_REAL tre1_1_0;
6169 	  FFTW_REAL tim1_1_0;
6170 	  tre1_0_0 = c_re(in[0]);
6171 	  tim1_0_0 = c_im(in[0]);
6172 	  tre1_1_0 = c_re(in[5 * istride]);
6173 	  tim1_1_0 = c_im(in[5 * istride]);
6174 	  tre0_0_0 = tre1_0_0 + tre1_1_0;
6175 	  tim0_0_0 = tim1_0_0 + tim1_1_0;
6176 	  tre0_1_0 = tre1_0_0 - tre1_1_0;
6177 	  tim0_1_0 = tim1_0_0 - tim1_1_0;
6178      }
6179      {
6180 	  FFTW_REAL tre1_0_0;
6181 	  FFTW_REAL tim1_0_0;
6182 	  FFTW_REAL tre1_1_0;
6183 	  FFTW_REAL tim1_1_0;
6184 	  tre1_0_0 = c_re(in[2 * istride]);
6185 	  tim1_0_0 = c_im(in[2 * istride]);
6186 	  tre1_1_0 = c_re(in[7 * istride]);
6187 	  tim1_1_0 = c_im(in[7 * istride]);
6188 	  tre0_0_1 = tre1_0_0 + tre1_1_0;
6189 	  tim0_0_1 = tim1_0_0 + tim1_1_0;
6190 	  tre0_1_1 = tre1_0_0 - tre1_1_0;
6191 	  tim0_1_1 = tim1_0_0 - tim1_1_0;
6192      }
6193      {
6194 	  FFTW_REAL tre1_0_0;
6195 	  FFTW_REAL tim1_0_0;
6196 	  FFTW_REAL tre1_1_0;
6197 	  FFTW_REAL tim1_1_0;
6198 	  tre1_0_0 = c_re(in[4 * istride]);
6199 	  tim1_0_0 = c_im(in[4 * istride]);
6200 	  tre1_1_0 = c_re(in[9 * istride]);
6201 	  tim1_1_0 = c_im(in[9 * istride]);
6202 	  tre0_0_2 = tre1_0_0 + tre1_1_0;
6203 	  tim0_0_2 = tim1_0_0 + tim1_1_0;
6204 	  tre0_1_2 = tre1_0_0 - tre1_1_0;
6205 	  tim0_1_2 = tim1_0_0 - tim1_1_0;
6206      }
6207      {
6208 	  FFTW_REAL tre1_0_0;
6209 	  FFTW_REAL tim1_0_0;
6210 	  FFTW_REAL tre1_1_0;
6211 	  FFTW_REAL tim1_1_0;
6212 	  tre1_0_0 = c_re(in[6 * istride]);
6213 	  tim1_0_0 = c_im(in[6 * istride]);
6214 	  tre1_1_0 = c_re(in[istride]);
6215 	  tim1_1_0 = c_im(in[istride]);
6216 	  tre0_0_3 = tre1_0_0 + tre1_1_0;
6217 	  tim0_0_3 = tim1_0_0 + tim1_1_0;
6218 	  tre0_1_3 = tre1_0_0 - tre1_1_0;
6219 	  tim0_1_3 = tim1_0_0 - tim1_1_0;
6220      }
6221      {
6222 	  FFTW_REAL tre1_0_0;
6223 	  FFTW_REAL tim1_0_0;
6224 	  FFTW_REAL tre1_1_0;
6225 	  FFTW_REAL tim1_1_0;
6226 	  tre1_0_0 = c_re(in[8 * istride]);
6227 	  tim1_0_0 = c_im(in[8 * istride]);
6228 	  tre1_1_0 = c_re(in[3 * istride]);
6229 	  tim1_1_0 = c_im(in[3 * istride]);
6230 	  tre0_0_4 = tre1_0_0 + tre1_1_0;
6231 	  tim0_0_4 = tim1_0_0 + tim1_1_0;
6232 	  tre0_1_4 = tre1_0_0 - tre1_1_0;
6233 	  tim0_1_4 = tim1_0_0 - tim1_1_0;
6234      }
6235      c_re(out[0]) = tre0_0_0 + tre0_0_1 + tre0_0_2 + tre0_0_3 + tre0_0_4;
6236      c_im(out[0]) = tim0_0_0 + tim0_0_1 + tim0_0_2 + tim0_0_3 + tim0_0_4;
6237      {
6238 	  FFTW_REAL tre2_0_0;
6239 	  FFTW_REAL tre2_1_0;
6240 	  tre2_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_0_1 + tre0_0_4)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_0_2 + tre0_0_3));
6241 	  tre2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tim0_0_4 - tim0_0_1)) + (((FFTW_REAL) FFTW_K587785252) * (tim0_0_3 - tim0_0_2));
6242 	  c_re(out[6 * ostride]) = tre2_0_0 + tre2_1_0;
6243 	  c_re(out[4 * ostride]) = tre2_0_0 - tre2_1_0;
6244      }
6245      {
6246 	  FFTW_REAL tim2_0_0;
6247 	  FFTW_REAL tim2_1_0;
6248 	  tim2_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_0_1 + tim0_0_4)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_0_2 + tim0_0_3));
6249 	  tim2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tre0_0_1 - tre0_0_4)) + (((FFTW_REAL) FFTW_K587785252) * (tre0_0_2 - tre0_0_3));
6250 	  c_im(out[6 * ostride]) = tim2_0_0 + tim2_1_0;
6251 	  c_im(out[4 * ostride]) = tim2_0_0 - tim2_1_0;
6252      }
6253      {
6254 	  FFTW_REAL tre2_0_0;
6255 	  FFTW_REAL tre2_1_0;
6256 	  tre2_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_0_2 + tre0_0_3)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_0_1 + tre0_0_4));
6257 	  tre2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tim0_0_4 - tim0_0_1)) + (((FFTW_REAL) FFTW_K951056516) * (tim0_0_2 - tim0_0_3));
6258 	  c_re(out[2 * ostride]) = tre2_0_0 + tre2_1_0;
6259 	  c_re(out[8 * ostride]) = tre2_0_0 - tre2_1_0;
6260      }
6261      {
6262 	  FFTW_REAL tim2_0_0;
6263 	  FFTW_REAL tim2_1_0;
6264 	  tim2_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_0_2 + tim0_0_3)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_0_1 + tim0_0_4));
6265 	  tim2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tre0_0_1 - tre0_0_4)) + (((FFTW_REAL) FFTW_K951056516) * (tre0_0_3 - tre0_0_2));
6266 	  c_im(out[2 * ostride]) = tim2_0_0 + tim2_1_0;
6267 	  c_im(out[8 * ostride]) = tim2_0_0 - tim2_1_0;
6268      }
6269      c_re(out[5 * ostride]) = tre0_1_0 + tre0_1_1 + tre0_1_2 + tre0_1_3 + tre0_1_4;
6270      c_im(out[5 * ostride]) = tim0_1_0 + tim0_1_1 + tim0_1_2 + tim0_1_3 + tim0_1_4;
6271      {
6272 	  FFTW_REAL tre2_0_0;
6273 	  FFTW_REAL tre2_1_0;
6274 	  tre2_0_0 = tre0_1_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_1_1 + tre0_1_4)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_1_2 + tre0_1_3));
6275 	  tre2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tim0_1_4 - tim0_1_1)) + (((FFTW_REAL) FFTW_K587785252) * (tim0_1_3 - tim0_1_2));
6276 	  c_re(out[ostride]) = tre2_0_0 + tre2_1_0;
6277 	  c_re(out[9 * ostride]) = tre2_0_0 - tre2_1_0;
6278      }
6279      {
6280 	  FFTW_REAL tim2_0_0;
6281 	  FFTW_REAL tim2_1_0;
6282 	  tim2_0_0 = tim0_1_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_1_1 + tim0_1_4)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_1_2 + tim0_1_3));
6283 	  tim2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tre0_1_1 - tre0_1_4)) + (((FFTW_REAL) FFTW_K587785252) * (tre0_1_2 - tre0_1_3));
6284 	  c_im(out[ostride]) = tim2_0_0 + tim2_1_0;
6285 	  c_im(out[9 * ostride]) = tim2_0_0 - tim2_1_0;
6286      }
6287      {
6288 	  FFTW_REAL tre2_0_0;
6289 	  FFTW_REAL tre2_1_0;
6290 	  tre2_0_0 = tre0_1_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_1_2 + tre0_1_3)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_1_1 + tre0_1_4));
6291 	  tre2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tim0_1_4 - tim0_1_1)) + (((FFTW_REAL) FFTW_K951056516) * (tim0_1_2 - tim0_1_3));
6292 	  c_re(out[7 * ostride]) = tre2_0_0 + tre2_1_0;
6293 	  c_re(out[3 * ostride]) = tre2_0_0 - tre2_1_0;
6294      }
6295      {
6296 	  FFTW_REAL tim2_0_0;
6297 	  FFTW_REAL tim2_1_0;
6298 	  tim2_0_0 = tim0_1_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_1_2 + tim0_1_3)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_1_1 + tim0_1_4));
6299 	  tim2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tre0_1_1 - tre0_1_4)) + (((FFTW_REAL) FFTW_K951056516) * (tre0_1_3 - tre0_1_2));
6300 	  c_im(out[7 * ostride]) = tim2_0_0 + tim2_1_0;
6301 	  c_im(out[3 * ostride]) = tim2_0_0 - tim2_1_0;
6302      }
6303 }
6304 
6305 /* This function contains 230 FP additions and 100 FP multiplications */
6306 
fftwi_no_twiddle_11(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)6307 static void fftwi_no_twiddle_11(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
6308 {
6309      FFTW_REAL tre0_0_0;
6310      FFTW_REAL tim0_0_0;
6311      FFTW_REAL tre0_1_0;
6312      FFTW_REAL tim0_1_0;
6313      FFTW_REAL tre0_2_0;
6314      FFTW_REAL tim0_2_0;
6315      FFTW_REAL tre0_3_0;
6316      FFTW_REAL tim0_3_0;
6317      FFTW_REAL tre0_4_0;
6318      FFTW_REAL tim0_4_0;
6319      FFTW_REAL tre0_5_0;
6320      FFTW_REAL tim0_5_0;
6321      FFTW_REAL tre0_6_0;
6322      FFTW_REAL tim0_6_0;
6323      FFTW_REAL tre0_7_0;
6324      FFTW_REAL tim0_7_0;
6325      FFTW_REAL tre0_8_0;
6326      FFTW_REAL tim0_8_0;
6327      FFTW_REAL tre0_9_0;
6328      FFTW_REAL tim0_9_0;
6329      FFTW_REAL tre0_10_0;
6330      FFTW_REAL tim0_10_0;
6331      tre0_0_0 = c_re(in[0]);
6332      tim0_0_0 = c_im(in[0]);
6333      tre0_1_0 = c_re(in[istride]);
6334      tim0_1_0 = c_im(in[istride]);
6335      tre0_2_0 = c_re(in[2 * istride]);
6336      tim0_2_0 = c_im(in[2 * istride]);
6337      tre0_3_0 = c_re(in[3 * istride]);
6338      tim0_3_0 = c_im(in[3 * istride]);
6339      tre0_4_0 = c_re(in[4 * istride]);
6340      tim0_4_0 = c_im(in[4 * istride]);
6341      tre0_5_0 = c_re(in[5 * istride]);
6342      tim0_5_0 = c_im(in[5 * istride]);
6343      tre0_6_0 = c_re(in[6 * istride]);
6344      tim0_6_0 = c_im(in[6 * istride]);
6345      tre0_7_0 = c_re(in[7 * istride]);
6346      tim0_7_0 = c_im(in[7 * istride]);
6347      tre0_8_0 = c_re(in[8 * istride]);
6348      tim0_8_0 = c_im(in[8 * istride]);
6349      tre0_9_0 = c_re(in[9 * istride]);
6350      tim0_9_0 = c_im(in[9 * istride]);
6351      tre0_10_0 = c_re(in[10 * istride]);
6352      tim0_10_0 = c_im(in[10 * istride]);
6353      c_re(out[0]) = tre0_0_0 + tre0_1_0 + tre0_2_0 + tre0_3_0 + tre0_4_0 + tre0_5_0 + tre0_6_0 + tre0_7_0 + tre0_8_0 + tre0_9_0 + tre0_10_0;
6354      c_im(out[0]) = tim0_0_0 + tim0_1_0 + tim0_2_0 + tim0_3_0 + tim0_4_0 + tim0_5_0 + tim0_6_0 + tim0_7_0 + tim0_8_0 + tim0_9_0 + tim0_10_0;
6355      {
6356 	  FFTW_REAL tre1_0_0;
6357 	  FFTW_REAL tre1_1_0;
6358 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K841253532) * (tre0_1_0 + tre0_10_0)) + (((FFTW_REAL) FFTW_K415415013) * (tre0_2_0 + tre0_9_0)) - (((FFTW_REAL) FFTW_K959492973) * (tre0_5_0 + tre0_6_0)) - (((FFTW_REAL) FFTW_K654860733) * (tre0_4_0 + tre0_7_0)) - (((FFTW_REAL) FFTW_K142314838) * (tre0_3_0 + tre0_8_0));
6359 	  tre1_1_0 = (((FFTW_REAL) FFTW_K540640817) * (tim0_10_0 - tim0_1_0)) + (((FFTW_REAL) FFTW_K909631995) * (tim0_9_0 - tim0_2_0)) + (((FFTW_REAL) FFTW_K989821441) * (tim0_8_0 - tim0_3_0)) + (((FFTW_REAL) FFTW_K755749574) * (tim0_7_0 - tim0_4_0)) + (((FFTW_REAL) FFTW_K281732556) * (tim0_6_0 - tim0_5_0));
6360 	  c_re(out[ostride]) = tre1_0_0 + tre1_1_0;
6361 	  c_re(out[10 * ostride]) = tre1_0_0 - tre1_1_0;
6362      }
6363      {
6364 	  FFTW_REAL tim1_0_0;
6365 	  FFTW_REAL tim1_1_0;
6366 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K841253532) * (tim0_1_0 + tim0_10_0)) + (((FFTW_REAL) FFTW_K415415013) * (tim0_2_0 + tim0_9_0)) - (((FFTW_REAL) FFTW_K959492973) * (tim0_5_0 + tim0_6_0)) - (((FFTW_REAL) FFTW_K654860733) * (tim0_4_0 + tim0_7_0)) - (((FFTW_REAL) FFTW_K142314838) * (tim0_3_0 + tim0_8_0));
6367 	  tim1_1_0 = (((FFTW_REAL) FFTW_K540640817) * (tre0_1_0 - tre0_10_0)) + (((FFTW_REAL) FFTW_K909631995) * (tre0_2_0 - tre0_9_0)) + (((FFTW_REAL) FFTW_K989821441) * (tre0_3_0 - tre0_8_0)) + (((FFTW_REAL) FFTW_K755749574) * (tre0_4_0 - tre0_7_0)) + (((FFTW_REAL) FFTW_K281732556) * (tre0_5_0 - tre0_6_0));
6368 	  c_im(out[ostride]) = tim1_0_0 + tim1_1_0;
6369 	  c_im(out[10 * ostride]) = tim1_0_0 - tim1_1_0;
6370      }
6371      {
6372 	  FFTW_REAL tre1_0_0;
6373 	  FFTW_REAL tre1_1_0;
6374 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K415415013) * (tre0_1_0 + tre0_10_0)) + (((FFTW_REAL) FFTW_K841253532) * (tre0_5_0 + tre0_6_0)) - (((FFTW_REAL) FFTW_K142314838) * (tre0_4_0 + tre0_7_0)) - (((FFTW_REAL) FFTW_K959492973) * (tre0_3_0 + tre0_8_0)) - (((FFTW_REAL) FFTW_K654860733) * (tre0_2_0 + tre0_9_0));
6375 	  tre1_1_0 = (((FFTW_REAL) FFTW_K909631995) * (tim0_10_0 - tim0_1_0)) + (((FFTW_REAL) FFTW_K755749574) * (tim0_9_0 - tim0_2_0)) + (((FFTW_REAL) FFTW_K281732556) * (tim0_3_0 - tim0_8_0)) + (((FFTW_REAL) FFTW_K989821441) * (tim0_4_0 - tim0_7_0)) + (((FFTW_REAL) FFTW_K540640817) * (tim0_5_0 - tim0_6_0));
6376 	  c_re(out[2 * ostride]) = tre1_0_0 + tre1_1_0;
6377 	  c_re(out[9 * ostride]) = tre1_0_0 - tre1_1_0;
6378      }
6379      {
6380 	  FFTW_REAL tim1_0_0;
6381 	  FFTW_REAL tim1_1_0;
6382 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K415415013) * (tim0_1_0 + tim0_10_0)) + (((FFTW_REAL) FFTW_K841253532) * (tim0_5_0 + tim0_6_0)) - (((FFTW_REAL) FFTW_K142314838) * (tim0_4_0 + tim0_7_0)) - (((FFTW_REAL) FFTW_K959492973) * (tim0_3_0 + tim0_8_0)) - (((FFTW_REAL) FFTW_K654860733) * (tim0_2_0 + tim0_9_0));
6383 	  tim1_1_0 = (((FFTW_REAL) FFTW_K909631995) * (tre0_1_0 - tre0_10_0)) + (((FFTW_REAL) FFTW_K755749574) * (tre0_2_0 - tre0_9_0)) + (((FFTW_REAL) FFTW_K281732556) * (tre0_8_0 - tre0_3_0)) + (((FFTW_REAL) FFTW_K989821441) * (tre0_7_0 - tre0_4_0)) + (((FFTW_REAL) FFTW_K540640817) * (tre0_6_0 - tre0_5_0));
6384 	  c_im(out[2 * ostride]) = tim1_0_0 + tim1_1_0;
6385 	  c_im(out[9 * ostride]) = tim1_0_0 - tim1_1_0;
6386      }
6387      {
6388 	  FFTW_REAL tre1_0_0;
6389 	  FFTW_REAL tre1_1_0;
6390 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K415415013) * (tre0_3_0 + tre0_8_0)) + (((FFTW_REAL) FFTW_K841253532) * (tre0_4_0 + tre0_7_0)) - (((FFTW_REAL) FFTW_K654860733) * (tre0_5_0 + tre0_6_0)) - (((FFTW_REAL) FFTW_K959492973) * (tre0_2_0 + tre0_9_0)) - (((FFTW_REAL) FFTW_K142314838) * (tre0_1_0 + tre0_10_0));
6391 	  tre1_1_0 = (((FFTW_REAL) FFTW_K989821441) * (tim0_10_0 - tim0_1_0)) + (((FFTW_REAL) FFTW_K281732556) * (tim0_2_0 - tim0_9_0)) + (((FFTW_REAL) FFTW_K909631995) * (tim0_3_0 - tim0_8_0)) + (((FFTW_REAL) FFTW_K540640817) * (tim0_7_0 - tim0_4_0)) + (((FFTW_REAL) FFTW_K755749574) * (tim0_6_0 - tim0_5_0));
6392 	  c_re(out[3 * ostride]) = tre1_0_0 + tre1_1_0;
6393 	  c_re(out[8 * ostride]) = tre1_0_0 - tre1_1_0;
6394      }
6395      {
6396 	  FFTW_REAL tim1_0_0;
6397 	  FFTW_REAL tim1_1_0;
6398 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K415415013) * (tim0_3_0 + tim0_8_0)) + (((FFTW_REAL) FFTW_K841253532) * (tim0_4_0 + tim0_7_0)) - (((FFTW_REAL) FFTW_K654860733) * (tim0_5_0 + tim0_6_0)) - (((FFTW_REAL) FFTW_K959492973) * (tim0_2_0 + tim0_9_0)) - (((FFTW_REAL) FFTW_K142314838) * (tim0_1_0 + tim0_10_0));
6399 	  tim1_1_0 = (((FFTW_REAL) FFTW_K989821441) * (tre0_1_0 - tre0_10_0)) + (((FFTW_REAL) FFTW_K281732556) * (tre0_9_0 - tre0_2_0)) + (((FFTW_REAL) FFTW_K909631995) * (tre0_8_0 - tre0_3_0)) + (((FFTW_REAL) FFTW_K540640817) * (tre0_4_0 - tre0_7_0)) + (((FFTW_REAL) FFTW_K755749574) * (tre0_5_0 - tre0_6_0));
6400 	  c_im(out[3 * ostride]) = tim1_0_0 + tim1_1_0;
6401 	  c_im(out[8 * ostride]) = tim1_0_0 - tim1_1_0;
6402      }
6403      {
6404 	  FFTW_REAL tre1_0_0;
6405 	  FFTW_REAL tre1_1_0;
6406 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K841253532) * (tre0_3_0 + tre0_8_0)) + (((FFTW_REAL) FFTW_K415415013) * (tre0_5_0 + tre0_6_0)) - (((FFTW_REAL) FFTW_K959492973) * (tre0_4_0 + tre0_7_0)) - (((FFTW_REAL) FFTW_K142314838) * (tre0_2_0 + tre0_9_0)) - (((FFTW_REAL) FFTW_K654860733) * (tre0_1_0 + tre0_10_0));
6407 	  tre1_1_0 = (((FFTW_REAL) FFTW_K755749574) * (tim0_10_0 - tim0_1_0)) + (((FFTW_REAL) FFTW_K989821441) * (tim0_2_0 - tim0_9_0)) + (((FFTW_REAL) FFTW_K540640817) * (tim0_8_0 - tim0_3_0)) + (((FFTW_REAL) FFTW_K281732556) * (tim0_7_0 - tim0_4_0)) + (((FFTW_REAL) FFTW_K909631995) * (tim0_5_0 - tim0_6_0));
6408 	  c_re(out[4 * ostride]) = tre1_0_0 + tre1_1_0;
6409 	  c_re(out[7 * ostride]) = tre1_0_0 - tre1_1_0;
6410      }
6411      {
6412 	  FFTW_REAL tim1_0_0;
6413 	  FFTW_REAL tim1_1_0;
6414 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K841253532) * (tim0_3_0 + tim0_8_0)) + (((FFTW_REAL) FFTW_K415415013) * (tim0_5_0 + tim0_6_0)) - (((FFTW_REAL) FFTW_K959492973) * (tim0_4_0 + tim0_7_0)) - (((FFTW_REAL) FFTW_K142314838) * (tim0_2_0 + tim0_9_0)) - (((FFTW_REAL) FFTW_K654860733) * (tim0_1_0 + tim0_10_0));
6415 	  tim1_1_0 = (((FFTW_REAL) FFTW_K755749574) * (tre0_1_0 - tre0_10_0)) + (((FFTW_REAL) FFTW_K989821441) * (tre0_9_0 - tre0_2_0)) + (((FFTW_REAL) FFTW_K540640817) * (tre0_3_0 - tre0_8_0)) + (((FFTW_REAL) FFTW_K281732556) * (tre0_4_0 - tre0_7_0)) + (((FFTW_REAL) FFTW_K909631995) * (tre0_6_0 - tre0_5_0));
6416 	  c_im(out[4 * ostride]) = tim1_0_0 + tim1_1_0;
6417 	  c_im(out[7 * ostride]) = tim1_0_0 - tim1_1_0;
6418      }
6419      {
6420 	  FFTW_REAL tre1_0_0;
6421 	  FFTW_REAL tre1_1_0;
6422 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K841253532) * (tre0_2_0 + tre0_9_0)) + (((FFTW_REAL) FFTW_K415415013) * (tre0_4_0 + tre0_7_0)) - (((FFTW_REAL) FFTW_K142314838) * (tre0_5_0 + tre0_6_0)) - (((FFTW_REAL) FFTW_K654860733) * (tre0_3_0 + tre0_8_0)) - (((FFTW_REAL) FFTW_K959492973) * (tre0_1_0 + tre0_10_0));
6423 	  tre1_1_0 = (((FFTW_REAL) FFTW_K281732556) * (tim0_10_0 - tim0_1_0)) + (((FFTW_REAL) FFTW_K540640817) * (tim0_2_0 - tim0_9_0)) + (((FFTW_REAL) FFTW_K755749574) * (tim0_8_0 - tim0_3_0)) + (((FFTW_REAL) FFTW_K909631995) * (tim0_4_0 - tim0_7_0)) + (((FFTW_REAL) FFTW_K989821441) * (tim0_6_0 - tim0_5_0));
6424 	  c_re(out[5 * ostride]) = tre1_0_0 + tre1_1_0;
6425 	  c_re(out[6 * ostride]) = tre1_0_0 - tre1_1_0;
6426      }
6427      {
6428 	  FFTW_REAL tim1_0_0;
6429 	  FFTW_REAL tim1_1_0;
6430 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K841253532) * (tim0_2_0 + tim0_9_0)) + (((FFTW_REAL) FFTW_K415415013) * (tim0_4_0 + tim0_7_0)) - (((FFTW_REAL) FFTW_K142314838) * (tim0_5_0 + tim0_6_0)) - (((FFTW_REAL) FFTW_K654860733) * (tim0_3_0 + tim0_8_0)) - (((FFTW_REAL) FFTW_K959492973) * (tim0_1_0 + tim0_10_0));
6431 	  tim1_1_0 = (((FFTW_REAL) FFTW_K281732556) * (tre0_1_0 - tre0_10_0)) + (((FFTW_REAL) FFTW_K540640817) * (tre0_9_0 - tre0_2_0)) + (((FFTW_REAL) FFTW_K755749574) * (tre0_3_0 - tre0_8_0)) + (((FFTW_REAL) FFTW_K909631995) * (tre0_7_0 - tre0_4_0)) + (((FFTW_REAL) FFTW_K989821441) * (tre0_5_0 - tre0_6_0));
6432 	  c_im(out[5 * ostride]) = tim1_0_0 + tim1_1_0;
6433 	  c_im(out[6 * ostride]) = tim1_0_0 - tim1_1_0;
6434      }
6435 }
6436 
6437 /* This function contains 104 FP additions and 16 FP multiplications */
6438 
fftwi_no_twiddle_12(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)6439 static void fftwi_no_twiddle_12(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
6440 {
6441      FFTW_REAL tre0_0_0;
6442      FFTW_REAL tim0_0_0;
6443      FFTW_REAL tre0_0_1;
6444      FFTW_REAL tim0_0_1;
6445      FFTW_REAL tre0_0_2;
6446      FFTW_REAL tim0_0_2;
6447      FFTW_REAL tre0_0_3;
6448      FFTW_REAL tim0_0_3;
6449      FFTW_REAL tre0_1_0;
6450      FFTW_REAL tim0_1_0;
6451      FFTW_REAL tre0_1_1;
6452      FFTW_REAL tim0_1_1;
6453      FFTW_REAL tre0_1_2;
6454      FFTW_REAL tim0_1_2;
6455      FFTW_REAL tre0_1_3;
6456      FFTW_REAL tim0_1_3;
6457      FFTW_REAL tre0_2_0;
6458      FFTW_REAL tim0_2_0;
6459      FFTW_REAL tre0_2_1;
6460      FFTW_REAL tim0_2_1;
6461      FFTW_REAL tre0_2_2;
6462      FFTW_REAL tim0_2_2;
6463      FFTW_REAL tre0_2_3;
6464      FFTW_REAL tim0_2_3;
6465      {
6466 	  FFTW_REAL tre1_0_0;
6467 	  FFTW_REAL tim1_0_0;
6468 	  FFTW_REAL tre1_1_0;
6469 	  FFTW_REAL tim1_1_0;
6470 	  FFTW_REAL tre1_2_0;
6471 	  FFTW_REAL tim1_2_0;
6472 	  tre1_0_0 = c_re(in[0]);
6473 	  tim1_0_0 = c_im(in[0]);
6474 	  tre1_1_0 = c_re(in[4 * istride]);
6475 	  tim1_1_0 = c_im(in[4 * istride]);
6476 	  tre1_2_0 = c_re(in[8 * istride]);
6477 	  tim1_2_0 = c_im(in[8 * istride]);
6478 	  tre0_0_0 = tre1_0_0 + tre1_1_0 + tre1_2_0;
6479 	  tim0_0_0 = tim1_0_0 + tim1_1_0 + tim1_2_0;
6480 	  {
6481 	       FFTW_REAL tre2_0_0;
6482 	       FFTW_REAL tre2_1_0;
6483 	       tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
6484 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_2_0 - tim1_1_0);
6485 	       tre0_1_0 = tre2_0_0 + tre2_1_0;
6486 	       tre0_2_0 = tre2_0_0 - tre2_1_0;
6487 	  }
6488 	  {
6489 	       FFTW_REAL tim2_0_0;
6490 	       FFTW_REAL tim2_1_0;
6491 	       tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
6492 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_1_0 - tre1_2_0);
6493 	       tim0_1_0 = tim2_0_0 + tim2_1_0;
6494 	       tim0_2_0 = tim2_0_0 - tim2_1_0;
6495 	  }
6496      }
6497      {
6498 	  FFTW_REAL tre1_0_0;
6499 	  FFTW_REAL tim1_0_0;
6500 	  FFTW_REAL tre1_1_0;
6501 	  FFTW_REAL tim1_1_0;
6502 	  FFTW_REAL tre1_2_0;
6503 	  FFTW_REAL tim1_2_0;
6504 	  tre1_0_0 = c_re(in[3 * istride]);
6505 	  tim1_0_0 = c_im(in[3 * istride]);
6506 	  tre1_1_0 = c_re(in[7 * istride]);
6507 	  tim1_1_0 = c_im(in[7 * istride]);
6508 	  tre1_2_0 = c_re(in[11 * istride]);
6509 	  tim1_2_0 = c_im(in[11 * istride]);
6510 	  tre0_0_1 = tre1_0_0 + tre1_1_0 + tre1_2_0;
6511 	  tim0_0_1 = tim1_0_0 + tim1_1_0 + tim1_2_0;
6512 	  {
6513 	       FFTW_REAL tre2_0_0;
6514 	       FFTW_REAL tre2_1_0;
6515 	       tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
6516 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_2_0 - tim1_1_0);
6517 	       tre0_1_1 = tre2_0_0 + tre2_1_0;
6518 	       tre0_2_1 = tre2_0_0 - tre2_1_0;
6519 	  }
6520 	  {
6521 	       FFTW_REAL tim2_0_0;
6522 	       FFTW_REAL tim2_1_0;
6523 	       tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
6524 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_1_0 - tre1_2_0);
6525 	       tim0_1_1 = tim2_0_0 + tim2_1_0;
6526 	       tim0_2_1 = tim2_0_0 - tim2_1_0;
6527 	  }
6528      }
6529      {
6530 	  FFTW_REAL tre1_0_0;
6531 	  FFTW_REAL tim1_0_0;
6532 	  FFTW_REAL tre1_1_0;
6533 	  FFTW_REAL tim1_1_0;
6534 	  FFTW_REAL tre1_2_0;
6535 	  FFTW_REAL tim1_2_0;
6536 	  tre1_0_0 = c_re(in[6 * istride]);
6537 	  tim1_0_0 = c_im(in[6 * istride]);
6538 	  tre1_1_0 = c_re(in[10 * istride]);
6539 	  tim1_1_0 = c_im(in[10 * istride]);
6540 	  tre1_2_0 = c_re(in[2 * istride]);
6541 	  tim1_2_0 = c_im(in[2 * istride]);
6542 	  tre0_0_2 = tre1_0_0 + tre1_1_0 + tre1_2_0;
6543 	  tim0_0_2 = tim1_0_0 + tim1_1_0 + tim1_2_0;
6544 	  {
6545 	       FFTW_REAL tre2_0_0;
6546 	       FFTW_REAL tre2_1_0;
6547 	       tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
6548 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_2_0 - tim1_1_0);
6549 	       tre0_1_2 = tre2_0_0 + tre2_1_0;
6550 	       tre0_2_2 = tre2_0_0 - tre2_1_0;
6551 	  }
6552 	  {
6553 	       FFTW_REAL tim2_0_0;
6554 	       FFTW_REAL tim2_1_0;
6555 	       tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
6556 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_1_0 - tre1_2_0);
6557 	       tim0_1_2 = tim2_0_0 + tim2_1_0;
6558 	       tim0_2_2 = tim2_0_0 - tim2_1_0;
6559 	  }
6560      }
6561      {
6562 	  FFTW_REAL tre1_0_0;
6563 	  FFTW_REAL tim1_0_0;
6564 	  FFTW_REAL tre1_1_0;
6565 	  FFTW_REAL tim1_1_0;
6566 	  FFTW_REAL tre1_2_0;
6567 	  FFTW_REAL tim1_2_0;
6568 	  tre1_0_0 = c_re(in[9 * istride]);
6569 	  tim1_0_0 = c_im(in[9 * istride]);
6570 	  tre1_1_0 = c_re(in[istride]);
6571 	  tim1_1_0 = c_im(in[istride]);
6572 	  tre1_2_0 = c_re(in[5 * istride]);
6573 	  tim1_2_0 = c_im(in[5 * istride]);
6574 	  tre0_0_3 = tre1_0_0 + tre1_1_0 + tre1_2_0;
6575 	  tim0_0_3 = tim1_0_0 + tim1_1_0 + tim1_2_0;
6576 	  {
6577 	       FFTW_REAL tre2_0_0;
6578 	       FFTW_REAL tre2_1_0;
6579 	       tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
6580 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_2_0 - tim1_1_0);
6581 	       tre0_1_3 = tre2_0_0 + tre2_1_0;
6582 	       tre0_2_3 = tre2_0_0 - tre2_1_0;
6583 	  }
6584 	  {
6585 	       FFTW_REAL tim2_0_0;
6586 	       FFTW_REAL tim2_1_0;
6587 	       tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
6588 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_1_0 - tre1_2_0);
6589 	       tim0_1_3 = tim2_0_0 + tim2_1_0;
6590 	       tim0_2_3 = tim2_0_0 - tim2_1_0;
6591 	  }
6592      }
6593      {
6594 	  FFTW_REAL tre1_0_0;
6595 	  FFTW_REAL tim1_0_0;
6596 	  FFTW_REAL tre1_0_1;
6597 	  FFTW_REAL tim1_0_1;
6598 	  FFTW_REAL tre1_1_0;
6599 	  FFTW_REAL tim1_1_0;
6600 	  FFTW_REAL tre1_1_1;
6601 	  FFTW_REAL tim1_1_1;
6602 	  tre1_0_0 = tre0_0_0 + tre0_0_2;
6603 	  tim1_0_0 = tim0_0_0 + tim0_0_2;
6604 	  tre1_1_0 = tre0_0_0 - tre0_0_2;
6605 	  tim1_1_0 = tim0_0_0 - tim0_0_2;
6606 	  tre1_0_1 = tre0_0_1 + tre0_0_3;
6607 	  tim1_0_1 = tim0_0_1 + tim0_0_3;
6608 	  tre1_1_1 = tre0_0_1 - tre0_0_3;
6609 	  tim1_1_1 = tim0_0_1 - tim0_0_3;
6610 	  c_re(out[0]) = tre1_0_0 + tre1_0_1;
6611 	  c_im(out[0]) = tim1_0_0 + tim1_0_1;
6612 	  c_re(out[6 * ostride]) = tre1_0_0 - tre1_0_1;
6613 	  c_im(out[6 * ostride]) = tim1_0_0 - tim1_0_1;
6614 	  c_re(out[9 * ostride]) = tre1_1_0 - tim1_1_1;
6615 	  c_im(out[9 * ostride]) = tim1_1_0 + tre1_1_1;
6616 	  c_re(out[3 * ostride]) = tre1_1_0 + tim1_1_1;
6617 	  c_im(out[3 * ostride]) = tim1_1_0 - tre1_1_1;
6618      }
6619      {
6620 	  FFTW_REAL tre1_0_0;
6621 	  FFTW_REAL tim1_0_0;
6622 	  FFTW_REAL tre1_0_1;
6623 	  FFTW_REAL tim1_0_1;
6624 	  FFTW_REAL tre1_1_0;
6625 	  FFTW_REAL tim1_1_0;
6626 	  FFTW_REAL tre1_1_1;
6627 	  FFTW_REAL tim1_1_1;
6628 	  tre1_0_0 = tre0_1_0 + tre0_1_2;
6629 	  tim1_0_0 = tim0_1_0 + tim0_1_2;
6630 	  tre1_1_0 = tre0_1_0 - tre0_1_2;
6631 	  tim1_1_0 = tim0_1_0 - tim0_1_2;
6632 	  tre1_0_1 = tre0_1_1 + tre0_1_3;
6633 	  tim1_0_1 = tim0_1_1 + tim0_1_3;
6634 	  tre1_1_1 = tre0_1_1 - tre0_1_3;
6635 	  tim1_1_1 = tim0_1_1 - tim0_1_3;
6636 	  c_re(out[4 * ostride]) = tre1_0_0 + tre1_0_1;
6637 	  c_im(out[4 * ostride]) = tim1_0_0 + tim1_0_1;
6638 	  c_re(out[10 * ostride]) = tre1_0_0 - tre1_0_1;
6639 	  c_im(out[10 * ostride]) = tim1_0_0 - tim1_0_1;
6640 	  c_re(out[ostride]) = tre1_1_0 - tim1_1_1;
6641 	  c_im(out[ostride]) = tim1_1_0 + tre1_1_1;
6642 	  c_re(out[7 * ostride]) = tre1_1_0 + tim1_1_1;
6643 	  c_im(out[7 * ostride]) = tim1_1_0 - tre1_1_1;
6644      }
6645      {
6646 	  FFTW_REAL tre1_0_0;
6647 	  FFTW_REAL tim1_0_0;
6648 	  FFTW_REAL tre1_0_1;
6649 	  FFTW_REAL tim1_0_1;
6650 	  FFTW_REAL tre1_1_0;
6651 	  FFTW_REAL tim1_1_0;
6652 	  FFTW_REAL tre1_1_1;
6653 	  FFTW_REAL tim1_1_1;
6654 	  tre1_0_0 = tre0_2_0 + tre0_2_2;
6655 	  tim1_0_0 = tim0_2_0 + tim0_2_2;
6656 	  tre1_1_0 = tre0_2_0 - tre0_2_2;
6657 	  tim1_1_0 = tim0_2_0 - tim0_2_2;
6658 	  tre1_0_1 = tre0_2_1 + tre0_2_3;
6659 	  tim1_0_1 = tim0_2_1 + tim0_2_3;
6660 	  tre1_1_1 = tre0_2_1 - tre0_2_3;
6661 	  tim1_1_1 = tim0_2_1 - tim0_2_3;
6662 	  c_re(out[8 * ostride]) = tre1_0_0 + tre1_0_1;
6663 	  c_im(out[8 * ostride]) = tim1_0_0 + tim1_0_1;
6664 	  c_re(out[2 * ostride]) = tre1_0_0 - tre1_0_1;
6665 	  c_im(out[2 * ostride]) = tim1_0_0 - tim1_0_1;
6666 	  c_re(out[5 * ostride]) = tre1_1_0 - tim1_1_1;
6667 	  c_im(out[5 * ostride]) = tim1_1_0 + tre1_1_1;
6668 	  c_re(out[11 * ostride]) = tre1_1_0 + tim1_1_1;
6669 	  c_im(out[11 * ostride]) = tim1_1_0 - tre1_1_1;
6670      }
6671 }
6672 
6673 /* This function contains 324 FP additions and 144 FP multiplications */
6674 
fftwi_no_twiddle_13(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)6675 static void fftwi_no_twiddle_13(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
6676 {
6677      FFTW_REAL tre0_0_0;
6678      FFTW_REAL tim0_0_0;
6679      FFTW_REAL tre0_1_0;
6680      FFTW_REAL tim0_1_0;
6681      FFTW_REAL tre0_2_0;
6682      FFTW_REAL tim0_2_0;
6683      FFTW_REAL tre0_3_0;
6684      FFTW_REAL tim0_3_0;
6685      FFTW_REAL tre0_4_0;
6686      FFTW_REAL tim0_4_0;
6687      FFTW_REAL tre0_5_0;
6688      FFTW_REAL tim0_5_0;
6689      FFTW_REAL tre0_6_0;
6690      FFTW_REAL tim0_6_0;
6691      FFTW_REAL tre0_7_0;
6692      FFTW_REAL tim0_7_0;
6693      FFTW_REAL tre0_8_0;
6694      FFTW_REAL tim0_8_0;
6695      FFTW_REAL tre0_9_0;
6696      FFTW_REAL tim0_9_0;
6697      FFTW_REAL tre0_10_0;
6698      FFTW_REAL tim0_10_0;
6699      FFTW_REAL tre0_11_0;
6700      FFTW_REAL tim0_11_0;
6701      FFTW_REAL tre0_12_0;
6702      FFTW_REAL tim0_12_0;
6703      tre0_0_0 = c_re(in[0]);
6704      tim0_0_0 = c_im(in[0]);
6705      tre0_1_0 = c_re(in[istride]);
6706      tim0_1_0 = c_im(in[istride]);
6707      tre0_2_0 = c_re(in[2 * istride]);
6708      tim0_2_0 = c_im(in[2 * istride]);
6709      tre0_3_0 = c_re(in[3 * istride]);
6710      tim0_3_0 = c_im(in[3 * istride]);
6711      tre0_4_0 = c_re(in[4 * istride]);
6712      tim0_4_0 = c_im(in[4 * istride]);
6713      tre0_5_0 = c_re(in[5 * istride]);
6714      tim0_5_0 = c_im(in[5 * istride]);
6715      tre0_6_0 = c_re(in[6 * istride]);
6716      tim0_6_0 = c_im(in[6 * istride]);
6717      tre0_7_0 = c_re(in[7 * istride]);
6718      tim0_7_0 = c_im(in[7 * istride]);
6719      tre0_8_0 = c_re(in[8 * istride]);
6720      tim0_8_0 = c_im(in[8 * istride]);
6721      tre0_9_0 = c_re(in[9 * istride]);
6722      tim0_9_0 = c_im(in[9 * istride]);
6723      tre0_10_0 = c_re(in[10 * istride]);
6724      tim0_10_0 = c_im(in[10 * istride]);
6725      tre0_11_0 = c_re(in[11 * istride]);
6726      tim0_11_0 = c_im(in[11 * istride]);
6727      tre0_12_0 = c_re(in[12 * istride]);
6728      tim0_12_0 = c_im(in[12 * istride]);
6729      c_re(out[0]) = tre0_0_0 + tre0_1_0 + tre0_2_0 + tre0_3_0 + tre0_4_0 + tre0_5_0 + tre0_6_0 + tre0_7_0 + tre0_8_0 + tre0_9_0 + tre0_10_0 + tre0_11_0 + tre0_12_0;
6730      c_im(out[0]) = tim0_0_0 + tim0_1_0 + tim0_2_0 + tim0_3_0 + tim0_4_0 + tim0_5_0 + tim0_6_0 + tim0_7_0 + tim0_8_0 + tim0_9_0 + tim0_10_0 + tim0_11_0 + tim0_12_0;
6731      {
6732 	  FFTW_REAL tre1_0_0;
6733 	  FFTW_REAL tre1_1_0;
6734 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K885456025) * (tre0_1_0 + tre0_12_0)) + (((FFTW_REAL) FFTW_K568064746) * (tre0_2_0 + tre0_11_0)) + (((FFTW_REAL) FFTW_K120536680) * (tre0_3_0 + tre0_10_0)) - (((FFTW_REAL) FFTW_K970941817) * (tre0_6_0 + tre0_7_0)) - (((FFTW_REAL) FFTW_K748510748) * (tre0_5_0 + tre0_8_0)) - (((FFTW_REAL) FFTW_K354604887) * (tre0_4_0 + tre0_9_0));
6735 	  tre1_1_0 = (((FFTW_REAL) FFTW_K464723172) * (tim0_12_0 - tim0_1_0)) + (((FFTW_REAL) FFTW_K822983865) * (tim0_11_0 - tim0_2_0)) + (((FFTW_REAL) FFTW_K992708874) * (tim0_10_0 - tim0_3_0)) + (((FFTW_REAL) FFTW_K935016242) * (tim0_9_0 - tim0_4_0)) + (((FFTW_REAL) FFTW_K663122658) * (tim0_8_0 - tim0_5_0)) + (((FFTW_REAL) FFTW_K239315664) * (tim0_7_0 - tim0_6_0));
6736 	  c_re(out[ostride]) = tre1_0_0 + tre1_1_0;
6737 	  c_re(out[12 * ostride]) = tre1_0_0 - tre1_1_0;
6738      }
6739      {
6740 	  FFTW_REAL tim1_0_0;
6741 	  FFTW_REAL tim1_1_0;
6742 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K885456025) * (tim0_1_0 + tim0_12_0)) + (((FFTW_REAL) FFTW_K568064746) * (tim0_2_0 + tim0_11_0)) + (((FFTW_REAL) FFTW_K120536680) * (tim0_3_0 + tim0_10_0)) - (((FFTW_REAL) FFTW_K970941817) * (tim0_6_0 + tim0_7_0)) - (((FFTW_REAL) FFTW_K748510748) * (tim0_5_0 + tim0_8_0)) - (((FFTW_REAL) FFTW_K354604887) * (tim0_4_0 + tim0_9_0));
6743 	  tim1_1_0 = (((FFTW_REAL) FFTW_K464723172) * (tre0_1_0 - tre0_12_0)) + (((FFTW_REAL) FFTW_K822983865) * (tre0_2_0 - tre0_11_0)) + (((FFTW_REAL) FFTW_K992708874) * (tre0_3_0 - tre0_10_0)) + (((FFTW_REAL) FFTW_K935016242) * (tre0_4_0 - tre0_9_0)) + (((FFTW_REAL) FFTW_K663122658) * (tre0_5_0 - tre0_8_0)) + (((FFTW_REAL) FFTW_K239315664) * (tre0_6_0 - tre0_7_0));
6744 	  c_im(out[ostride]) = tim1_0_0 + tim1_1_0;
6745 	  c_im(out[12 * ostride]) = tim1_0_0 - tim1_1_0;
6746      }
6747      {
6748 	  FFTW_REAL tre1_0_0;
6749 	  FFTW_REAL tre1_1_0;
6750 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K568064746) * (tre0_1_0 + tre0_12_0)) + (((FFTW_REAL) FFTW_K120536680) * (tre0_5_0 + tre0_8_0)) + (((FFTW_REAL) FFTW_K885456025) * (tre0_6_0 + tre0_7_0)) - (((FFTW_REAL) FFTW_K748510748) * (tre0_4_0 + tre0_9_0)) - (((FFTW_REAL) FFTW_K970941817) * (tre0_3_0 + tre0_10_0)) - (((FFTW_REAL) FFTW_K354604887) * (tre0_2_0 + tre0_11_0));
6751 	  tre1_1_0 = (((FFTW_REAL) FFTW_K822983865) * (tim0_12_0 - tim0_1_0)) + (((FFTW_REAL) FFTW_K935016242) * (tim0_11_0 - tim0_2_0)) + (((FFTW_REAL) FFTW_K239315664) * (tim0_10_0 - tim0_3_0)) + (((FFTW_REAL) FFTW_K663122658) * (tim0_4_0 - tim0_9_0)) + (((FFTW_REAL) FFTW_K992708874) * (tim0_5_0 - tim0_8_0)) + (((FFTW_REAL) FFTW_K464723172) * (tim0_6_0 - tim0_7_0));
6752 	  c_re(out[2 * ostride]) = tre1_0_0 + tre1_1_0;
6753 	  c_re(out[11 * ostride]) = tre1_0_0 - tre1_1_0;
6754      }
6755      {
6756 	  FFTW_REAL tim1_0_0;
6757 	  FFTW_REAL tim1_1_0;
6758 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K568064746) * (tim0_1_0 + tim0_12_0)) + (((FFTW_REAL) FFTW_K120536680) * (tim0_5_0 + tim0_8_0)) + (((FFTW_REAL) FFTW_K885456025) * (tim0_6_0 + tim0_7_0)) - (((FFTW_REAL) FFTW_K748510748) * (tim0_4_0 + tim0_9_0)) - (((FFTW_REAL) FFTW_K970941817) * (tim0_3_0 + tim0_10_0)) - (((FFTW_REAL) FFTW_K354604887) * (tim0_2_0 + tim0_11_0));
6759 	  tim1_1_0 = (((FFTW_REAL) FFTW_K822983865) * (tre0_1_0 - tre0_12_0)) + (((FFTW_REAL) FFTW_K935016242) * (tre0_2_0 - tre0_11_0)) + (((FFTW_REAL) FFTW_K239315664) * (tre0_3_0 - tre0_10_0)) + (((FFTW_REAL) FFTW_K663122658) * (tre0_9_0 - tre0_4_0)) + (((FFTW_REAL) FFTW_K992708874) * (tre0_8_0 - tre0_5_0)) + (((FFTW_REAL) FFTW_K464723172) * (tre0_7_0 - tre0_6_0));
6760 	  c_im(out[2 * ostride]) = tim1_0_0 + tim1_1_0;
6761 	  c_im(out[11 * ostride]) = tim1_0_0 - tim1_1_0;
6762      }
6763      {
6764 	  FFTW_REAL tre1_0_0;
6765 	  FFTW_REAL tre1_1_0;
6766 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K120536680) * (tre0_1_0 + tre0_12_0)) + (((FFTW_REAL) FFTW_K885456025) * (tre0_4_0 + tre0_9_0)) + (((FFTW_REAL) FFTW_K568064746) * (tre0_5_0 + tre0_8_0)) - (((FFTW_REAL) FFTW_K748510748) * (tre0_6_0 + tre0_7_0)) - (((FFTW_REAL) FFTW_K354604887) * (tre0_3_0 + tre0_10_0)) - (((FFTW_REAL) FFTW_K970941817) * (tre0_2_0 + tre0_11_0));
6767 	  tre1_1_0 = (((FFTW_REAL) FFTW_K992708874) * (tim0_12_0 - tim0_1_0)) + (((FFTW_REAL) FFTW_K239315664) * (tim0_11_0 - tim0_2_0)) + (((FFTW_REAL) FFTW_K935016242) * (tim0_3_0 - tim0_10_0)) + (((FFTW_REAL) FFTW_K464723172) * (tim0_4_0 - tim0_9_0)) + (((FFTW_REAL) FFTW_K822983865) * (tim0_8_0 - tim0_5_0)) + (((FFTW_REAL) FFTW_K663122658) * (tim0_7_0 - tim0_6_0));
6768 	  c_re(out[3 * ostride]) = tre1_0_0 + tre1_1_0;
6769 	  c_re(out[10 * ostride]) = tre1_0_0 - tre1_1_0;
6770      }
6771      {
6772 	  FFTW_REAL tim1_0_0;
6773 	  FFTW_REAL tim1_1_0;
6774 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K120536680) * (tim0_1_0 + tim0_12_0)) + (((FFTW_REAL) FFTW_K885456025) * (tim0_4_0 + tim0_9_0)) + (((FFTW_REAL) FFTW_K568064746) * (tim0_5_0 + tim0_8_0)) - (((FFTW_REAL) FFTW_K748510748) * (tim0_6_0 + tim0_7_0)) - (((FFTW_REAL) FFTW_K354604887) * (tim0_3_0 + tim0_10_0)) - (((FFTW_REAL) FFTW_K970941817) * (tim0_2_0 + tim0_11_0));
6775 	  tim1_1_0 = (((FFTW_REAL) FFTW_K992708874) * (tre0_1_0 - tre0_12_0)) + (((FFTW_REAL) FFTW_K239315664) * (tre0_2_0 - tre0_11_0)) + (((FFTW_REAL) FFTW_K935016242) * (tre0_10_0 - tre0_3_0)) + (((FFTW_REAL) FFTW_K464723172) * (tre0_9_0 - tre0_4_0)) + (((FFTW_REAL) FFTW_K822983865) * (tre0_5_0 - tre0_8_0)) + (((FFTW_REAL) FFTW_K663122658) * (tre0_6_0 - tre0_7_0));
6776 	  c_im(out[3 * ostride]) = tim1_0_0 + tim1_1_0;
6777 	  c_im(out[10 * ostride]) = tim1_0_0 - tim1_1_0;
6778      }
6779      {
6780 	  FFTW_REAL tre1_0_0;
6781 	  FFTW_REAL tre1_1_0;
6782 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K885456025) * (tre0_3_0 + tre0_10_0)) + (((FFTW_REAL) FFTW_K120536680) * (tre0_4_0 + tre0_9_0)) + (((FFTW_REAL) FFTW_K568064746) * (tre0_6_0 + tre0_7_0)) - (((FFTW_REAL) FFTW_K970941817) * (tre0_5_0 + tre0_8_0)) - (((FFTW_REAL) FFTW_K748510748) * (tre0_2_0 + tre0_11_0)) - (((FFTW_REAL) FFTW_K354604887) * (tre0_1_0 + tre0_12_0));
6783 	  tre1_1_0 = (((FFTW_REAL) FFTW_K935016242) * (tim0_12_0 - tim0_1_0)) + (((FFTW_REAL) FFTW_K663122658) * (tim0_2_0 - tim0_11_0)) + (((FFTW_REAL) FFTW_K464723172) * (tim0_3_0 - tim0_10_0)) + (((FFTW_REAL) FFTW_K992708874) * (tim0_9_0 - tim0_4_0)) + (((FFTW_REAL) FFTW_K239315664) * (tim0_5_0 - tim0_8_0)) + (((FFTW_REAL) FFTW_K822983865) * (tim0_6_0 - tim0_7_0));
6784 	  c_re(out[4 * ostride]) = tre1_0_0 + tre1_1_0;
6785 	  c_re(out[9 * ostride]) = tre1_0_0 - tre1_1_0;
6786      }
6787      {
6788 	  FFTW_REAL tim1_0_0;
6789 	  FFTW_REAL tim1_1_0;
6790 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K885456025) * (tim0_3_0 + tim0_10_0)) + (((FFTW_REAL) FFTW_K120536680) * (tim0_4_0 + tim0_9_0)) + (((FFTW_REAL) FFTW_K568064746) * (tim0_6_0 + tim0_7_0)) - (((FFTW_REAL) FFTW_K970941817) * (tim0_5_0 + tim0_8_0)) - (((FFTW_REAL) FFTW_K748510748) * (tim0_2_0 + tim0_11_0)) - (((FFTW_REAL) FFTW_K354604887) * (tim0_1_0 + tim0_12_0));
6791 	  tim1_1_0 = (((FFTW_REAL) FFTW_K935016242) * (tre0_1_0 - tre0_12_0)) + (((FFTW_REAL) FFTW_K663122658) * (tre0_11_0 - tre0_2_0)) + (((FFTW_REAL) FFTW_K464723172) * (tre0_10_0 - tre0_3_0)) + (((FFTW_REAL) FFTW_K992708874) * (tre0_4_0 - tre0_9_0)) + (((FFTW_REAL) FFTW_K239315664) * (tre0_8_0 - tre0_5_0)) + (((FFTW_REAL) FFTW_K822983865) * (tre0_7_0 - tre0_6_0));
6792 	  c_im(out[4 * ostride]) = tim1_0_0 + tim1_1_0;
6793 	  c_im(out[9 * ostride]) = tim1_0_0 - tim1_1_0;
6794      }
6795      {
6796 	  FFTW_REAL tre1_0_0;
6797 	  FFTW_REAL tre1_1_0;
6798 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K120536680) * (tre0_2_0 + tre0_11_0)) + (((FFTW_REAL) FFTW_K568064746) * (tre0_3_0 + tre0_10_0)) + (((FFTW_REAL) FFTW_K885456025) * (tre0_5_0 + tre0_8_0)) - (((FFTW_REAL) FFTW_K354604887) * (tre0_6_0 + tre0_7_0)) - (((FFTW_REAL) FFTW_K970941817) * (tre0_4_0 + tre0_9_0)) - (((FFTW_REAL) FFTW_K748510748) * (tre0_1_0 + tre0_12_0));
6799 	  tre1_1_0 = (((FFTW_REAL) FFTW_K663122658) * (tim0_12_0 - tim0_1_0)) + (((FFTW_REAL) FFTW_K992708874) * (tim0_2_0 - tim0_11_0)) + (((FFTW_REAL) FFTW_K822983865) * (tim0_10_0 - tim0_3_0)) + (((FFTW_REAL) FFTW_K239315664) * (tim0_4_0 - tim0_9_0)) + (((FFTW_REAL) FFTW_K464723172) * (tim0_5_0 - tim0_8_0)) + (((FFTW_REAL) FFTW_K935016242) * (tim0_7_0 - tim0_6_0));
6800 	  c_re(out[5 * ostride]) = tre1_0_0 + tre1_1_0;
6801 	  c_re(out[8 * ostride]) = tre1_0_0 - tre1_1_0;
6802      }
6803      {
6804 	  FFTW_REAL tim1_0_0;
6805 	  FFTW_REAL tim1_1_0;
6806 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K120536680) * (tim0_2_0 + tim0_11_0)) + (((FFTW_REAL) FFTW_K568064746) * (tim0_3_0 + tim0_10_0)) + (((FFTW_REAL) FFTW_K885456025) * (tim0_5_0 + tim0_8_0)) - (((FFTW_REAL) FFTW_K354604887) * (tim0_6_0 + tim0_7_0)) - (((FFTW_REAL) FFTW_K970941817) * (tim0_4_0 + tim0_9_0)) - (((FFTW_REAL) FFTW_K748510748) * (tim0_1_0 + tim0_12_0));
6807 	  tim1_1_0 = (((FFTW_REAL) FFTW_K663122658) * (tre0_1_0 - tre0_12_0)) + (((FFTW_REAL) FFTW_K992708874) * (tre0_11_0 - tre0_2_0)) + (((FFTW_REAL) FFTW_K822983865) * (tre0_3_0 - tre0_10_0)) + (((FFTW_REAL) FFTW_K239315664) * (tre0_9_0 - tre0_4_0)) + (((FFTW_REAL) FFTW_K464723172) * (tre0_8_0 - tre0_5_0)) + (((FFTW_REAL) FFTW_K935016242) * (tre0_6_0 - tre0_7_0));
6808 	  c_im(out[5 * ostride]) = tim1_0_0 + tim1_1_0;
6809 	  c_im(out[8 * ostride]) = tim1_0_0 - tim1_1_0;
6810      }
6811      {
6812 	  FFTW_REAL tre1_0_0;
6813 	  FFTW_REAL tre1_1_0;
6814 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K885456025) * (tre0_2_0 + tre0_11_0)) + (((FFTW_REAL) FFTW_K568064746) * (tre0_4_0 + tre0_9_0)) + (((FFTW_REAL) FFTW_K120536680) * (tre0_6_0 + tre0_7_0)) - (((FFTW_REAL) FFTW_K354604887) * (tre0_5_0 + tre0_8_0)) - (((FFTW_REAL) FFTW_K748510748) * (tre0_3_0 + tre0_10_0)) - (((FFTW_REAL) FFTW_K970941817) * (tre0_1_0 + tre0_12_0));
6815 	  tre1_1_0 = (((FFTW_REAL) FFTW_K239315664) * (tim0_12_0 - tim0_1_0)) + (((FFTW_REAL) FFTW_K464723172) * (tim0_2_0 - tim0_11_0)) + (((FFTW_REAL) FFTW_K663122658) * (tim0_10_0 - tim0_3_0)) + (((FFTW_REAL) FFTW_K822983865) * (tim0_4_0 - tim0_9_0)) + (((FFTW_REAL) FFTW_K935016242) * (tim0_8_0 - tim0_5_0)) + (((FFTW_REAL) FFTW_K992708874) * (tim0_6_0 - tim0_7_0));
6816 	  c_re(out[6 * ostride]) = tre1_0_0 + tre1_1_0;
6817 	  c_re(out[7 * ostride]) = tre1_0_0 - tre1_1_0;
6818      }
6819      {
6820 	  FFTW_REAL tim1_0_0;
6821 	  FFTW_REAL tim1_1_0;
6822 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K885456025) * (tim0_2_0 + tim0_11_0)) + (((FFTW_REAL) FFTW_K568064746) * (tim0_4_0 + tim0_9_0)) + (((FFTW_REAL) FFTW_K120536680) * (tim0_6_0 + tim0_7_0)) - (((FFTW_REAL) FFTW_K354604887) * (tim0_5_0 + tim0_8_0)) - (((FFTW_REAL) FFTW_K748510748) * (tim0_3_0 + tim0_10_0)) - (((FFTW_REAL) FFTW_K970941817) * (tim0_1_0 + tim0_12_0));
6823 	  tim1_1_0 = (((FFTW_REAL) FFTW_K239315664) * (tre0_1_0 - tre0_12_0)) + (((FFTW_REAL) FFTW_K464723172) * (tre0_11_0 - tre0_2_0)) + (((FFTW_REAL) FFTW_K663122658) * (tre0_3_0 - tre0_10_0)) + (((FFTW_REAL) FFTW_K822983865) * (tre0_9_0 - tre0_4_0)) + (((FFTW_REAL) FFTW_K935016242) * (tre0_5_0 - tre0_8_0)) + (((FFTW_REAL) FFTW_K992708874) * (tre0_7_0 - tre0_6_0));
6824 	  c_im(out[6 * ostride]) = tim1_0_0 + tim1_1_0;
6825 	  c_im(out[7 * ostride]) = tim1_0_0 - tim1_1_0;
6826      }
6827 }
6828 
6829 /* This function contains 208 FP additions and 72 FP multiplications */
6830 
fftwi_no_twiddle_14(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)6831 static void fftwi_no_twiddle_14(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
6832 {
6833      FFTW_REAL tre0_0_0;
6834      FFTW_REAL tim0_0_0;
6835      FFTW_REAL tre0_0_1;
6836      FFTW_REAL tim0_0_1;
6837      FFTW_REAL tre0_0_2;
6838      FFTW_REAL tim0_0_2;
6839      FFTW_REAL tre0_0_3;
6840      FFTW_REAL tim0_0_3;
6841      FFTW_REAL tre0_0_4;
6842      FFTW_REAL tim0_0_4;
6843      FFTW_REAL tre0_0_5;
6844      FFTW_REAL tim0_0_5;
6845      FFTW_REAL tre0_0_6;
6846      FFTW_REAL tim0_0_6;
6847      FFTW_REAL tre0_1_0;
6848      FFTW_REAL tim0_1_0;
6849      FFTW_REAL tre0_1_1;
6850      FFTW_REAL tim0_1_1;
6851      FFTW_REAL tre0_1_2;
6852      FFTW_REAL tim0_1_2;
6853      FFTW_REAL tre0_1_3;
6854      FFTW_REAL tim0_1_3;
6855      FFTW_REAL tre0_1_4;
6856      FFTW_REAL tim0_1_4;
6857      FFTW_REAL tre0_1_5;
6858      FFTW_REAL tim0_1_5;
6859      FFTW_REAL tre0_1_6;
6860      FFTW_REAL tim0_1_6;
6861      {
6862 	  FFTW_REAL tre1_0_0;
6863 	  FFTW_REAL tim1_0_0;
6864 	  FFTW_REAL tre1_1_0;
6865 	  FFTW_REAL tim1_1_0;
6866 	  tre1_0_0 = c_re(in[0]);
6867 	  tim1_0_0 = c_im(in[0]);
6868 	  tre1_1_0 = c_re(in[7 * istride]);
6869 	  tim1_1_0 = c_im(in[7 * istride]);
6870 	  tre0_0_0 = tre1_0_0 + tre1_1_0;
6871 	  tim0_0_0 = tim1_0_0 + tim1_1_0;
6872 	  tre0_1_0 = tre1_0_0 - tre1_1_0;
6873 	  tim0_1_0 = tim1_0_0 - tim1_1_0;
6874      }
6875      {
6876 	  FFTW_REAL tre1_0_0;
6877 	  FFTW_REAL tim1_0_0;
6878 	  FFTW_REAL tre1_1_0;
6879 	  FFTW_REAL tim1_1_0;
6880 	  tre1_0_0 = c_re(in[2 * istride]);
6881 	  tim1_0_0 = c_im(in[2 * istride]);
6882 	  tre1_1_0 = c_re(in[9 * istride]);
6883 	  tim1_1_0 = c_im(in[9 * istride]);
6884 	  tre0_0_1 = tre1_0_0 + tre1_1_0;
6885 	  tim0_0_1 = tim1_0_0 + tim1_1_0;
6886 	  tre0_1_1 = tre1_0_0 - tre1_1_0;
6887 	  tim0_1_1 = tim1_0_0 - tim1_1_0;
6888      }
6889      {
6890 	  FFTW_REAL tre1_0_0;
6891 	  FFTW_REAL tim1_0_0;
6892 	  FFTW_REAL tre1_1_0;
6893 	  FFTW_REAL tim1_1_0;
6894 	  tre1_0_0 = c_re(in[4 * istride]);
6895 	  tim1_0_0 = c_im(in[4 * istride]);
6896 	  tre1_1_0 = c_re(in[11 * istride]);
6897 	  tim1_1_0 = c_im(in[11 * istride]);
6898 	  tre0_0_2 = tre1_0_0 + tre1_1_0;
6899 	  tim0_0_2 = tim1_0_0 + tim1_1_0;
6900 	  tre0_1_2 = tre1_0_0 - tre1_1_0;
6901 	  tim0_1_2 = tim1_0_0 - tim1_1_0;
6902      }
6903      {
6904 	  FFTW_REAL tre1_0_0;
6905 	  FFTW_REAL tim1_0_0;
6906 	  FFTW_REAL tre1_1_0;
6907 	  FFTW_REAL tim1_1_0;
6908 	  tre1_0_0 = c_re(in[6 * istride]);
6909 	  tim1_0_0 = c_im(in[6 * istride]);
6910 	  tre1_1_0 = c_re(in[13 * istride]);
6911 	  tim1_1_0 = c_im(in[13 * istride]);
6912 	  tre0_0_3 = tre1_0_0 + tre1_1_0;
6913 	  tim0_0_3 = tim1_0_0 + tim1_1_0;
6914 	  tre0_1_3 = tre1_0_0 - tre1_1_0;
6915 	  tim0_1_3 = tim1_0_0 - tim1_1_0;
6916      }
6917      {
6918 	  FFTW_REAL tre1_0_0;
6919 	  FFTW_REAL tim1_0_0;
6920 	  FFTW_REAL tre1_1_0;
6921 	  FFTW_REAL tim1_1_0;
6922 	  tre1_0_0 = c_re(in[8 * istride]);
6923 	  tim1_0_0 = c_im(in[8 * istride]);
6924 	  tre1_1_0 = c_re(in[istride]);
6925 	  tim1_1_0 = c_im(in[istride]);
6926 	  tre0_0_4 = tre1_0_0 + tre1_1_0;
6927 	  tim0_0_4 = tim1_0_0 + tim1_1_0;
6928 	  tre0_1_4 = tre1_0_0 - tre1_1_0;
6929 	  tim0_1_4 = tim1_0_0 - tim1_1_0;
6930      }
6931      {
6932 	  FFTW_REAL tre1_0_0;
6933 	  FFTW_REAL tim1_0_0;
6934 	  FFTW_REAL tre1_1_0;
6935 	  FFTW_REAL tim1_1_0;
6936 	  tre1_0_0 = c_re(in[10 * istride]);
6937 	  tim1_0_0 = c_im(in[10 * istride]);
6938 	  tre1_1_0 = c_re(in[3 * istride]);
6939 	  tim1_1_0 = c_im(in[3 * istride]);
6940 	  tre0_0_5 = tre1_0_0 + tre1_1_0;
6941 	  tim0_0_5 = tim1_0_0 + tim1_1_0;
6942 	  tre0_1_5 = tre1_0_0 - tre1_1_0;
6943 	  tim0_1_5 = tim1_0_0 - tim1_1_0;
6944      }
6945      {
6946 	  FFTW_REAL tre1_0_0;
6947 	  FFTW_REAL tim1_0_0;
6948 	  FFTW_REAL tre1_1_0;
6949 	  FFTW_REAL tim1_1_0;
6950 	  tre1_0_0 = c_re(in[12 * istride]);
6951 	  tim1_0_0 = c_im(in[12 * istride]);
6952 	  tre1_1_0 = c_re(in[5 * istride]);
6953 	  tim1_1_0 = c_im(in[5 * istride]);
6954 	  tre0_0_6 = tre1_0_0 + tre1_1_0;
6955 	  tim0_0_6 = tim1_0_0 + tim1_1_0;
6956 	  tre0_1_6 = tre1_0_0 - tre1_1_0;
6957 	  tim0_1_6 = tim1_0_0 - tim1_1_0;
6958      }
6959      c_re(out[0]) = tre0_0_0 + tre0_0_1 + tre0_0_2 + tre0_0_3 + tre0_0_4 + tre0_0_5 + tre0_0_6;
6960      c_im(out[0]) = tim0_0_0 + tim0_0_1 + tim0_0_2 + tim0_0_3 + tim0_0_4 + tim0_0_5 + tim0_0_6;
6961      {
6962 	  FFTW_REAL tre2_0_0;
6963 	  FFTW_REAL tre2_1_0;
6964 	  tre2_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tre0_0_1 + tre0_0_6)) - (((FFTW_REAL) FFTW_K900968867) * (tre0_0_3 + tre0_0_4)) - (((FFTW_REAL) FFTW_K222520933) * (tre0_0_2 + tre0_0_5));
6965 	  tre2_1_0 = (((FFTW_REAL) FFTW_K781831482) * (tim0_0_6 - tim0_0_1)) + (((FFTW_REAL) FFTW_K974927912) * (tim0_0_5 - tim0_0_2)) + (((FFTW_REAL) FFTW_K433883739) * (tim0_0_4 - tim0_0_3));
6966 	  c_re(out[8 * ostride]) = tre2_0_0 + tre2_1_0;
6967 	  c_re(out[6 * ostride]) = tre2_0_0 - tre2_1_0;
6968      }
6969      {
6970 	  FFTW_REAL tim2_0_0;
6971 	  FFTW_REAL tim2_1_0;
6972 	  tim2_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tim0_0_1 + tim0_0_6)) - (((FFTW_REAL) FFTW_K900968867) * (tim0_0_3 + tim0_0_4)) - (((FFTW_REAL) FFTW_K222520933) * (tim0_0_2 + tim0_0_5));
6973 	  tim2_1_0 = (((FFTW_REAL) FFTW_K781831482) * (tre0_0_1 - tre0_0_6)) + (((FFTW_REAL) FFTW_K974927912) * (tre0_0_2 - tre0_0_5)) + (((FFTW_REAL) FFTW_K433883739) * (tre0_0_3 - tre0_0_4));
6974 	  c_im(out[8 * ostride]) = tim2_0_0 + tim2_1_0;
6975 	  c_im(out[6 * ostride]) = tim2_0_0 - tim2_1_0;
6976      }
6977      {
6978 	  FFTW_REAL tre2_0_0;
6979 	  FFTW_REAL tre2_1_0;
6980 	  tre2_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tre0_0_3 + tre0_0_4)) - (((FFTW_REAL) FFTW_K900968867) * (tre0_0_2 + tre0_0_5)) - (((FFTW_REAL) FFTW_K222520933) * (tre0_0_1 + tre0_0_6));
6981 	  tre2_1_0 = (((FFTW_REAL) FFTW_K974927912) * (tim0_0_6 - tim0_0_1)) + (((FFTW_REAL) FFTW_K433883739) * (tim0_0_2 - tim0_0_5)) + (((FFTW_REAL) FFTW_K781831482) * (tim0_0_3 - tim0_0_4));
6982 	  c_re(out[2 * ostride]) = tre2_0_0 + tre2_1_0;
6983 	  c_re(out[12 * ostride]) = tre2_0_0 - tre2_1_0;
6984      }
6985      {
6986 	  FFTW_REAL tim2_0_0;
6987 	  FFTW_REAL tim2_1_0;
6988 	  tim2_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tim0_0_3 + tim0_0_4)) - (((FFTW_REAL) FFTW_K900968867) * (tim0_0_2 + tim0_0_5)) - (((FFTW_REAL) FFTW_K222520933) * (tim0_0_1 + tim0_0_6));
6989 	  tim2_1_0 = (((FFTW_REAL) FFTW_K974927912) * (tre0_0_1 - tre0_0_6)) + (((FFTW_REAL) FFTW_K433883739) * (tre0_0_5 - tre0_0_2)) + (((FFTW_REAL) FFTW_K781831482) * (tre0_0_4 - tre0_0_3));
6990 	  c_im(out[2 * ostride]) = tim2_0_0 + tim2_1_0;
6991 	  c_im(out[12 * ostride]) = tim2_0_0 - tim2_1_0;
6992      }
6993      {
6994 	  FFTW_REAL tre2_0_0;
6995 	  FFTW_REAL tre2_1_0;
6996 	  tre2_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tre0_0_2 + tre0_0_5)) - (((FFTW_REAL) FFTW_K222520933) * (tre0_0_3 + tre0_0_4)) - (((FFTW_REAL) FFTW_K900968867) * (tre0_0_1 + tre0_0_6));
6997 	  tre2_1_0 = (((FFTW_REAL) FFTW_K433883739) * (tim0_0_6 - tim0_0_1)) + (((FFTW_REAL) FFTW_K781831482) * (tim0_0_2 - tim0_0_5)) + (((FFTW_REAL) FFTW_K974927912) * (tim0_0_4 - tim0_0_3));
6998 	  c_re(out[10 * ostride]) = tre2_0_0 + tre2_1_0;
6999 	  c_re(out[4 * ostride]) = tre2_0_0 - tre2_1_0;
7000      }
7001      {
7002 	  FFTW_REAL tim2_0_0;
7003 	  FFTW_REAL tim2_1_0;
7004 	  tim2_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tim0_0_2 + tim0_0_5)) - (((FFTW_REAL) FFTW_K222520933) * (tim0_0_3 + tim0_0_4)) - (((FFTW_REAL) FFTW_K900968867) * (tim0_0_1 + tim0_0_6));
7005 	  tim2_1_0 = (((FFTW_REAL) FFTW_K433883739) * (tre0_0_1 - tre0_0_6)) + (((FFTW_REAL) FFTW_K781831482) * (tre0_0_5 - tre0_0_2)) + (((FFTW_REAL) FFTW_K974927912) * (tre0_0_3 - tre0_0_4));
7006 	  c_im(out[10 * ostride]) = tim2_0_0 + tim2_1_0;
7007 	  c_im(out[4 * ostride]) = tim2_0_0 - tim2_1_0;
7008      }
7009      c_re(out[7 * ostride]) = tre0_1_0 + tre0_1_1 + tre0_1_2 + tre0_1_3 + tre0_1_4 + tre0_1_5 + tre0_1_6;
7010      c_im(out[7 * ostride]) = tim0_1_0 + tim0_1_1 + tim0_1_2 + tim0_1_3 + tim0_1_4 + tim0_1_5 + tim0_1_6;
7011      {
7012 	  FFTW_REAL tre2_0_0;
7013 	  FFTW_REAL tre2_1_0;
7014 	  tre2_0_0 = tre0_1_0 + (((FFTW_REAL) FFTW_K623489801) * (tre0_1_1 + tre0_1_6)) - (((FFTW_REAL) FFTW_K900968867) * (tre0_1_3 + tre0_1_4)) - (((FFTW_REAL) FFTW_K222520933) * (tre0_1_2 + tre0_1_5));
7015 	  tre2_1_0 = (((FFTW_REAL) FFTW_K781831482) * (tim0_1_6 - tim0_1_1)) + (((FFTW_REAL) FFTW_K974927912) * (tim0_1_5 - tim0_1_2)) + (((FFTW_REAL) FFTW_K433883739) * (tim0_1_4 - tim0_1_3));
7016 	  c_re(out[ostride]) = tre2_0_0 + tre2_1_0;
7017 	  c_re(out[13 * ostride]) = tre2_0_0 - tre2_1_0;
7018      }
7019      {
7020 	  FFTW_REAL tim2_0_0;
7021 	  FFTW_REAL tim2_1_0;
7022 	  tim2_0_0 = tim0_1_0 + (((FFTW_REAL) FFTW_K623489801) * (tim0_1_1 + tim0_1_6)) - (((FFTW_REAL) FFTW_K900968867) * (tim0_1_3 + tim0_1_4)) - (((FFTW_REAL) FFTW_K222520933) * (tim0_1_2 + tim0_1_5));
7023 	  tim2_1_0 = (((FFTW_REAL) FFTW_K781831482) * (tre0_1_1 - tre0_1_6)) + (((FFTW_REAL) FFTW_K974927912) * (tre0_1_2 - tre0_1_5)) + (((FFTW_REAL) FFTW_K433883739) * (tre0_1_3 - tre0_1_4));
7024 	  c_im(out[ostride]) = tim2_0_0 + tim2_1_0;
7025 	  c_im(out[13 * ostride]) = tim2_0_0 - tim2_1_0;
7026      }
7027      {
7028 	  FFTW_REAL tre2_0_0;
7029 	  FFTW_REAL tre2_1_0;
7030 	  tre2_0_0 = tre0_1_0 + (((FFTW_REAL) FFTW_K623489801) * (tre0_1_3 + tre0_1_4)) - (((FFTW_REAL) FFTW_K900968867) * (tre0_1_2 + tre0_1_5)) - (((FFTW_REAL) FFTW_K222520933) * (tre0_1_1 + tre0_1_6));
7031 	  tre2_1_0 = (((FFTW_REAL) FFTW_K974927912) * (tim0_1_6 - tim0_1_1)) + (((FFTW_REAL) FFTW_K433883739) * (tim0_1_2 - tim0_1_5)) + (((FFTW_REAL) FFTW_K781831482) * (tim0_1_3 - tim0_1_4));
7032 	  c_re(out[9 * ostride]) = tre2_0_0 + tre2_1_0;
7033 	  c_re(out[5 * ostride]) = tre2_0_0 - tre2_1_0;
7034      }
7035      {
7036 	  FFTW_REAL tim2_0_0;
7037 	  FFTW_REAL tim2_1_0;
7038 	  tim2_0_0 = tim0_1_0 + (((FFTW_REAL) FFTW_K623489801) * (tim0_1_3 + tim0_1_4)) - (((FFTW_REAL) FFTW_K900968867) * (tim0_1_2 + tim0_1_5)) - (((FFTW_REAL) FFTW_K222520933) * (tim0_1_1 + tim0_1_6));
7039 	  tim2_1_0 = (((FFTW_REAL) FFTW_K974927912) * (tre0_1_1 - tre0_1_6)) + (((FFTW_REAL) FFTW_K433883739) * (tre0_1_5 - tre0_1_2)) + (((FFTW_REAL) FFTW_K781831482) * (tre0_1_4 - tre0_1_3));
7040 	  c_im(out[9 * ostride]) = tim2_0_0 + tim2_1_0;
7041 	  c_im(out[5 * ostride]) = tim2_0_0 - tim2_1_0;
7042      }
7043      {
7044 	  FFTW_REAL tre2_0_0;
7045 	  FFTW_REAL tre2_1_0;
7046 	  tre2_0_0 = tre0_1_0 + (((FFTW_REAL) FFTW_K623489801) * (tre0_1_2 + tre0_1_5)) - (((FFTW_REAL) FFTW_K222520933) * (tre0_1_3 + tre0_1_4)) - (((FFTW_REAL) FFTW_K900968867) * (tre0_1_1 + tre0_1_6));
7047 	  tre2_1_0 = (((FFTW_REAL) FFTW_K433883739) * (tim0_1_6 - tim0_1_1)) + (((FFTW_REAL) FFTW_K781831482) * (tim0_1_2 - tim0_1_5)) + (((FFTW_REAL) FFTW_K974927912) * (tim0_1_4 - tim0_1_3));
7048 	  c_re(out[3 * ostride]) = tre2_0_0 + tre2_1_0;
7049 	  c_re(out[11 * ostride]) = tre2_0_0 - tre2_1_0;
7050      }
7051      {
7052 	  FFTW_REAL tim2_0_0;
7053 	  FFTW_REAL tim2_1_0;
7054 	  tim2_0_0 = tim0_1_0 + (((FFTW_REAL) FFTW_K623489801) * (tim0_1_2 + tim0_1_5)) - (((FFTW_REAL) FFTW_K222520933) * (tim0_1_3 + tim0_1_4)) - (((FFTW_REAL) FFTW_K900968867) * (tim0_1_1 + tim0_1_6));
7055 	  tim2_1_0 = (((FFTW_REAL) FFTW_K433883739) * (tre0_1_1 - tre0_1_6)) + (((FFTW_REAL) FFTW_K781831482) * (tre0_1_5 - tre0_1_2)) + (((FFTW_REAL) FFTW_K974927912) * (tre0_1_3 - tre0_1_4));
7056 	  c_im(out[3 * ostride]) = tim2_0_0 + tim2_1_0;
7057 	  c_im(out[11 * ostride]) = tim2_0_0 - tim2_1_0;
7058      }
7059 }
7060 
7061 /* This function contains 202 FP additions and 68 FP multiplications */
7062 
fftwi_no_twiddle_15(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)7063 static void fftwi_no_twiddle_15(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
7064 {
7065      FFTW_REAL tre0_0_0;
7066      FFTW_REAL tim0_0_0;
7067      FFTW_REAL tre0_0_1;
7068      FFTW_REAL tim0_0_1;
7069      FFTW_REAL tre0_0_2;
7070      FFTW_REAL tim0_0_2;
7071      FFTW_REAL tre0_0_3;
7072      FFTW_REAL tim0_0_3;
7073      FFTW_REAL tre0_0_4;
7074      FFTW_REAL tim0_0_4;
7075      FFTW_REAL tre0_1_0;
7076      FFTW_REAL tim0_1_0;
7077      FFTW_REAL tre0_1_1;
7078      FFTW_REAL tim0_1_1;
7079      FFTW_REAL tre0_1_2;
7080      FFTW_REAL tim0_1_2;
7081      FFTW_REAL tre0_1_3;
7082      FFTW_REAL tim0_1_3;
7083      FFTW_REAL tre0_1_4;
7084      FFTW_REAL tim0_1_4;
7085      FFTW_REAL tre0_2_0;
7086      FFTW_REAL tim0_2_0;
7087      FFTW_REAL tre0_2_1;
7088      FFTW_REAL tim0_2_1;
7089      FFTW_REAL tre0_2_2;
7090      FFTW_REAL tim0_2_2;
7091      FFTW_REAL tre0_2_3;
7092      FFTW_REAL tim0_2_3;
7093      FFTW_REAL tre0_2_4;
7094      FFTW_REAL tim0_2_4;
7095      {
7096 	  FFTW_REAL tre1_0_0;
7097 	  FFTW_REAL tim1_0_0;
7098 	  FFTW_REAL tre1_1_0;
7099 	  FFTW_REAL tim1_1_0;
7100 	  FFTW_REAL tre1_2_0;
7101 	  FFTW_REAL tim1_2_0;
7102 	  tre1_0_0 = c_re(in[0]);
7103 	  tim1_0_0 = c_im(in[0]);
7104 	  tre1_1_0 = c_re(in[5 * istride]);
7105 	  tim1_1_0 = c_im(in[5 * istride]);
7106 	  tre1_2_0 = c_re(in[10 * istride]);
7107 	  tim1_2_0 = c_im(in[10 * istride]);
7108 	  tre0_0_0 = tre1_0_0 + tre1_1_0 + tre1_2_0;
7109 	  tim0_0_0 = tim1_0_0 + tim1_1_0 + tim1_2_0;
7110 	  {
7111 	       FFTW_REAL tre2_0_0;
7112 	       FFTW_REAL tre2_1_0;
7113 	       tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
7114 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_2_0 - tim1_1_0);
7115 	       tre0_1_0 = tre2_0_0 + tre2_1_0;
7116 	       tre0_2_0 = tre2_0_0 - tre2_1_0;
7117 	  }
7118 	  {
7119 	       FFTW_REAL tim2_0_0;
7120 	       FFTW_REAL tim2_1_0;
7121 	       tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
7122 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_1_0 - tre1_2_0);
7123 	       tim0_1_0 = tim2_0_0 + tim2_1_0;
7124 	       tim0_2_0 = tim2_0_0 - tim2_1_0;
7125 	  }
7126      }
7127      {
7128 	  FFTW_REAL tre1_0_0;
7129 	  FFTW_REAL tim1_0_0;
7130 	  FFTW_REAL tre1_1_0;
7131 	  FFTW_REAL tim1_1_0;
7132 	  FFTW_REAL tre1_2_0;
7133 	  FFTW_REAL tim1_2_0;
7134 	  tre1_0_0 = c_re(in[3 * istride]);
7135 	  tim1_0_0 = c_im(in[3 * istride]);
7136 	  tre1_1_0 = c_re(in[8 * istride]);
7137 	  tim1_1_0 = c_im(in[8 * istride]);
7138 	  tre1_2_0 = c_re(in[13 * istride]);
7139 	  tim1_2_0 = c_im(in[13 * istride]);
7140 	  tre0_0_1 = tre1_0_0 + tre1_1_0 + tre1_2_0;
7141 	  tim0_0_1 = tim1_0_0 + tim1_1_0 + tim1_2_0;
7142 	  {
7143 	       FFTW_REAL tre2_0_0;
7144 	       FFTW_REAL tre2_1_0;
7145 	       tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
7146 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_2_0 - tim1_1_0);
7147 	       tre0_1_1 = tre2_0_0 + tre2_1_0;
7148 	       tre0_2_1 = tre2_0_0 - tre2_1_0;
7149 	  }
7150 	  {
7151 	       FFTW_REAL tim2_0_0;
7152 	       FFTW_REAL tim2_1_0;
7153 	       tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
7154 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_1_0 - tre1_2_0);
7155 	       tim0_1_1 = tim2_0_0 + tim2_1_0;
7156 	       tim0_2_1 = tim2_0_0 - tim2_1_0;
7157 	  }
7158      }
7159      {
7160 	  FFTW_REAL tre1_0_0;
7161 	  FFTW_REAL tim1_0_0;
7162 	  FFTW_REAL tre1_1_0;
7163 	  FFTW_REAL tim1_1_0;
7164 	  FFTW_REAL tre1_2_0;
7165 	  FFTW_REAL tim1_2_0;
7166 	  tre1_0_0 = c_re(in[6 * istride]);
7167 	  tim1_0_0 = c_im(in[6 * istride]);
7168 	  tre1_1_0 = c_re(in[11 * istride]);
7169 	  tim1_1_0 = c_im(in[11 * istride]);
7170 	  tre1_2_0 = c_re(in[istride]);
7171 	  tim1_2_0 = c_im(in[istride]);
7172 	  tre0_0_2 = tre1_0_0 + tre1_1_0 + tre1_2_0;
7173 	  tim0_0_2 = tim1_0_0 + tim1_1_0 + tim1_2_0;
7174 	  {
7175 	       FFTW_REAL tre2_0_0;
7176 	       FFTW_REAL tre2_1_0;
7177 	       tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
7178 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_2_0 - tim1_1_0);
7179 	       tre0_1_2 = tre2_0_0 + tre2_1_0;
7180 	       tre0_2_2 = tre2_0_0 - tre2_1_0;
7181 	  }
7182 	  {
7183 	       FFTW_REAL tim2_0_0;
7184 	       FFTW_REAL tim2_1_0;
7185 	       tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
7186 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_1_0 - tre1_2_0);
7187 	       tim0_1_2 = tim2_0_0 + tim2_1_0;
7188 	       tim0_2_2 = tim2_0_0 - tim2_1_0;
7189 	  }
7190      }
7191      {
7192 	  FFTW_REAL tre1_0_0;
7193 	  FFTW_REAL tim1_0_0;
7194 	  FFTW_REAL tre1_1_0;
7195 	  FFTW_REAL tim1_1_0;
7196 	  FFTW_REAL tre1_2_0;
7197 	  FFTW_REAL tim1_2_0;
7198 	  tre1_0_0 = c_re(in[9 * istride]);
7199 	  tim1_0_0 = c_im(in[9 * istride]);
7200 	  tre1_1_0 = c_re(in[14 * istride]);
7201 	  tim1_1_0 = c_im(in[14 * istride]);
7202 	  tre1_2_0 = c_re(in[4 * istride]);
7203 	  tim1_2_0 = c_im(in[4 * istride]);
7204 	  tre0_0_3 = tre1_0_0 + tre1_1_0 + tre1_2_0;
7205 	  tim0_0_3 = tim1_0_0 + tim1_1_0 + tim1_2_0;
7206 	  {
7207 	       FFTW_REAL tre2_0_0;
7208 	       FFTW_REAL tre2_1_0;
7209 	       tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
7210 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_2_0 - tim1_1_0);
7211 	       tre0_1_3 = tre2_0_0 + tre2_1_0;
7212 	       tre0_2_3 = tre2_0_0 - tre2_1_0;
7213 	  }
7214 	  {
7215 	       FFTW_REAL tim2_0_0;
7216 	       FFTW_REAL tim2_1_0;
7217 	       tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
7218 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_1_0 - tre1_2_0);
7219 	       tim0_1_3 = tim2_0_0 + tim2_1_0;
7220 	       tim0_2_3 = tim2_0_0 - tim2_1_0;
7221 	  }
7222      }
7223      {
7224 	  FFTW_REAL tre1_0_0;
7225 	  FFTW_REAL tim1_0_0;
7226 	  FFTW_REAL tre1_1_0;
7227 	  FFTW_REAL tim1_1_0;
7228 	  FFTW_REAL tre1_2_0;
7229 	  FFTW_REAL tim1_2_0;
7230 	  tre1_0_0 = c_re(in[12 * istride]);
7231 	  tim1_0_0 = c_im(in[12 * istride]);
7232 	  tre1_1_0 = c_re(in[2 * istride]);
7233 	  tim1_1_0 = c_im(in[2 * istride]);
7234 	  tre1_2_0 = c_re(in[7 * istride]);
7235 	  tim1_2_0 = c_im(in[7 * istride]);
7236 	  tre0_0_4 = tre1_0_0 + tre1_1_0 + tre1_2_0;
7237 	  tim0_0_4 = tim1_0_0 + tim1_1_0 + tim1_2_0;
7238 	  {
7239 	       FFTW_REAL tre2_0_0;
7240 	       FFTW_REAL tre2_1_0;
7241 	       tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
7242 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_2_0 - tim1_1_0);
7243 	       tre0_1_4 = tre2_0_0 + tre2_1_0;
7244 	       tre0_2_4 = tre2_0_0 - tre2_1_0;
7245 	  }
7246 	  {
7247 	       FFTW_REAL tim2_0_0;
7248 	       FFTW_REAL tim2_1_0;
7249 	       tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
7250 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_1_0 - tre1_2_0);
7251 	       tim0_1_4 = tim2_0_0 + tim2_1_0;
7252 	       tim0_2_4 = tim2_0_0 - tim2_1_0;
7253 	  }
7254      }
7255      c_re(out[0]) = tre0_0_0 + tre0_0_1 + tre0_0_2 + tre0_0_3 + tre0_0_4;
7256      c_im(out[0]) = tim0_0_0 + tim0_0_1 + tim0_0_2 + tim0_0_3 + tim0_0_4;
7257      {
7258 	  FFTW_REAL tre2_0_0;
7259 	  FFTW_REAL tre2_1_0;
7260 	  tre2_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_0_1 + tre0_0_4)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_0_2 + tre0_0_3));
7261 	  tre2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tim0_0_4 - tim0_0_1)) + (((FFTW_REAL) FFTW_K587785252) * (tim0_0_3 - tim0_0_2));
7262 	  c_re(out[6 * ostride]) = tre2_0_0 + tre2_1_0;
7263 	  c_re(out[9 * ostride]) = tre2_0_0 - tre2_1_0;
7264      }
7265      {
7266 	  FFTW_REAL tim2_0_0;
7267 	  FFTW_REAL tim2_1_0;
7268 	  tim2_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_0_1 + tim0_0_4)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_0_2 + tim0_0_3));
7269 	  tim2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tre0_0_1 - tre0_0_4)) + (((FFTW_REAL) FFTW_K587785252) * (tre0_0_2 - tre0_0_3));
7270 	  c_im(out[6 * ostride]) = tim2_0_0 + tim2_1_0;
7271 	  c_im(out[9 * ostride]) = tim2_0_0 - tim2_1_0;
7272      }
7273      {
7274 	  FFTW_REAL tre2_0_0;
7275 	  FFTW_REAL tre2_1_0;
7276 	  tre2_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_0_2 + tre0_0_3)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_0_1 + tre0_0_4));
7277 	  tre2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tim0_0_4 - tim0_0_1)) + (((FFTW_REAL) FFTW_K951056516) * (tim0_0_2 - tim0_0_3));
7278 	  c_re(out[12 * ostride]) = tre2_0_0 + tre2_1_0;
7279 	  c_re(out[3 * ostride]) = tre2_0_0 - tre2_1_0;
7280      }
7281      {
7282 	  FFTW_REAL tim2_0_0;
7283 	  FFTW_REAL tim2_1_0;
7284 	  tim2_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_0_2 + tim0_0_3)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_0_1 + tim0_0_4));
7285 	  tim2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tre0_0_1 - tre0_0_4)) + (((FFTW_REAL) FFTW_K951056516) * (tre0_0_3 - tre0_0_2));
7286 	  c_im(out[12 * ostride]) = tim2_0_0 + tim2_1_0;
7287 	  c_im(out[3 * ostride]) = tim2_0_0 - tim2_1_0;
7288      }
7289      c_re(out[10 * ostride]) = tre0_1_0 + tre0_1_1 + tre0_1_2 + tre0_1_3 + tre0_1_4;
7290      c_im(out[10 * ostride]) = tim0_1_0 + tim0_1_1 + tim0_1_2 + tim0_1_3 + tim0_1_4;
7291      {
7292 	  FFTW_REAL tre2_0_0;
7293 	  FFTW_REAL tre2_1_0;
7294 	  tre2_0_0 = tre0_1_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_1_1 + tre0_1_4)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_1_2 + tre0_1_3));
7295 	  tre2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tim0_1_4 - tim0_1_1)) + (((FFTW_REAL) FFTW_K587785252) * (tim0_1_3 - tim0_1_2));
7296 	  c_re(out[ostride]) = tre2_0_0 + tre2_1_0;
7297 	  c_re(out[4 * ostride]) = tre2_0_0 - tre2_1_0;
7298      }
7299      {
7300 	  FFTW_REAL tim2_0_0;
7301 	  FFTW_REAL tim2_1_0;
7302 	  tim2_0_0 = tim0_1_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_1_1 + tim0_1_4)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_1_2 + tim0_1_3));
7303 	  tim2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tre0_1_1 - tre0_1_4)) + (((FFTW_REAL) FFTW_K587785252) * (tre0_1_2 - tre0_1_3));
7304 	  c_im(out[ostride]) = tim2_0_0 + tim2_1_0;
7305 	  c_im(out[4 * ostride]) = tim2_0_0 - tim2_1_0;
7306      }
7307      {
7308 	  FFTW_REAL tre2_0_0;
7309 	  FFTW_REAL tre2_1_0;
7310 	  tre2_0_0 = tre0_1_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_1_2 + tre0_1_3)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_1_1 + tre0_1_4));
7311 	  tre2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tim0_1_4 - tim0_1_1)) + (((FFTW_REAL) FFTW_K951056516) * (tim0_1_2 - tim0_1_3));
7312 	  c_re(out[7 * ostride]) = tre2_0_0 + tre2_1_0;
7313 	  c_re(out[13 * ostride]) = tre2_0_0 - tre2_1_0;
7314      }
7315      {
7316 	  FFTW_REAL tim2_0_0;
7317 	  FFTW_REAL tim2_1_0;
7318 	  tim2_0_0 = tim0_1_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_1_2 + tim0_1_3)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_1_1 + tim0_1_4));
7319 	  tim2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tre0_1_1 - tre0_1_4)) + (((FFTW_REAL) FFTW_K951056516) * (tre0_1_3 - tre0_1_2));
7320 	  c_im(out[7 * ostride]) = tim2_0_0 + tim2_1_0;
7321 	  c_im(out[13 * ostride]) = tim2_0_0 - tim2_1_0;
7322      }
7323      c_re(out[5 * ostride]) = tre0_2_0 + tre0_2_1 + tre0_2_2 + tre0_2_3 + tre0_2_4;
7324      c_im(out[5 * ostride]) = tim0_2_0 + tim0_2_1 + tim0_2_2 + tim0_2_3 + tim0_2_4;
7325      {
7326 	  FFTW_REAL tre2_0_0;
7327 	  FFTW_REAL tre2_1_0;
7328 	  tre2_0_0 = tre0_2_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_2_1 + tre0_2_4)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_2_2 + tre0_2_3));
7329 	  tre2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tim0_2_4 - tim0_2_1)) + (((FFTW_REAL) FFTW_K587785252) * (tim0_2_3 - tim0_2_2));
7330 	  c_re(out[11 * ostride]) = tre2_0_0 + tre2_1_0;
7331 	  c_re(out[14 * ostride]) = tre2_0_0 - tre2_1_0;
7332      }
7333      {
7334 	  FFTW_REAL tim2_0_0;
7335 	  FFTW_REAL tim2_1_0;
7336 	  tim2_0_0 = tim0_2_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_2_1 + tim0_2_4)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_2_2 + tim0_2_3));
7337 	  tim2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tre0_2_1 - tre0_2_4)) + (((FFTW_REAL) FFTW_K587785252) * (tre0_2_2 - tre0_2_3));
7338 	  c_im(out[11 * ostride]) = tim2_0_0 + tim2_1_0;
7339 	  c_im(out[14 * ostride]) = tim2_0_0 - tim2_1_0;
7340      }
7341      {
7342 	  FFTW_REAL tre2_0_0;
7343 	  FFTW_REAL tre2_1_0;
7344 	  tre2_0_0 = tre0_2_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_2_2 + tre0_2_3)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_2_1 + tre0_2_4));
7345 	  tre2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tim0_2_4 - tim0_2_1)) + (((FFTW_REAL) FFTW_K951056516) * (tim0_2_2 - tim0_2_3));
7346 	  c_re(out[2 * ostride]) = tre2_0_0 + tre2_1_0;
7347 	  c_re(out[8 * ostride]) = tre2_0_0 - tre2_1_0;
7348      }
7349      {
7350 	  FFTW_REAL tim2_0_0;
7351 	  FFTW_REAL tim2_1_0;
7352 	  tim2_0_0 = tim0_2_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_2_2 + tim0_2_3)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_2_1 + tim0_2_4));
7353 	  tim2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tre0_2_1 - tre0_2_4)) + (((FFTW_REAL) FFTW_K951056516) * (tre0_2_3 - tre0_2_2));
7354 	  c_im(out[2 * ostride]) = tim2_0_0 + tim2_1_0;
7355 	  c_im(out[8 * ostride]) = tim2_0_0 - tim2_1_0;
7356      }
7357 }
7358 
7359 /* This function contains 144 FP additions and 24 FP multiplications */
7360 
fftwi_no_twiddle_16(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)7361 static void fftwi_no_twiddle_16(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
7362 {
7363      FFTW_REAL tre0_0_0;
7364      FFTW_REAL tim0_0_0;
7365      FFTW_REAL tre0_0_1;
7366      FFTW_REAL tim0_0_1;
7367      FFTW_REAL tre0_0_2;
7368      FFTW_REAL tim0_0_2;
7369      FFTW_REAL tre0_0_3;
7370      FFTW_REAL tim0_0_3;
7371      FFTW_REAL tre0_1_0;
7372      FFTW_REAL tim0_1_0;
7373      FFTW_REAL tre0_1_1;
7374      FFTW_REAL tim0_1_1;
7375      FFTW_REAL tre0_1_2;
7376      FFTW_REAL tim0_1_2;
7377      FFTW_REAL tre0_1_3;
7378      FFTW_REAL tim0_1_3;
7379      FFTW_REAL tre0_2_0;
7380      FFTW_REAL tim0_2_0;
7381      FFTW_REAL tre0_2_1;
7382      FFTW_REAL tim0_2_1;
7383      FFTW_REAL tre0_2_2;
7384      FFTW_REAL tim0_2_2;
7385      FFTW_REAL tre0_2_3;
7386      FFTW_REAL tim0_2_3;
7387      FFTW_REAL tre0_3_0;
7388      FFTW_REAL tim0_3_0;
7389      FFTW_REAL tre0_3_1;
7390      FFTW_REAL tim0_3_1;
7391      FFTW_REAL tre0_3_2;
7392      FFTW_REAL tim0_3_2;
7393      FFTW_REAL tre0_3_3;
7394      FFTW_REAL tim0_3_3;
7395      {
7396 	  FFTW_REAL tre1_0_0;
7397 	  FFTW_REAL tim1_0_0;
7398 	  FFTW_REAL tre1_0_1;
7399 	  FFTW_REAL tim1_0_1;
7400 	  FFTW_REAL tre1_1_0;
7401 	  FFTW_REAL tim1_1_0;
7402 	  FFTW_REAL tre1_1_1;
7403 	  FFTW_REAL tim1_1_1;
7404 	  {
7405 	       FFTW_REAL tre2_0_0;
7406 	       FFTW_REAL tim2_0_0;
7407 	       FFTW_REAL tre2_1_0;
7408 	       FFTW_REAL tim2_1_0;
7409 	       tre2_0_0 = c_re(in[0]);
7410 	       tim2_0_0 = c_im(in[0]);
7411 	       tre2_1_0 = c_re(in[8 * istride]);
7412 	       tim2_1_0 = c_im(in[8 * istride]);
7413 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
7414 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
7415 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
7416 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
7417 	  }
7418 	  {
7419 	       FFTW_REAL tre2_0_0;
7420 	       FFTW_REAL tim2_0_0;
7421 	       FFTW_REAL tre2_1_0;
7422 	       FFTW_REAL tim2_1_0;
7423 	       tre2_0_0 = c_re(in[4 * istride]);
7424 	       tim2_0_0 = c_im(in[4 * istride]);
7425 	       tre2_1_0 = c_re(in[12 * istride]);
7426 	       tim2_1_0 = c_im(in[12 * istride]);
7427 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
7428 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
7429 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
7430 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
7431 	  }
7432 	  tre0_0_0 = tre1_0_0 + tre1_0_1;
7433 	  tim0_0_0 = tim1_0_0 + tim1_0_1;
7434 	  tre0_2_0 = tre1_0_0 - tre1_0_1;
7435 	  tim0_2_0 = tim1_0_0 - tim1_0_1;
7436 	  tre0_1_0 = tre1_1_0 - tim1_1_1;
7437 	  tim0_1_0 = tim1_1_0 + tre1_1_1;
7438 	  tre0_3_0 = tre1_1_0 + tim1_1_1;
7439 	  tim0_3_0 = tim1_1_0 - tre1_1_1;
7440      }
7441      {
7442 	  FFTW_REAL tre1_0_0;
7443 	  FFTW_REAL tim1_0_0;
7444 	  FFTW_REAL tre1_0_1;
7445 	  FFTW_REAL tim1_0_1;
7446 	  FFTW_REAL tre1_1_0;
7447 	  FFTW_REAL tim1_1_0;
7448 	  FFTW_REAL tre1_1_1;
7449 	  FFTW_REAL tim1_1_1;
7450 	  {
7451 	       FFTW_REAL tre2_0_0;
7452 	       FFTW_REAL tim2_0_0;
7453 	       FFTW_REAL tre2_1_0;
7454 	       FFTW_REAL tim2_1_0;
7455 	       tre2_0_0 = c_re(in[istride]);
7456 	       tim2_0_0 = c_im(in[istride]);
7457 	       tre2_1_0 = c_re(in[9 * istride]);
7458 	       tim2_1_0 = c_im(in[9 * istride]);
7459 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
7460 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
7461 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
7462 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
7463 	  }
7464 	  {
7465 	       FFTW_REAL tre2_0_0;
7466 	       FFTW_REAL tim2_0_0;
7467 	       FFTW_REAL tre2_1_0;
7468 	       FFTW_REAL tim2_1_0;
7469 	       tre2_0_0 = c_re(in[5 * istride]);
7470 	       tim2_0_0 = c_im(in[5 * istride]);
7471 	       tre2_1_0 = c_re(in[13 * istride]);
7472 	       tim2_1_0 = c_im(in[13 * istride]);
7473 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
7474 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
7475 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
7476 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
7477 	  }
7478 	  tre0_0_1 = tre1_0_0 + tre1_0_1;
7479 	  tim0_0_1 = tim1_0_0 + tim1_0_1;
7480 	  tre0_2_1 = tre1_0_0 - tre1_0_1;
7481 	  tim0_2_1 = tim1_0_0 - tim1_0_1;
7482 	  tre0_1_1 = tre1_1_0 - tim1_1_1;
7483 	  tim0_1_1 = tim1_1_0 + tre1_1_1;
7484 	  tre0_3_1 = tre1_1_0 + tim1_1_1;
7485 	  tim0_3_1 = tim1_1_0 - tre1_1_1;
7486      }
7487      {
7488 	  FFTW_REAL tre1_0_0;
7489 	  FFTW_REAL tim1_0_0;
7490 	  FFTW_REAL tre1_0_1;
7491 	  FFTW_REAL tim1_0_1;
7492 	  FFTW_REAL tre1_1_0;
7493 	  FFTW_REAL tim1_1_0;
7494 	  FFTW_REAL tre1_1_1;
7495 	  FFTW_REAL tim1_1_1;
7496 	  {
7497 	       FFTW_REAL tre2_0_0;
7498 	       FFTW_REAL tim2_0_0;
7499 	       FFTW_REAL tre2_1_0;
7500 	       FFTW_REAL tim2_1_0;
7501 	       tre2_0_0 = c_re(in[2 * istride]);
7502 	       tim2_0_0 = c_im(in[2 * istride]);
7503 	       tre2_1_0 = c_re(in[10 * istride]);
7504 	       tim2_1_0 = c_im(in[10 * istride]);
7505 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
7506 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
7507 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
7508 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
7509 	  }
7510 	  {
7511 	       FFTW_REAL tre2_0_0;
7512 	       FFTW_REAL tim2_0_0;
7513 	       FFTW_REAL tre2_1_0;
7514 	       FFTW_REAL tim2_1_0;
7515 	       tre2_0_0 = c_re(in[6 * istride]);
7516 	       tim2_0_0 = c_im(in[6 * istride]);
7517 	       tre2_1_0 = c_re(in[14 * istride]);
7518 	       tim2_1_0 = c_im(in[14 * istride]);
7519 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
7520 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
7521 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
7522 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
7523 	  }
7524 	  tre0_0_2 = tre1_0_0 + tre1_0_1;
7525 	  tim0_0_2 = tim1_0_0 + tim1_0_1;
7526 	  tre0_2_2 = tre1_0_0 - tre1_0_1;
7527 	  tim0_2_2 = tim1_0_0 - tim1_0_1;
7528 	  tre0_1_2 = tre1_1_0 - tim1_1_1;
7529 	  tim0_1_2 = tim1_1_0 + tre1_1_1;
7530 	  tre0_3_2 = tre1_1_0 + tim1_1_1;
7531 	  tim0_3_2 = tim1_1_0 - tre1_1_1;
7532      }
7533      {
7534 	  FFTW_REAL tre1_0_0;
7535 	  FFTW_REAL tim1_0_0;
7536 	  FFTW_REAL tre1_0_1;
7537 	  FFTW_REAL tim1_0_1;
7538 	  FFTW_REAL tre1_1_0;
7539 	  FFTW_REAL tim1_1_0;
7540 	  FFTW_REAL tre1_1_1;
7541 	  FFTW_REAL tim1_1_1;
7542 	  {
7543 	       FFTW_REAL tre2_0_0;
7544 	       FFTW_REAL tim2_0_0;
7545 	       FFTW_REAL tre2_1_0;
7546 	       FFTW_REAL tim2_1_0;
7547 	       tre2_0_0 = c_re(in[3 * istride]);
7548 	       tim2_0_0 = c_im(in[3 * istride]);
7549 	       tre2_1_0 = c_re(in[11 * istride]);
7550 	       tim2_1_0 = c_im(in[11 * istride]);
7551 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
7552 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
7553 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
7554 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
7555 	  }
7556 	  {
7557 	       FFTW_REAL tre2_0_0;
7558 	       FFTW_REAL tim2_0_0;
7559 	       FFTW_REAL tre2_1_0;
7560 	       FFTW_REAL tim2_1_0;
7561 	       tre2_0_0 = c_re(in[7 * istride]);
7562 	       tim2_0_0 = c_im(in[7 * istride]);
7563 	       tre2_1_0 = c_re(in[15 * istride]);
7564 	       tim2_1_0 = c_im(in[15 * istride]);
7565 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
7566 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
7567 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
7568 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
7569 	  }
7570 	  tre0_0_3 = tre1_0_0 + tre1_0_1;
7571 	  tim0_0_3 = tim1_0_0 + tim1_0_1;
7572 	  tre0_2_3 = tre1_0_0 - tre1_0_1;
7573 	  tim0_2_3 = tim1_0_0 - tim1_0_1;
7574 	  tre0_1_3 = tre1_1_0 - tim1_1_1;
7575 	  tim0_1_3 = tim1_1_0 + tre1_1_1;
7576 	  tre0_3_3 = tre1_1_0 + tim1_1_1;
7577 	  tim0_3_3 = tim1_1_0 - tre1_1_1;
7578      }
7579      {
7580 	  FFTW_REAL tre1_0_0;
7581 	  FFTW_REAL tim1_0_0;
7582 	  FFTW_REAL tre1_0_1;
7583 	  FFTW_REAL tim1_0_1;
7584 	  FFTW_REAL tre1_1_0;
7585 	  FFTW_REAL tim1_1_0;
7586 	  FFTW_REAL tre1_1_1;
7587 	  FFTW_REAL tim1_1_1;
7588 	  tre1_0_0 = tre0_0_0 + tre0_0_2;
7589 	  tim1_0_0 = tim0_0_0 + tim0_0_2;
7590 	  tre1_1_0 = tre0_0_0 - tre0_0_2;
7591 	  tim1_1_0 = tim0_0_0 - tim0_0_2;
7592 	  tre1_0_1 = tre0_0_1 + tre0_0_3;
7593 	  tim1_0_1 = tim0_0_1 + tim0_0_3;
7594 	  tre1_1_1 = tre0_0_1 - tre0_0_3;
7595 	  tim1_1_1 = tim0_0_1 - tim0_0_3;
7596 	  c_re(out[0]) = tre1_0_0 + tre1_0_1;
7597 	  c_im(out[0]) = tim1_0_0 + tim1_0_1;
7598 	  c_re(out[8 * ostride]) = tre1_0_0 - tre1_0_1;
7599 	  c_im(out[8 * ostride]) = tim1_0_0 - tim1_0_1;
7600 	  c_re(out[4 * ostride]) = tre1_1_0 - tim1_1_1;
7601 	  c_im(out[4 * ostride]) = tim1_1_0 + tre1_1_1;
7602 	  c_re(out[12 * ostride]) = tre1_1_0 + tim1_1_1;
7603 	  c_im(out[12 * ostride]) = tim1_1_0 - tre1_1_1;
7604      }
7605      {
7606 	  FFTW_REAL tre1_0_0;
7607 	  FFTW_REAL tim1_0_0;
7608 	  FFTW_REAL tre1_0_1;
7609 	  FFTW_REAL tim1_0_1;
7610 	  FFTW_REAL tre1_1_0;
7611 	  FFTW_REAL tim1_1_0;
7612 	  FFTW_REAL tre1_1_1;
7613 	  FFTW_REAL tim1_1_1;
7614 	  {
7615 	       FFTW_REAL tre2_1_0;
7616 	       FFTW_REAL tim2_1_0;
7617 	       tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_1_2 - tim0_1_2);
7618 	       tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_1_2 + tre0_1_2);
7619 	       tre1_0_0 = tre0_1_0 + tre2_1_0;
7620 	       tim1_0_0 = tim0_1_0 + tim2_1_0;
7621 	       tre1_1_0 = tre0_1_0 - tre2_1_0;
7622 	       tim1_1_0 = tim0_1_0 - tim2_1_0;
7623 	  }
7624 	  {
7625 	       FFTW_REAL tre2_0_0;
7626 	       FFTW_REAL tim2_0_0;
7627 	       FFTW_REAL tre2_1_0;
7628 	       FFTW_REAL tim2_1_0;
7629 	       tre2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_1_1) - (((FFTW_REAL) FFTW_K382683432) * tim0_1_1);
7630 	       tim2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_1_1) + (((FFTW_REAL) FFTW_K382683432) * tre0_1_1);
7631 	       tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_1_3) - (((FFTW_REAL) FFTW_K923879532) * tim0_1_3);
7632 	       tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_1_3) + (((FFTW_REAL) FFTW_K923879532) * tre0_1_3);
7633 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
7634 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
7635 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
7636 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
7637 	  }
7638 	  c_re(out[ostride]) = tre1_0_0 + tre1_0_1;
7639 	  c_im(out[ostride]) = tim1_0_0 + tim1_0_1;
7640 	  c_re(out[9 * ostride]) = tre1_0_0 - tre1_0_1;
7641 	  c_im(out[9 * ostride]) = tim1_0_0 - tim1_0_1;
7642 	  c_re(out[5 * ostride]) = tre1_1_0 - tim1_1_1;
7643 	  c_im(out[5 * ostride]) = tim1_1_0 + tre1_1_1;
7644 	  c_re(out[13 * ostride]) = tre1_1_0 + tim1_1_1;
7645 	  c_im(out[13 * ostride]) = tim1_1_0 - tre1_1_1;
7646      }
7647      {
7648 	  FFTW_REAL tre1_0_0;
7649 	  FFTW_REAL tim1_0_0;
7650 	  FFTW_REAL tre1_0_1;
7651 	  FFTW_REAL tim1_0_1;
7652 	  FFTW_REAL tre1_1_0;
7653 	  FFTW_REAL tim1_1_0;
7654 	  FFTW_REAL tre1_1_1;
7655 	  FFTW_REAL tim1_1_1;
7656 	  tre1_0_0 = tre0_2_0 - tim0_2_2;
7657 	  tim1_0_0 = tim0_2_0 + tre0_2_2;
7658 	  tre1_1_0 = tre0_2_0 + tim0_2_2;
7659 	  tim1_1_0 = tim0_2_0 - tre0_2_2;
7660 	  {
7661 	       FFTW_REAL tre2_0_0;
7662 	       FFTW_REAL tim2_0_0;
7663 	       FFTW_REAL tre2_1_0;
7664 	       FFTW_REAL tim2_1_0;
7665 	       tre2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_2_1 - tim0_2_1);
7666 	       tim2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_2_1 + tre0_2_1);
7667 	       tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_2_3 + tim0_2_3);
7668 	       tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_2_3 - tim0_2_3);
7669 	       tre1_0_1 = tre2_0_0 - tre2_1_0;
7670 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
7671 	       tre1_1_1 = tre2_0_0 + tre2_1_0;
7672 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
7673 	  }
7674 	  c_re(out[2 * ostride]) = tre1_0_0 + tre1_0_1;
7675 	  c_im(out[2 * ostride]) = tim1_0_0 + tim1_0_1;
7676 	  c_re(out[10 * ostride]) = tre1_0_0 - tre1_0_1;
7677 	  c_im(out[10 * ostride]) = tim1_0_0 - tim1_0_1;
7678 	  c_re(out[6 * ostride]) = tre1_1_0 - tim1_1_1;
7679 	  c_im(out[6 * ostride]) = tim1_1_0 + tre1_1_1;
7680 	  c_re(out[14 * ostride]) = tre1_1_0 + tim1_1_1;
7681 	  c_im(out[14 * ostride]) = tim1_1_0 - tre1_1_1;
7682      }
7683      {
7684 	  FFTW_REAL tre1_0_0;
7685 	  FFTW_REAL tim1_0_0;
7686 	  FFTW_REAL tre1_0_1;
7687 	  FFTW_REAL tim1_0_1;
7688 	  FFTW_REAL tre1_1_0;
7689 	  FFTW_REAL tim1_1_0;
7690 	  FFTW_REAL tre1_1_1;
7691 	  FFTW_REAL tim1_1_1;
7692 	  {
7693 	       FFTW_REAL tre2_1_0;
7694 	       FFTW_REAL tim2_1_0;
7695 	       tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_3_2 + tim0_3_2);
7696 	       tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_3_2 - tim0_3_2);
7697 	       tre1_0_0 = tre0_3_0 - tre2_1_0;
7698 	       tim1_0_0 = tim0_3_0 + tim2_1_0;
7699 	       tre1_1_0 = tre0_3_0 + tre2_1_0;
7700 	       tim1_1_0 = tim0_3_0 - tim2_1_0;
7701 	  }
7702 	  {
7703 	       FFTW_REAL tre2_0_0;
7704 	       FFTW_REAL tim2_0_0;
7705 	       FFTW_REAL tre2_1_0;
7706 	       FFTW_REAL tim2_1_0;
7707 	       tre2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_3_1) - (((FFTW_REAL) FFTW_K923879532) * tim0_3_1);
7708 	       tim2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_3_1) + (((FFTW_REAL) FFTW_K923879532) * tre0_3_1);
7709 	       tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_3_3) - (((FFTW_REAL) FFTW_K923879532) * tre0_3_3);
7710 	       tim2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_3_3) + (((FFTW_REAL) FFTW_K382683432) * tre0_3_3);
7711 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
7712 	       tim1_0_1 = tim2_0_0 - tim2_1_0;
7713 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
7714 	       tim1_1_1 = tim2_0_0 + tim2_1_0;
7715 	  }
7716 	  c_re(out[3 * ostride]) = tre1_0_0 + tre1_0_1;
7717 	  c_im(out[3 * ostride]) = tim1_0_0 + tim1_0_1;
7718 	  c_re(out[11 * ostride]) = tre1_0_0 - tre1_0_1;
7719 	  c_im(out[11 * ostride]) = tim1_0_0 - tim1_0_1;
7720 	  c_re(out[7 * ostride]) = tre1_1_0 - tim1_1_1;
7721 	  c_im(out[7 * ostride]) = tim1_1_0 + tre1_1_1;
7722 	  c_re(out[15 * ostride]) = tre1_1_0 + tim1_1_1;
7723 	  c_im(out[15 * ostride]) = tim1_1_0 - tre1_1_1;
7724      }
7725 }
7726 
7727 /* This function contains 4 FP additions and 0 FP multiplications */
7728 
fftwi_no_twiddle_2(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)7729 static void fftwi_no_twiddle_2(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
7730 {
7731      FFTW_REAL tre0_0_0;
7732      FFTW_REAL tim0_0_0;
7733      FFTW_REAL tre0_1_0;
7734      FFTW_REAL tim0_1_0;
7735      tre0_0_0 = c_re(in[0]);
7736      tim0_0_0 = c_im(in[0]);
7737      tre0_1_0 = c_re(in[istride]);
7738      tim0_1_0 = c_im(in[istride]);
7739      c_re(out[0]) = tre0_0_0 + tre0_1_0;
7740      c_im(out[0]) = tim0_0_0 + tim0_1_0;
7741      c_re(out[ostride]) = tre0_0_0 - tre0_1_0;
7742      c_im(out[ostride]) = tim0_0_0 - tim0_1_0;
7743 }
7744 
7745 /* This function contains 14 FP additions and 4 FP multiplications */
7746 
fftwi_no_twiddle_3(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)7747 static void fftwi_no_twiddle_3(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
7748 {
7749      FFTW_REAL tre0_0_0;
7750      FFTW_REAL tim0_0_0;
7751      FFTW_REAL tre0_1_0;
7752      FFTW_REAL tim0_1_0;
7753      FFTW_REAL tre0_2_0;
7754      FFTW_REAL tim0_2_0;
7755      tre0_0_0 = c_re(in[0]);
7756      tim0_0_0 = c_im(in[0]);
7757      tre0_1_0 = c_re(in[istride]);
7758      tim0_1_0 = c_im(in[istride]);
7759      tre0_2_0 = c_re(in[2 * istride]);
7760      tim0_2_0 = c_im(in[2 * istride]);
7761      c_re(out[0]) = tre0_0_0 + tre0_1_0 + tre0_2_0;
7762      c_im(out[0]) = tim0_0_0 + tim0_1_0 + tim0_2_0;
7763      {
7764 	  FFTW_REAL tre1_0_0;
7765 	  FFTW_REAL tre1_1_0;
7766 	  tre1_0_0 = tre0_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre0_1_0 + tre0_2_0));
7767 	  tre1_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim0_2_0 - tim0_1_0);
7768 	  c_re(out[ostride]) = tre1_0_0 + tre1_1_0;
7769 	  c_re(out[2 * ostride]) = tre1_0_0 - tre1_1_0;
7770      }
7771      {
7772 	  FFTW_REAL tim1_0_0;
7773 	  FFTW_REAL tim1_1_0;
7774 	  tim1_0_0 = tim0_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim0_1_0 + tim0_2_0));
7775 	  tim1_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre0_1_0 - tre0_2_0);
7776 	  c_im(out[ostride]) = tim1_0_0 + tim1_1_0;
7777 	  c_im(out[2 * ostride]) = tim1_0_0 - tim1_1_0;
7778      }
7779 }
7780 
7781 /* This function contains 376 FP additions and 88 FP multiplications */
7782 
fftwi_no_twiddle_32(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)7783 static void fftwi_no_twiddle_32(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
7784 {
7785      FFTW_REAL tre0_0_0;
7786      FFTW_REAL tim0_0_0;
7787      FFTW_REAL tre0_0_1;
7788      FFTW_REAL tim0_0_1;
7789      FFTW_REAL tre0_0_2;
7790      FFTW_REAL tim0_0_2;
7791      FFTW_REAL tre0_0_3;
7792      FFTW_REAL tim0_0_3;
7793      FFTW_REAL tre0_0_4;
7794      FFTW_REAL tim0_0_4;
7795      FFTW_REAL tre0_0_5;
7796      FFTW_REAL tim0_0_5;
7797      FFTW_REAL tre0_0_6;
7798      FFTW_REAL tim0_0_6;
7799      FFTW_REAL tre0_0_7;
7800      FFTW_REAL tim0_0_7;
7801      FFTW_REAL tre0_1_0;
7802      FFTW_REAL tim0_1_0;
7803      FFTW_REAL tre0_1_1;
7804      FFTW_REAL tim0_1_1;
7805      FFTW_REAL tre0_1_2;
7806      FFTW_REAL tim0_1_2;
7807      FFTW_REAL tre0_1_3;
7808      FFTW_REAL tim0_1_3;
7809      FFTW_REAL tre0_1_4;
7810      FFTW_REAL tim0_1_4;
7811      FFTW_REAL tre0_1_5;
7812      FFTW_REAL tim0_1_5;
7813      FFTW_REAL tre0_1_6;
7814      FFTW_REAL tim0_1_6;
7815      FFTW_REAL tre0_1_7;
7816      FFTW_REAL tim0_1_7;
7817      FFTW_REAL tre0_2_0;
7818      FFTW_REAL tim0_2_0;
7819      FFTW_REAL tre0_2_1;
7820      FFTW_REAL tim0_2_1;
7821      FFTW_REAL tre0_2_2;
7822      FFTW_REAL tim0_2_2;
7823      FFTW_REAL tre0_2_3;
7824      FFTW_REAL tim0_2_3;
7825      FFTW_REAL tre0_2_4;
7826      FFTW_REAL tim0_2_4;
7827      FFTW_REAL tre0_2_5;
7828      FFTW_REAL tim0_2_5;
7829      FFTW_REAL tre0_2_6;
7830      FFTW_REAL tim0_2_6;
7831      FFTW_REAL tre0_2_7;
7832      FFTW_REAL tim0_2_7;
7833      FFTW_REAL tre0_3_0;
7834      FFTW_REAL tim0_3_0;
7835      FFTW_REAL tre0_3_1;
7836      FFTW_REAL tim0_3_1;
7837      FFTW_REAL tre0_3_2;
7838      FFTW_REAL tim0_3_2;
7839      FFTW_REAL tre0_3_3;
7840      FFTW_REAL tim0_3_3;
7841      FFTW_REAL tre0_3_4;
7842      FFTW_REAL tim0_3_4;
7843      FFTW_REAL tre0_3_5;
7844      FFTW_REAL tim0_3_5;
7845      FFTW_REAL tre0_3_6;
7846      FFTW_REAL tim0_3_6;
7847      FFTW_REAL tre0_3_7;
7848      FFTW_REAL tim0_3_7;
7849      {
7850 	  FFTW_REAL tre1_0_0;
7851 	  FFTW_REAL tim1_0_0;
7852 	  FFTW_REAL tre1_0_1;
7853 	  FFTW_REAL tim1_0_1;
7854 	  FFTW_REAL tre1_1_0;
7855 	  FFTW_REAL tim1_1_0;
7856 	  FFTW_REAL tre1_1_1;
7857 	  FFTW_REAL tim1_1_1;
7858 	  {
7859 	       FFTW_REAL tre2_0_0;
7860 	       FFTW_REAL tim2_0_0;
7861 	       FFTW_REAL tre2_1_0;
7862 	       FFTW_REAL tim2_1_0;
7863 	       tre2_0_0 = c_re(in[0]);
7864 	       tim2_0_0 = c_im(in[0]);
7865 	       tre2_1_0 = c_re(in[16 * istride]);
7866 	       tim2_1_0 = c_im(in[16 * istride]);
7867 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
7868 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
7869 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
7870 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
7871 	  }
7872 	  {
7873 	       FFTW_REAL tre2_0_0;
7874 	       FFTW_REAL tim2_0_0;
7875 	       FFTW_REAL tre2_1_0;
7876 	       FFTW_REAL tim2_1_0;
7877 	       tre2_0_0 = c_re(in[8 * istride]);
7878 	       tim2_0_0 = c_im(in[8 * istride]);
7879 	       tre2_1_0 = c_re(in[24 * istride]);
7880 	       tim2_1_0 = c_im(in[24 * istride]);
7881 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
7882 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
7883 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
7884 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
7885 	  }
7886 	  tre0_0_0 = tre1_0_0 + tre1_0_1;
7887 	  tim0_0_0 = tim1_0_0 + tim1_0_1;
7888 	  tre0_2_0 = tre1_0_0 - tre1_0_1;
7889 	  tim0_2_0 = tim1_0_0 - tim1_0_1;
7890 	  tre0_1_0 = tre1_1_0 - tim1_1_1;
7891 	  tim0_1_0 = tim1_1_0 + tre1_1_1;
7892 	  tre0_3_0 = tre1_1_0 + tim1_1_1;
7893 	  tim0_3_0 = tim1_1_0 - tre1_1_1;
7894      }
7895      {
7896 	  FFTW_REAL tre1_0_0;
7897 	  FFTW_REAL tim1_0_0;
7898 	  FFTW_REAL tre1_0_1;
7899 	  FFTW_REAL tim1_0_1;
7900 	  FFTW_REAL tre1_1_0;
7901 	  FFTW_REAL tim1_1_0;
7902 	  FFTW_REAL tre1_1_1;
7903 	  FFTW_REAL tim1_1_1;
7904 	  {
7905 	       FFTW_REAL tre2_0_0;
7906 	       FFTW_REAL tim2_0_0;
7907 	       FFTW_REAL tre2_1_0;
7908 	       FFTW_REAL tim2_1_0;
7909 	       tre2_0_0 = c_re(in[istride]);
7910 	       tim2_0_0 = c_im(in[istride]);
7911 	       tre2_1_0 = c_re(in[17 * istride]);
7912 	       tim2_1_0 = c_im(in[17 * istride]);
7913 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
7914 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
7915 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
7916 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
7917 	  }
7918 	  {
7919 	       FFTW_REAL tre2_0_0;
7920 	       FFTW_REAL tim2_0_0;
7921 	       FFTW_REAL tre2_1_0;
7922 	       FFTW_REAL tim2_1_0;
7923 	       tre2_0_0 = c_re(in[9 * istride]);
7924 	       tim2_0_0 = c_im(in[9 * istride]);
7925 	       tre2_1_0 = c_re(in[25 * istride]);
7926 	       tim2_1_0 = c_im(in[25 * istride]);
7927 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
7928 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
7929 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
7930 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
7931 	  }
7932 	  tre0_0_1 = tre1_0_0 + tre1_0_1;
7933 	  tim0_0_1 = tim1_0_0 + tim1_0_1;
7934 	  tre0_2_1 = tre1_0_0 - tre1_0_1;
7935 	  tim0_2_1 = tim1_0_0 - tim1_0_1;
7936 	  tre0_1_1 = tre1_1_0 - tim1_1_1;
7937 	  tim0_1_1 = tim1_1_0 + tre1_1_1;
7938 	  tre0_3_1 = tre1_1_0 + tim1_1_1;
7939 	  tim0_3_1 = tim1_1_0 - tre1_1_1;
7940      }
7941      {
7942 	  FFTW_REAL tre1_0_0;
7943 	  FFTW_REAL tim1_0_0;
7944 	  FFTW_REAL tre1_0_1;
7945 	  FFTW_REAL tim1_0_1;
7946 	  FFTW_REAL tre1_1_0;
7947 	  FFTW_REAL tim1_1_0;
7948 	  FFTW_REAL tre1_1_1;
7949 	  FFTW_REAL tim1_1_1;
7950 	  {
7951 	       FFTW_REAL tre2_0_0;
7952 	       FFTW_REAL tim2_0_0;
7953 	       FFTW_REAL tre2_1_0;
7954 	       FFTW_REAL tim2_1_0;
7955 	       tre2_0_0 = c_re(in[2 * istride]);
7956 	       tim2_0_0 = c_im(in[2 * istride]);
7957 	       tre2_1_0 = c_re(in[18 * istride]);
7958 	       tim2_1_0 = c_im(in[18 * istride]);
7959 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
7960 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
7961 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
7962 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
7963 	  }
7964 	  {
7965 	       FFTW_REAL tre2_0_0;
7966 	       FFTW_REAL tim2_0_0;
7967 	       FFTW_REAL tre2_1_0;
7968 	       FFTW_REAL tim2_1_0;
7969 	       tre2_0_0 = c_re(in[10 * istride]);
7970 	       tim2_0_0 = c_im(in[10 * istride]);
7971 	       tre2_1_0 = c_re(in[26 * istride]);
7972 	       tim2_1_0 = c_im(in[26 * istride]);
7973 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
7974 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
7975 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
7976 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
7977 	  }
7978 	  tre0_0_2 = tre1_0_0 + tre1_0_1;
7979 	  tim0_0_2 = tim1_0_0 + tim1_0_1;
7980 	  tre0_2_2 = tre1_0_0 - tre1_0_1;
7981 	  tim0_2_2 = tim1_0_0 - tim1_0_1;
7982 	  tre0_1_2 = tre1_1_0 - tim1_1_1;
7983 	  tim0_1_2 = tim1_1_0 + tre1_1_1;
7984 	  tre0_3_2 = tre1_1_0 + tim1_1_1;
7985 	  tim0_3_2 = tim1_1_0 - tre1_1_1;
7986      }
7987      {
7988 	  FFTW_REAL tre1_0_0;
7989 	  FFTW_REAL tim1_0_0;
7990 	  FFTW_REAL tre1_0_1;
7991 	  FFTW_REAL tim1_0_1;
7992 	  FFTW_REAL tre1_1_0;
7993 	  FFTW_REAL tim1_1_0;
7994 	  FFTW_REAL tre1_1_1;
7995 	  FFTW_REAL tim1_1_1;
7996 	  {
7997 	       FFTW_REAL tre2_0_0;
7998 	       FFTW_REAL tim2_0_0;
7999 	       FFTW_REAL tre2_1_0;
8000 	       FFTW_REAL tim2_1_0;
8001 	       tre2_0_0 = c_re(in[3 * istride]);
8002 	       tim2_0_0 = c_im(in[3 * istride]);
8003 	       tre2_1_0 = c_re(in[19 * istride]);
8004 	       tim2_1_0 = c_im(in[19 * istride]);
8005 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
8006 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
8007 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
8008 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
8009 	  }
8010 	  {
8011 	       FFTW_REAL tre2_0_0;
8012 	       FFTW_REAL tim2_0_0;
8013 	       FFTW_REAL tre2_1_0;
8014 	       FFTW_REAL tim2_1_0;
8015 	       tre2_0_0 = c_re(in[11 * istride]);
8016 	       tim2_0_0 = c_im(in[11 * istride]);
8017 	       tre2_1_0 = c_re(in[27 * istride]);
8018 	       tim2_1_0 = c_im(in[27 * istride]);
8019 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
8020 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
8021 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
8022 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
8023 	  }
8024 	  tre0_0_3 = tre1_0_0 + tre1_0_1;
8025 	  tim0_0_3 = tim1_0_0 + tim1_0_1;
8026 	  tre0_2_3 = tre1_0_0 - tre1_0_1;
8027 	  tim0_2_3 = tim1_0_0 - tim1_0_1;
8028 	  tre0_1_3 = tre1_1_0 - tim1_1_1;
8029 	  tim0_1_3 = tim1_1_0 + tre1_1_1;
8030 	  tre0_3_3 = tre1_1_0 + tim1_1_1;
8031 	  tim0_3_3 = tim1_1_0 - tre1_1_1;
8032      }
8033      {
8034 	  FFTW_REAL tre1_0_0;
8035 	  FFTW_REAL tim1_0_0;
8036 	  FFTW_REAL tre1_0_1;
8037 	  FFTW_REAL tim1_0_1;
8038 	  FFTW_REAL tre1_1_0;
8039 	  FFTW_REAL tim1_1_0;
8040 	  FFTW_REAL tre1_1_1;
8041 	  FFTW_REAL tim1_1_1;
8042 	  {
8043 	       FFTW_REAL tre2_0_0;
8044 	       FFTW_REAL tim2_0_0;
8045 	       FFTW_REAL tre2_1_0;
8046 	       FFTW_REAL tim2_1_0;
8047 	       tre2_0_0 = c_re(in[4 * istride]);
8048 	       tim2_0_0 = c_im(in[4 * istride]);
8049 	       tre2_1_0 = c_re(in[20 * istride]);
8050 	       tim2_1_0 = c_im(in[20 * istride]);
8051 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
8052 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
8053 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
8054 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
8055 	  }
8056 	  {
8057 	       FFTW_REAL tre2_0_0;
8058 	       FFTW_REAL tim2_0_0;
8059 	       FFTW_REAL tre2_1_0;
8060 	       FFTW_REAL tim2_1_0;
8061 	       tre2_0_0 = c_re(in[12 * istride]);
8062 	       tim2_0_0 = c_im(in[12 * istride]);
8063 	       tre2_1_0 = c_re(in[28 * istride]);
8064 	       tim2_1_0 = c_im(in[28 * istride]);
8065 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
8066 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
8067 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
8068 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
8069 	  }
8070 	  tre0_0_4 = tre1_0_0 + tre1_0_1;
8071 	  tim0_0_4 = tim1_0_0 + tim1_0_1;
8072 	  tre0_2_4 = tre1_0_0 - tre1_0_1;
8073 	  tim0_2_4 = tim1_0_0 - tim1_0_1;
8074 	  tre0_1_4 = tre1_1_0 - tim1_1_1;
8075 	  tim0_1_4 = tim1_1_0 + tre1_1_1;
8076 	  tre0_3_4 = tre1_1_0 + tim1_1_1;
8077 	  tim0_3_4 = tim1_1_0 - tre1_1_1;
8078      }
8079      {
8080 	  FFTW_REAL tre1_0_0;
8081 	  FFTW_REAL tim1_0_0;
8082 	  FFTW_REAL tre1_0_1;
8083 	  FFTW_REAL tim1_0_1;
8084 	  FFTW_REAL tre1_1_0;
8085 	  FFTW_REAL tim1_1_0;
8086 	  FFTW_REAL tre1_1_1;
8087 	  FFTW_REAL tim1_1_1;
8088 	  {
8089 	       FFTW_REAL tre2_0_0;
8090 	       FFTW_REAL tim2_0_0;
8091 	       FFTW_REAL tre2_1_0;
8092 	       FFTW_REAL tim2_1_0;
8093 	       tre2_0_0 = c_re(in[5 * istride]);
8094 	       tim2_0_0 = c_im(in[5 * istride]);
8095 	       tre2_1_0 = c_re(in[21 * istride]);
8096 	       tim2_1_0 = c_im(in[21 * istride]);
8097 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
8098 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
8099 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
8100 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
8101 	  }
8102 	  {
8103 	       FFTW_REAL tre2_0_0;
8104 	       FFTW_REAL tim2_0_0;
8105 	       FFTW_REAL tre2_1_0;
8106 	       FFTW_REAL tim2_1_0;
8107 	       tre2_0_0 = c_re(in[13 * istride]);
8108 	       tim2_0_0 = c_im(in[13 * istride]);
8109 	       tre2_1_0 = c_re(in[29 * istride]);
8110 	       tim2_1_0 = c_im(in[29 * istride]);
8111 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
8112 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
8113 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
8114 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
8115 	  }
8116 	  tre0_0_5 = tre1_0_0 + tre1_0_1;
8117 	  tim0_0_5 = tim1_0_0 + tim1_0_1;
8118 	  tre0_2_5 = tre1_0_0 - tre1_0_1;
8119 	  tim0_2_5 = tim1_0_0 - tim1_0_1;
8120 	  tre0_1_5 = tre1_1_0 - tim1_1_1;
8121 	  tim0_1_5 = tim1_1_0 + tre1_1_1;
8122 	  tre0_3_5 = tre1_1_0 + tim1_1_1;
8123 	  tim0_3_5 = tim1_1_0 - tre1_1_1;
8124      }
8125      {
8126 	  FFTW_REAL tre1_0_0;
8127 	  FFTW_REAL tim1_0_0;
8128 	  FFTW_REAL tre1_0_1;
8129 	  FFTW_REAL tim1_0_1;
8130 	  FFTW_REAL tre1_1_0;
8131 	  FFTW_REAL tim1_1_0;
8132 	  FFTW_REAL tre1_1_1;
8133 	  FFTW_REAL tim1_1_1;
8134 	  {
8135 	       FFTW_REAL tre2_0_0;
8136 	       FFTW_REAL tim2_0_0;
8137 	       FFTW_REAL tre2_1_0;
8138 	       FFTW_REAL tim2_1_0;
8139 	       tre2_0_0 = c_re(in[6 * istride]);
8140 	       tim2_0_0 = c_im(in[6 * istride]);
8141 	       tre2_1_0 = c_re(in[22 * istride]);
8142 	       tim2_1_0 = c_im(in[22 * istride]);
8143 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
8144 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
8145 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
8146 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
8147 	  }
8148 	  {
8149 	       FFTW_REAL tre2_0_0;
8150 	       FFTW_REAL tim2_0_0;
8151 	       FFTW_REAL tre2_1_0;
8152 	       FFTW_REAL tim2_1_0;
8153 	       tre2_0_0 = c_re(in[14 * istride]);
8154 	       tim2_0_0 = c_im(in[14 * istride]);
8155 	       tre2_1_0 = c_re(in[30 * istride]);
8156 	       tim2_1_0 = c_im(in[30 * istride]);
8157 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
8158 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
8159 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
8160 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
8161 	  }
8162 	  tre0_0_6 = tre1_0_0 + tre1_0_1;
8163 	  tim0_0_6 = tim1_0_0 + tim1_0_1;
8164 	  tre0_2_6 = tre1_0_0 - tre1_0_1;
8165 	  tim0_2_6 = tim1_0_0 - tim1_0_1;
8166 	  tre0_1_6 = tre1_1_0 - tim1_1_1;
8167 	  tim0_1_6 = tim1_1_0 + tre1_1_1;
8168 	  tre0_3_6 = tre1_1_0 + tim1_1_1;
8169 	  tim0_3_6 = tim1_1_0 - tre1_1_1;
8170      }
8171      {
8172 	  FFTW_REAL tre1_0_0;
8173 	  FFTW_REAL tim1_0_0;
8174 	  FFTW_REAL tre1_0_1;
8175 	  FFTW_REAL tim1_0_1;
8176 	  FFTW_REAL tre1_1_0;
8177 	  FFTW_REAL tim1_1_0;
8178 	  FFTW_REAL tre1_1_1;
8179 	  FFTW_REAL tim1_1_1;
8180 	  {
8181 	       FFTW_REAL tre2_0_0;
8182 	       FFTW_REAL tim2_0_0;
8183 	       FFTW_REAL tre2_1_0;
8184 	       FFTW_REAL tim2_1_0;
8185 	       tre2_0_0 = c_re(in[7 * istride]);
8186 	       tim2_0_0 = c_im(in[7 * istride]);
8187 	       tre2_1_0 = c_re(in[23 * istride]);
8188 	       tim2_1_0 = c_im(in[23 * istride]);
8189 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
8190 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
8191 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
8192 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
8193 	  }
8194 	  {
8195 	       FFTW_REAL tre2_0_0;
8196 	       FFTW_REAL tim2_0_0;
8197 	       FFTW_REAL tre2_1_0;
8198 	       FFTW_REAL tim2_1_0;
8199 	       tre2_0_0 = c_re(in[15 * istride]);
8200 	       tim2_0_0 = c_im(in[15 * istride]);
8201 	       tre2_1_0 = c_re(in[31 * istride]);
8202 	       tim2_1_0 = c_im(in[31 * istride]);
8203 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
8204 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
8205 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
8206 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
8207 	  }
8208 	  tre0_0_7 = tre1_0_0 + tre1_0_1;
8209 	  tim0_0_7 = tim1_0_0 + tim1_0_1;
8210 	  tre0_2_7 = tre1_0_0 - tre1_0_1;
8211 	  tim0_2_7 = tim1_0_0 - tim1_0_1;
8212 	  tre0_1_7 = tre1_1_0 - tim1_1_1;
8213 	  tim0_1_7 = tim1_1_0 + tre1_1_1;
8214 	  tre0_3_7 = tre1_1_0 + tim1_1_1;
8215 	  tim0_3_7 = tim1_1_0 - tre1_1_1;
8216      }
8217      {
8218 	  FFTW_REAL tre1_0_0;
8219 	  FFTW_REAL tim1_0_0;
8220 	  FFTW_REAL tre1_0_1;
8221 	  FFTW_REAL tim1_0_1;
8222 	  FFTW_REAL tre1_0_2;
8223 	  FFTW_REAL tim1_0_2;
8224 	  FFTW_REAL tre1_0_3;
8225 	  FFTW_REAL tim1_0_3;
8226 	  FFTW_REAL tre1_1_0;
8227 	  FFTW_REAL tim1_1_0;
8228 	  FFTW_REAL tre1_1_1;
8229 	  FFTW_REAL tim1_1_1;
8230 	  FFTW_REAL tre1_1_2;
8231 	  FFTW_REAL tim1_1_2;
8232 	  FFTW_REAL tre1_1_3;
8233 	  FFTW_REAL tim1_1_3;
8234 	  tre1_0_0 = tre0_0_0 + tre0_0_4;
8235 	  tim1_0_0 = tim0_0_0 + tim0_0_4;
8236 	  tre1_1_0 = tre0_0_0 - tre0_0_4;
8237 	  tim1_1_0 = tim0_0_0 - tim0_0_4;
8238 	  tre1_0_1 = tre0_0_1 + tre0_0_5;
8239 	  tim1_0_1 = tim0_0_1 + tim0_0_5;
8240 	  tre1_1_1 = tre0_0_1 - tre0_0_5;
8241 	  tim1_1_1 = tim0_0_1 - tim0_0_5;
8242 	  tre1_0_2 = tre0_0_2 + tre0_0_6;
8243 	  tim1_0_2 = tim0_0_2 + tim0_0_6;
8244 	  tre1_1_2 = tre0_0_2 - tre0_0_6;
8245 	  tim1_1_2 = tim0_0_2 - tim0_0_6;
8246 	  tre1_0_3 = tre0_0_3 + tre0_0_7;
8247 	  tim1_0_3 = tim0_0_3 + tim0_0_7;
8248 	  tre1_1_3 = tre0_0_3 - tre0_0_7;
8249 	  tim1_1_3 = tim0_0_3 - tim0_0_7;
8250 	  {
8251 	       FFTW_REAL tre2_0_0;
8252 	       FFTW_REAL tim2_0_0;
8253 	       FFTW_REAL tre2_0_1;
8254 	       FFTW_REAL tim2_0_1;
8255 	       FFTW_REAL tre2_1_0;
8256 	       FFTW_REAL tim2_1_0;
8257 	       FFTW_REAL tre2_1_1;
8258 	       FFTW_REAL tim2_1_1;
8259 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
8260 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
8261 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
8262 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
8263 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
8264 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
8265 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
8266 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
8267 	       c_re(out[0]) = tre2_0_0 + tre2_0_1;
8268 	       c_im(out[0]) = tim2_0_0 + tim2_0_1;
8269 	       c_re(out[16 * ostride]) = tre2_0_0 - tre2_0_1;
8270 	       c_im(out[16 * ostride]) = tim2_0_0 - tim2_0_1;
8271 	       c_re(out[8 * ostride]) = tre2_1_0 - tim2_1_1;
8272 	       c_im(out[8 * ostride]) = tim2_1_0 + tre2_1_1;
8273 	       c_re(out[24 * ostride]) = tre2_1_0 + tim2_1_1;
8274 	       c_im(out[24 * ostride]) = tim2_1_0 - tre2_1_1;
8275 	  }
8276 	  {
8277 	       FFTW_REAL tre2_0_0;
8278 	       FFTW_REAL tim2_0_0;
8279 	       FFTW_REAL tre2_0_1;
8280 	       FFTW_REAL tim2_0_1;
8281 	       FFTW_REAL tre2_1_0;
8282 	       FFTW_REAL tim2_1_0;
8283 	       FFTW_REAL tre2_1_1;
8284 	       FFTW_REAL tim2_1_1;
8285 	       tre2_0_0 = tre1_1_0 - tim1_1_2;
8286 	       tim2_0_0 = tim1_1_0 + tre1_1_2;
8287 	       tre2_1_0 = tre1_1_0 + tim1_1_2;
8288 	       tim2_1_0 = tim1_1_0 - tre1_1_2;
8289 	       {
8290 		    FFTW_REAL tre3_0_0;
8291 		    FFTW_REAL tim3_0_0;
8292 		    FFTW_REAL tre3_1_0;
8293 		    FFTW_REAL tim3_1_0;
8294 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
8295 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
8296 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
8297 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
8298 		    tre2_0_1 = tre3_0_0 - tre3_1_0;
8299 		    tim2_0_1 = tim3_0_0 + tim3_1_0;
8300 		    tre2_1_1 = tre3_0_0 + tre3_1_0;
8301 		    tim2_1_1 = tim3_0_0 - tim3_1_0;
8302 	       }
8303 	       c_re(out[4 * ostride]) = tre2_0_0 + tre2_0_1;
8304 	       c_im(out[4 * ostride]) = tim2_0_0 + tim2_0_1;
8305 	       c_re(out[20 * ostride]) = tre2_0_0 - tre2_0_1;
8306 	       c_im(out[20 * ostride]) = tim2_0_0 - tim2_0_1;
8307 	       c_re(out[12 * ostride]) = tre2_1_0 - tim2_1_1;
8308 	       c_im(out[12 * ostride]) = tim2_1_0 + tre2_1_1;
8309 	       c_re(out[28 * ostride]) = tre2_1_0 + tim2_1_1;
8310 	       c_im(out[28 * ostride]) = tim2_1_0 - tre2_1_1;
8311 	  }
8312      }
8313      {
8314 	  FFTW_REAL tre1_0_0;
8315 	  FFTW_REAL tim1_0_0;
8316 	  FFTW_REAL tre1_0_1;
8317 	  FFTW_REAL tim1_0_1;
8318 	  FFTW_REAL tre1_0_2;
8319 	  FFTW_REAL tim1_0_2;
8320 	  FFTW_REAL tre1_0_3;
8321 	  FFTW_REAL tim1_0_3;
8322 	  FFTW_REAL tre1_1_0;
8323 	  FFTW_REAL tim1_1_0;
8324 	  FFTW_REAL tre1_1_1;
8325 	  FFTW_REAL tim1_1_1;
8326 	  FFTW_REAL tre1_1_2;
8327 	  FFTW_REAL tim1_1_2;
8328 	  FFTW_REAL tre1_1_3;
8329 	  FFTW_REAL tim1_1_3;
8330 	  {
8331 	       FFTW_REAL tre2_1_0;
8332 	       FFTW_REAL tim2_1_0;
8333 	       tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_1_4 - tim0_1_4);
8334 	       tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_1_4 + tre0_1_4);
8335 	       tre1_0_0 = tre0_1_0 + tre2_1_0;
8336 	       tim1_0_0 = tim0_1_0 + tim2_1_0;
8337 	       tre1_1_0 = tre0_1_0 - tre2_1_0;
8338 	       tim1_1_0 = tim0_1_0 - tim2_1_0;
8339 	  }
8340 	  {
8341 	       FFTW_REAL tre2_0_0;
8342 	       FFTW_REAL tim2_0_0;
8343 	       FFTW_REAL tre2_1_0;
8344 	       FFTW_REAL tim2_1_0;
8345 	       tre2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tre0_1_1) - (((FFTW_REAL) FFTW_K195090322) * tim0_1_1);
8346 	       tim2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tim0_1_1) + (((FFTW_REAL) FFTW_K195090322) * tre0_1_1);
8347 	       tre2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tre0_1_5) - (((FFTW_REAL) FFTW_K831469612) * tim0_1_5);
8348 	       tim2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tim0_1_5) + (((FFTW_REAL) FFTW_K831469612) * tre0_1_5);
8349 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
8350 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
8351 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
8352 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
8353 	  }
8354 	  {
8355 	       FFTW_REAL tre2_0_0;
8356 	       FFTW_REAL tim2_0_0;
8357 	       FFTW_REAL tre2_1_0;
8358 	       FFTW_REAL tim2_1_0;
8359 	       tre2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_1_2) - (((FFTW_REAL) FFTW_K382683432) * tim0_1_2);
8360 	       tim2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_1_2) + (((FFTW_REAL) FFTW_K382683432) * tre0_1_2);
8361 	       tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_1_6) - (((FFTW_REAL) FFTW_K923879532) * tim0_1_6);
8362 	       tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_1_6) + (((FFTW_REAL) FFTW_K923879532) * tre0_1_6);
8363 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
8364 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
8365 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
8366 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
8367 	  }
8368 	  {
8369 	       FFTW_REAL tre2_0_0;
8370 	       FFTW_REAL tim2_0_0;
8371 	       FFTW_REAL tre2_1_0;
8372 	       FFTW_REAL tim2_1_0;
8373 	       tre2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_1_3) - (((FFTW_REAL) FFTW_K555570233) * tim0_1_3);
8374 	       tim2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_1_3) + (((FFTW_REAL) FFTW_K555570233) * tre0_1_3);
8375 	       tre2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tre0_1_7) - (((FFTW_REAL) FFTW_K980785280) * tim0_1_7);
8376 	       tim2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tim0_1_7) + (((FFTW_REAL) FFTW_K980785280) * tre0_1_7);
8377 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
8378 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
8379 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
8380 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
8381 	  }
8382 	  {
8383 	       FFTW_REAL tre2_0_0;
8384 	       FFTW_REAL tim2_0_0;
8385 	       FFTW_REAL tre2_0_1;
8386 	       FFTW_REAL tim2_0_1;
8387 	       FFTW_REAL tre2_1_0;
8388 	       FFTW_REAL tim2_1_0;
8389 	       FFTW_REAL tre2_1_1;
8390 	       FFTW_REAL tim2_1_1;
8391 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
8392 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
8393 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
8394 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
8395 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
8396 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
8397 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
8398 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
8399 	       c_re(out[ostride]) = tre2_0_0 + tre2_0_1;
8400 	       c_im(out[ostride]) = tim2_0_0 + tim2_0_1;
8401 	       c_re(out[17 * ostride]) = tre2_0_0 - tre2_0_1;
8402 	       c_im(out[17 * ostride]) = tim2_0_0 - tim2_0_1;
8403 	       c_re(out[9 * ostride]) = tre2_1_0 - tim2_1_1;
8404 	       c_im(out[9 * ostride]) = tim2_1_0 + tre2_1_1;
8405 	       c_re(out[25 * ostride]) = tre2_1_0 + tim2_1_1;
8406 	       c_im(out[25 * ostride]) = tim2_1_0 - tre2_1_1;
8407 	  }
8408 	  {
8409 	       FFTW_REAL tre2_0_0;
8410 	       FFTW_REAL tim2_0_0;
8411 	       FFTW_REAL tre2_0_1;
8412 	       FFTW_REAL tim2_0_1;
8413 	       FFTW_REAL tre2_1_0;
8414 	       FFTW_REAL tim2_1_0;
8415 	       FFTW_REAL tre2_1_1;
8416 	       FFTW_REAL tim2_1_1;
8417 	       tre2_0_0 = tre1_1_0 - tim1_1_2;
8418 	       tim2_0_0 = tim1_1_0 + tre1_1_2;
8419 	       tre2_1_0 = tre1_1_0 + tim1_1_2;
8420 	       tim2_1_0 = tim1_1_0 - tre1_1_2;
8421 	       {
8422 		    FFTW_REAL tre3_0_0;
8423 		    FFTW_REAL tim3_0_0;
8424 		    FFTW_REAL tre3_1_0;
8425 		    FFTW_REAL tim3_1_0;
8426 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
8427 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
8428 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
8429 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
8430 		    tre2_0_1 = tre3_0_0 - tre3_1_0;
8431 		    tim2_0_1 = tim3_0_0 + tim3_1_0;
8432 		    tre2_1_1 = tre3_0_0 + tre3_1_0;
8433 		    tim2_1_1 = tim3_0_0 - tim3_1_0;
8434 	       }
8435 	       c_re(out[5 * ostride]) = tre2_0_0 + tre2_0_1;
8436 	       c_im(out[5 * ostride]) = tim2_0_0 + tim2_0_1;
8437 	       c_re(out[21 * ostride]) = tre2_0_0 - tre2_0_1;
8438 	       c_im(out[21 * ostride]) = tim2_0_0 - tim2_0_1;
8439 	       c_re(out[13 * ostride]) = tre2_1_0 - tim2_1_1;
8440 	       c_im(out[13 * ostride]) = tim2_1_0 + tre2_1_1;
8441 	       c_re(out[29 * ostride]) = tre2_1_0 + tim2_1_1;
8442 	       c_im(out[29 * ostride]) = tim2_1_0 - tre2_1_1;
8443 	  }
8444      }
8445      {
8446 	  FFTW_REAL tre1_0_0;
8447 	  FFTW_REAL tim1_0_0;
8448 	  FFTW_REAL tre1_0_1;
8449 	  FFTW_REAL tim1_0_1;
8450 	  FFTW_REAL tre1_0_2;
8451 	  FFTW_REAL tim1_0_2;
8452 	  FFTW_REAL tre1_0_3;
8453 	  FFTW_REAL tim1_0_3;
8454 	  FFTW_REAL tre1_1_0;
8455 	  FFTW_REAL tim1_1_0;
8456 	  FFTW_REAL tre1_1_1;
8457 	  FFTW_REAL tim1_1_1;
8458 	  FFTW_REAL tre1_1_2;
8459 	  FFTW_REAL tim1_1_2;
8460 	  FFTW_REAL tre1_1_3;
8461 	  FFTW_REAL tim1_1_3;
8462 	  tre1_0_0 = tre0_2_0 - tim0_2_4;
8463 	  tim1_0_0 = tim0_2_0 + tre0_2_4;
8464 	  tre1_1_0 = tre0_2_0 + tim0_2_4;
8465 	  tim1_1_0 = tim0_2_0 - tre0_2_4;
8466 	  {
8467 	       FFTW_REAL tre2_0_0;
8468 	       FFTW_REAL tim2_0_0;
8469 	       FFTW_REAL tre2_1_0;
8470 	       FFTW_REAL tim2_1_0;
8471 	       tre2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_2_1) - (((FFTW_REAL) FFTW_K382683432) * tim0_2_1);
8472 	       tim2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_2_1) + (((FFTW_REAL) FFTW_K382683432) * tre0_2_1);
8473 	       tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_2_5) + (((FFTW_REAL) FFTW_K923879532) * tim0_2_5);
8474 	       tim2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_2_5) - (((FFTW_REAL) FFTW_K382683432) * tim0_2_5);
8475 	       tre1_0_1 = tre2_0_0 - tre2_1_0;
8476 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
8477 	       tre1_1_1 = tre2_0_0 + tre2_1_0;
8478 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
8479 	  }
8480 	  {
8481 	       FFTW_REAL tre2_0_0;
8482 	       FFTW_REAL tim2_0_0;
8483 	       FFTW_REAL tre2_1_0;
8484 	       FFTW_REAL tim2_1_0;
8485 	       tre2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_2_2 - tim0_2_2);
8486 	       tim2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_2_2 + tre0_2_2);
8487 	       tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_2_6 + tim0_2_6);
8488 	       tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_2_6 - tim0_2_6);
8489 	       tre1_0_2 = tre2_0_0 - tre2_1_0;
8490 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
8491 	       tre1_1_2 = tre2_0_0 + tre2_1_0;
8492 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
8493 	  }
8494 	  {
8495 	       FFTW_REAL tre2_0_0;
8496 	       FFTW_REAL tim2_0_0;
8497 	       FFTW_REAL tre2_1_0;
8498 	       FFTW_REAL tim2_1_0;
8499 	       tre2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_2_3) - (((FFTW_REAL) FFTW_K923879532) * tim0_2_3);
8500 	       tim2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_2_3) + (((FFTW_REAL) FFTW_K923879532) * tre0_2_3);
8501 	       tre2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_2_7) + (((FFTW_REAL) FFTW_K382683432) * tim0_2_7);
8502 	       tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_2_7) - (((FFTW_REAL) FFTW_K923879532) * tim0_2_7);
8503 	       tre1_0_3 = tre2_0_0 - tre2_1_0;
8504 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
8505 	       tre1_1_3 = tre2_0_0 + tre2_1_0;
8506 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
8507 	  }
8508 	  {
8509 	       FFTW_REAL tre2_0_0;
8510 	       FFTW_REAL tim2_0_0;
8511 	       FFTW_REAL tre2_0_1;
8512 	       FFTW_REAL tim2_0_1;
8513 	       FFTW_REAL tre2_1_0;
8514 	       FFTW_REAL tim2_1_0;
8515 	       FFTW_REAL tre2_1_1;
8516 	       FFTW_REAL tim2_1_1;
8517 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
8518 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
8519 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
8520 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
8521 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
8522 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
8523 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
8524 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
8525 	       c_re(out[2 * ostride]) = tre2_0_0 + tre2_0_1;
8526 	       c_im(out[2 * ostride]) = tim2_0_0 + tim2_0_1;
8527 	       c_re(out[18 * ostride]) = tre2_0_0 - tre2_0_1;
8528 	       c_im(out[18 * ostride]) = tim2_0_0 - tim2_0_1;
8529 	       c_re(out[10 * ostride]) = tre2_1_0 - tim2_1_1;
8530 	       c_im(out[10 * ostride]) = tim2_1_0 + tre2_1_1;
8531 	       c_re(out[26 * ostride]) = tre2_1_0 + tim2_1_1;
8532 	       c_im(out[26 * ostride]) = tim2_1_0 - tre2_1_1;
8533 	  }
8534 	  {
8535 	       FFTW_REAL tre2_0_0;
8536 	       FFTW_REAL tim2_0_0;
8537 	       FFTW_REAL tre2_0_1;
8538 	       FFTW_REAL tim2_0_1;
8539 	       FFTW_REAL tre2_1_0;
8540 	       FFTW_REAL tim2_1_0;
8541 	       FFTW_REAL tre2_1_1;
8542 	       FFTW_REAL tim2_1_1;
8543 	       tre2_0_0 = tre1_1_0 - tim1_1_2;
8544 	       tim2_0_0 = tim1_1_0 + tre1_1_2;
8545 	       tre2_1_0 = tre1_1_0 + tim1_1_2;
8546 	       tim2_1_0 = tim1_1_0 - tre1_1_2;
8547 	       {
8548 		    FFTW_REAL tre3_0_0;
8549 		    FFTW_REAL tim3_0_0;
8550 		    FFTW_REAL tre3_1_0;
8551 		    FFTW_REAL tim3_1_0;
8552 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
8553 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
8554 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
8555 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
8556 		    tre2_0_1 = tre3_0_0 - tre3_1_0;
8557 		    tim2_0_1 = tim3_0_0 + tim3_1_0;
8558 		    tre2_1_1 = tre3_0_0 + tre3_1_0;
8559 		    tim2_1_1 = tim3_0_0 - tim3_1_0;
8560 	       }
8561 	       c_re(out[6 * ostride]) = tre2_0_0 + tre2_0_1;
8562 	       c_im(out[6 * ostride]) = tim2_0_0 + tim2_0_1;
8563 	       c_re(out[22 * ostride]) = tre2_0_0 - tre2_0_1;
8564 	       c_im(out[22 * ostride]) = tim2_0_0 - tim2_0_1;
8565 	       c_re(out[14 * ostride]) = tre2_1_0 - tim2_1_1;
8566 	       c_im(out[14 * ostride]) = tim2_1_0 + tre2_1_1;
8567 	       c_re(out[30 * ostride]) = tre2_1_0 + tim2_1_1;
8568 	       c_im(out[30 * ostride]) = tim2_1_0 - tre2_1_1;
8569 	  }
8570      }
8571      {
8572 	  FFTW_REAL tre1_0_0;
8573 	  FFTW_REAL tim1_0_0;
8574 	  FFTW_REAL tre1_0_1;
8575 	  FFTW_REAL tim1_0_1;
8576 	  FFTW_REAL tre1_0_2;
8577 	  FFTW_REAL tim1_0_2;
8578 	  FFTW_REAL tre1_0_3;
8579 	  FFTW_REAL tim1_0_3;
8580 	  FFTW_REAL tre1_1_0;
8581 	  FFTW_REAL tim1_1_0;
8582 	  FFTW_REAL tre1_1_1;
8583 	  FFTW_REAL tim1_1_1;
8584 	  FFTW_REAL tre1_1_2;
8585 	  FFTW_REAL tim1_1_2;
8586 	  FFTW_REAL tre1_1_3;
8587 	  FFTW_REAL tim1_1_3;
8588 	  {
8589 	       FFTW_REAL tre2_1_0;
8590 	       FFTW_REAL tim2_1_0;
8591 	       tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_3_4 + tim0_3_4);
8592 	       tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_3_4 - tim0_3_4);
8593 	       tre1_0_0 = tre0_3_0 - tre2_1_0;
8594 	       tim1_0_0 = tim0_3_0 + tim2_1_0;
8595 	       tre1_1_0 = tre0_3_0 + tre2_1_0;
8596 	       tim1_1_0 = tim0_3_0 - tim2_1_0;
8597 	  }
8598 	  {
8599 	       FFTW_REAL tre2_0_0;
8600 	       FFTW_REAL tim2_0_0;
8601 	       FFTW_REAL tre2_1_0;
8602 	       FFTW_REAL tim2_1_0;
8603 	       tre2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_3_1) - (((FFTW_REAL) FFTW_K555570233) * tim0_3_1);
8604 	       tim2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_3_1) + (((FFTW_REAL) FFTW_K555570233) * tre0_3_1);
8605 	       tre2_1_0 = (((FFTW_REAL) FFTW_K980785280) * tre0_3_5) + (((FFTW_REAL) FFTW_K195090322) * tim0_3_5);
8606 	       tim2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tre0_3_5) - (((FFTW_REAL) FFTW_K980785280) * tim0_3_5);
8607 	       tre1_0_1 = tre2_0_0 - tre2_1_0;
8608 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
8609 	       tre1_1_1 = tre2_0_0 + tre2_1_0;
8610 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
8611 	  }
8612 	  {
8613 	       FFTW_REAL tre2_0_0;
8614 	       FFTW_REAL tim2_0_0;
8615 	       FFTW_REAL tre2_1_0;
8616 	       FFTW_REAL tim2_1_0;
8617 	       tre2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_3_2) - (((FFTW_REAL) FFTW_K923879532) * tim0_3_2);
8618 	       tim2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_3_2) + (((FFTW_REAL) FFTW_K923879532) * tre0_3_2);
8619 	       tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_3_6) - (((FFTW_REAL) FFTW_K923879532) * tre0_3_6);
8620 	       tim2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_3_6) + (((FFTW_REAL) FFTW_K382683432) * tre0_3_6);
8621 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
8622 	       tim1_0_2 = tim2_0_0 - tim2_1_0;
8623 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
8624 	       tim1_1_2 = tim2_0_0 + tim2_1_0;
8625 	  }
8626 	  {
8627 	       FFTW_REAL tre2_0_0;
8628 	       FFTW_REAL tim2_0_0;
8629 	       FFTW_REAL tre2_1_0;
8630 	       FFTW_REAL tim2_1_0;
8631 	       tre2_0_0 = (((FFTW_REAL) FFTW_K195090322) * tre0_3_3) + (((FFTW_REAL) FFTW_K980785280) * tim0_3_3);
8632 	       tim2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tre0_3_3) - (((FFTW_REAL) FFTW_K195090322) * tim0_3_3);
8633 	       tre2_1_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_3_7) - (((FFTW_REAL) FFTW_K555570233) * tre0_3_7);
8634 	       tim2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tim0_3_7) + (((FFTW_REAL) FFTW_K831469612) * tre0_3_7);
8635 	       tre1_0_3 = tre2_1_0 - tre2_0_0;
8636 	       tim1_0_3 = tim2_0_0 - tim2_1_0;
8637 	       tre1_1_3 = (-(tre2_0_0 + tre2_1_0));
8638 	       tim1_1_3 = tim2_0_0 + tim2_1_0;
8639 	  }
8640 	  {
8641 	       FFTW_REAL tre2_0_0;
8642 	       FFTW_REAL tim2_0_0;
8643 	       FFTW_REAL tre2_0_1;
8644 	       FFTW_REAL tim2_0_1;
8645 	       FFTW_REAL tre2_1_0;
8646 	       FFTW_REAL tim2_1_0;
8647 	       FFTW_REAL tre2_1_1;
8648 	       FFTW_REAL tim2_1_1;
8649 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
8650 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
8651 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
8652 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
8653 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
8654 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
8655 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
8656 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
8657 	       c_re(out[3 * ostride]) = tre2_0_0 + tre2_0_1;
8658 	       c_im(out[3 * ostride]) = tim2_0_0 + tim2_0_1;
8659 	       c_re(out[19 * ostride]) = tre2_0_0 - tre2_0_1;
8660 	       c_im(out[19 * ostride]) = tim2_0_0 - tim2_0_1;
8661 	       c_re(out[11 * ostride]) = tre2_1_0 - tim2_1_1;
8662 	       c_im(out[11 * ostride]) = tim2_1_0 + tre2_1_1;
8663 	       c_re(out[27 * ostride]) = tre2_1_0 + tim2_1_1;
8664 	       c_im(out[27 * ostride]) = tim2_1_0 - tre2_1_1;
8665 	  }
8666 	  {
8667 	       FFTW_REAL tre2_0_0;
8668 	       FFTW_REAL tim2_0_0;
8669 	       FFTW_REAL tre2_0_1;
8670 	       FFTW_REAL tim2_0_1;
8671 	       FFTW_REAL tre2_1_0;
8672 	       FFTW_REAL tim2_1_0;
8673 	       FFTW_REAL tre2_1_1;
8674 	       FFTW_REAL tim2_1_1;
8675 	       tre2_0_0 = tre1_1_0 - tim1_1_2;
8676 	       tim2_0_0 = tim1_1_0 + tre1_1_2;
8677 	       tre2_1_0 = tre1_1_0 + tim1_1_2;
8678 	       tim2_1_0 = tim1_1_0 - tre1_1_2;
8679 	       {
8680 		    FFTW_REAL tre3_0_0;
8681 		    FFTW_REAL tim3_0_0;
8682 		    FFTW_REAL tre3_1_0;
8683 		    FFTW_REAL tim3_1_0;
8684 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
8685 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
8686 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
8687 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
8688 		    tre2_0_1 = tre3_0_0 - tre3_1_0;
8689 		    tim2_0_1 = tim3_0_0 + tim3_1_0;
8690 		    tre2_1_1 = tre3_0_0 + tre3_1_0;
8691 		    tim2_1_1 = tim3_0_0 - tim3_1_0;
8692 	       }
8693 	       c_re(out[7 * ostride]) = tre2_0_0 + tre2_0_1;
8694 	       c_im(out[7 * ostride]) = tim2_0_0 + tim2_0_1;
8695 	       c_re(out[23 * ostride]) = tre2_0_0 - tre2_0_1;
8696 	       c_im(out[23 * ostride]) = tim2_0_0 - tim2_0_1;
8697 	       c_re(out[15 * ostride]) = tre2_1_0 - tim2_1_1;
8698 	       c_im(out[15 * ostride]) = tim2_1_0 + tre2_1_1;
8699 	       c_re(out[31 * ostride]) = tre2_1_0 + tim2_1_1;
8700 	       c_im(out[31 * ostride]) = tim2_1_0 - tre2_1_1;
8701 	  }
8702      }
8703 }
8704 
8705 /* This function contains 16 FP additions and 0 FP multiplications */
8706 
fftwi_no_twiddle_4(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)8707 static void fftwi_no_twiddle_4(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
8708 {
8709      FFTW_REAL tre0_0_0;
8710      FFTW_REAL tim0_0_0;
8711      FFTW_REAL tre0_0_1;
8712      FFTW_REAL tim0_0_1;
8713      FFTW_REAL tre0_1_0;
8714      FFTW_REAL tim0_1_0;
8715      FFTW_REAL tre0_1_1;
8716      FFTW_REAL tim0_1_1;
8717      {
8718 	  FFTW_REAL tre1_0_0;
8719 	  FFTW_REAL tim1_0_0;
8720 	  FFTW_REAL tre1_1_0;
8721 	  FFTW_REAL tim1_1_0;
8722 	  tre1_0_0 = c_re(in[0]);
8723 	  tim1_0_0 = c_im(in[0]);
8724 	  tre1_1_0 = c_re(in[2 * istride]);
8725 	  tim1_1_0 = c_im(in[2 * istride]);
8726 	  tre0_0_0 = tre1_0_0 + tre1_1_0;
8727 	  tim0_0_0 = tim1_0_0 + tim1_1_0;
8728 	  tre0_1_0 = tre1_0_0 - tre1_1_0;
8729 	  tim0_1_0 = tim1_0_0 - tim1_1_0;
8730      }
8731      {
8732 	  FFTW_REAL tre1_0_0;
8733 	  FFTW_REAL tim1_0_0;
8734 	  FFTW_REAL tre1_1_0;
8735 	  FFTW_REAL tim1_1_0;
8736 	  tre1_0_0 = c_re(in[istride]);
8737 	  tim1_0_0 = c_im(in[istride]);
8738 	  tre1_1_0 = c_re(in[3 * istride]);
8739 	  tim1_1_0 = c_im(in[3 * istride]);
8740 	  tre0_0_1 = tre1_0_0 + tre1_1_0;
8741 	  tim0_0_1 = tim1_0_0 + tim1_1_0;
8742 	  tre0_1_1 = tre1_0_0 - tre1_1_0;
8743 	  tim0_1_1 = tim1_0_0 - tim1_1_0;
8744      }
8745      c_re(out[0]) = tre0_0_0 + tre0_0_1;
8746      c_im(out[0]) = tim0_0_0 + tim0_0_1;
8747      c_re(out[2 * ostride]) = tre0_0_0 - tre0_0_1;
8748      c_im(out[2 * ostride]) = tim0_0_0 - tim0_0_1;
8749      c_re(out[ostride]) = tre0_1_0 - tim0_1_1;
8750      c_im(out[ostride]) = tim0_1_0 + tre0_1_1;
8751      c_re(out[3 * ostride]) = tre0_1_0 + tim0_1_1;
8752      c_im(out[3 * ostride]) = tim0_1_0 - tre0_1_1;
8753 }
8754 
8755 /* This function contains 44 FP additions and 16 FP multiplications */
8756 
fftwi_no_twiddle_5(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)8757 static void fftwi_no_twiddle_5(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
8758 {
8759      FFTW_REAL tre0_0_0;
8760      FFTW_REAL tim0_0_0;
8761      FFTW_REAL tre0_1_0;
8762      FFTW_REAL tim0_1_0;
8763      FFTW_REAL tre0_2_0;
8764      FFTW_REAL tim0_2_0;
8765      FFTW_REAL tre0_3_0;
8766      FFTW_REAL tim0_3_0;
8767      FFTW_REAL tre0_4_0;
8768      FFTW_REAL tim0_4_0;
8769      tre0_0_0 = c_re(in[0]);
8770      tim0_0_0 = c_im(in[0]);
8771      tre0_1_0 = c_re(in[istride]);
8772      tim0_1_0 = c_im(in[istride]);
8773      tre0_2_0 = c_re(in[2 * istride]);
8774      tim0_2_0 = c_im(in[2 * istride]);
8775      tre0_3_0 = c_re(in[3 * istride]);
8776      tim0_3_0 = c_im(in[3 * istride]);
8777      tre0_4_0 = c_re(in[4 * istride]);
8778      tim0_4_0 = c_im(in[4 * istride]);
8779      c_re(out[0]) = tre0_0_0 + tre0_1_0 + tre0_2_0 + tre0_3_0 + tre0_4_0;
8780      c_im(out[0]) = tim0_0_0 + tim0_1_0 + tim0_2_0 + tim0_3_0 + tim0_4_0;
8781      {
8782 	  FFTW_REAL tre1_0_0;
8783 	  FFTW_REAL tre1_1_0;
8784 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_1_0 + tre0_4_0)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_2_0 + tre0_3_0));
8785 	  tre1_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tim0_4_0 - tim0_1_0)) + (((FFTW_REAL) FFTW_K587785252) * (tim0_3_0 - tim0_2_0));
8786 	  c_re(out[ostride]) = tre1_0_0 + tre1_1_0;
8787 	  c_re(out[4 * ostride]) = tre1_0_0 - tre1_1_0;
8788      }
8789      {
8790 	  FFTW_REAL tim1_0_0;
8791 	  FFTW_REAL tim1_1_0;
8792 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_1_0 + tim0_4_0)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_2_0 + tim0_3_0));
8793 	  tim1_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tre0_1_0 - tre0_4_0)) + (((FFTW_REAL) FFTW_K587785252) * (tre0_2_0 - tre0_3_0));
8794 	  c_im(out[ostride]) = tim1_0_0 + tim1_1_0;
8795 	  c_im(out[4 * ostride]) = tim1_0_0 - tim1_1_0;
8796      }
8797      {
8798 	  FFTW_REAL tre1_0_0;
8799 	  FFTW_REAL tre1_1_0;
8800 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_2_0 + tre0_3_0)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_1_0 + tre0_4_0));
8801 	  tre1_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tim0_4_0 - tim0_1_0)) + (((FFTW_REAL) FFTW_K951056516) * (tim0_2_0 - tim0_3_0));
8802 	  c_re(out[2 * ostride]) = tre1_0_0 + tre1_1_0;
8803 	  c_re(out[3 * ostride]) = tre1_0_0 - tre1_1_0;
8804      }
8805      {
8806 	  FFTW_REAL tim1_0_0;
8807 	  FFTW_REAL tim1_1_0;
8808 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_2_0 + tim0_3_0)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_1_0 + tim0_4_0));
8809 	  tim1_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tre0_1_0 - tre0_4_0)) + (((FFTW_REAL) FFTW_K951056516) * (tre0_3_0 - tre0_2_0));
8810 	  c_im(out[2 * ostride]) = tim1_0_0 + tim1_1_0;
8811 	  c_im(out[3 * ostride]) = tim1_0_0 - tim1_1_0;
8812      }
8813 }
8814 
8815 /* This function contains 40 FP additions and 8 FP multiplications */
8816 
fftwi_no_twiddle_6(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)8817 static void fftwi_no_twiddle_6(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
8818 {
8819      FFTW_REAL tre0_0_0;
8820      FFTW_REAL tim0_0_0;
8821      FFTW_REAL tre0_0_1;
8822      FFTW_REAL tim0_0_1;
8823      FFTW_REAL tre0_0_2;
8824      FFTW_REAL tim0_0_2;
8825      FFTW_REAL tre0_1_0;
8826      FFTW_REAL tim0_1_0;
8827      FFTW_REAL tre0_1_1;
8828      FFTW_REAL tim0_1_1;
8829      FFTW_REAL tre0_1_2;
8830      FFTW_REAL tim0_1_2;
8831      {
8832 	  FFTW_REAL tre1_0_0;
8833 	  FFTW_REAL tim1_0_0;
8834 	  FFTW_REAL tre1_1_0;
8835 	  FFTW_REAL tim1_1_0;
8836 	  tre1_0_0 = c_re(in[0]);
8837 	  tim1_0_0 = c_im(in[0]);
8838 	  tre1_1_0 = c_re(in[3 * istride]);
8839 	  tim1_1_0 = c_im(in[3 * istride]);
8840 	  tre0_0_0 = tre1_0_0 + tre1_1_0;
8841 	  tim0_0_0 = tim1_0_0 + tim1_1_0;
8842 	  tre0_1_0 = tre1_0_0 - tre1_1_0;
8843 	  tim0_1_0 = tim1_0_0 - tim1_1_0;
8844      }
8845      {
8846 	  FFTW_REAL tre1_0_0;
8847 	  FFTW_REAL tim1_0_0;
8848 	  FFTW_REAL tre1_1_0;
8849 	  FFTW_REAL tim1_1_0;
8850 	  tre1_0_0 = c_re(in[2 * istride]);
8851 	  tim1_0_0 = c_im(in[2 * istride]);
8852 	  tre1_1_0 = c_re(in[5 * istride]);
8853 	  tim1_1_0 = c_im(in[5 * istride]);
8854 	  tre0_0_1 = tre1_0_0 + tre1_1_0;
8855 	  tim0_0_1 = tim1_0_0 + tim1_1_0;
8856 	  tre0_1_1 = tre1_0_0 - tre1_1_0;
8857 	  tim0_1_1 = tim1_0_0 - tim1_1_0;
8858      }
8859      {
8860 	  FFTW_REAL tre1_0_0;
8861 	  FFTW_REAL tim1_0_0;
8862 	  FFTW_REAL tre1_1_0;
8863 	  FFTW_REAL tim1_1_0;
8864 	  tre1_0_0 = c_re(in[4 * istride]);
8865 	  tim1_0_0 = c_im(in[4 * istride]);
8866 	  tre1_1_0 = c_re(in[istride]);
8867 	  tim1_1_0 = c_im(in[istride]);
8868 	  tre0_0_2 = tre1_0_0 + tre1_1_0;
8869 	  tim0_0_2 = tim1_0_0 + tim1_1_0;
8870 	  tre0_1_2 = tre1_0_0 - tre1_1_0;
8871 	  tim0_1_2 = tim1_0_0 - tim1_1_0;
8872      }
8873      c_re(out[0]) = tre0_0_0 + tre0_0_1 + tre0_0_2;
8874      c_im(out[0]) = tim0_0_0 + tim0_0_1 + tim0_0_2;
8875      {
8876 	  FFTW_REAL tre2_0_0;
8877 	  FFTW_REAL tre2_1_0;
8878 	  tre2_0_0 = tre0_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre0_0_1 + tre0_0_2));
8879 	  tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim0_0_2 - tim0_0_1);
8880 	  c_re(out[4 * ostride]) = tre2_0_0 + tre2_1_0;
8881 	  c_re(out[2 * ostride]) = tre2_0_0 - tre2_1_0;
8882      }
8883      {
8884 	  FFTW_REAL tim2_0_0;
8885 	  FFTW_REAL tim2_1_0;
8886 	  tim2_0_0 = tim0_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim0_0_1 + tim0_0_2));
8887 	  tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre0_0_1 - tre0_0_2);
8888 	  c_im(out[4 * ostride]) = tim2_0_0 + tim2_1_0;
8889 	  c_im(out[2 * ostride]) = tim2_0_0 - tim2_1_0;
8890      }
8891      c_re(out[3 * ostride]) = tre0_1_0 + tre0_1_1 + tre0_1_2;
8892      c_im(out[3 * ostride]) = tim0_1_0 + tim0_1_1 + tim0_1_2;
8893      {
8894 	  FFTW_REAL tre2_0_0;
8895 	  FFTW_REAL tre2_1_0;
8896 	  tre2_0_0 = tre0_1_0 - (((FFTW_REAL) FFTW_K499999999) * (tre0_1_1 + tre0_1_2));
8897 	  tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim0_1_2 - tim0_1_1);
8898 	  c_re(out[ostride]) = tre2_0_0 + tre2_1_0;
8899 	  c_re(out[5 * ostride]) = tre2_0_0 - tre2_1_0;
8900      }
8901      {
8902 	  FFTW_REAL tim2_0_0;
8903 	  FFTW_REAL tim2_1_0;
8904 	  tim2_0_0 = tim0_1_0 - (((FFTW_REAL) FFTW_K499999999) * (tim0_1_1 + tim0_1_2));
8905 	  tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre0_1_1 - tre0_1_2);
8906 	  c_im(out[ostride]) = tim2_0_0 + tim2_1_0;
8907 	  c_im(out[5 * ostride]) = tim2_0_0 - tim2_1_0;
8908      }
8909 }
8910 
8911 /* This function contains 928 FP additions and 248 FP multiplications */
8912 
fftwi_no_twiddle_64(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)8913 static void fftwi_no_twiddle_64(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
8914 {
8915      FFTW_REAL tre0_0_0;
8916      FFTW_REAL tim0_0_0;
8917      FFTW_REAL tre0_0_1;
8918      FFTW_REAL tim0_0_1;
8919      FFTW_REAL tre0_0_2;
8920      FFTW_REAL tim0_0_2;
8921      FFTW_REAL tre0_0_3;
8922      FFTW_REAL tim0_0_3;
8923      FFTW_REAL tre0_0_4;
8924      FFTW_REAL tim0_0_4;
8925      FFTW_REAL tre0_0_5;
8926      FFTW_REAL tim0_0_5;
8927      FFTW_REAL tre0_0_6;
8928      FFTW_REAL tim0_0_6;
8929      FFTW_REAL tre0_0_7;
8930      FFTW_REAL tim0_0_7;
8931      FFTW_REAL tre0_1_0;
8932      FFTW_REAL tim0_1_0;
8933      FFTW_REAL tre0_1_1;
8934      FFTW_REAL tim0_1_1;
8935      FFTW_REAL tre0_1_2;
8936      FFTW_REAL tim0_1_2;
8937      FFTW_REAL tre0_1_3;
8938      FFTW_REAL tim0_1_3;
8939      FFTW_REAL tre0_1_4;
8940      FFTW_REAL tim0_1_4;
8941      FFTW_REAL tre0_1_5;
8942      FFTW_REAL tim0_1_5;
8943      FFTW_REAL tre0_1_6;
8944      FFTW_REAL tim0_1_6;
8945      FFTW_REAL tre0_1_7;
8946      FFTW_REAL tim0_1_7;
8947      FFTW_REAL tre0_2_0;
8948      FFTW_REAL tim0_2_0;
8949      FFTW_REAL tre0_2_1;
8950      FFTW_REAL tim0_2_1;
8951      FFTW_REAL tre0_2_2;
8952      FFTW_REAL tim0_2_2;
8953      FFTW_REAL tre0_2_3;
8954      FFTW_REAL tim0_2_3;
8955      FFTW_REAL tre0_2_4;
8956      FFTW_REAL tim0_2_4;
8957      FFTW_REAL tre0_2_5;
8958      FFTW_REAL tim0_2_5;
8959      FFTW_REAL tre0_2_6;
8960      FFTW_REAL tim0_2_6;
8961      FFTW_REAL tre0_2_7;
8962      FFTW_REAL tim0_2_7;
8963      FFTW_REAL tre0_3_0;
8964      FFTW_REAL tim0_3_0;
8965      FFTW_REAL tre0_3_1;
8966      FFTW_REAL tim0_3_1;
8967      FFTW_REAL tre0_3_2;
8968      FFTW_REAL tim0_3_2;
8969      FFTW_REAL tre0_3_3;
8970      FFTW_REAL tim0_3_3;
8971      FFTW_REAL tre0_3_4;
8972      FFTW_REAL tim0_3_4;
8973      FFTW_REAL tre0_3_5;
8974      FFTW_REAL tim0_3_5;
8975      FFTW_REAL tre0_3_6;
8976      FFTW_REAL tim0_3_6;
8977      FFTW_REAL tre0_3_7;
8978      FFTW_REAL tim0_3_7;
8979      FFTW_REAL tre0_4_0;
8980      FFTW_REAL tim0_4_0;
8981      FFTW_REAL tre0_4_1;
8982      FFTW_REAL tim0_4_1;
8983      FFTW_REAL tre0_4_2;
8984      FFTW_REAL tim0_4_2;
8985      FFTW_REAL tre0_4_3;
8986      FFTW_REAL tim0_4_3;
8987      FFTW_REAL tre0_4_4;
8988      FFTW_REAL tim0_4_4;
8989      FFTW_REAL tre0_4_5;
8990      FFTW_REAL tim0_4_5;
8991      FFTW_REAL tre0_4_6;
8992      FFTW_REAL tim0_4_6;
8993      FFTW_REAL tre0_4_7;
8994      FFTW_REAL tim0_4_7;
8995      FFTW_REAL tre0_5_0;
8996      FFTW_REAL tim0_5_0;
8997      FFTW_REAL tre0_5_1;
8998      FFTW_REAL tim0_5_1;
8999      FFTW_REAL tre0_5_2;
9000      FFTW_REAL tim0_5_2;
9001      FFTW_REAL tre0_5_3;
9002      FFTW_REAL tim0_5_3;
9003      FFTW_REAL tre0_5_4;
9004      FFTW_REAL tim0_5_4;
9005      FFTW_REAL tre0_5_5;
9006      FFTW_REAL tim0_5_5;
9007      FFTW_REAL tre0_5_6;
9008      FFTW_REAL tim0_5_6;
9009      FFTW_REAL tre0_5_7;
9010      FFTW_REAL tim0_5_7;
9011      FFTW_REAL tre0_6_0;
9012      FFTW_REAL tim0_6_0;
9013      FFTW_REAL tre0_6_1;
9014      FFTW_REAL tim0_6_1;
9015      FFTW_REAL tre0_6_2;
9016      FFTW_REAL tim0_6_2;
9017      FFTW_REAL tre0_6_3;
9018      FFTW_REAL tim0_6_3;
9019      FFTW_REAL tre0_6_4;
9020      FFTW_REAL tim0_6_4;
9021      FFTW_REAL tre0_6_5;
9022      FFTW_REAL tim0_6_5;
9023      FFTW_REAL tre0_6_6;
9024      FFTW_REAL tim0_6_6;
9025      FFTW_REAL tre0_6_7;
9026      FFTW_REAL tim0_6_7;
9027      FFTW_REAL tre0_7_0;
9028      FFTW_REAL tim0_7_0;
9029      FFTW_REAL tre0_7_1;
9030      FFTW_REAL tim0_7_1;
9031      FFTW_REAL tre0_7_2;
9032      FFTW_REAL tim0_7_2;
9033      FFTW_REAL tre0_7_3;
9034      FFTW_REAL tim0_7_3;
9035      FFTW_REAL tre0_7_4;
9036      FFTW_REAL tim0_7_4;
9037      FFTW_REAL tre0_7_5;
9038      FFTW_REAL tim0_7_5;
9039      FFTW_REAL tre0_7_6;
9040      FFTW_REAL tim0_7_6;
9041      FFTW_REAL tre0_7_7;
9042      FFTW_REAL tim0_7_7;
9043      {
9044 	  FFTW_REAL tre1_0_0;
9045 	  FFTW_REAL tim1_0_0;
9046 	  FFTW_REAL tre1_0_1;
9047 	  FFTW_REAL tim1_0_1;
9048 	  FFTW_REAL tre1_0_2;
9049 	  FFTW_REAL tim1_0_2;
9050 	  FFTW_REAL tre1_0_3;
9051 	  FFTW_REAL tim1_0_3;
9052 	  FFTW_REAL tre1_1_0;
9053 	  FFTW_REAL tim1_1_0;
9054 	  FFTW_REAL tre1_1_1;
9055 	  FFTW_REAL tim1_1_1;
9056 	  FFTW_REAL tre1_1_2;
9057 	  FFTW_REAL tim1_1_2;
9058 	  FFTW_REAL tre1_1_3;
9059 	  FFTW_REAL tim1_1_3;
9060 	  {
9061 	       FFTW_REAL tre2_0_0;
9062 	       FFTW_REAL tim2_0_0;
9063 	       FFTW_REAL tre2_1_0;
9064 	       FFTW_REAL tim2_1_0;
9065 	       tre2_0_0 = c_re(in[0]);
9066 	       tim2_0_0 = c_im(in[0]);
9067 	       tre2_1_0 = c_re(in[32 * istride]);
9068 	       tim2_1_0 = c_im(in[32 * istride]);
9069 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
9070 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
9071 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
9072 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
9073 	  }
9074 	  {
9075 	       FFTW_REAL tre2_0_0;
9076 	       FFTW_REAL tim2_0_0;
9077 	       FFTW_REAL tre2_1_0;
9078 	       FFTW_REAL tim2_1_0;
9079 	       tre2_0_0 = c_re(in[8 * istride]);
9080 	       tim2_0_0 = c_im(in[8 * istride]);
9081 	       tre2_1_0 = c_re(in[40 * istride]);
9082 	       tim2_1_0 = c_im(in[40 * istride]);
9083 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
9084 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
9085 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
9086 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
9087 	  }
9088 	  {
9089 	       FFTW_REAL tre2_0_0;
9090 	       FFTW_REAL tim2_0_0;
9091 	       FFTW_REAL tre2_1_0;
9092 	       FFTW_REAL tim2_1_0;
9093 	       tre2_0_0 = c_re(in[16 * istride]);
9094 	       tim2_0_0 = c_im(in[16 * istride]);
9095 	       tre2_1_0 = c_re(in[48 * istride]);
9096 	       tim2_1_0 = c_im(in[48 * istride]);
9097 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
9098 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
9099 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
9100 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
9101 	  }
9102 	  {
9103 	       FFTW_REAL tre2_0_0;
9104 	       FFTW_REAL tim2_0_0;
9105 	       FFTW_REAL tre2_1_0;
9106 	       FFTW_REAL tim2_1_0;
9107 	       tre2_0_0 = c_re(in[24 * istride]);
9108 	       tim2_0_0 = c_im(in[24 * istride]);
9109 	       tre2_1_0 = c_re(in[56 * istride]);
9110 	       tim2_1_0 = c_im(in[56 * istride]);
9111 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
9112 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
9113 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
9114 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
9115 	  }
9116 	  {
9117 	       FFTW_REAL tre2_0_0;
9118 	       FFTW_REAL tim2_0_0;
9119 	       FFTW_REAL tre2_0_1;
9120 	       FFTW_REAL tim2_0_1;
9121 	       FFTW_REAL tre2_1_0;
9122 	       FFTW_REAL tim2_1_0;
9123 	       FFTW_REAL tre2_1_1;
9124 	       FFTW_REAL tim2_1_1;
9125 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
9126 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
9127 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
9128 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
9129 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
9130 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
9131 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
9132 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
9133 	       tre0_0_0 = tre2_0_0 + tre2_0_1;
9134 	       tim0_0_0 = tim2_0_0 + tim2_0_1;
9135 	       tre0_4_0 = tre2_0_0 - tre2_0_1;
9136 	       tim0_4_0 = tim2_0_0 - tim2_0_1;
9137 	       tre0_2_0 = tre2_1_0 - tim2_1_1;
9138 	       tim0_2_0 = tim2_1_0 + tre2_1_1;
9139 	       tre0_6_0 = tre2_1_0 + tim2_1_1;
9140 	       tim0_6_0 = tim2_1_0 - tre2_1_1;
9141 	  }
9142 	  {
9143 	       FFTW_REAL tre2_0_0;
9144 	       FFTW_REAL tim2_0_0;
9145 	       FFTW_REAL tre2_0_1;
9146 	       FFTW_REAL tim2_0_1;
9147 	       FFTW_REAL tre2_1_0;
9148 	       FFTW_REAL tim2_1_0;
9149 	       FFTW_REAL tre2_1_1;
9150 	       FFTW_REAL tim2_1_1;
9151 	       tre2_0_0 = tre1_1_0 - tim1_1_2;
9152 	       tim2_0_0 = tim1_1_0 + tre1_1_2;
9153 	       tre2_1_0 = tre1_1_0 + tim1_1_2;
9154 	       tim2_1_0 = tim1_1_0 - tre1_1_2;
9155 	       {
9156 		    FFTW_REAL tre3_0_0;
9157 		    FFTW_REAL tim3_0_0;
9158 		    FFTW_REAL tre3_1_0;
9159 		    FFTW_REAL tim3_1_0;
9160 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
9161 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
9162 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
9163 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
9164 		    tre2_0_1 = tre3_0_0 - tre3_1_0;
9165 		    tim2_0_1 = tim3_0_0 + tim3_1_0;
9166 		    tre2_1_1 = tre3_0_0 + tre3_1_0;
9167 		    tim2_1_1 = tim3_0_0 - tim3_1_0;
9168 	       }
9169 	       tre0_1_0 = tre2_0_0 + tre2_0_1;
9170 	       tim0_1_0 = tim2_0_0 + tim2_0_1;
9171 	       tre0_5_0 = tre2_0_0 - tre2_0_1;
9172 	       tim0_5_0 = tim2_0_0 - tim2_0_1;
9173 	       tre0_3_0 = tre2_1_0 - tim2_1_1;
9174 	       tim0_3_0 = tim2_1_0 + tre2_1_1;
9175 	       tre0_7_0 = tre2_1_0 + tim2_1_1;
9176 	       tim0_7_0 = tim2_1_0 - tre2_1_1;
9177 	  }
9178      }
9179      {
9180 	  FFTW_REAL tre1_0_0;
9181 	  FFTW_REAL tim1_0_0;
9182 	  FFTW_REAL tre1_0_1;
9183 	  FFTW_REAL tim1_0_1;
9184 	  FFTW_REAL tre1_0_2;
9185 	  FFTW_REAL tim1_0_2;
9186 	  FFTW_REAL tre1_0_3;
9187 	  FFTW_REAL tim1_0_3;
9188 	  FFTW_REAL tre1_1_0;
9189 	  FFTW_REAL tim1_1_0;
9190 	  FFTW_REAL tre1_1_1;
9191 	  FFTW_REAL tim1_1_1;
9192 	  FFTW_REAL tre1_1_2;
9193 	  FFTW_REAL tim1_1_2;
9194 	  FFTW_REAL tre1_1_3;
9195 	  FFTW_REAL tim1_1_3;
9196 	  {
9197 	       FFTW_REAL tre2_0_0;
9198 	       FFTW_REAL tim2_0_0;
9199 	       FFTW_REAL tre2_1_0;
9200 	       FFTW_REAL tim2_1_0;
9201 	       tre2_0_0 = c_re(in[istride]);
9202 	       tim2_0_0 = c_im(in[istride]);
9203 	       tre2_1_0 = c_re(in[33 * istride]);
9204 	       tim2_1_0 = c_im(in[33 * istride]);
9205 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
9206 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
9207 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
9208 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
9209 	  }
9210 	  {
9211 	       FFTW_REAL tre2_0_0;
9212 	       FFTW_REAL tim2_0_0;
9213 	       FFTW_REAL tre2_1_0;
9214 	       FFTW_REAL tim2_1_0;
9215 	       tre2_0_0 = c_re(in[9 * istride]);
9216 	       tim2_0_0 = c_im(in[9 * istride]);
9217 	       tre2_1_0 = c_re(in[41 * istride]);
9218 	       tim2_1_0 = c_im(in[41 * istride]);
9219 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
9220 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
9221 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
9222 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
9223 	  }
9224 	  {
9225 	       FFTW_REAL tre2_0_0;
9226 	       FFTW_REAL tim2_0_0;
9227 	       FFTW_REAL tre2_1_0;
9228 	       FFTW_REAL tim2_1_0;
9229 	       tre2_0_0 = c_re(in[17 * istride]);
9230 	       tim2_0_0 = c_im(in[17 * istride]);
9231 	       tre2_1_0 = c_re(in[49 * istride]);
9232 	       tim2_1_0 = c_im(in[49 * istride]);
9233 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
9234 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
9235 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
9236 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
9237 	  }
9238 	  {
9239 	       FFTW_REAL tre2_0_0;
9240 	       FFTW_REAL tim2_0_0;
9241 	       FFTW_REAL tre2_1_0;
9242 	       FFTW_REAL tim2_1_0;
9243 	       tre2_0_0 = c_re(in[25 * istride]);
9244 	       tim2_0_0 = c_im(in[25 * istride]);
9245 	       tre2_1_0 = c_re(in[57 * istride]);
9246 	       tim2_1_0 = c_im(in[57 * istride]);
9247 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
9248 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
9249 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
9250 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
9251 	  }
9252 	  {
9253 	       FFTW_REAL tre2_0_0;
9254 	       FFTW_REAL tim2_0_0;
9255 	       FFTW_REAL tre2_0_1;
9256 	       FFTW_REAL tim2_0_1;
9257 	       FFTW_REAL tre2_1_0;
9258 	       FFTW_REAL tim2_1_0;
9259 	       FFTW_REAL tre2_1_1;
9260 	       FFTW_REAL tim2_1_1;
9261 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
9262 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
9263 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
9264 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
9265 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
9266 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
9267 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
9268 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
9269 	       tre0_0_1 = tre2_0_0 + tre2_0_1;
9270 	       tim0_0_1 = tim2_0_0 + tim2_0_1;
9271 	       tre0_4_1 = tre2_0_0 - tre2_0_1;
9272 	       tim0_4_1 = tim2_0_0 - tim2_0_1;
9273 	       tre0_2_1 = tre2_1_0 - tim2_1_1;
9274 	       tim0_2_1 = tim2_1_0 + tre2_1_1;
9275 	       tre0_6_1 = tre2_1_0 + tim2_1_1;
9276 	       tim0_6_1 = tim2_1_0 - tre2_1_1;
9277 	  }
9278 	  {
9279 	       FFTW_REAL tre2_0_0;
9280 	       FFTW_REAL tim2_0_0;
9281 	       FFTW_REAL tre2_0_1;
9282 	       FFTW_REAL tim2_0_1;
9283 	       FFTW_REAL tre2_1_0;
9284 	       FFTW_REAL tim2_1_0;
9285 	       FFTW_REAL tre2_1_1;
9286 	       FFTW_REAL tim2_1_1;
9287 	       tre2_0_0 = tre1_1_0 - tim1_1_2;
9288 	       tim2_0_0 = tim1_1_0 + tre1_1_2;
9289 	       tre2_1_0 = tre1_1_0 + tim1_1_2;
9290 	       tim2_1_0 = tim1_1_0 - tre1_1_2;
9291 	       {
9292 		    FFTW_REAL tre3_0_0;
9293 		    FFTW_REAL tim3_0_0;
9294 		    FFTW_REAL tre3_1_0;
9295 		    FFTW_REAL tim3_1_0;
9296 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
9297 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
9298 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
9299 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
9300 		    tre2_0_1 = tre3_0_0 - tre3_1_0;
9301 		    tim2_0_1 = tim3_0_0 + tim3_1_0;
9302 		    tre2_1_1 = tre3_0_0 + tre3_1_0;
9303 		    tim2_1_1 = tim3_0_0 - tim3_1_0;
9304 	       }
9305 	       tre0_1_1 = tre2_0_0 + tre2_0_1;
9306 	       tim0_1_1 = tim2_0_0 + tim2_0_1;
9307 	       tre0_5_1 = tre2_0_0 - tre2_0_1;
9308 	       tim0_5_1 = tim2_0_0 - tim2_0_1;
9309 	       tre0_3_1 = tre2_1_0 - tim2_1_1;
9310 	       tim0_3_1 = tim2_1_0 + tre2_1_1;
9311 	       tre0_7_1 = tre2_1_0 + tim2_1_1;
9312 	       tim0_7_1 = tim2_1_0 - tre2_1_1;
9313 	  }
9314      }
9315      {
9316 	  FFTW_REAL tre1_0_0;
9317 	  FFTW_REAL tim1_0_0;
9318 	  FFTW_REAL tre1_0_1;
9319 	  FFTW_REAL tim1_0_1;
9320 	  FFTW_REAL tre1_0_2;
9321 	  FFTW_REAL tim1_0_2;
9322 	  FFTW_REAL tre1_0_3;
9323 	  FFTW_REAL tim1_0_3;
9324 	  FFTW_REAL tre1_1_0;
9325 	  FFTW_REAL tim1_1_0;
9326 	  FFTW_REAL tre1_1_1;
9327 	  FFTW_REAL tim1_1_1;
9328 	  FFTW_REAL tre1_1_2;
9329 	  FFTW_REAL tim1_1_2;
9330 	  FFTW_REAL tre1_1_3;
9331 	  FFTW_REAL tim1_1_3;
9332 	  {
9333 	       FFTW_REAL tre2_0_0;
9334 	       FFTW_REAL tim2_0_0;
9335 	       FFTW_REAL tre2_1_0;
9336 	       FFTW_REAL tim2_1_0;
9337 	       tre2_0_0 = c_re(in[2 * istride]);
9338 	       tim2_0_0 = c_im(in[2 * istride]);
9339 	       tre2_1_0 = c_re(in[34 * istride]);
9340 	       tim2_1_0 = c_im(in[34 * istride]);
9341 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
9342 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
9343 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
9344 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
9345 	  }
9346 	  {
9347 	       FFTW_REAL tre2_0_0;
9348 	       FFTW_REAL tim2_0_0;
9349 	       FFTW_REAL tre2_1_0;
9350 	       FFTW_REAL tim2_1_0;
9351 	       tre2_0_0 = c_re(in[10 * istride]);
9352 	       tim2_0_0 = c_im(in[10 * istride]);
9353 	       tre2_1_0 = c_re(in[42 * istride]);
9354 	       tim2_1_0 = c_im(in[42 * istride]);
9355 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
9356 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
9357 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
9358 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
9359 	  }
9360 	  {
9361 	       FFTW_REAL tre2_0_0;
9362 	       FFTW_REAL tim2_0_0;
9363 	       FFTW_REAL tre2_1_0;
9364 	       FFTW_REAL tim2_1_0;
9365 	       tre2_0_0 = c_re(in[18 * istride]);
9366 	       tim2_0_0 = c_im(in[18 * istride]);
9367 	       tre2_1_0 = c_re(in[50 * istride]);
9368 	       tim2_1_0 = c_im(in[50 * istride]);
9369 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
9370 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
9371 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
9372 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
9373 	  }
9374 	  {
9375 	       FFTW_REAL tre2_0_0;
9376 	       FFTW_REAL tim2_0_0;
9377 	       FFTW_REAL tre2_1_0;
9378 	       FFTW_REAL tim2_1_0;
9379 	       tre2_0_0 = c_re(in[26 * istride]);
9380 	       tim2_0_0 = c_im(in[26 * istride]);
9381 	       tre2_1_0 = c_re(in[58 * istride]);
9382 	       tim2_1_0 = c_im(in[58 * istride]);
9383 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
9384 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
9385 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
9386 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
9387 	  }
9388 	  {
9389 	       FFTW_REAL tre2_0_0;
9390 	       FFTW_REAL tim2_0_0;
9391 	       FFTW_REAL tre2_0_1;
9392 	       FFTW_REAL tim2_0_1;
9393 	       FFTW_REAL tre2_1_0;
9394 	       FFTW_REAL tim2_1_0;
9395 	       FFTW_REAL tre2_1_1;
9396 	       FFTW_REAL tim2_1_1;
9397 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
9398 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
9399 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
9400 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
9401 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
9402 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
9403 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
9404 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
9405 	       tre0_0_2 = tre2_0_0 + tre2_0_1;
9406 	       tim0_0_2 = tim2_0_0 + tim2_0_1;
9407 	       tre0_4_2 = tre2_0_0 - tre2_0_1;
9408 	       tim0_4_2 = tim2_0_0 - tim2_0_1;
9409 	       tre0_2_2 = tre2_1_0 - tim2_1_1;
9410 	       tim0_2_2 = tim2_1_0 + tre2_1_1;
9411 	       tre0_6_2 = tre2_1_0 + tim2_1_1;
9412 	       tim0_6_2 = tim2_1_0 - tre2_1_1;
9413 	  }
9414 	  {
9415 	       FFTW_REAL tre2_0_0;
9416 	       FFTW_REAL tim2_0_0;
9417 	       FFTW_REAL tre2_0_1;
9418 	       FFTW_REAL tim2_0_1;
9419 	       FFTW_REAL tre2_1_0;
9420 	       FFTW_REAL tim2_1_0;
9421 	       FFTW_REAL tre2_1_1;
9422 	       FFTW_REAL tim2_1_1;
9423 	       tre2_0_0 = tre1_1_0 - tim1_1_2;
9424 	       tim2_0_0 = tim1_1_0 + tre1_1_2;
9425 	       tre2_1_0 = tre1_1_0 + tim1_1_2;
9426 	       tim2_1_0 = tim1_1_0 - tre1_1_2;
9427 	       {
9428 		    FFTW_REAL tre3_0_0;
9429 		    FFTW_REAL tim3_0_0;
9430 		    FFTW_REAL tre3_1_0;
9431 		    FFTW_REAL tim3_1_0;
9432 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
9433 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
9434 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
9435 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
9436 		    tre2_0_1 = tre3_0_0 - tre3_1_0;
9437 		    tim2_0_1 = tim3_0_0 + tim3_1_0;
9438 		    tre2_1_1 = tre3_0_0 + tre3_1_0;
9439 		    tim2_1_1 = tim3_0_0 - tim3_1_0;
9440 	       }
9441 	       tre0_1_2 = tre2_0_0 + tre2_0_1;
9442 	       tim0_1_2 = tim2_0_0 + tim2_0_1;
9443 	       tre0_5_2 = tre2_0_0 - tre2_0_1;
9444 	       tim0_5_2 = tim2_0_0 - tim2_0_1;
9445 	       tre0_3_2 = tre2_1_0 - tim2_1_1;
9446 	       tim0_3_2 = tim2_1_0 + tre2_1_1;
9447 	       tre0_7_2 = tre2_1_0 + tim2_1_1;
9448 	       tim0_7_2 = tim2_1_0 - tre2_1_1;
9449 	  }
9450      }
9451      {
9452 	  FFTW_REAL tre1_0_0;
9453 	  FFTW_REAL tim1_0_0;
9454 	  FFTW_REAL tre1_0_1;
9455 	  FFTW_REAL tim1_0_1;
9456 	  FFTW_REAL tre1_0_2;
9457 	  FFTW_REAL tim1_0_2;
9458 	  FFTW_REAL tre1_0_3;
9459 	  FFTW_REAL tim1_0_3;
9460 	  FFTW_REAL tre1_1_0;
9461 	  FFTW_REAL tim1_1_0;
9462 	  FFTW_REAL tre1_1_1;
9463 	  FFTW_REAL tim1_1_1;
9464 	  FFTW_REAL tre1_1_2;
9465 	  FFTW_REAL tim1_1_2;
9466 	  FFTW_REAL tre1_1_3;
9467 	  FFTW_REAL tim1_1_3;
9468 	  {
9469 	       FFTW_REAL tre2_0_0;
9470 	       FFTW_REAL tim2_0_0;
9471 	       FFTW_REAL tre2_1_0;
9472 	       FFTW_REAL tim2_1_0;
9473 	       tre2_0_0 = c_re(in[3 * istride]);
9474 	       tim2_0_0 = c_im(in[3 * istride]);
9475 	       tre2_1_0 = c_re(in[35 * istride]);
9476 	       tim2_1_0 = c_im(in[35 * istride]);
9477 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
9478 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
9479 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
9480 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
9481 	  }
9482 	  {
9483 	       FFTW_REAL tre2_0_0;
9484 	       FFTW_REAL tim2_0_0;
9485 	       FFTW_REAL tre2_1_0;
9486 	       FFTW_REAL tim2_1_0;
9487 	       tre2_0_0 = c_re(in[11 * istride]);
9488 	       tim2_0_0 = c_im(in[11 * istride]);
9489 	       tre2_1_0 = c_re(in[43 * istride]);
9490 	       tim2_1_0 = c_im(in[43 * istride]);
9491 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
9492 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
9493 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
9494 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
9495 	  }
9496 	  {
9497 	       FFTW_REAL tre2_0_0;
9498 	       FFTW_REAL tim2_0_0;
9499 	       FFTW_REAL tre2_1_0;
9500 	       FFTW_REAL tim2_1_0;
9501 	       tre2_0_0 = c_re(in[19 * istride]);
9502 	       tim2_0_0 = c_im(in[19 * istride]);
9503 	       tre2_1_0 = c_re(in[51 * istride]);
9504 	       tim2_1_0 = c_im(in[51 * istride]);
9505 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
9506 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
9507 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
9508 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
9509 	  }
9510 	  {
9511 	       FFTW_REAL tre2_0_0;
9512 	       FFTW_REAL tim2_0_0;
9513 	       FFTW_REAL tre2_1_0;
9514 	       FFTW_REAL tim2_1_0;
9515 	       tre2_0_0 = c_re(in[27 * istride]);
9516 	       tim2_0_0 = c_im(in[27 * istride]);
9517 	       tre2_1_0 = c_re(in[59 * istride]);
9518 	       tim2_1_0 = c_im(in[59 * istride]);
9519 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
9520 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
9521 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
9522 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
9523 	  }
9524 	  {
9525 	       FFTW_REAL tre2_0_0;
9526 	       FFTW_REAL tim2_0_0;
9527 	       FFTW_REAL tre2_0_1;
9528 	       FFTW_REAL tim2_0_1;
9529 	       FFTW_REAL tre2_1_0;
9530 	       FFTW_REAL tim2_1_0;
9531 	       FFTW_REAL tre2_1_1;
9532 	       FFTW_REAL tim2_1_1;
9533 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
9534 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
9535 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
9536 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
9537 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
9538 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
9539 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
9540 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
9541 	       tre0_0_3 = tre2_0_0 + tre2_0_1;
9542 	       tim0_0_3 = tim2_0_0 + tim2_0_1;
9543 	       tre0_4_3 = tre2_0_0 - tre2_0_1;
9544 	       tim0_4_3 = tim2_0_0 - tim2_0_1;
9545 	       tre0_2_3 = tre2_1_0 - tim2_1_1;
9546 	       tim0_2_3 = tim2_1_0 + tre2_1_1;
9547 	       tre0_6_3 = tre2_1_0 + tim2_1_1;
9548 	       tim0_6_3 = tim2_1_0 - tre2_1_1;
9549 	  }
9550 	  {
9551 	       FFTW_REAL tre2_0_0;
9552 	       FFTW_REAL tim2_0_0;
9553 	       FFTW_REAL tre2_0_1;
9554 	       FFTW_REAL tim2_0_1;
9555 	       FFTW_REAL tre2_1_0;
9556 	       FFTW_REAL tim2_1_0;
9557 	       FFTW_REAL tre2_1_1;
9558 	       FFTW_REAL tim2_1_1;
9559 	       tre2_0_0 = tre1_1_0 - tim1_1_2;
9560 	       tim2_0_0 = tim1_1_0 + tre1_1_2;
9561 	       tre2_1_0 = tre1_1_0 + tim1_1_2;
9562 	       tim2_1_0 = tim1_1_0 - tre1_1_2;
9563 	       {
9564 		    FFTW_REAL tre3_0_0;
9565 		    FFTW_REAL tim3_0_0;
9566 		    FFTW_REAL tre3_1_0;
9567 		    FFTW_REAL tim3_1_0;
9568 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
9569 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
9570 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
9571 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
9572 		    tre2_0_1 = tre3_0_0 - tre3_1_0;
9573 		    tim2_0_1 = tim3_0_0 + tim3_1_0;
9574 		    tre2_1_1 = tre3_0_0 + tre3_1_0;
9575 		    tim2_1_1 = tim3_0_0 - tim3_1_0;
9576 	       }
9577 	       tre0_1_3 = tre2_0_0 + tre2_0_1;
9578 	       tim0_1_3 = tim2_0_0 + tim2_0_1;
9579 	       tre0_5_3 = tre2_0_0 - tre2_0_1;
9580 	       tim0_5_3 = tim2_0_0 - tim2_0_1;
9581 	       tre0_3_3 = tre2_1_0 - tim2_1_1;
9582 	       tim0_3_3 = tim2_1_0 + tre2_1_1;
9583 	       tre0_7_3 = tre2_1_0 + tim2_1_1;
9584 	       tim0_7_3 = tim2_1_0 - tre2_1_1;
9585 	  }
9586      }
9587      {
9588 	  FFTW_REAL tre1_0_0;
9589 	  FFTW_REAL tim1_0_0;
9590 	  FFTW_REAL tre1_0_1;
9591 	  FFTW_REAL tim1_0_1;
9592 	  FFTW_REAL tre1_0_2;
9593 	  FFTW_REAL tim1_0_2;
9594 	  FFTW_REAL tre1_0_3;
9595 	  FFTW_REAL tim1_0_3;
9596 	  FFTW_REAL tre1_1_0;
9597 	  FFTW_REAL tim1_1_0;
9598 	  FFTW_REAL tre1_1_1;
9599 	  FFTW_REAL tim1_1_1;
9600 	  FFTW_REAL tre1_1_2;
9601 	  FFTW_REAL tim1_1_2;
9602 	  FFTW_REAL tre1_1_3;
9603 	  FFTW_REAL tim1_1_3;
9604 	  {
9605 	       FFTW_REAL tre2_0_0;
9606 	       FFTW_REAL tim2_0_0;
9607 	       FFTW_REAL tre2_1_0;
9608 	       FFTW_REAL tim2_1_0;
9609 	       tre2_0_0 = c_re(in[4 * istride]);
9610 	       tim2_0_0 = c_im(in[4 * istride]);
9611 	       tre2_1_0 = c_re(in[36 * istride]);
9612 	       tim2_1_0 = c_im(in[36 * istride]);
9613 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
9614 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
9615 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
9616 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
9617 	  }
9618 	  {
9619 	       FFTW_REAL tre2_0_0;
9620 	       FFTW_REAL tim2_0_0;
9621 	       FFTW_REAL tre2_1_0;
9622 	       FFTW_REAL tim2_1_0;
9623 	       tre2_0_0 = c_re(in[12 * istride]);
9624 	       tim2_0_0 = c_im(in[12 * istride]);
9625 	       tre2_1_0 = c_re(in[44 * istride]);
9626 	       tim2_1_0 = c_im(in[44 * istride]);
9627 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
9628 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
9629 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
9630 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
9631 	  }
9632 	  {
9633 	       FFTW_REAL tre2_0_0;
9634 	       FFTW_REAL tim2_0_0;
9635 	       FFTW_REAL tre2_1_0;
9636 	       FFTW_REAL tim2_1_0;
9637 	       tre2_0_0 = c_re(in[20 * istride]);
9638 	       tim2_0_0 = c_im(in[20 * istride]);
9639 	       tre2_1_0 = c_re(in[52 * istride]);
9640 	       tim2_1_0 = c_im(in[52 * istride]);
9641 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
9642 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
9643 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
9644 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
9645 	  }
9646 	  {
9647 	       FFTW_REAL tre2_0_0;
9648 	       FFTW_REAL tim2_0_0;
9649 	       FFTW_REAL tre2_1_0;
9650 	       FFTW_REAL tim2_1_0;
9651 	       tre2_0_0 = c_re(in[28 * istride]);
9652 	       tim2_0_0 = c_im(in[28 * istride]);
9653 	       tre2_1_0 = c_re(in[60 * istride]);
9654 	       tim2_1_0 = c_im(in[60 * istride]);
9655 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
9656 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
9657 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
9658 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
9659 	  }
9660 	  {
9661 	       FFTW_REAL tre2_0_0;
9662 	       FFTW_REAL tim2_0_0;
9663 	       FFTW_REAL tre2_0_1;
9664 	       FFTW_REAL tim2_0_1;
9665 	       FFTW_REAL tre2_1_0;
9666 	       FFTW_REAL tim2_1_0;
9667 	       FFTW_REAL tre2_1_1;
9668 	       FFTW_REAL tim2_1_1;
9669 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
9670 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
9671 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
9672 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
9673 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
9674 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
9675 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
9676 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
9677 	       tre0_0_4 = tre2_0_0 + tre2_0_1;
9678 	       tim0_0_4 = tim2_0_0 + tim2_0_1;
9679 	       tre0_4_4 = tre2_0_0 - tre2_0_1;
9680 	       tim0_4_4 = tim2_0_0 - tim2_0_1;
9681 	       tre0_2_4 = tre2_1_0 - tim2_1_1;
9682 	       tim0_2_4 = tim2_1_0 + tre2_1_1;
9683 	       tre0_6_4 = tre2_1_0 + tim2_1_1;
9684 	       tim0_6_4 = tim2_1_0 - tre2_1_1;
9685 	  }
9686 	  {
9687 	       FFTW_REAL tre2_0_0;
9688 	       FFTW_REAL tim2_0_0;
9689 	       FFTW_REAL tre2_0_1;
9690 	       FFTW_REAL tim2_0_1;
9691 	       FFTW_REAL tre2_1_0;
9692 	       FFTW_REAL tim2_1_0;
9693 	       FFTW_REAL tre2_1_1;
9694 	       FFTW_REAL tim2_1_1;
9695 	       tre2_0_0 = tre1_1_0 - tim1_1_2;
9696 	       tim2_0_0 = tim1_1_0 + tre1_1_2;
9697 	       tre2_1_0 = tre1_1_0 + tim1_1_2;
9698 	       tim2_1_0 = tim1_1_0 - tre1_1_2;
9699 	       {
9700 		    FFTW_REAL tre3_0_0;
9701 		    FFTW_REAL tim3_0_0;
9702 		    FFTW_REAL tre3_1_0;
9703 		    FFTW_REAL tim3_1_0;
9704 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
9705 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
9706 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
9707 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
9708 		    tre2_0_1 = tre3_0_0 - tre3_1_0;
9709 		    tim2_0_1 = tim3_0_0 + tim3_1_0;
9710 		    tre2_1_1 = tre3_0_0 + tre3_1_0;
9711 		    tim2_1_1 = tim3_0_0 - tim3_1_0;
9712 	       }
9713 	       tre0_1_4 = tre2_0_0 + tre2_0_1;
9714 	       tim0_1_4 = tim2_0_0 + tim2_0_1;
9715 	       tre0_5_4 = tre2_0_0 - tre2_0_1;
9716 	       tim0_5_4 = tim2_0_0 - tim2_0_1;
9717 	       tre0_3_4 = tre2_1_0 - tim2_1_1;
9718 	       tim0_3_4 = tim2_1_0 + tre2_1_1;
9719 	       tre0_7_4 = tre2_1_0 + tim2_1_1;
9720 	       tim0_7_4 = tim2_1_0 - tre2_1_1;
9721 	  }
9722      }
9723      {
9724 	  FFTW_REAL tre1_0_0;
9725 	  FFTW_REAL tim1_0_0;
9726 	  FFTW_REAL tre1_0_1;
9727 	  FFTW_REAL tim1_0_1;
9728 	  FFTW_REAL tre1_0_2;
9729 	  FFTW_REAL tim1_0_2;
9730 	  FFTW_REAL tre1_0_3;
9731 	  FFTW_REAL tim1_0_3;
9732 	  FFTW_REAL tre1_1_0;
9733 	  FFTW_REAL tim1_1_0;
9734 	  FFTW_REAL tre1_1_1;
9735 	  FFTW_REAL tim1_1_1;
9736 	  FFTW_REAL tre1_1_2;
9737 	  FFTW_REAL tim1_1_2;
9738 	  FFTW_REAL tre1_1_3;
9739 	  FFTW_REAL tim1_1_3;
9740 	  {
9741 	       FFTW_REAL tre2_0_0;
9742 	       FFTW_REAL tim2_0_0;
9743 	       FFTW_REAL tre2_1_0;
9744 	       FFTW_REAL tim2_1_0;
9745 	       tre2_0_0 = c_re(in[5 * istride]);
9746 	       tim2_0_0 = c_im(in[5 * istride]);
9747 	       tre2_1_0 = c_re(in[37 * istride]);
9748 	       tim2_1_0 = c_im(in[37 * istride]);
9749 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
9750 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
9751 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
9752 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
9753 	  }
9754 	  {
9755 	       FFTW_REAL tre2_0_0;
9756 	       FFTW_REAL tim2_0_0;
9757 	       FFTW_REAL tre2_1_0;
9758 	       FFTW_REAL tim2_1_0;
9759 	       tre2_0_0 = c_re(in[13 * istride]);
9760 	       tim2_0_0 = c_im(in[13 * istride]);
9761 	       tre2_1_0 = c_re(in[45 * istride]);
9762 	       tim2_1_0 = c_im(in[45 * istride]);
9763 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
9764 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
9765 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
9766 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
9767 	  }
9768 	  {
9769 	       FFTW_REAL tre2_0_0;
9770 	       FFTW_REAL tim2_0_0;
9771 	       FFTW_REAL tre2_1_0;
9772 	       FFTW_REAL tim2_1_0;
9773 	       tre2_0_0 = c_re(in[21 * istride]);
9774 	       tim2_0_0 = c_im(in[21 * istride]);
9775 	       tre2_1_0 = c_re(in[53 * istride]);
9776 	       tim2_1_0 = c_im(in[53 * istride]);
9777 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
9778 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
9779 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
9780 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
9781 	  }
9782 	  {
9783 	       FFTW_REAL tre2_0_0;
9784 	       FFTW_REAL tim2_0_0;
9785 	       FFTW_REAL tre2_1_0;
9786 	       FFTW_REAL tim2_1_0;
9787 	       tre2_0_0 = c_re(in[29 * istride]);
9788 	       tim2_0_0 = c_im(in[29 * istride]);
9789 	       tre2_1_0 = c_re(in[61 * istride]);
9790 	       tim2_1_0 = c_im(in[61 * istride]);
9791 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
9792 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
9793 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
9794 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
9795 	  }
9796 	  {
9797 	       FFTW_REAL tre2_0_0;
9798 	       FFTW_REAL tim2_0_0;
9799 	       FFTW_REAL tre2_0_1;
9800 	       FFTW_REAL tim2_0_1;
9801 	       FFTW_REAL tre2_1_0;
9802 	       FFTW_REAL tim2_1_0;
9803 	       FFTW_REAL tre2_1_1;
9804 	       FFTW_REAL tim2_1_1;
9805 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
9806 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
9807 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
9808 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
9809 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
9810 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
9811 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
9812 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
9813 	       tre0_0_5 = tre2_0_0 + tre2_0_1;
9814 	       tim0_0_5 = tim2_0_0 + tim2_0_1;
9815 	       tre0_4_5 = tre2_0_0 - tre2_0_1;
9816 	       tim0_4_5 = tim2_0_0 - tim2_0_1;
9817 	       tre0_2_5 = tre2_1_0 - tim2_1_1;
9818 	       tim0_2_5 = tim2_1_0 + tre2_1_1;
9819 	       tre0_6_5 = tre2_1_0 + tim2_1_1;
9820 	       tim0_6_5 = tim2_1_0 - tre2_1_1;
9821 	  }
9822 	  {
9823 	       FFTW_REAL tre2_0_0;
9824 	       FFTW_REAL tim2_0_0;
9825 	       FFTW_REAL tre2_0_1;
9826 	       FFTW_REAL tim2_0_1;
9827 	       FFTW_REAL tre2_1_0;
9828 	       FFTW_REAL tim2_1_0;
9829 	       FFTW_REAL tre2_1_1;
9830 	       FFTW_REAL tim2_1_1;
9831 	       tre2_0_0 = tre1_1_0 - tim1_1_2;
9832 	       tim2_0_0 = tim1_1_0 + tre1_1_2;
9833 	       tre2_1_0 = tre1_1_0 + tim1_1_2;
9834 	       tim2_1_0 = tim1_1_0 - tre1_1_2;
9835 	       {
9836 		    FFTW_REAL tre3_0_0;
9837 		    FFTW_REAL tim3_0_0;
9838 		    FFTW_REAL tre3_1_0;
9839 		    FFTW_REAL tim3_1_0;
9840 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
9841 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
9842 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
9843 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
9844 		    tre2_0_1 = tre3_0_0 - tre3_1_0;
9845 		    tim2_0_1 = tim3_0_0 + tim3_1_0;
9846 		    tre2_1_1 = tre3_0_0 + tre3_1_0;
9847 		    tim2_1_1 = tim3_0_0 - tim3_1_0;
9848 	       }
9849 	       tre0_1_5 = tre2_0_0 + tre2_0_1;
9850 	       tim0_1_5 = tim2_0_0 + tim2_0_1;
9851 	       tre0_5_5 = tre2_0_0 - tre2_0_1;
9852 	       tim0_5_5 = tim2_0_0 - tim2_0_1;
9853 	       tre0_3_5 = tre2_1_0 - tim2_1_1;
9854 	       tim0_3_5 = tim2_1_0 + tre2_1_1;
9855 	       tre0_7_5 = tre2_1_0 + tim2_1_1;
9856 	       tim0_7_5 = tim2_1_0 - tre2_1_1;
9857 	  }
9858      }
9859      {
9860 	  FFTW_REAL tre1_0_0;
9861 	  FFTW_REAL tim1_0_0;
9862 	  FFTW_REAL tre1_0_1;
9863 	  FFTW_REAL tim1_0_1;
9864 	  FFTW_REAL tre1_0_2;
9865 	  FFTW_REAL tim1_0_2;
9866 	  FFTW_REAL tre1_0_3;
9867 	  FFTW_REAL tim1_0_3;
9868 	  FFTW_REAL tre1_1_0;
9869 	  FFTW_REAL tim1_1_0;
9870 	  FFTW_REAL tre1_1_1;
9871 	  FFTW_REAL tim1_1_1;
9872 	  FFTW_REAL tre1_1_2;
9873 	  FFTW_REAL tim1_1_2;
9874 	  FFTW_REAL tre1_1_3;
9875 	  FFTW_REAL tim1_1_3;
9876 	  {
9877 	       FFTW_REAL tre2_0_0;
9878 	       FFTW_REAL tim2_0_0;
9879 	       FFTW_REAL tre2_1_0;
9880 	       FFTW_REAL tim2_1_0;
9881 	       tre2_0_0 = c_re(in[6 * istride]);
9882 	       tim2_0_0 = c_im(in[6 * istride]);
9883 	       tre2_1_0 = c_re(in[38 * istride]);
9884 	       tim2_1_0 = c_im(in[38 * istride]);
9885 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
9886 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
9887 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
9888 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
9889 	  }
9890 	  {
9891 	       FFTW_REAL tre2_0_0;
9892 	       FFTW_REAL tim2_0_0;
9893 	       FFTW_REAL tre2_1_0;
9894 	       FFTW_REAL tim2_1_0;
9895 	       tre2_0_0 = c_re(in[14 * istride]);
9896 	       tim2_0_0 = c_im(in[14 * istride]);
9897 	       tre2_1_0 = c_re(in[46 * istride]);
9898 	       tim2_1_0 = c_im(in[46 * istride]);
9899 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
9900 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
9901 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
9902 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
9903 	  }
9904 	  {
9905 	       FFTW_REAL tre2_0_0;
9906 	       FFTW_REAL tim2_0_0;
9907 	       FFTW_REAL tre2_1_0;
9908 	       FFTW_REAL tim2_1_0;
9909 	       tre2_0_0 = c_re(in[22 * istride]);
9910 	       tim2_0_0 = c_im(in[22 * istride]);
9911 	       tre2_1_0 = c_re(in[54 * istride]);
9912 	       tim2_1_0 = c_im(in[54 * istride]);
9913 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
9914 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
9915 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
9916 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
9917 	  }
9918 	  {
9919 	       FFTW_REAL tre2_0_0;
9920 	       FFTW_REAL tim2_0_0;
9921 	       FFTW_REAL tre2_1_0;
9922 	       FFTW_REAL tim2_1_0;
9923 	       tre2_0_0 = c_re(in[30 * istride]);
9924 	       tim2_0_0 = c_im(in[30 * istride]);
9925 	       tre2_1_0 = c_re(in[62 * istride]);
9926 	       tim2_1_0 = c_im(in[62 * istride]);
9927 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
9928 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
9929 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
9930 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
9931 	  }
9932 	  {
9933 	       FFTW_REAL tre2_0_0;
9934 	       FFTW_REAL tim2_0_0;
9935 	       FFTW_REAL tre2_0_1;
9936 	       FFTW_REAL tim2_0_1;
9937 	       FFTW_REAL tre2_1_0;
9938 	       FFTW_REAL tim2_1_0;
9939 	       FFTW_REAL tre2_1_1;
9940 	       FFTW_REAL tim2_1_1;
9941 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
9942 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
9943 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
9944 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
9945 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
9946 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
9947 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
9948 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
9949 	       tre0_0_6 = tre2_0_0 + tre2_0_1;
9950 	       tim0_0_6 = tim2_0_0 + tim2_0_1;
9951 	       tre0_4_6 = tre2_0_0 - tre2_0_1;
9952 	       tim0_4_6 = tim2_0_0 - tim2_0_1;
9953 	       tre0_2_6 = tre2_1_0 - tim2_1_1;
9954 	       tim0_2_6 = tim2_1_0 + tre2_1_1;
9955 	       tre0_6_6 = tre2_1_0 + tim2_1_1;
9956 	       tim0_6_6 = tim2_1_0 - tre2_1_1;
9957 	  }
9958 	  {
9959 	       FFTW_REAL tre2_0_0;
9960 	       FFTW_REAL tim2_0_0;
9961 	       FFTW_REAL tre2_0_1;
9962 	       FFTW_REAL tim2_0_1;
9963 	       FFTW_REAL tre2_1_0;
9964 	       FFTW_REAL tim2_1_0;
9965 	       FFTW_REAL tre2_1_1;
9966 	       FFTW_REAL tim2_1_1;
9967 	       tre2_0_0 = tre1_1_0 - tim1_1_2;
9968 	       tim2_0_0 = tim1_1_0 + tre1_1_2;
9969 	       tre2_1_0 = tre1_1_0 + tim1_1_2;
9970 	       tim2_1_0 = tim1_1_0 - tre1_1_2;
9971 	       {
9972 		    FFTW_REAL tre3_0_0;
9973 		    FFTW_REAL tim3_0_0;
9974 		    FFTW_REAL tre3_1_0;
9975 		    FFTW_REAL tim3_1_0;
9976 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
9977 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
9978 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
9979 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
9980 		    tre2_0_1 = tre3_0_0 - tre3_1_0;
9981 		    tim2_0_1 = tim3_0_0 + tim3_1_0;
9982 		    tre2_1_1 = tre3_0_0 + tre3_1_0;
9983 		    tim2_1_1 = tim3_0_0 - tim3_1_0;
9984 	       }
9985 	       tre0_1_6 = tre2_0_0 + tre2_0_1;
9986 	       tim0_1_6 = tim2_0_0 + tim2_0_1;
9987 	       tre0_5_6 = tre2_0_0 - tre2_0_1;
9988 	       tim0_5_6 = tim2_0_0 - tim2_0_1;
9989 	       tre0_3_6 = tre2_1_0 - tim2_1_1;
9990 	       tim0_3_6 = tim2_1_0 + tre2_1_1;
9991 	       tre0_7_6 = tre2_1_0 + tim2_1_1;
9992 	       tim0_7_6 = tim2_1_0 - tre2_1_1;
9993 	  }
9994      }
9995      {
9996 	  FFTW_REAL tre1_0_0;
9997 	  FFTW_REAL tim1_0_0;
9998 	  FFTW_REAL tre1_0_1;
9999 	  FFTW_REAL tim1_0_1;
10000 	  FFTW_REAL tre1_0_2;
10001 	  FFTW_REAL tim1_0_2;
10002 	  FFTW_REAL tre1_0_3;
10003 	  FFTW_REAL tim1_0_3;
10004 	  FFTW_REAL tre1_1_0;
10005 	  FFTW_REAL tim1_1_0;
10006 	  FFTW_REAL tre1_1_1;
10007 	  FFTW_REAL tim1_1_1;
10008 	  FFTW_REAL tre1_1_2;
10009 	  FFTW_REAL tim1_1_2;
10010 	  FFTW_REAL tre1_1_3;
10011 	  FFTW_REAL tim1_1_3;
10012 	  {
10013 	       FFTW_REAL tre2_0_0;
10014 	       FFTW_REAL tim2_0_0;
10015 	       FFTW_REAL tre2_1_0;
10016 	       FFTW_REAL tim2_1_0;
10017 	       tre2_0_0 = c_re(in[7 * istride]);
10018 	       tim2_0_0 = c_im(in[7 * istride]);
10019 	       tre2_1_0 = c_re(in[39 * istride]);
10020 	       tim2_1_0 = c_im(in[39 * istride]);
10021 	       tre1_0_0 = tre2_0_0 + tre2_1_0;
10022 	       tim1_0_0 = tim2_0_0 + tim2_1_0;
10023 	       tre1_1_0 = tre2_0_0 - tre2_1_0;
10024 	       tim1_1_0 = tim2_0_0 - tim2_1_0;
10025 	  }
10026 	  {
10027 	       FFTW_REAL tre2_0_0;
10028 	       FFTW_REAL tim2_0_0;
10029 	       FFTW_REAL tre2_1_0;
10030 	       FFTW_REAL tim2_1_0;
10031 	       tre2_0_0 = c_re(in[15 * istride]);
10032 	       tim2_0_0 = c_im(in[15 * istride]);
10033 	       tre2_1_0 = c_re(in[47 * istride]);
10034 	       tim2_1_0 = c_im(in[47 * istride]);
10035 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
10036 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
10037 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
10038 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
10039 	  }
10040 	  {
10041 	       FFTW_REAL tre2_0_0;
10042 	       FFTW_REAL tim2_0_0;
10043 	       FFTW_REAL tre2_1_0;
10044 	       FFTW_REAL tim2_1_0;
10045 	       tre2_0_0 = c_re(in[23 * istride]);
10046 	       tim2_0_0 = c_im(in[23 * istride]);
10047 	       tre2_1_0 = c_re(in[55 * istride]);
10048 	       tim2_1_0 = c_im(in[55 * istride]);
10049 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
10050 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
10051 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
10052 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
10053 	  }
10054 	  {
10055 	       FFTW_REAL tre2_0_0;
10056 	       FFTW_REAL tim2_0_0;
10057 	       FFTW_REAL tre2_1_0;
10058 	       FFTW_REAL tim2_1_0;
10059 	       tre2_0_0 = c_re(in[31 * istride]);
10060 	       tim2_0_0 = c_im(in[31 * istride]);
10061 	       tre2_1_0 = c_re(in[63 * istride]);
10062 	       tim2_1_0 = c_im(in[63 * istride]);
10063 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
10064 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
10065 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
10066 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
10067 	  }
10068 	  {
10069 	       FFTW_REAL tre2_0_0;
10070 	       FFTW_REAL tim2_0_0;
10071 	       FFTW_REAL tre2_0_1;
10072 	       FFTW_REAL tim2_0_1;
10073 	       FFTW_REAL tre2_1_0;
10074 	       FFTW_REAL tim2_1_0;
10075 	       FFTW_REAL tre2_1_1;
10076 	       FFTW_REAL tim2_1_1;
10077 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
10078 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
10079 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
10080 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
10081 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
10082 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
10083 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
10084 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
10085 	       tre0_0_7 = tre2_0_0 + tre2_0_1;
10086 	       tim0_0_7 = tim2_0_0 + tim2_0_1;
10087 	       tre0_4_7 = tre2_0_0 - tre2_0_1;
10088 	       tim0_4_7 = tim2_0_0 - tim2_0_1;
10089 	       tre0_2_7 = tre2_1_0 - tim2_1_1;
10090 	       tim0_2_7 = tim2_1_0 + tre2_1_1;
10091 	       tre0_6_7 = tre2_1_0 + tim2_1_1;
10092 	       tim0_6_7 = tim2_1_0 - tre2_1_1;
10093 	  }
10094 	  {
10095 	       FFTW_REAL tre2_0_0;
10096 	       FFTW_REAL tim2_0_0;
10097 	       FFTW_REAL tre2_0_1;
10098 	       FFTW_REAL tim2_0_1;
10099 	       FFTW_REAL tre2_1_0;
10100 	       FFTW_REAL tim2_1_0;
10101 	       FFTW_REAL tre2_1_1;
10102 	       FFTW_REAL tim2_1_1;
10103 	       tre2_0_0 = tre1_1_0 - tim1_1_2;
10104 	       tim2_0_0 = tim1_1_0 + tre1_1_2;
10105 	       tre2_1_0 = tre1_1_0 + tim1_1_2;
10106 	       tim2_1_0 = tim1_1_0 - tre1_1_2;
10107 	       {
10108 		    FFTW_REAL tre3_0_0;
10109 		    FFTW_REAL tim3_0_0;
10110 		    FFTW_REAL tre3_1_0;
10111 		    FFTW_REAL tim3_1_0;
10112 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
10113 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
10114 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
10115 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
10116 		    tre2_0_1 = tre3_0_0 - tre3_1_0;
10117 		    tim2_0_1 = tim3_0_0 + tim3_1_0;
10118 		    tre2_1_1 = tre3_0_0 + tre3_1_0;
10119 		    tim2_1_1 = tim3_0_0 - tim3_1_0;
10120 	       }
10121 	       tre0_1_7 = tre2_0_0 + tre2_0_1;
10122 	       tim0_1_7 = tim2_0_0 + tim2_0_1;
10123 	       tre0_5_7 = tre2_0_0 - tre2_0_1;
10124 	       tim0_5_7 = tim2_0_0 - tim2_0_1;
10125 	       tre0_3_7 = tre2_1_0 - tim2_1_1;
10126 	       tim0_3_7 = tim2_1_0 + tre2_1_1;
10127 	       tre0_7_7 = tre2_1_0 + tim2_1_1;
10128 	       tim0_7_7 = tim2_1_0 - tre2_1_1;
10129 	  }
10130      }
10131      {
10132 	  FFTW_REAL tre1_0_0;
10133 	  FFTW_REAL tim1_0_0;
10134 	  FFTW_REAL tre1_0_1;
10135 	  FFTW_REAL tim1_0_1;
10136 	  FFTW_REAL tre1_0_2;
10137 	  FFTW_REAL tim1_0_2;
10138 	  FFTW_REAL tre1_0_3;
10139 	  FFTW_REAL tim1_0_3;
10140 	  FFTW_REAL tre1_1_0;
10141 	  FFTW_REAL tim1_1_0;
10142 	  FFTW_REAL tre1_1_1;
10143 	  FFTW_REAL tim1_1_1;
10144 	  FFTW_REAL tre1_1_2;
10145 	  FFTW_REAL tim1_1_2;
10146 	  FFTW_REAL tre1_1_3;
10147 	  FFTW_REAL tim1_1_3;
10148 	  tre1_0_0 = tre0_0_0 + tre0_0_4;
10149 	  tim1_0_0 = tim0_0_0 + tim0_0_4;
10150 	  tre1_1_0 = tre0_0_0 - tre0_0_4;
10151 	  tim1_1_0 = tim0_0_0 - tim0_0_4;
10152 	  tre1_0_1 = tre0_0_1 + tre0_0_5;
10153 	  tim1_0_1 = tim0_0_1 + tim0_0_5;
10154 	  tre1_1_1 = tre0_0_1 - tre0_0_5;
10155 	  tim1_1_1 = tim0_0_1 - tim0_0_5;
10156 	  tre1_0_2 = tre0_0_2 + tre0_0_6;
10157 	  tim1_0_2 = tim0_0_2 + tim0_0_6;
10158 	  tre1_1_2 = tre0_0_2 - tre0_0_6;
10159 	  tim1_1_2 = tim0_0_2 - tim0_0_6;
10160 	  tre1_0_3 = tre0_0_3 + tre0_0_7;
10161 	  tim1_0_3 = tim0_0_3 + tim0_0_7;
10162 	  tre1_1_3 = tre0_0_3 - tre0_0_7;
10163 	  tim1_1_3 = tim0_0_3 - tim0_0_7;
10164 	  {
10165 	       FFTW_REAL tre2_0_0;
10166 	       FFTW_REAL tim2_0_0;
10167 	       FFTW_REAL tre2_0_1;
10168 	       FFTW_REAL tim2_0_1;
10169 	       FFTW_REAL tre2_1_0;
10170 	       FFTW_REAL tim2_1_0;
10171 	       FFTW_REAL tre2_1_1;
10172 	       FFTW_REAL tim2_1_1;
10173 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
10174 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
10175 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
10176 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
10177 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
10178 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
10179 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
10180 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
10181 	       c_re(out[0]) = tre2_0_0 + tre2_0_1;
10182 	       c_im(out[0]) = tim2_0_0 + tim2_0_1;
10183 	       c_re(out[32 * ostride]) = tre2_0_0 - tre2_0_1;
10184 	       c_im(out[32 * ostride]) = tim2_0_0 - tim2_0_1;
10185 	       c_re(out[16 * ostride]) = tre2_1_0 - tim2_1_1;
10186 	       c_im(out[16 * ostride]) = tim2_1_0 + tre2_1_1;
10187 	       c_re(out[48 * ostride]) = tre2_1_0 + tim2_1_1;
10188 	       c_im(out[48 * ostride]) = tim2_1_0 - tre2_1_1;
10189 	  }
10190 	  {
10191 	       FFTW_REAL tre2_0_0;
10192 	       FFTW_REAL tim2_0_0;
10193 	       FFTW_REAL tre2_0_1;
10194 	       FFTW_REAL tim2_0_1;
10195 	       FFTW_REAL tre2_1_0;
10196 	       FFTW_REAL tim2_1_0;
10197 	       FFTW_REAL tre2_1_1;
10198 	       FFTW_REAL tim2_1_1;
10199 	       tre2_0_0 = tre1_1_0 - tim1_1_2;
10200 	       tim2_0_0 = tim1_1_0 + tre1_1_2;
10201 	       tre2_1_0 = tre1_1_0 + tim1_1_2;
10202 	       tim2_1_0 = tim1_1_0 - tre1_1_2;
10203 	       {
10204 		    FFTW_REAL tre3_0_0;
10205 		    FFTW_REAL tim3_0_0;
10206 		    FFTW_REAL tre3_1_0;
10207 		    FFTW_REAL tim3_1_0;
10208 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
10209 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
10210 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
10211 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
10212 		    tre2_0_1 = tre3_0_0 - tre3_1_0;
10213 		    tim2_0_1 = tim3_0_0 + tim3_1_0;
10214 		    tre2_1_1 = tre3_0_0 + tre3_1_0;
10215 		    tim2_1_1 = tim3_0_0 - tim3_1_0;
10216 	       }
10217 	       c_re(out[8 * ostride]) = tre2_0_0 + tre2_0_1;
10218 	       c_im(out[8 * ostride]) = tim2_0_0 + tim2_0_1;
10219 	       c_re(out[40 * ostride]) = tre2_0_0 - tre2_0_1;
10220 	       c_im(out[40 * ostride]) = tim2_0_0 - tim2_0_1;
10221 	       c_re(out[24 * ostride]) = tre2_1_0 - tim2_1_1;
10222 	       c_im(out[24 * ostride]) = tim2_1_0 + tre2_1_1;
10223 	       c_re(out[56 * ostride]) = tre2_1_0 + tim2_1_1;
10224 	       c_im(out[56 * ostride]) = tim2_1_0 - tre2_1_1;
10225 	  }
10226      }
10227      {
10228 	  FFTW_REAL tre1_0_0;
10229 	  FFTW_REAL tim1_0_0;
10230 	  FFTW_REAL tre1_0_1;
10231 	  FFTW_REAL tim1_0_1;
10232 	  FFTW_REAL tre1_0_2;
10233 	  FFTW_REAL tim1_0_2;
10234 	  FFTW_REAL tre1_0_3;
10235 	  FFTW_REAL tim1_0_3;
10236 	  FFTW_REAL tre1_1_0;
10237 	  FFTW_REAL tim1_1_0;
10238 	  FFTW_REAL tre1_1_1;
10239 	  FFTW_REAL tim1_1_1;
10240 	  FFTW_REAL tre1_1_2;
10241 	  FFTW_REAL tim1_1_2;
10242 	  FFTW_REAL tre1_1_3;
10243 	  FFTW_REAL tim1_1_3;
10244 	  {
10245 	       FFTW_REAL tre2_1_0;
10246 	       FFTW_REAL tim2_1_0;
10247 	       tre2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_1_4) - (((FFTW_REAL) FFTW_K382683432) * tim0_1_4);
10248 	       tim2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_1_4) + (((FFTW_REAL) FFTW_K382683432) * tre0_1_4);
10249 	       tre1_0_0 = tre0_1_0 + tre2_1_0;
10250 	       tim1_0_0 = tim0_1_0 + tim2_1_0;
10251 	       tre1_1_0 = tre0_1_0 - tre2_1_0;
10252 	       tim1_1_0 = tim0_1_0 - tim2_1_0;
10253 	  }
10254 	  {
10255 	       FFTW_REAL tre2_0_0;
10256 	       FFTW_REAL tim2_0_0;
10257 	       FFTW_REAL tre2_1_0;
10258 	       FFTW_REAL tim2_1_0;
10259 	       tre2_0_0 = (((FFTW_REAL) FFTW_K995184726) * tre0_1_1) - (((FFTW_REAL) FFTW_K098017140) * tim0_1_1);
10260 	       tim2_0_0 = (((FFTW_REAL) FFTW_K995184726) * tim0_1_1) + (((FFTW_REAL) FFTW_K098017140) * tre0_1_1);
10261 	       tre2_1_0 = (((FFTW_REAL) FFTW_K881921264) * tre0_1_5) - (((FFTW_REAL) FFTW_K471396736) * tim0_1_5);
10262 	       tim2_1_0 = (((FFTW_REAL) FFTW_K881921264) * tim0_1_5) + (((FFTW_REAL) FFTW_K471396736) * tre0_1_5);
10263 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
10264 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
10265 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
10266 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
10267 	  }
10268 	  {
10269 	       FFTW_REAL tre2_0_0;
10270 	       FFTW_REAL tim2_0_0;
10271 	       FFTW_REAL tre2_1_0;
10272 	       FFTW_REAL tim2_1_0;
10273 	       tre2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tre0_1_2) - (((FFTW_REAL) FFTW_K195090322) * tim0_1_2);
10274 	       tim2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tim0_1_2) + (((FFTW_REAL) FFTW_K195090322) * tre0_1_2);
10275 	       tre2_1_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_1_6) - (((FFTW_REAL) FFTW_K555570233) * tim0_1_6);
10276 	       tim2_1_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_1_6) + (((FFTW_REAL) FFTW_K555570233) * tre0_1_6);
10277 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
10278 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
10279 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
10280 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
10281 	  }
10282 	  {
10283 	       FFTW_REAL tre2_0_0;
10284 	       FFTW_REAL tim2_0_0;
10285 	       FFTW_REAL tre2_1_0;
10286 	       FFTW_REAL tim2_1_0;
10287 	       tre2_0_0 = (((FFTW_REAL) FFTW_K956940335) * tre0_1_3) - (((FFTW_REAL) FFTW_K290284677) * tim0_1_3);
10288 	       tim2_0_0 = (((FFTW_REAL) FFTW_K956940335) * tim0_1_3) + (((FFTW_REAL) FFTW_K290284677) * tre0_1_3);
10289 	       tre2_1_0 = (((FFTW_REAL) FFTW_K773010453) * tre0_1_7) - (((FFTW_REAL) FFTW_K634393284) * tim0_1_7);
10290 	       tim2_1_0 = (((FFTW_REAL) FFTW_K773010453) * tim0_1_7) + (((FFTW_REAL) FFTW_K634393284) * tre0_1_7);
10291 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
10292 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
10293 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
10294 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
10295 	  }
10296 	  {
10297 	       FFTW_REAL tre2_0_0;
10298 	       FFTW_REAL tim2_0_0;
10299 	       FFTW_REAL tre2_0_1;
10300 	       FFTW_REAL tim2_0_1;
10301 	       FFTW_REAL tre2_1_0;
10302 	       FFTW_REAL tim2_1_0;
10303 	       FFTW_REAL tre2_1_1;
10304 	       FFTW_REAL tim2_1_1;
10305 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
10306 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
10307 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
10308 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
10309 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
10310 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
10311 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
10312 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
10313 	       c_re(out[ostride]) = tre2_0_0 + tre2_0_1;
10314 	       c_im(out[ostride]) = tim2_0_0 + tim2_0_1;
10315 	       c_re(out[33 * ostride]) = tre2_0_0 - tre2_0_1;
10316 	       c_im(out[33 * ostride]) = tim2_0_0 - tim2_0_1;
10317 	       c_re(out[17 * ostride]) = tre2_1_0 - tim2_1_1;
10318 	       c_im(out[17 * ostride]) = tim2_1_0 + tre2_1_1;
10319 	       c_re(out[49 * ostride]) = tre2_1_0 + tim2_1_1;
10320 	       c_im(out[49 * ostride]) = tim2_1_0 - tre2_1_1;
10321 	  }
10322 	  {
10323 	       FFTW_REAL tre2_0_0;
10324 	       FFTW_REAL tim2_0_0;
10325 	       FFTW_REAL tre2_0_1;
10326 	       FFTW_REAL tim2_0_1;
10327 	       FFTW_REAL tre2_1_0;
10328 	       FFTW_REAL tim2_1_0;
10329 	       FFTW_REAL tre2_1_1;
10330 	       FFTW_REAL tim2_1_1;
10331 	       tre2_0_0 = tre1_1_0 - tim1_1_2;
10332 	       tim2_0_0 = tim1_1_0 + tre1_1_2;
10333 	       tre2_1_0 = tre1_1_0 + tim1_1_2;
10334 	       tim2_1_0 = tim1_1_0 - tre1_1_2;
10335 	       {
10336 		    FFTW_REAL tre3_0_0;
10337 		    FFTW_REAL tim3_0_0;
10338 		    FFTW_REAL tre3_1_0;
10339 		    FFTW_REAL tim3_1_0;
10340 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
10341 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
10342 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
10343 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
10344 		    tre2_0_1 = tre3_0_0 - tre3_1_0;
10345 		    tim2_0_1 = tim3_0_0 + tim3_1_0;
10346 		    tre2_1_1 = tre3_0_0 + tre3_1_0;
10347 		    tim2_1_1 = tim3_0_0 - tim3_1_0;
10348 	       }
10349 	       c_re(out[9 * ostride]) = tre2_0_0 + tre2_0_1;
10350 	       c_im(out[9 * ostride]) = tim2_0_0 + tim2_0_1;
10351 	       c_re(out[41 * ostride]) = tre2_0_0 - tre2_0_1;
10352 	       c_im(out[41 * ostride]) = tim2_0_0 - tim2_0_1;
10353 	       c_re(out[25 * ostride]) = tre2_1_0 - tim2_1_1;
10354 	       c_im(out[25 * ostride]) = tim2_1_0 + tre2_1_1;
10355 	       c_re(out[57 * ostride]) = tre2_1_0 + tim2_1_1;
10356 	       c_im(out[57 * ostride]) = tim2_1_0 - tre2_1_1;
10357 	  }
10358      }
10359      {
10360 	  FFTW_REAL tre1_0_0;
10361 	  FFTW_REAL tim1_0_0;
10362 	  FFTW_REAL tre1_0_1;
10363 	  FFTW_REAL tim1_0_1;
10364 	  FFTW_REAL tre1_0_2;
10365 	  FFTW_REAL tim1_0_2;
10366 	  FFTW_REAL tre1_0_3;
10367 	  FFTW_REAL tim1_0_3;
10368 	  FFTW_REAL tre1_1_0;
10369 	  FFTW_REAL tim1_1_0;
10370 	  FFTW_REAL tre1_1_1;
10371 	  FFTW_REAL tim1_1_1;
10372 	  FFTW_REAL tre1_1_2;
10373 	  FFTW_REAL tim1_1_2;
10374 	  FFTW_REAL tre1_1_3;
10375 	  FFTW_REAL tim1_1_3;
10376 	  {
10377 	       FFTW_REAL tre2_1_0;
10378 	       FFTW_REAL tim2_1_0;
10379 	       tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_2_4 - tim0_2_4);
10380 	       tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_2_4 + tre0_2_4);
10381 	       tre1_0_0 = tre0_2_0 + tre2_1_0;
10382 	       tim1_0_0 = tim0_2_0 + tim2_1_0;
10383 	       tre1_1_0 = tre0_2_0 - tre2_1_0;
10384 	       tim1_1_0 = tim0_2_0 - tim2_1_0;
10385 	  }
10386 	  {
10387 	       FFTW_REAL tre2_0_0;
10388 	       FFTW_REAL tim2_0_0;
10389 	       FFTW_REAL tre2_1_0;
10390 	       FFTW_REAL tim2_1_0;
10391 	       tre2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tre0_2_1) - (((FFTW_REAL) FFTW_K195090322) * tim0_2_1);
10392 	       tim2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tim0_2_1) + (((FFTW_REAL) FFTW_K195090322) * tre0_2_1);
10393 	       tre2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tre0_2_5) - (((FFTW_REAL) FFTW_K831469612) * tim0_2_5);
10394 	       tim2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tim0_2_5) + (((FFTW_REAL) FFTW_K831469612) * tre0_2_5);
10395 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
10396 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
10397 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
10398 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
10399 	  }
10400 	  {
10401 	       FFTW_REAL tre2_0_0;
10402 	       FFTW_REAL tim2_0_0;
10403 	       FFTW_REAL tre2_1_0;
10404 	       FFTW_REAL tim2_1_0;
10405 	       tre2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_2_2) - (((FFTW_REAL) FFTW_K382683432) * tim0_2_2);
10406 	       tim2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_2_2) + (((FFTW_REAL) FFTW_K382683432) * tre0_2_2);
10407 	       tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_2_6) - (((FFTW_REAL) FFTW_K923879532) * tim0_2_6);
10408 	       tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_2_6) + (((FFTW_REAL) FFTW_K923879532) * tre0_2_6);
10409 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
10410 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
10411 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
10412 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
10413 	  }
10414 	  {
10415 	       FFTW_REAL tre2_0_0;
10416 	       FFTW_REAL tim2_0_0;
10417 	       FFTW_REAL tre2_1_0;
10418 	       FFTW_REAL tim2_1_0;
10419 	       tre2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_2_3) - (((FFTW_REAL) FFTW_K555570233) * tim0_2_3);
10420 	       tim2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_2_3) + (((FFTW_REAL) FFTW_K555570233) * tre0_2_3);
10421 	       tre2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tre0_2_7) - (((FFTW_REAL) FFTW_K980785280) * tim0_2_7);
10422 	       tim2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tim0_2_7) + (((FFTW_REAL) FFTW_K980785280) * tre0_2_7);
10423 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
10424 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
10425 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
10426 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
10427 	  }
10428 	  {
10429 	       FFTW_REAL tre2_0_0;
10430 	       FFTW_REAL tim2_0_0;
10431 	       FFTW_REAL tre2_0_1;
10432 	       FFTW_REAL tim2_0_1;
10433 	       FFTW_REAL tre2_1_0;
10434 	       FFTW_REAL tim2_1_0;
10435 	       FFTW_REAL tre2_1_1;
10436 	       FFTW_REAL tim2_1_1;
10437 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
10438 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
10439 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
10440 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
10441 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
10442 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
10443 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
10444 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
10445 	       c_re(out[2 * ostride]) = tre2_0_0 + tre2_0_1;
10446 	       c_im(out[2 * ostride]) = tim2_0_0 + tim2_0_1;
10447 	       c_re(out[34 * ostride]) = tre2_0_0 - tre2_0_1;
10448 	       c_im(out[34 * ostride]) = tim2_0_0 - tim2_0_1;
10449 	       c_re(out[18 * ostride]) = tre2_1_0 - tim2_1_1;
10450 	       c_im(out[18 * ostride]) = tim2_1_0 + tre2_1_1;
10451 	       c_re(out[50 * ostride]) = tre2_1_0 + tim2_1_1;
10452 	       c_im(out[50 * ostride]) = tim2_1_0 - tre2_1_1;
10453 	  }
10454 	  {
10455 	       FFTW_REAL tre2_0_0;
10456 	       FFTW_REAL tim2_0_0;
10457 	       FFTW_REAL tre2_0_1;
10458 	       FFTW_REAL tim2_0_1;
10459 	       FFTW_REAL tre2_1_0;
10460 	       FFTW_REAL tim2_1_0;
10461 	       FFTW_REAL tre2_1_1;
10462 	       FFTW_REAL tim2_1_1;
10463 	       tre2_0_0 = tre1_1_0 - tim1_1_2;
10464 	       tim2_0_0 = tim1_1_0 + tre1_1_2;
10465 	       tre2_1_0 = tre1_1_0 + tim1_1_2;
10466 	       tim2_1_0 = tim1_1_0 - tre1_1_2;
10467 	       {
10468 		    FFTW_REAL tre3_0_0;
10469 		    FFTW_REAL tim3_0_0;
10470 		    FFTW_REAL tre3_1_0;
10471 		    FFTW_REAL tim3_1_0;
10472 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
10473 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
10474 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
10475 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
10476 		    tre2_0_1 = tre3_0_0 - tre3_1_0;
10477 		    tim2_0_1 = tim3_0_0 + tim3_1_0;
10478 		    tre2_1_1 = tre3_0_0 + tre3_1_0;
10479 		    tim2_1_1 = tim3_0_0 - tim3_1_0;
10480 	       }
10481 	       c_re(out[10 * ostride]) = tre2_0_0 + tre2_0_1;
10482 	       c_im(out[10 * ostride]) = tim2_0_0 + tim2_0_1;
10483 	       c_re(out[42 * ostride]) = tre2_0_0 - tre2_0_1;
10484 	       c_im(out[42 * ostride]) = tim2_0_0 - tim2_0_1;
10485 	       c_re(out[26 * ostride]) = tre2_1_0 - tim2_1_1;
10486 	       c_im(out[26 * ostride]) = tim2_1_0 + tre2_1_1;
10487 	       c_re(out[58 * ostride]) = tre2_1_0 + tim2_1_1;
10488 	       c_im(out[58 * ostride]) = tim2_1_0 - tre2_1_1;
10489 	  }
10490      }
10491      {
10492 	  FFTW_REAL tre1_0_0;
10493 	  FFTW_REAL tim1_0_0;
10494 	  FFTW_REAL tre1_0_1;
10495 	  FFTW_REAL tim1_0_1;
10496 	  FFTW_REAL tre1_0_2;
10497 	  FFTW_REAL tim1_0_2;
10498 	  FFTW_REAL tre1_0_3;
10499 	  FFTW_REAL tim1_0_3;
10500 	  FFTW_REAL tre1_1_0;
10501 	  FFTW_REAL tim1_1_0;
10502 	  FFTW_REAL tre1_1_1;
10503 	  FFTW_REAL tim1_1_1;
10504 	  FFTW_REAL tre1_1_2;
10505 	  FFTW_REAL tim1_1_2;
10506 	  FFTW_REAL tre1_1_3;
10507 	  FFTW_REAL tim1_1_3;
10508 	  {
10509 	       FFTW_REAL tre2_1_0;
10510 	       FFTW_REAL tim2_1_0;
10511 	       tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_3_4) - (((FFTW_REAL) FFTW_K923879532) * tim0_3_4);
10512 	       tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_3_4) + (((FFTW_REAL) FFTW_K923879532) * tre0_3_4);
10513 	       tre1_0_0 = tre0_3_0 + tre2_1_0;
10514 	       tim1_0_0 = tim0_3_0 + tim2_1_0;
10515 	       tre1_1_0 = tre0_3_0 - tre2_1_0;
10516 	       tim1_1_0 = tim0_3_0 - tim2_1_0;
10517 	  }
10518 	  {
10519 	       FFTW_REAL tre2_0_0;
10520 	       FFTW_REAL tim2_0_0;
10521 	       FFTW_REAL tre2_1_0;
10522 	       FFTW_REAL tim2_1_0;
10523 	       tre2_0_0 = (((FFTW_REAL) FFTW_K956940335) * tre0_3_1) - (((FFTW_REAL) FFTW_K290284677) * tim0_3_1);
10524 	       tim2_0_0 = (((FFTW_REAL) FFTW_K956940335) * tim0_3_1) + (((FFTW_REAL) FFTW_K290284677) * tre0_3_1);
10525 	       tre2_1_0 = (((FFTW_REAL) FFTW_K098017140) * tre0_3_5) - (((FFTW_REAL) FFTW_K995184726) * tim0_3_5);
10526 	       tim2_1_0 = (((FFTW_REAL) FFTW_K098017140) * tim0_3_5) + (((FFTW_REAL) FFTW_K995184726) * tre0_3_5);
10527 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
10528 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
10529 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
10530 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
10531 	  }
10532 	  {
10533 	       FFTW_REAL tre2_0_0;
10534 	       FFTW_REAL tim2_0_0;
10535 	       FFTW_REAL tre2_1_0;
10536 	       FFTW_REAL tim2_1_0;
10537 	       tre2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_3_2) - (((FFTW_REAL) FFTW_K555570233) * tim0_3_2);
10538 	       tim2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_3_2) + (((FFTW_REAL) FFTW_K555570233) * tre0_3_2);
10539 	       tre2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tre0_3_6) + (((FFTW_REAL) FFTW_K980785280) * tim0_3_6);
10540 	       tim2_1_0 = (((FFTW_REAL) FFTW_K980785280) * tre0_3_6) - (((FFTW_REAL) FFTW_K195090322) * tim0_3_6);
10541 	       tre1_0_2 = tre2_0_0 - tre2_1_0;
10542 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
10543 	       tre1_1_2 = tre2_0_0 + tre2_1_0;
10544 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
10545 	  }
10546 	  {
10547 	       FFTW_REAL tre2_0_0;
10548 	       FFTW_REAL tim2_0_0;
10549 	       FFTW_REAL tre2_1_0;
10550 	       FFTW_REAL tim2_1_0;
10551 	       tre2_0_0 = (((FFTW_REAL) FFTW_K634393284) * tre0_3_3) - (((FFTW_REAL) FFTW_K773010453) * tim0_3_3);
10552 	       tim2_0_0 = (((FFTW_REAL) FFTW_K634393284) * tim0_3_3) + (((FFTW_REAL) FFTW_K773010453) * tre0_3_3);
10553 	       tre2_1_0 = (((FFTW_REAL) FFTW_K471396736) * tre0_3_7) + (((FFTW_REAL) FFTW_K881921264) * tim0_3_7);
10554 	       tim2_1_0 = (((FFTW_REAL) FFTW_K881921264) * tre0_3_7) - (((FFTW_REAL) FFTW_K471396736) * tim0_3_7);
10555 	       tre1_0_3 = tre2_0_0 - tre2_1_0;
10556 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
10557 	       tre1_1_3 = tre2_0_0 + tre2_1_0;
10558 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
10559 	  }
10560 	  {
10561 	       FFTW_REAL tre2_0_0;
10562 	       FFTW_REAL tim2_0_0;
10563 	       FFTW_REAL tre2_0_1;
10564 	       FFTW_REAL tim2_0_1;
10565 	       FFTW_REAL tre2_1_0;
10566 	       FFTW_REAL tim2_1_0;
10567 	       FFTW_REAL tre2_1_1;
10568 	       FFTW_REAL tim2_1_1;
10569 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
10570 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
10571 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
10572 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
10573 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
10574 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
10575 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
10576 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
10577 	       c_re(out[3 * ostride]) = tre2_0_0 + tre2_0_1;
10578 	       c_im(out[3 * ostride]) = tim2_0_0 + tim2_0_1;
10579 	       c_re(out[35 * ostride]) = tre2_0_0 - tre2_0_1;
10580 	       c_im(out[35 * ostride]) = tim2_0_0 - tim2_0_1;
10581 	       c_re(out[19 * ostride]) = tre2_1_0 - tim2_1_1;
10582 	       c_im(out[19 * ostride]) = tim2_1_0 + tre2_1_1;
10583 	       c_re(out[51 * ostride]) = tre2_1_0 + tim2_1_1;
10584 	       c_im(out[51 * ostride]) = tim2_1_0 - tre2_1_1;
10585 	  }
10586 	  {
10587 	       FFTW_REAL tre2_0_0;
10588 	       FFTW_REAL tim2_0_0;
10589 	       FFTW_REAL tre2_0_1;
10590 	       FFTW_REAL tim2_0_1;
10591 	       FFTW_REAL tre2_1_0;
10592 	       FFTW_REAL tim2_1_0;
10593 	       FFTW_REAL tre2_1_1;
10594 	       FFTW_REAL tim2_1_1;
10595 	       tre2_0_0 = tre1_1_0 - tim1_1_2;
10596 	       tim2_0_0 = tim1_1_0 + tre1_1_2;
10597 	       tre2_1_0 = tre1_1_0 + tim1_1_2;
10598 	       tim2_1_0 = tim1_1_0 - tre1_1_2;
10599 	       {
10600 		    FFTW_REAL tre3_0_0;
10601 		    FFTW_REAL tim3_0_0;
10602 		    FFTW_REAL tre3_1_0;
10603 		    FFTW_REAL tim3_1_0;
10604 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
10605 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
10606 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
10607 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
10608 		    tre2_0_1 = tre3_0_0 - tre3_1_0;
10609 		    tim2_0_1 = tim3_0_0 + tim3_1_0;
10610 		    tre2_1_1 = tre3_0_0 + tre3_1_0;
10611 		    tim2_1_1 = tim3_0_0 - tim3_1_0;
10612 	       }
10613 	       c_re(out[11 * ostride]) = tre2_0_0 + tre2_0_1;
10614 	       c_im(out[11 * ostride]) = tim2_0_0 + tim2_0_1;
10615 	       c_re(out[43 * ostride]) = tre2_0_0 - tre2_0_1;
10616 	       c_im(out[43 * ostride]) = tim2_0_0 - tim2_0_1;
10617 	       c_re(out[27 * ostride]) = tre2_1_0 - tim2_1_1;
10618 	       c_im(out[27 * ostride]) = tim2_1_0 + tre2_1_1;
10619 	       c_re(out[59 * ostride]) = tre2_1_0 + tim2_1_1;
10620 	       c_im(out[59 * ostride]) = tim2_1_0 - tre2_1_1;
10621 	  }
10622      }
10623      {
10624 	  FFTW_REAL tre1_0_0;
10625 	  FFTW_REAL tim1_0_0;
10626 	  FFTW_REAL tre1_0_1;
10627 	  FFTW_REAL tim1_0_1;
10628 	  FFTW_REAL tre1_0_2;
10629 	  FFTW_REAL tim1_0_2;
10630 	  FFTW_REAL tre1_0_3;
10631 	  FFTW_REAL tim1_0_3;
10632 	  FFTW_REAL tre1_1_0;
10633 	  FFTW_REAL tim1_1_0;
10634 	  FFTW_REAL tre1_1_1;
10635 	  FFTW_REAL tim1_1_1;
10636 	  FFTW_REAL tre1_1_2;
10637 	  FFTW_REAL tim1_1_2;
10638 	  FFTW_REAL tre1_1_3;
10639 	  FFTW_REAL tim1_1_3;
10640 	  tre1_0_0 = tre0_4_0 - tim0_4_4;
10641 	  tim1_0_0 = tim0_4_0 + tre0_4_4;
10642 	  tre1_1_0 = tre0_4_0 + tim0_4_4;
10643 	  tim1_1_0 = tim0_4_0 - tre0_4_4;
10644 	  {
10645 	       FFTW_REAL tre2_0_0;
10646 	       FFTW_REAL tim2_0_0;
10647 	       FFTW_REAL tre2_1_0;
10648 	       FFTW_REAL tim2_1_0;
10649 	       tre2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_4_1) - (((FFTW_REAL) FFTW_K382683432) * tim0_4_1);
10650 	       tim2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_4_1) + (((FFTW_REAL) FFTW_K382683432) * tre0_4_1);
10651 	       tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_4_5) + (((FFTW_REAL) FFTW_K923879532) * tim0_4_5);
10652 	       tim2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_4_5) - (((FFTW_REAL) FFTW_K382683432) * tim0_4_5);
10653 	       tre1_0_1 = tre2_0_0 - tre2_1_0;
10654 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
10655 	       tre1_1_1 = tre2_0_0 + tre2_1_0;
10656 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
10657 	  }
10658 	  {
10659 	       FFTW_REAL tre2_0_0;
10660 	       FFTW_REAL tim2_0_0;
10661 	       FFTW_REAL tre2_1_0;
10662 	       FFTW_REAL tim2_1_0;
10663 	       tre2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_4_2 - tim0_4_2);
10664 	       tim2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_4_2 + tre0_4_2);
10665 	       tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_4_6 + tim0_4_6);
10666 	       tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_4_6 - tim0_4_6);
10667 	       tre1_0_2 = tre2_0_0 - tre2_1_0;
10668 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
10669 	       tre1_1_2 = tre2_0_0 + tre2_1_0;
10670 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
10671 	  }
10672 	  {
10673 	       FFTW_REAL tre2_0_0;
10674 	       FFTW_REAL tim2_0_0;
10675 	       FFTW_REAL tre2_1_0;
10676 	       FFTW_REAL tim2_1_0;
10677 	       tre2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_4_3) - (((FFTW_REAL) FFTW_K923879532) * tim0_4_3);
10678 	       tim2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_4_3) + (((FFTW_REAL) FFTW_K923879532) * tre0_4_3);
10679 	       tre2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_4_7) + (((FFTW_REAL) FFTW_K382683432) * tim0_4_7);
10680 	       tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_4_7) - (((FFTW_REAL) FFTW_K923879532) * tim0_4_7);
10681 	       tre1_0_3 = tre2_0_0 - tre2_1_0;
10682 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
10683 	       tre1_1_3 = tre2_0_0 + tre2_1_0;
10684 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
10685 	  }
10686 	  {
10687 	       FFTW_REAL tre2_0_0;
10688 	       FFTW_REAL tim2_0_0;
10689 	       FFTW_REAL tre2_0_1;
10690 	       FFTW_REAL tim2_0_1;
10691 	       FFTW_REAL tre2_1_0;
10692 	       FFTW_REAL tim2_1_0;
10693 	       FFTW_REAL tre2_1_1;
10694 	       FFTW_REAL tim2_1_1;
10695 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
10696 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
10697 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
10698 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
10699 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
10700 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
10701 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
10702 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
10703 	       c_re(out[4 * ostride]) = tre2_0_0 + tre2_0_1;
10704 	       c_im(out[4 * ostride]) = tim2_0_0 + tim2_0_1;
10705 	       c_re(out[36 * ostride]) = tre2_0_0 - tre2_0_1;
10706 	       c_im(out[36 * ostride]) = tim2_0_0 - tim2_0_1;
10707 	       c_re(out[20 * ostride]) = tre2_1_0 - tim2_1_1;
10708 	       c_im(out[20 * ostride]) = tim2_1_0 + tre2_1_1;
10709 	       c_re(out[52 * ostride]) = tre2_1_0 + tim2_1_1;
10710 	       c_im(out[52 * ostride]) = tim2_1_0 - tre2_1_1;
10711 	  }
10712 	  {
10713 	       FFTW_REAL tre2_0_0;
10714 	       FFTW_REAL tim2_0_0;
10715 	       FFTW_REAL tre2_0_1;
10716 	       FFTW_REAL tim2_0_1;
10717 	       FFTW_REAL tre2_1_0;
10718 	       FFTW_REAL tim2_1_0;
10719 	       FFTW_REAL tre2_1_1;
10720 	       FFTW_REAL tim2_1_1;
10721 	       tre2_0_0 = tre1_1_0 - tim1_1_2;
10722 	       tim2_0_0 = tim1_1_0 + tre1_1_2;
10723 	       tre2_1_0 = tre1_1_0 + tim1_1_2;
10724 	       tim2_1_0 = tim1_1_0 - tre1_1_2;
10725 	       {
10726 		    FFTW_REAL tre3_0_0;
10727 		    FFTW_REAL tim3_0_0;
10728 		    FFTW_REAL tre3_1_0;
10729 		    FFTW_REAL tim3_1_0;
10730 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
10731 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
10732 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
10733 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
10734 		    tre2_0_1 = tre3_0_0 - tre3_1_0;
10735 		    tim2_0_1 = tim3_0_0 + tim3_1_0;
10736 		    tre2_1_1 = tre3_0_0 + tre3_1_0;
10737 		    tim2_1_1 = tim3_0_0 - tim3_1_0;
10738 	       }
10739 	       c_re(out[12 * ostride]) = tre2_0_0 + tre2_0_1;
10740 	       c_im(out[12 * ostride]) = tim2_0_0 + tim2_0_1;
10741 	       c_re(out[44 * ostride]) = tre2_0_0 - tre2_0_1;
10742 	       c_im(out[44 * ostride]) = tim2_0_0 - tim2_0_1;
10743 	       c_re(out[28 * ostride]) = tre2_1_0 - tim2_1_1;
10744 	       c_im(out[28 * ostride]) = tim2_1_0 + tre2_1_1;
10745 	       c_re(out[60 * ostride]) = tre2_1_0 + tim2_1_1;
10746 	       c_im(out[60 * ostride]) = tim2_1_0 - tre2_1_1;
10747 	  }
10748      }
10749      {
10750 	  FFTW_REAL tre1_0_0;
10751 	  FFTW_REAL tim1_0_0;
10752 	  FFTW_REAL tre1_0_1;
10753 	  FFTW_REAL tim1_0_1;
10754 	  FFTW_REAL tre1_0_2;
10755 	  FFTW_REAL tim1_0_2;
10756 	  FFTW_REAL tre1_0_3;
10757 	  FFTW_REAL tim1_0_3;
10758 	  FFTW_REAL tre1_1_0;
10759 	  FFTW_REAL tim1_1_0;
10760 	  FFTW_REAL tre1_1_1;
10761 	  FFTW_REAL tim1_1_1;
10762 	  FFTW_REAL tre1_1_2;
10763 	  FFTW_REAL tim1_1_2;
10764 	  FFTW_REAL tre1_1_3;
10765 	  FFTW_REAL tim1_1_3;
10766 	  {
10767 	       FFTW_REAL tre2_1_0;
10768 	       FFTW_REAL tim2_1_0;
10769 	       tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_5_4) + (((FFTW_REAL) FFTW_K923879532) * tim0_5_4);
10770 	       tim2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_5_4) - (((FFTW_REAL) FFTW_K382683432) * tim0_5_4);
10771 	       tre1_0_0 = tre0_5_0 - tre2_1_0;
10772 	       tim1_0_0 = tim0_5_0 + tim2_1_0;
10773 	       tre1_1_0 = tre0_5_0 + tre2_1_0;
10774 	       tim1_1_0 = tim0_5_0 - tim2_1_0;
10775 	  }
10776 	  {
10777 	       FFTW_REAL tre2_0_0;
10778 	       FFTW_REAL tim2_0_0;
10779 	       FFTW_REAL tre2_1_0;
10780 	       FFTW_REAL tim2_1_0;
10781 	       tre2_0_0 = (((FFTW_REAL) FFTW_K881921264) * tre0_5_1) - (((FFTW_REAL) FFTW_K471396736) * tim0_5_1);
10782 	       tim2_0_0 = (((FFTW_REAL) FFTW_K881921264) * tim0_5_1) + (((FFTW_REAL) FFTW_K471396736) * tre0_5_1);
10783 	       tre2_1_0 = (((FFTW_REAL) FFTW_K773010453) * tre0_5_5) + (((FFTW_REAL) FFTW_K634393284) * tim0_5_5);
10784 	       tim2_1_0 = (((FFTW_REAL) FFTW_K634393284) * tre0_5_5) - (((FFTW_REAL) FFTW_K773010453) * tim0_5_5);
10785 	       tre1_0_1 = tre2_0_0 - tre2_1_0;
10786 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
10787 	       tre1_1_1 = tre2_0_0 + tre2_1_0;
10788 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
10789 	  }
10790 	  {
10791 	       FFTW_REAL tre2_0_0;
10792 	       FFTW_REAL tim2_0_0;
10793 	       FFTW_REAL tre2_1_0;
10794 	       FFTW_REAL tim2_1_0;
10795 	       tre2_0_0 = (((FFTW_REAL) FFTW_K555570233) * tre0_5_2) - (((FFTW_REAL) FFTW_K831469612) * tim0_5_2);
10796 	       tim2_0_0 = (((FFTW_REAL) FFTW_K555570233) * tim0_5_2) + (((FFTW_REAL) FFTW_K831469612) * tre0_5_2);
10797 	       tre2_1_0 = (((FFTW_REAL) FFTW_K980785280) * tre0_5_6) + (((FFTW_REAL) FFTW_K195090322) * tim0_5_6);
10798 	       tim2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tre0_5_6) - (((FFTW_REAL) FFTW_K980785280) * tim0_5_6);
10799 	       tre1_0_2 = tre2_0_0 - tre2_1_0;
10800 	       tim1_0_2 = tim2_0_0 + tim2_1_0;
10801 	       tre1_1_2 = tre2_0_0 + tre2_1_0;
10802 	       tim1_1_2 = tim2_0_0 - tim2_1_0;
10803 	  }
10804 	  {
10805 	       FFTW_REAL tre2_0_0;
10806 	       FFTW_REAL tim2_0_0;
10807 	       FFTW_REAL tre2_1_0;
10808 	       FFTW_REAL tim2_1_0;
10809 	       tre2_0_0 = (((FFTW_REAL) FFTW_K098017140) * tre0_5_3) - (((FFTW_REAL) FFTW_K995184726) * tim0_5_3);
10810 	       tim2_0_0 = (((FFTW_REAL) FFTW_K098017140) * tim0_5_3) + (((FFTW_REAL) FFTW_K995184726) * tre0_5_3);
10811 	       tre2_1_0 = (((FFTW_REAL) FFTW_K290284677) * tim0_5_7) - (((FFTW_REAL) FFTW_K956940335) * tre0_5_7);
10812 	       tim2_1_0 = (((FFTW_REAL) FFTW_K956940335) * tim0_5_7) + (((FFTW_REAL) FFTW_K290284677) * tre0_5_7);
10813 	       tre1_0_3 = tre2_0_0 + tre2_1_0;
10814 	       tim1_0_3 = tim2_0_0 - tim2_1_0;
10815 	       tre1_1_3 = tre2_0_0 - tre2_1_0;
10816 	       tim1_1_3 = tim2_0_0 + tim2_1_0;
10817 	  }
10818 	  {
10819 	       FFTW_REAL tre2_0_0;
10820 	       FFTW_REAL tim2_0_0;
10821 	       FFTW_REAL tre2_0_1;
10822 	       FFTW_REAL tim2_0_1;
10823 	       FFTW_REAL tre2_1_0;
10824 	       FFTW_REAL tim2_1_0;
10825 	       FFTW_REAL tre2_1_1;
10826 	       FFTW_REAL tim2_1_1;
10827 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
10828 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
10829 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
10830 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
10831 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
10832 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
10833 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
10834 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
10835 	       c_re(out[5 * ostride]) = tre2_0_0 + tre2_0_1;
10836 	       c_im(out[5 * ostride]) = tim2_0_0 + tim2_0_1;
10837 	       c_re(out[37 * ostride]) = tre2_0_0 - tre2_0_1;
10838 	       c_im(out[37 * ostride]) = tim2_0_0 - tim2_0_1;
10839 	       c_re(out[21 * ostride]) = tre2_1_0 - tim2_1_1;
10840 	       c_im(out[21 * ostride]) = tim2_1_0 + tre2_1_1;
10841 	       c_re(out[53 * ostride]) = tre2_1_0 + tim2_1_1;
10842 	       c_im(out[53 * ostride]) = tim2_1_0 - tre2_1_1;
10843 	  }
10844 	  {
10845 	       FFTW_REAL tre2_0_0;
10846 	       FFTW_REAL tim2_0_0;
10847 	       FFTW_REAL tre2_0_1;
10848 	       FFTW_REAL tim2_0_1;
10849 	       FFTW_REAL tre2_1_0;
10850 	       FFTW_REAL tim2_1_0;
10851 	       FFTW_REAL tre2_1_1;
10852 	       FFTW_REAL tim2_1_1;
10853 	       tre2_0_0 = tre1_1_0 - tim1_1_2;
10854 	       tim2_0_0 = tim1_1_0 + tre1_1_2;
10855 	       tre2_1_0 = tre1_1_0 + tim1_1_2;
10856 	       tim2_1_0 = tim1_1_0 - tre1_1_2;
10857 	       {
10858 		    FFTW_REAL tre3_0_0;
10859 		    FFTW_REAL tim3_0_0;
10860 		    FFTW_REAL tre3_1_0;
10861 		    FFTW_REAL tim3_1_0;
10862 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
10863 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
10864 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
10865 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
10866 		    tre2_0_1 = tre3_0_0 - tre3_1_0;
10867 		    tim2_0_1 = tim3_0_0 + tim3_1_0;
10868 		    tre2_1_1 = tre3_0_0 + tre3_1_0;
10869 		    tim2_1_1 = tim3_0_0 - tim3_1_0;
10870 	       }
10871 	       c_re(out[13 * ostride]) = tre2_0_0 + tre2_0_1;
10872 	       c_im(out[13 * ostride]) = tim2_0_0 + tim2_0_1;
10873 	       c_re(out[45 * ostride]) = tre2_0_0 - tre2_0_1;
10874 	       c_im(out[45 * ostride]) = tim2_0_0 - tim2_0_1;
10875 	       c_re(out[29 * ostride]) = tre2_1_0 - tim2_1_1;
10876 	       c_im(out[29 * ostride]) = tim2_1_0 + tre2_1_1;
10877 	       c_re(out[61 * ostride]) = tre2_1_0 + tim2_1_1;
10878 	       c_im(out[61 * ostride]) = tim2_1_0 - tre2_1_1;
10879 	  }
10880      }
10881      {
10882 	  FFTW_REAL tre1_0_0;
10883 	  FFTW_REAL tim1_0_0;
10884 	  FFTW_REAL tre1_0_1;
10885 	  FFTW_REAL tim1_0_1;
10886 	  FFTW_REAL tre1_0_2;
10887 	  FFTW_REAL tim1_0_2;
10888 	  FFTW_REAL tre1_0_3;
10889 	  FFTW_REAL tim1_0_3;
10890 	  FFTW_REAL tre1_1_0;
10891 	  FFTW_REAL tim1_1_0;
10892 	  FFTW_REAL tre1_1_1;
10893 	  FFTW_REAL tim1_1_1;
10894 	  FFTW_REAL tre1_1_2;
10895 	  FFTW_REAL tim1_1_2;
10896 	  FFTW_REAL tre1_1_3;
10897 	  FFTW_REAL tim1_1_3;
10898 	  {
10899 	       FFTW_REAL tre2_1_0;
10900 	       FFTW_REAL tim2_1_0;
10901 	       tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_6_4 + tim0_6_4);
10902 	       tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_6_4 - tim0_6_4);
10903 	       tre1_0_0 = tre0_6_0 - tre2_1_0;
10904 	       tim1_0_0 = tim0_6_0 + tim2_1_0;
10905 	       tre1_1_0 = tre0_6_0 + tre2_1_0;
10906 	       tim1_1_0 = tim0_6_0 - tim2_1_0;
10907 	  }
10908 	  {
10909 	       FFTW_REAL tre2_0_0;
10910 	       FFTW_REAL tim2_0_0;
10911 	       FFTW_REAL tre2_1_0;
10912 	       FFTW_REAL tim2_1_0;
10913 	       tre2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_6_1) - (((FFTW_REAL) FFTW_K555570233) * tim0_6_1);
10914 	       tim2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_6_1) + (((FFTW_REAL) FFTW_K555570233) * tre0_6_1);
10915 	       tre2_1_0 = (((FFTW_REAL) FFTW_K980785280) * tre0_6_5) + (((FFTW_REAL) FFTW_K195090322) * tim0_6_5);
10916 	       tim2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tre0_6_5) - (((FFTW_REAL) FFTW_K980785280) * tim0_6_5);
10917 	       tre1_0_1 = tre2_0_0 - tre2_1_0;
10918 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
10919 	       tre1_1_1 = tre2_0_0 + tre2_1_0;
10920 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
10921 	  }
10922 	  {
10923 	       FFTW_REAL tre2_0_0;
10924 	       FFTW_REAL tim2_0_0;
10925 	       FFTW_REAL tre2_1_0;
10926 	       FFTW_REAL tim2_1_0;
10927 	       tre2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_6_2) - (((FFTW_REAL) FFTW_K923879532) * tim0_6_2);
10928 	       tim2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_6_2) + (((FFTW_REAL) FFTW_K923879532) * tre0_6_2);
10929 	       tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_6_6) - (((FFTW_REAL) FFTW_K923879532) * tre0_6_6);
10930 	       tim2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_6_6) + (((FFTW_REAL) FFTW_K382683432) * tre0_6_6);
10931 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
10932 	       tim1_0_2 = tim2_0_0 - tim2_1_0;
10933 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
10934 	       tim1_1_2 = tim2_0_0 + tim2_1_0;
10935 	  }
10936 	  {
10937 	       FFTW_REAL tre2_0_0;
10938 	       FFTW_REAL tim2_0_0;
10939 	       FFTW_REAL tre2_1_0;
10940 	       FFTW_REAL tim2_1_0;
10941 	       tre2_0_0 = (((FFTW_REAL) FFTW_K195090322) * tre0_6_3) + (((FFTW_REAL) FFTW_K980785280) * tim0_6_3);
10942 	       tim2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tre0_6_3) - (((FFTW_REAL) FFTW_K195090322) * tim0_6_3);
10943 	       tre2_1_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_6_7) - (((FFTW_REAL) FFTW_K555570233) * tre0_6_7);
10944 	       tim2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tim0_6_7) + (((FFTW_REAL) FFTW_K831469612) * tre0_6_7);
10945 	       tre1_0_3 = tre2_1_0 - tre2_0_0;
10946 	       tim1_0_3 = tim2_0_0 - tim2_1_0;
10947 	       tre1_1_3 = (-(tre2_0_0 + tre2_1_0));
10948 	       tim1_1_3 = tim2_0_0 + tim2_1_0;
10949 	  }
10950 	  {
10951 	       FFTW_REAL tre2_0_0;
10952 	       FFTW_REAL tim2_0_0;
10953 	       FFTW_REAL tre2_0_1;
10954 	       FFTW_REAL tim2_0_1;
10955 	       FFTW_REAL tre2_1_0;
10956 	       FFTW_REAL tim2_1_0;
10957 	       FFTW_REAL tre2_1_1;
10958 	       FFTW_REAL tim2_1_1;
10959 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
10960 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
10961 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
10962 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
10963 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
10964 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
10965 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
10966 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
10967 	       c_re(out[6 * ostride]) = tre2_0_0 + tre2_0_1;
10968 	       c_im(out[6 * ostride]) = tim2_0_0 + tim2_0_1;
10969 	       c_re(out[38 * ostride]) = tre2_0_0 - tre2_0_1;
10970 	       c_im(out[38 * ostride]) = tim2_0_0 - tim2_0_1;
10971 	       c_re(out[22 * ostride]) = tre2_1_0 - tim2_1_1;
10972 	       c_im(out[22 * ostride]) = tim2_1_0 + tre2_1_1;
10973 	       c_re(out[54 * ostride]) = tre2_1_0 + tim2_1_1;
10974 	       c_im(out[54 * ostride]) = tim2_1_0 - tre2_1_1;
10975 	  }
10976 	  {
10977 	       FFTW_REAL tre2_0_0;
10978 	       FFTW_REAL tim2_0_0;
10979 	       FFTW_REAL tre2_0_1;
10980 	       FFTW_REAL tim2_0_1;
10981 	       FFTW_REAL tre2_1_0;
10982 	       FFTW_REAL tim2_1_0;
10983 	       FFTW_REAL tre2_1_1;
10984 	       FFTW_REAL tim2_1_1;
10985 	       tre2_0_0 = tre1_1_0 - tim1_1_2;
10986 	       tim2_0_0 = tim1_1_0 + tre1_1_2;
10987 	       tre2_1_0 = tre1_1_0 + tim1_1_2;
10988 	       tim2_1_0 = tim1_1_0 - tre1_1_2;
10989 	       {
10990 		    FFTW_REAL tre3_0_0;
10991 		    FFTW_REAL tim3_0_0;
10992 		    FFTW_REAL tre3_1_0;
10993 		    FFTW_REAL tim3_1_0;
10994 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
10995 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
10996 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
10997 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
10998 		    tre2_0_1 = tre3_0_0 - tre3_1_0;
10999 		    tim2_0_1 = tim3_0_0 + tim3_1_0;
11000 		    tre2_1_1 = tre3_0_0 + tre3_1_0;
11001 		    tim2_1_1 = tim3_0_0 - tim3_1_0;
11002 	       }
11003 	       c_re(out[14 * ostride]) = tre2_0_0 + tre2_0_1;
11004 	       c_im(out[14 * ostride]) = tim2_0_0 + tim2_0_1;
11005 	       c_re(out[46 * ostride]) = tre2_0_0 - tre2_0_1;
11006 	       c_im(out[46 * ostride]) = tim2_0_0 - tim2_0_1;
11007 	       c_re(out[30 * ostride]) = tre2_1_0 - tim2_1_1;
11008 	       c_im(out[30 * ostride]) = tim2_1_0 + tre2_1_1;
11009 	       c_re(out[62 * ostride]) = tre2_1_0 + tim2_1_1;
11010 	       c_im(out[62 * ostride]) = tim2_1_0 - tre2_1_1;
11011 	  }
11012      }
11013      {
11014 	  FFTW_REAL tre1_0_0;
11015 	  FFTW_REAL tim1_0_0;
11016 	  FFTW_REAL tre1_0_1;
11017 	  FFTW_REAL tim1_0_1;
11018 	  FFTW_REAL tre1_0_2;
11019 	  FFTW_REAL tim1_0_2;
11020 	  FFTW_REAL tre1_0_3;
11021 	  FFTW_REAL tim1_0_3;
11022 	  FFTW_REAL tre1_1_0;
11023 	  FFTW_REAL tim1_1_0;
11024 	  FFTW_REAL tre1_1_1;
11025 	  FFTW_REAL tim1_1_1;
11026 	  FFTW_REAL tre1_1_2;
11027 	  FFTW_REAL tim1_1_2;
11028 	  FFTW_REAL tre1_1_3;
11029 	  FFTW_REAL tim1_1_3;
11030 	  {
11031 	       FFTW_REAL tre2_1_0;
11032 	       FFTW_REAL tim2_1_0;
11033 	       tre2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_7_4) + (((FFTW_REAL) FFTW_K382683432) * tim0_7_4);
11034 	       tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_7_4) - (((FFTW_REAL) FFTW_K923879532) * tim0_7_4);
11035 	       tre1_0_0 = tre0_7_0 - tre2_1_0;
11036 	       tim1_0_0 = tim0_7_0 + tim2_1_0;
11037 	       tre1_1_0 = tre0_7_0 + tre2_1_0;
11038 	       tim1_1_0 = tim0_7_0 - tim2_1_0;
11039 	  }
11040 	  {
11041 	       FFTW_REAL tre2_0_0;
11042 	       FFTW_REAL tim2_0_0;
11043 	       FFTW_REAL tre2_1_0;
11044 	       FFTW_REAL tim2_1_0;
11045 	       tre2_0_0 = (((FFTW_REAL) FFTW_K773010453) * tre0_7_1) - (((FFTW_REAL) FFTW_K634393284) * tim0_7_1);
11046 	       tim2_0_0 = (((FFTW_REAL) FFTW_K773010453) * tim0_7_1) + (((FFTW_REAL) FFTW_K634393284) * tre0_7_1);
11047 	       tre2_1_0 = (((FFTW_REAL) FFTW_K290284677) * tim0_7_5) - (((FFTW_REAL) FFTW_K956940335) * tre0_7_5);
11048 	       tim2_1_0 = (((FFTW_REAL) FFTW_K956940335) * tim0_7_5) + (((FFTW_REAL) FFTW_K290284677) * tre0_7_5);
11049 	       tre1_0_1 = tre2_0_0 + tre2_1_0;
11050 	       tim1_0_1 = tim2_0_0 - tim2_1_0;
11051 	       tre1_1_1 = tre2_0_0 - tre2_1_0;
11052 	       tim1_1_1 = tim2_0_0 + tim2_1_0;
11053 	  }
11054 	  {
11055 	       FFTW_REAL tre2_0_0;
11056 	       FFTW_REAL tim2_0_0;
11057 	       FFTW_REAL tre2_1_0;
11058 	       FFTW_REAL tim2_1_0;
11059 	       tre2_0_0 = (((FFTW_REAL) FFTW_K195090322) * tre0_7_2) - (((FFTW_REAL) FFTW_K980785280) * tim0_7_2);
11060 	       tim2_0_0 = (((FFTW_REAL) FFTW_K195090322) * tim0_7_2) + (((FFTW_REAL) FFTW_K980785280) * tre0_7_2);
11061 	       tre2_1_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_7_6) - (((FFTW_REAL) FFTW_K555570233) * tre0_7_6);
11062 	       tim2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tim0_7_6) + (((FFTW_REAL) FFTW_K831469612) * tre0_7_6);
11063 	       tre1_0_2 = tre2_0_0 + tre2_1_0;
11064 	       tim1_0_2 = tim2_0_0 - tim2_1_0;
11065 	       tre1_1_2 = tre2_0_0 - tre2_1_0;
11066 	       tim1_1_2 = tim2_0_0 + tim2_1_0;
11067 	  }
11068 	  {
11069 	       FFTW_REAL tre2_0_0;
11070 	       FFTW_REAL tim2_0_0;
11071 	       FFTW_REAL tre2_1_0;
11072 	       FFTW_REAL tim2_1_0;
11073 	       tre2_0_0 = (((FFTW_REAL) FFTW_K471396736) * tre0_7_3) + (((FFTW_REAL) FFTW_K881921264) * tim0_7_3);
11074 	       tim2_0_0 = (((FFTW_REAL) FFTW_K881921264) * tre0_7_3) - (((FFTW_REAL) FFTW_K471396736) * tim0_7_3);
11075 	       tre2_1_0 = (((FFTW_REAL) FFTW_K098017140) * tre0_7_7) + (((FFTW_REAL) FFTW_K995184726) * tim0_7_7);
11076 	       tim2_1_0 = (((FFTW_REAL) FFTW_K098017140) * tim0_7_7) - (((FFTW_REAL) FFTW_K995184726) * tre0_7_7);
11077 	       tre1_0_3 = tre2_1_0 - tre2_0_0;
11078 	       tim1_0_3 = tim2_0_0 + tim2_1_0;
11079 	       tre1_1_3 = (-(tre2_0_0 + tre2_1_0));
11080 	       tim1_1_3 = tim2_0_0 - tim2_1_0;
11081 	  }
11082 	  {
11083 	       FFTW_REAL tre2_0_0;
11084 	       FFTW_REAL tim2_0_0;
11085 	       FFTW_REAL tre2_0_1;
11086 	       FFTW_REAL tim2_0_1;
11087 	       FFTW_REAL tre2_1_0;
11088 	       FFTW_REAL tim2_1_0;
11089 	       FFTW_REAL tre2_1_1;
11090 	       FFTW_REAL tim2_1_1;
11091 	       tre2_0_0 = tre1_0_0 + tre1_0_2;
11092 	       tim2_0_0 = tim1_0_0 + tim1_0_2;
11093 	       tre2_1_0 = tre1_0_0 - tre1_0_2;
11094 	       tim2_1_0 = tim1_0_0 - tim1_0_2;
11095 	       tre2_0_1 = tre1_0_1 + tre1_0_3;
11096 	       tim2_0_1 = tim1_0_1 + tim1_0_3;
11097 	       tre2_1_1 = tre1_0_1 - tre1_0_3;
11098 	       tim2_1_1 = tim1_0_1 - tim1_0_3;
11099 	       c_re(out[7 * ostride]) = tre2_0_0 + tre2_0_1;
11100 	       c_im(out[7 * ostride]) = tim2_0_0 + tim2_0_1;
11101 	       c_re(out[39 * ostride]) = tre2_0_0 - tre2_0_1;
11102 	       c_im(out[39 * ostride]) = tim2_0_0 - tim2_0_1;
11103 	       c_re(out[23 * ostride]) = tre2_1_0 - tim2_1_1;
11104 	       c_im(out[23 * ostride]) = tim2_1_0 + tre2_1_1;
11105 	       c_re(out[55 * ostride]) = tre2_1_0 + tim2_1_1;
11106 	       c_im(out[55 * ostride]) = tim2_1_0 - tre2_1_1;
11107 	  }
11108 	  {
11109 	       FFTW_REAL tre2_0_0;
11110 	       FFTW_REAL tim2_0_0;
11111 	       FFTW_REAL tre2_0_1;
11112 	       FFTW_REAL tim2_0_1;
11113 	       FFTW_REAL tre2_1_0;
11114 	       FFTW_REAL tim2_1_0;
11115 	       FFTW_REAL tre2_1_1;
11116 	       FFTW_REAL tim2_1_1;
11117 	       tre2_0_0 = tre1_1_0 - tim1_1_2;
11118 	       tim2_0_0 = tim1_1_0 + tre1_1_2;
11119 	       tre2_1_0 = tre1_1_0 + tim1_1_2;
11120 	       tim2_1_0 = tim1_1_0 - tre1_1_2;
11121 	       {
11122 		    FFTW_REAL tre3_0_0;
11123 		    FFTW_REAL tim3_0_0;
11124 		    FFTW_REAL tre3_1_0;
11125 		    FFTW_REAL tim3_1_0;
11126 		    tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
11127 		    tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
11128 		    tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
11129 		    tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
11130 		    tre2_0_1 = tre3_0_0 - tre3_1_0;
11131 		    tim2_0_1 = tim3_0_0 + tim3_1_0;
11132 		    tre2_1_1 = tre3_0_0 + tre3_1_0;
11133 		    tim2_1_1 = tim3_0_0 - tim3_1_0;
11134 	       }
11135 	       c_re(out[15 * ostride]) = tre2_0_0 + tre2_0_1;
11136 	       c_im(out[15 * ostride]) = tim2_0_0 + tim2_0_1;
11137 	       c_re(out[47 * ostride]) = tre2_0_0 - tre2_0_1;
11138 	       c_im(out[47 * ostride]) = tim2_0_0 - tim2_0_1;
11139 	       c_re(out[31 * ostride]) = tre2_1_0 - tim2_1_1;
11140 	       c_im(out[31 * ostride]) = tim2_1_0 + tre2_1_1;
11141 	       c_re(out[63 * ostride]) = tre2_1_0 + tim2_1_1;
11142 	       c_im(out[63 * ostride]) = tim2_1_0 - tre2_1_1;
11143 	  }
11144      }
11145 }
11146 
11147 /* This function contains 90 FP additions and 36 FP multiplications */
11148 
fftwi_no_twiddle_7(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)11149 static void fftwi_no_twiddle_7(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
11150 {
11151      FFTW_REAL tre0_0_0;
11152      FFTW_REAL tim0_0_0;
11153      FFTW_REAL tre0_1_0;
11154      FFTW_REAL tim0_1_0;
11155      FFTW_REAL tre0_2_0;
11156      FFTW_REAL tim0_2_0;
11157      FFTW_REAL tre0_3_0;
11158      FFTW_REAL tim0_3_0;
11159      FFTW_REAL tre0_4_0;
11160      FFTW_REAL tim0_4_0;
11161      FFTW_REAL tre0_5_0;
11162      FFTW_REAL tim0_5_0;
11163      FFTW_REAL tre0_6_0;
11164      FFTW_REAL tim0_6_0;
11165      tre0_0_0 = c_re(in[0]);
11166      tim0_0_0 = c_im(in[0]);
11167      tre0_1_0 = c_re(in[istride]);
11168      tim0_1_0 = c_im(in[istride]);
11169      tre0_2_0 = c_re(in[2 * istride]);
11170      tim0_2_0 = c_im(in[2 * istride]);
11171      tre0_3_0 = c_re(in[3 * istride]);
11172      tim0_3_0 = c_im(in[3 * istride]);
11173      tre0_4_0 = c_re(in[4 * istride]);
11174      tim0_4_0 = c_im(in[4 * istride]);
11175      tre0_5_0 = c_re(in[5 * istride]);
11176      tim0_5_0 = c_im(in[5 * istride]);
11177      tre0_6_0 = c_re(in[6 * istride]);
11178      tim0_6_0 = c_im(in[6 * istride]);
11179      c_re(out[0]) = tre0_0_0 + tre0_1_0 + tre0_2_0 + tre0_3_0 + tre0_4_0 + tre0_5_0 + tre0_6_0;
11180      c_im(out[0]) = tim0_0_0 + tim0_1_0 + tim0_2_0 + tim0_3_0 + tim0_4_0 + tim0_5_0 + tim0_6_0;
11181      {
11182 	  FFTW_REAL tre1_0_0;
11183 	  FFTW_REAL tre1_1_0;
11184 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tre0_1_0 + tre0_6_0)) - (((FFTW_REAL) FFTW_K900968867) * (tre0_3_0 + tre0_4_0)) - (((FFTW_REAL) FFTW_K222520933) * (tre0_2_0 + tre0_5_0));
11185 	  tre1_1_0 = (((FFTW_REAL) FFTW_K781831482) * (tim0_6_0 - tim0_1_0)) + (((FFTW_REAL) FFTW_K974927912) * (tim0_5_0 - tim0_2_0)) + (((FFTW_REAL) FFTW_K433883739) * (tim0_4_0 - tim0_3_0));
11186 	  c_re(out[ostride]) = tre1_0_0 + tre1_1_0;
11187 	  c_re(out[6 * ostride]) = tre1_0_0 - tre1_1_0;
11188      }
11189      {
11190 	  FFTW_REAL tim1_0_0;
11191 	  FFTW_REAL tim1_1_0;
11192 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tim0_1_0 + tim0_6_0)) - (((FFTW_REAL) FFTW_K900968867) * (tim0_3_0 + tim0_4_0)) - (((FFTW_REAL) FFTW_K222520933) * (tim0_2_0 + tim0_5_0));
11193 	  tim1_1_0 = (((FFTW_REAL) FFTW_K781831482) * (tre0_1_0 - tre0_6_0)) + (((FFTW_REAL) FFTW_K974927912) * (tre0_2_0 - tre0_5_0)) + (((FFTW_REAL) FFTW_K433883739) * (tre0_3_0 - tre0_4_0));
11194 	  c_im(out[ostride]) = tim1_0_0 + tim1_1_0;
11195 	  c_im(out[6 * ostride]) = tim1_0_0 - tim1_1_0;
11196      }
11197      {
11198 	  FFTW_REAL tre1_0_0;
11199 	  FFTW_REAL tre1_1_0;
11200 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tre0_3_0 + tre0_4_0)) - (((FFTW_REAL) FFTW_K900968867) * (tre0_2_0 + tre0_5_0)) - (((FFTW_REAL) FFTW_K222520933) * (tre0_1_0 + tre0_6_0));
11201 	  tre1_1_0 = (((FFTW_REAL) FFTW_K974927912) * (tim0_6_0 - tim0_1_0)) + (((FFTW_REAL) FFTW_K433883739) * (tim0_2_0 - tim0_5_0)) + (((FFTW_REAL) FFTW_K781831482) * (tim0_3_0 - tim0_4_0));
11202 	  c_re(out[2 * ostride]) = tre1_0_0 + tre1_1_0;
11203 	  c_re(out[5 * ostride]) = tre1_0_0 - tre1_1_0;
11204      }
11205      {
11206 	  FFTW_REAL tim1_0_0;
11207 	  FFTW_REAL tim1_1_0;
11208 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tim0_3_0 + tim0_4_0)) - (((FFTW_REAL) FFTW_K900968867) * (tim0_2_0 + tim0_5_0)) - (((FFTW_REAL) FFTW_K222520933) * (tim0_1_0 + tim0_6_0));
11209 	  tim1_1_0 = (((FFTW_REAL) FFTW_K974927912) * (tre0_1_0 - tre0_6_0)) + (((FFTW_REAL) FFTW_K433883739) * (tre0_5_0 - tre0_2_0)) + (((FFTW_REAL) FFTW_K781831482) * (tre0_4_0 - tre0_3_0));
11210 	  c_im(out[2 * ostride]) = tim1_0_0 + tim1_1_0;
11211 	  c_im(out[5 * ostride]) = tim1_0_0 - tim1_1_0;
11212      }
11213      {
11214 	  FFTW_REAL tre1_0_0;
11215 	  FFTW_REAL tre1_1_0;
11216 	  tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tre0_2_0 + tre0_5_0)) - (((FFTW_REAL) FFTW_K222520933) * (tre0_3_0 + tre0_4_0)) - (((FFTW_REAL) FFTW_K900968867) * (tre0_1_0 + tre0_6_0));
11217 	  tre1_1_0 = (((FFTW_REAL) FFTW_K433883739) * (tim0_6_0 - tim0_1_0)) + (((FFTW_REAL) FFTW_K781831482) * (tim0_2_0 - tim0_5_0)) + (((FFTW_REAL) FFTW_K974927912) * (tim0_4_0 - tim0_3_0));
11218 	  c_re(out[3 * ostride]) = tre1_0_0 + tre1_1_0;
11219 	  c_re(out[4 * ostride]) = tre1_0_0 - tre1_1_0;
11220      }
11221      {
11222 	  FFTW_REAL tim1_0_0;
11223 	  FFTW_REAL tim1_1_0;
11224 	  tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tim0_2_0 + tim0_5_0)) - (((FFTW_REAL) FFTW_K222520933) * (tim0_3_0 + tim0_4_0)) - (((FFTW_REAL) FFTW_K900968867) * (tim0_1_0 + tim0_6_0));
11225 	  tim1_1_0 = (((FFTW_REAL) FFTW_K433883739) * (tre0_1_0 - tre0_6_0)) + (((FFTW_REAL) FFTW_K781831482) * (tre0_5_0 - tre0_2_0)) + (((FFTW_REAL) FFTW_K974927912) * (tre0_3_0 - tre0_4_0));
11226 	  c_im(out[3 * ostride]) = tim1_0_0 + tim1_1_0;
11227 	  c_im(out[4 * ostride]) = tim1_0_0 - tim1_1_0;
11228      }
11229 }
11230 
11231 /* This function contains 52 FP additions and 4 FP multiplications */
11232 
fftwi_no_twiddle_8(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)11233 static void fftwi_no_twiddle_8(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
11234 {
11235      FFTW_REAL tre0_0_0;
11236      FFTW_REAL tim0_0_0;
11237      FFTW_REAL tre0_0_1;
11238      FFTW_REAL tim0_0_1;
11239      FFTW_REAL tre0_0_2;
11240      FFTW_REAL tim0_0_2;
11241      FFTW_REAL tre0_0_3;
11242      FFTW_REAL tim0_0_3;
11243      FFTW_REAL tre0_1_0;
11244      FFTW_REAL tim0_1_0;
11245      FFTW_REAL tre0_1_1;
11246      FFTW_REAL tim0_1_1;
11247      FFTW_REAL tre0_1_2;
11248      FFTW_REAL tim0_1_2;
11249      FFTW_REAL tre0_1_3;
11250      FFTW_REAL tim0_1_3;
11251      {
11252 	  FFTW_REAL tre1_0_0;
11253 	  FFTW_REAL tim1_0_0;
11254 	  FFTW_REAL tre1_1_0;
11255 	  FFTW_REAL tim1_1_0;
11256 	  tre1_0_0 = c_re(in[0]);
11257 	  tim1_0_0 = c_im(in[0]);
11258 	  tre1_1_0 = c_re(in[4 * istride]);
11259 	  tim1_1_0 = c_im(in[4 * istride]);
11260 	  tre0_0_0 = tre1_0_0 + tre1_1_0;
11261 	  tim0_0_0 = tim1_0_0 + tim1_1_0;
11262 	  tre0_1_0 = tre1_0_0 - tre1_1_0;
11263 	  tim0_1_0 = tim1_0_0 - tim1_1_0;
11264      }
11265      {
11266 	  FFTW_REAL tre1_0_0;
11267 	  FFTW_REAL tim1_0_0;
11268 	  FFTW_REAL tre1_1_0;
11269 	  FFTW_REAL tim1_1_0;
11270 	  tre1_0_0 = c_re(in[istride]);
11271 	  tim1_0_0 = c_im(in[istride]);
11272 	  tre1_1_0 = c_re(in[5 * istride]);
11273 	  tim1_1_0 = c_im(in[5 * istride]);
11274 	  tre0_0_1 = tre1_0_0 + tre1_1_0;
11275 	  tim0_0_1 = tim1_0_0 + tim1_1_0;
11276 	  tre0_1_1 = tre1_0_0 - tre1_1_0;
11277 	  tim0_1_1 = tim1_0_0 - tim1_1_0;
11278      }
11279      {
11280 	  FFTW_REAL tre1_0_0;
11281 	  FFTW_REAL tim1_0_0;
11282 	  FFTW_REAL tre1_1_0;
11283 	  FFTW_REAL tim1_1_0;
11284 	  tre1_0_0 = c_re(in[2 * istride]);
11285 	  tim1_0_0 = c_im(in[2 * istride]);
11286 	  tre1_1_0 = c_re(in[6 * istride]);
11287 	  tim1_1_0 = c_im(in[6 * istride]);
11288 	  tre0_0_2 = tre1_0_0 + tre1_1_0;
11289 	  tim0_0_2 = tim1_0_0 + tim1_1_0;
11290 	  tre0_1_2 = tre1_0_0 - tre1_1_0;
11291 	  tim0_1_2 = tim1_0_0 - tim1_1_0;
11292      }
11293      {
11294 	  FFTW_REAL tre1_0_0;
11295 	  FFTW_REAL tim1_0_0;
11296 	  FFTW_REAL tre1_1_0;
11297 	  FFTW_REAL tim1_1_0;
11298 	  tre1_0_0 = c_re(in[3 * istride]);
11299 	  tim1_0_0 = c_im(in[3 * istride]);
11300 	  tre1_1_0 = c_re(in[7 * istride]);
11301 	  tim1_1_0 = c_im(in[7 * istride]);
11302 	  tre0_0_3 = tre1_0_0 + tre1_1_0;
11303 	  tim0_0_3 = tim1_0_0 + tim1_1_0;
11304 	  tre0_1_3 = tre1_0_0 - tre1_1_0;
11305 	  tim0_1_3 = tim1_0_0 - tim1_1_0;
11306      }
11307      {
11308 	  FFTW_REAL tre1_0_0;
11309 	  FFTW_REAL tim1_0_0;
11310 	  FFTW_REAL tre1_0_1;
11311 	  FFTW_REAL tim1_0_1;
11312 	  FFTW_REAL tre1_1_0;
11313 	  FFTW_REAL tim1_1_0;
11314 	  FFTW_REAL tre1_1_1;
11315 	  FFTW_REAL tim1_1_1;
11316 	  tre1_0_0 = tre0_0_0 + tre0_0_2;
11317 	  tim1_0_0 = tim0_0_0 + tim0_0_2;
11318 	  tre1_1_0 = tre0_0_0 - tre0_0_2;
11319 	  tim1_1_0 = tim0_0_0 - tim0_0_2;
11320 	  tre1_0_1 = tre0_0_1 + tre0_0_3;
11321 	  tim1_0_1 = tim0_0_1 + tim0_0_3;
11322 	  tre1_1_1 = tre0_0_1 - tre0_0_3;
11323 	  tim1_1_1 = tim0_0_1 - tim0_0_3;
11324 	  c_re(out[0]) = tre1_0_0 + tre1_0_1;
11325 	  c_im(out[0]) = tim1_0_0 + tim1_0_1;
11326 	  c_re(out[4 * ostride]) = tre1_0_0 - tre1_0_1;
11327 	  c_im(out[4 * ostride]) = tim1_0_0 - tim1_0_1;
11328 	  c_re(out[2 * ostride]) = tre1_1_0 - tim1_1_1;
11329 	  c_im(out[2 * ostride]) = tim1_1_0 + tre1_1_1;
11330 	  c_re(out[6 * ostride]) = tre1_1_0 + tim1_1_1;
11331 	  c_im(out[6 * ostride]) = tim1_1_0 - tre1_1_1;
11332      }
11333      {
11334 	  FFTW_REAL tre1_0_0;
11335 	  FFTW_REAL tim1_0_0;
11336 	  FFTW_REAL tre1_0_1;
11337 	  FFTW_REAL tim1_0_1;
11338 	  FFTW_REAL tre1_1_0;
11339 	  FFTW_REAL tim1_1_0;
11340 	  FFTW_REAL tre1_1_1;
11341 	  FFTW_REAL tim1_1_1;
11342 	  tre1_0_0 = tre0_1_0 - tim0_1_2;
11343 	  tim1_0_0 = tim0_1_0 + tre0_1_2;
11344 	  tre1_1_0 = tre0_1_0 + tim0_1_2;
11345 	  tim1_1_0 = tim0_1_0 - tre0_1_2;
11346 	  {
11347 	       FFTW_REAL tre2_0_0;
11348 	       FFTW_REAL tim2_0_0;
11349 	       FFTW_REAL tre2_1_0;
11350 	       FFTW_REAL tim2_1_0;
11351 	       tre2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_1_1 - tim0_1_1);
11352 	       tim2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_1_1 + tre0_1_1);
11353 	       tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_1_3 + tim0_1_3);
11354 	       tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_1_3 - tim0_1_3);
11355 	       tre1_0_1 = tre2_0_0 - tre2_1_0;
11356 	       tim1_0_1 = tim2_0_0 + tim2_1_0;
11357 	       tre1_1_1 = tre2_0_0 + tre2_1_0;
11358 	       tim1_1_1 = tim2_0_0 - tim2_1_0;
11359 	  }
11360 	  c_re(out[ostride]) = tre1_0_0 + tre1_0_1;
11361 	  c_im(out[ostride]) = tim1_0_0 + tim1_0_1;
11362 	  c_re(out[5 * ostride]) = tre1_0_0 - tre1_0_1;
11363 	  c_im(out[5 * ostride]) = tim1_0_0 - tim1_0_1;
11364 	  c_re(out[3 * ostride]) = tre1_1_0 - tim1_1_1;
11365 	  c_im(out[3 * ostride]) = tim1_1_0 + tre1_1_1;
11366 	  c_re(out[7 * ostride]) = tre1_1_0 + tim1_1_1;
11367 	  c_im(out[7 * ostride]) = tim1_1_0 - tre1_1_1;
11368      }
11369 }
11370 
11371 /* This function contains 92 FP additions and 40 FP multiplications */
11372 
fftwi_no_twiddle_9(const FFTW_COMPLEX * in,FFTW_COMPLEX * out,int istride,int ostride)11373 static void fftwi_no_twiddle_9(const FFTW_COMPLEX *in, FFTW_COMPLEX *out, int istride, int ostride)
11374 {
11375      FFTW_REAL tre0_0_0;
11376      FFTW_REAL tim0_0_0;
11377      FFTW_REAL tre0_0_1;
11378      FFTW_REAL tim0_0_1;
11379      FFTW_REAL tre0_0_2;
11380      FFTW_REAL tim0_0_2;
11381      FFTW_REAL tre0_1_0;
11382      FFTW_REAL tim0_1_0;
11383      FFTW_REAL tre0_1_1;
11384      FFTW_REAL tim0_1_1;
11385      FFTW_REAL tre0_1_2;
11386      FFTW_REAL tim0_1_2;
11387      FFTW_REAL tre0_2_0;
11388      FFTW_REAL tim0_2_0;
11389      FFTW_REAL tre0_2_1;
11390      FFTW_REAL tim0_2_1;
11391      FFTW_REAL tre0_2_2;
11392      FFTW_REAL tim0_2_2;
11393      {
11394 	  FFTW_REAL tre1_0_0;
11395 	  FFTW_REAL tim1_0_0;
11396 	  FFTW_REAL tre1_1_0;
11397 	  FFTW_REAL tim1_1_0;
11398 	  FFTW_REAL tre1_2_0;
11399 	  FFTW_REAL tim1_2_0;
11400 	  tre1_0_0 = c_re(in[0]);
11401 	  tim1_0_0 = c_im(in[0]);
11402 	  tre1_1_0 = c_re(in[3 * istride]);
11403 	  tim1_1_0 = c_im(in[3 * istride]);
11404 	  tre1_2_0 = c_re(in[6 * istride]);
11405 	  tim1_2_0 = c_im(in[6 * istride]);
11406 	  tre0_0_0 = tre1_0_0 + tre1_1_0 + tre1_2_0;
11407 	  tim0_0_0 = tim1_0_0 + tim1_1_0 + tim1_2_0;
11408 	  {
11409 	       FFTW_REAL tre2_0_0;
11410 	       FFTW_REAL tre2_1_0;
11411 	       tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
11412 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_2_0 - tim1_1_0);
11413 	       tre0_1_0 = tre2_0_0 + tre2_1_0;
11414 	       tre0_2_0 = tre2_0_0 - tre2_1_0;
11415 	  }
11416 	  {
11417 	       FFTW_REAL tim2_0_0;
11418 	       FFTW_REAL tim2_1_0;
11419 	       tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
11420 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_1_0 - tre1_2_0);
11421 	       tim0_1_0 = tim2_0_0 + tim2_1_0;
11422 	       tim0_2_0 = tim2_0_0 - tim2_1_0;
11423 	  }
11424      }
11425      {
11426 	  FFTW_REAL tre1_0_0;
11427 	  FFTW_REAL tim1_0_0;
11428 	  FFTW_REAL tre1_1_0;
11429 	  FFTW_REAL tim1_1_0;
11430 	  FFTW_REAL tre1_2_0;
11431 	  FFTW_REAL tim1_2_0;
11432 	  tre1_0_0 = c_re(in[istride]);
11433 	  tim1_0_0 = c_im(in[istride]);
11434 	  tre1_1_0 = c_re(in[4 * istride]);
11435 	  tim1_1_0 = c_im(in[4 * istride]);
11436 	  tre1_2_0 = c_re(in[7 * istride]);
11437 	  tim1_2_0 = c_im(in[7 * istride]);
11438 	  tre0_0_1 = tre1_0_0 + tre1_1_0 + tre1_2_0;
11439 	  tim0_0_1 = tim1_0_0 + tim1_1_0 + tim1_2_0;
11440 	  {
11441 	       FFTW_REAL tre2_0_0;
11442 	       FFTW_REAL tre2_1_0;
11443 	       tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
11444 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_2_0 - tim1_1_0);
11445 	       tre0_1_1 = tre2_0_0 + tre2_1_0;
11446 	       tre0_2_1 = tre2_0_0 - tre2_1_0;
11447 	  }
11448 	  {
11449 	       FFTW_REAL tim2_0_0;
11450 	       FFTW_REAL tim2_1_0;
11451 	       tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
11452 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_1_0 - tre1_2_0);
11453 	       tim0_1_1 = tim2_0_0 + tim2_1_0;
11454 	       tim0_2_1 = tim2_0_0 - tim2_1_0;
11455 	  }
11456      }
11457      {
11458 	  FFTW_REAL tre1_0_0;
11459 	  FFTW_REAL tim1_0_0;
11460 	  FFTW_REAL tre1_1_0;
11461 	  FFTW_REAL tim1_1_0;
11462 	  FFTW_REAL tre1_2_0;
11463 	  FFTW_REAL tim1_2_0;
11464 	  tre1_0_0 = c_re(in[2 * istride]);
11465 	  tim1_0_0 = c_im(in[2 * istride]);
11466 	  tre1_1_0 = c_re(in[5 * istride]);
11467 	  tim1_1_0 = c_im(in[5 * istride]);
11468 	  tre1_2_0 = c_re(in[8 * istride]);
11469 	  tim1_2_0 = c_im(in[8 * istride]);
11470 	  tre0_0_2 = tre1_0_0 + tre1_1_0 + tre1_2_0;
11471 	  tim0_0_2 = tim1_0_0 + tim1_1_0 + tim1_2_0;
11472 	  {
11473 	       FFTW_REAL tre2_0_0;
11474 	       FFTW_REAL tre2_1_0;
11475 	       tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
11476 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_2_0 - tim1_1_0);
11477 	       tre0_1_2 = tre2_0_0 + tre2_1_0;
11478 	       tre0_2_2 = tre2_0_0 - tre2_1_0;
11479 	  }
11480 	  {
11481 	       FFTW_REAL tim2_0_0;
11482 	       FFTW_REAL tim2_1_0;
11483 	       tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
11484 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_1_0 - tre1_2_0);
11485 	       tim0_1_2 = tim2_0_0 + tim2_1_0;
11486 	       tim0_2_2 = tim2_0_0 - tim2_1_0;
11487 	  }
11488      }
11489      c_re(out[0]) = tre0_0_0 + tre0_0_1 + tre0_0_2;
11490      c_im(out[0]) = tim0_0_0 + tim0_0_1 + tim0_0_2;
11491      {
11492 	  FFTW_REAL tre2_0_0;
11493 	  FFTW_REAL tre2_1_0;
11494 	  tre2_0_0 = tre0_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre0_0_1 + tre0_0_2));
11495 	  tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim0_0_2 - tim0_0_1);
11496 	  c_re(out[3 * ostride]) = tre2_0_0 + tre2_1_0;
11497 	  c_re(out[6 * ostride]) = tre2_0_0 - tre2_1_0;
11498      }
11499      {
11500 	  FFTW_REAL tim2_0_0;
11501 	  FFTW_REAL tim2_1_0;
11502 	  tim2_0_0 = tim0_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim0_0_1 + tim0_0_2));
11503 	  tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre0_0_1 - tre0_0_2);
11504 	  c_im(out[3 * ostride]) = tim2_0_0 + tim2_1_0;
11505 	  c_im(out[6 * ostride]) = tim2_0_0 - tim2_1_0;
11506      }
11507      {
11508 	  FFTW_REAL tre1_1_0;
11509 	  FFTW_REAL tim1_1_0;
11510 	  FFTW_REAL tre1_2_0;
11511 	  FFTW_REAL tim1_2_0;
11512 	  tre1_1_0 = (((FFTW_REAL) FFTW_K766044443) * tre0_1_1) - (((FFTW_REAL) FFTW_K642787609) * tim0_1_1);
11513 	  tim1_1_0 = (((FFTW_REAL) FFTW_K766044443) * tim0_1_1) + (((FFTW_REAL) FFTW_K642787609) * tre0_1_1);
11514 	  tre1_2_0 = (((FFTW_REAL) FFTW_K173648177) * tre0_1_2) - (((FFTW_REAL) FFTW_K984807753) * tim0_1_2);
11515 	  tim1_2_0 = (((FFTW_REAL) FFTW_K173648177) * tim0_1_2) + (((FFTW_REAL) FFTW_K984807753) * tre0_1_2);
11516 	  c_re(out[ostride]) = tre0_1_0 + tre1_1_0 + tre1_2_0;
11517 	  c_im(out[ostride]) = tim0_1_0 + tim1_1_0 + tim1_2_0;
11518 	  {
11519 	       FFTW_REAL tre2_0_0;
11520 	       FFTW_REAL tre2_1_0;
11521 	       tre2_0_0 = tre0_1_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
11522 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_2_0 - tim1_1_0);
11523 	       c_re(out[4 * ostride]) = tre2_0_0 + tre2_1_0;
11524 	       c_re(out[7 * ostride]) = tre2_0_0 - tre2_1_0;
11525 	  }
11526 	  {
11527 	       FFTW_REAL tim2_0_0;
11528 	       FFTW_REAL tim2_1_0;
11529 	       tim2_0_0 = tim0_1_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
11530 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_1_0 - tre1_2_0);
11531 	       c_im(out[4 * ostride]) = tim2_0_0 + tim2_1_0;
11532 	       c_im(out[7 * ostride]) = tim2_0_0 - tim2_1_0;
11533 	  }
11534      }
11535      {
11536 	  FFTW_REAL tre1_1_0;
11537 	  FFTW_REAL tim1_1_0;
11538 	  FFTW_REAL tre1_2_0;
11539 	  FFTW_REAL tim1_2_0;
11540 	  tre1_1_0 = (((FFTW_REAL) FFTW_K173648177) * tre0_2_1) - (((FFTW_REAL) FFTW_K984807753) * tim0_2_1);
11541 	  tim1_1_0 = (((FFTW_REAL) FFTW_K173648177) * tim0_2_1) + (((FFTW_REAL) FFTW_K984807753) * tre0_2_1);
11542 	  tre1_2_0 = (((FFTW_REAL) FFTW_K939692620) * tre0_2_2) + (((FFTW_REAL) FFTW_K342020143) * tim0_2_2);
11543 	  tim1_2_0 = (((FFTW_REAL) FFTW_K342020143) * tre0_2_2) - (((FFTW_REAL) FFTW_K939692620) * tim0_2_2);
11544 	  c_re(out[2 * ostride]) = tre0_2_0 + tre1_1_0 - tre1_2_0;
11545 	  c_im(out[2 * ostride]) = tim0_2_0 + tim1_1_0 + tim1_2_0;
11546 	  {
11547 	       FFTW_REAL tre2_0_0;
11548 	       FFTW_REAL tre2_1_0;
11549 	       tre2_0_0 = tre0_2_0 + (((FFTW_REAL) FFTW_K499999999) * (tre1_2_0 - tre1_1_0));
11550 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_2_0 - tim1_1_0);
11551 	       c_re(out[5 * ostride]) = tre2_0_0 + tre2_1_0;
11552 	       c_re(out[8 * ostride]) = tre2_0_0 - tre2_1_0;
11553 	  }
11554 	  {
11555 	       FFTW_REAL tim2_0_0;
11556 	       FFTW_REAL tim2_1_0;
11557 	       tim2_0_0 = tim0_2_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
11558 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_1_0 + tre1_2_0);
11559 	       c_im(out[5 * ostride]) = tim2_0_0 + tim2_1_0;
11560 	       c_im(out[8 * ostride]) = tim2_0_0 - tim2_1_0;
11561 	  }
11562      }
11563 }
11564 
11565 /* This function contains 126 FP additions and 68 FP multiplications */
11566 
fftw_twiddle_10(FFTW_COMPLEX * A,const FFTW_COMPLEX * W,int stride,int m,int dist)11567 static void fftw_twiddle_10(FFTW_COMPLEX *A, const FFTW_COMPLEX *W, int stride, int m, int dist)
11568 {
11569      int i;
11570      FFTW_COMPLEX *inout;
11571      inout = A;
11572      for (i = 0; i < m; i = i + 1, inout = inout + dist, W = W + 9) {
11573 	  FFTW_REAL tre0_0_0;
11574 	  FFTW_REAL tim0_0_0;
11575 	  FFTW_REAL tre0_0_1;
11576 	  FFTW_REAL tim0_0_1;
11577 	  FFTW_REAL tre0_0_2;
11578 	  FFTW_REAL tim0_0_2;
11579 	  FFTW_REAL tre0_0_3;
11580 	  FFTW_REAL tim0_0_3;
11581 	  FFTW_REAL tre0_0_4;
11582 	  FFTW_REAL tim0_0_4;
11583 	  FFTW_REAL tre0_1_0;
11584 	  FFTW_REAL tim0_1_0;
11585 	  FFTW_REAL tre0_1_1;
11586 	  FFTW_REAL tim0_1_1;
11587 	  FFTW_REAL tre0_1_2;
11588 	  FFTW_REAL tim0_1_2;
11589 	  FFTW_REAL tre0_1_3;
11590 	  FFTW_REAL tim0_1_3;
11591 	  FFTW_REAL tre0_1_4;
11592 	  FFTW_REAL tim0_1_4;
11593 	  {
11594 	       FFTW_REAL tre1_0_0;
11595 	       FFTW_REAL tim1_0_0;
11596 	       FFTW_REAL tre1_1_0;
11597 	       FFTW_REAL tim1_1_0;
11598 	       tre1_0_0 = c_re(inout[0]);
11599 	       tim1_0_0 = c_im(inout[0]);
11600 	       {
11601 		    FFTW_REAL tr;
11602 		    FFTW_REAL ti;
11603 		    FFTW_REAL twr;
11604 		    FFTW_REAL twi;
11605 		    tr = c_re(inout[5 * stride]);
11606 		    ti = c_im(inout[5 * stride]);
11607 		    twr = c_re(W[4]);
11608 		    twi = c_im(W[4]);
11609 		    tre1_1_0 = (tr * twr) - (ti * twi);
11610 		    tim1_1_0 = (tr * twi) + (ti * twr);
11611 	       }
11612 	       tre0_0_0 = tre1_0_0 + tre1_1_0;
11613 	       tim0_0_0 = tim1_0_0 + tim1_1_0;
11614 	       tre0_1_0 = tre1_0_0 - tre1_1_0;
11615 	       tim0_1_0 = tim1_0_0 - tim1_1_0;
11616 	  }
11617 	  {
11618 	       FFTW_REAL tre1_0_0;
11619 	       FFTW_REAL tim1_0_0;
11620 	       FFTW_REAL tre1_1_0;
11621 	       FFTW_REAL tim1_1_0;
11622 	       {
11623 		    FFTW_REAL tr;
11624 		    FFTW_REAL ti;
11625 		    FFTW_REAL twr;
11626 		    FFTW_REAL twi;
11627 		    tr = c_re(inout[2 * stride]);
11628 		    ti = c_im(inout[2 * stride]);
11629 		    twr = c_re(W[1]);
11630 		    twi = c_im(W[1]);
11631 		    tre1_0_0 = (tr * twr) - (ti * twi);
11632 		    tim1_0_0 = (tr * twi) + (ti * twr);
11633 	       }
11634 	       {
11635 		    FFTW_REAL tr;
11636 		    FFTW_REAL ti;
11637 		    FFTW_REAL twr;
11638 		    FFTW_REAL twi;
11639 		    tr = c_re(inout[7 * stride]);
11640 		    ti = c_im(inout[7 * stride]);
11641 		    twr = c_re(W[6]);
11642 		    twi = c_im(W[6]);
11643 		    tre1_1_0 = (tr * twr) - (ti * twi);
11644 		    tim1_1_0 = (tr * twi) + (ti * twr);
11645 	       }
11646 	       tre0_0_1 = tre1_0_0 + tre1_1_0;
11647 	       tim0_0_1 = tim1_0_0 + tim1_1_0;
11648 	       tre0_1_1 = tre1_0_0 - tre1_1_0;
11649 	       tim0_1_1 = tim1_0_0 - tim1_1_0;
11650 	  }
11651 	  {
11652 	       FFTW_REAL tre1_0_0;
11653 	       FFTW_REAL tim1_0_0;
11654 	       FFTW_REAL tre1_1_0;
11655 	       FFTW_REAL tim1_1_0;
11656 	       {
11657 		    FFTW_REAL tr;
11658 		    FFTW_REAL ti;
11659 		    FFTW_REAL twr;
11660 		    FFTW_REAL twi;
11661 		    tr = c_re(inout[4 * stride]);
11662 		    ti = c_im(inout[4 * stride]);
11663 		    twr = c_re(W[3]);
11664 		    twi = c_im(W[3]);
11665 		    tre1_0_0 = (tr * twr) - (ti * twi);
11666 		    tim1_0_0 = (tr * twi) + (ti * twr);
11667 	       }
11668 	       {
11669 		    FFTW_REAL tr;
11670 		    FFTW_REAL ti;
11671 		    FFTW_REAL twr;
11672 		    FFTW_REAL twi;
11673 		    tr = c_re(inout[9 * stride]);
11674 		    ti = c_im(inout[9 * stride]);
11675 		    twr = c_re(W[8]);
11676 		    twi = c_im(W[8]);
11677 		    tre1_1_0 = (tr * twr) - (ti * twi);
11678 		    tim1_1_0 = (tr * twi) + (ti * twr);
11679 	       }
11680 	       tre0_0_2 = tre1_0_0 + tre1_1_0;
11681 	       tim0_0_2 = tim1_0_0 + tim1_1_0;
11682 	       tre0_1_2 = tre1_0_0 - tre1_1_0;
11683 	       tim0_1_2 = tim1_0_0 - tim1_1_0;
11684 	  }
11685 	  {
11686 	       FFTW_REAL tre1_0_0;
11687 	       FFTW_REAL tim1_0_0;
11688 	       FFTW_REAL tre1_1_0;
11689 	       FFTW_REAL tim1_1_0;
11690 	       {
11691 		    FFTW_REAL tr;
11692 		    FFTW_REAL ti;
11693 		    FFTW_REAL twr;
11694 		    FFTW_REAL twi;
11695 		    tr = c_re(inout[6 * stride]);
11696 		    ti = c_im(inout[6 * stride]);
11697 		    twr = c_re(W[5]);
11698 		    twi = c_im(W[5]);
11699 		    tre1_0_0 = (tr * twr) - (ti * twi);
11700 		    tim1_0_0 = (tr * twi) + (ti * twr);
11701 	       }
11702 	       {
11703 		    FFTW_REAL tr;
11704 		    FFTW_REAL ti;
11705 		    FFTW_REAL twr;
11706 		    FFTW_REAL twi;
11707 		    tr = c_re(inout[stride]);
11708 		    ti = c_im(inout[stride]);
11709 		    twr = c_re(W[0]);
11710 		    twi = c_im(W[0]);
11711 		    tre1_1_0 = (tr * twr) - (ti * twi);
11712 		    tim1_1_0 = (tr * twi) + (ti * twr);
11713 	       }
11714 	       tre0_0_3 = tre1_0_0 + tre1_1_0;
11715 	       tim0_0_3 = tim1_0_0 + tim1_1_0;
11716 	       tre0_1_3 = tre1_0_0 - tre1_1_0;
11717 	       tim0_1_3 = tim1_0_0 - tim1_1_0;
11718 	  }
11719 	  {
11720 	       FFTW_REAL tre1_0_0;
11721 	       FFTW_REAL tim1_0_0;
11722 	       FFTW_REAL tre1_1_0;
11723 	       FFTW_REAL tim1_1_0;
11724 	       {
11725 		    FFTW_REAL tr;
11726 		    FFTW_REAL ti;
11727 		    FFTW_REAL twr;
11728 		    FFTW_REAL twi;
11729 		    tr = c_re(inout[8 * stride]);
11730 		    ti = c_im(inout[8 * stride]);
11731 		    twr = c_re(W[7]);
11732 		    twi = c_im(W[7]);
11733 		    tre1_0_0 = (tr * twr) - (ti * twi);
11734 		    tim1_0_0 = (tr * twi) + (ti * twr);
11735 	       }
11736 	       {
11737 		    FFTW_REAL tr;
11738 		    FFTW_REAL ti;
11739 		    FFTW_REAL twr;
11740 		    FFTW_REAL twi;
11741 		    tr = c_re(inout[3 * stride]);
11742 		    ti = c_im(inout[3 * stride]);
11743 		    twr = c_re(W[2]);
11744 		    twi = c_im(W[2]);
11745 		    tre1_1_0 = (tr * twr) - (ti * twi);
11746 		    tim1_1_0 = (tr * twi) + (ti * twr);
11747 	       }
11748 	       tre0_0_4 = tre1_0_0 + tre1_1_0;
11749 	       tim0_0_4 = tim1_0_0 + tim1_1_0;
11750 	       tre0_1_4 = tre1_0_0 - tre1_1_0;
11751 	       tim0_1_4 = tim1_0_0 - tim1_1_0;
11752 	  }
11753 	  c_re(inout[0]) = tre0_0_0 + tre0_0_1 + tre0_0_2 + tre0_0_3 + tre0_0_4;
11754 	  c_im(inout[0]) = tim0_0_0 + tim0_0_1 + tim0_0_2 + tim0_0_3 + tim0_0_4;
11755 	  {
11756 	       FFTW_REAL tre2_0_0;
11757 	       FFTW_REAL tre2_1_0;
11758 	       tre2_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_0_1 + tre0_0_4)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_0_2 + tre0_0_3));
11759 	       tre2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tim0_0_1 - tim0_0_4)) + (((FFTW_REAL) FFTW_K587785252) * (tim0_0_2 - tim0_0_3));
11760 	       c_re(inout[6 * stride]) = tre2_0_0 + tre2_1_0;
11761 	       c_re(inout[4 * stride]) = tre2_0_0 - tre2_1_0;
11762 	  }
11763 	  {
11764 	       FFTW_REAL tim2_0_0;
11765 	       FFTW_REAL tim2_1_0;
11766 	       tim2_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_0_1 + tim0_0_4)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_0_2 + tim0_0_3));
11767 	       tim2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tre0_0_4 - tre0_0_1)) + (((FFTW_REAL) FFTW_K587785252) * (tre0_0_3 - tre0_0_2));
11768 	       c_im(inout[6 * stride]) = tim2_0_0 + tim2_1_0;
11769 	       c_im(inout[4 * stride]) = tim2_0_0 - tim2_1_0;
11770 	  }
11771 	  {
11772 	       FFTW_REAL tre2_0_0;
11773 	       FFTW_REAL tre2_1_0;
11774 	       tre2_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_0_2 + tre0_0_3)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_0_1 + tre0_0_4));
11775 	       tre2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tim0_0_1 - tim0_0_4)) + (((FFTW_REAL) FFTW_K951056516) * (tim0_0_3 - tim0_0_2));
11776 	       c_re(inout[2 * stride]) = tre2_0_0 + tre2_1_0;
11777 	       c_re(inout[8 * stride]) = tre2_0_0 - tre2_1_0;
11778 	  }
11779 	  {
11780 	       FFTW_REAL tim2_0_0;
11781 	       FFTW_REAL tim2_1_0;
11782 	       tim2_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_0_2 + tim0_0_3)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_0_1 + tim0_0_4));
11783 	       tim2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tre0_0_4 - tre0_0_1)) + (((FFTW_REAL) FFTW_K951056516) * (tre0_0_2 - tre0_0_3));
11784 	       c_im(inout[2 * stride]) = tim2_0_0 + tim2_1_0;
11785 	       c_im(inout[8 * stride]) = tim2_0_0 - tim2_1_0;
11786 	  }
11787 	  c_re(inout[5 * stride]) = tre0_1_0 + tre0_1_1 + tre0_1_2 + tre0_1_3 + tre0_1_4;
11788 	  c_im(inout[5 * stride]) = tim0_1_0 + tim0_1_1 + tim0_1_2 + tim0_1_3 + tim0_1_4;
11789 	  {
11790 	       FFTW_REAL tre2_0_0;
11791 	       FFTW_REAL tre2_1_0;
11792 	       tre2_0_0 = tre0_1_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_1_1 + tre0_1_4)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_1_2 + tre0_1_3));
11793 	       tre2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tim0_1_1 - tim0_1_4)) + (((FFTW_REAL) FFTW_K587785252) * (tim0_1_2 - tim0_1_3));
11794 	       c_re(inout[stride]) = tre2_0_0 + tre2_1_0;
11795 	       c_re(inout[9 * stride]) = tre2_0_0 - tre2_1_0;
11796 	  }
11797 	  {
11798 	       FFTW_REAL tim2_0_0;
11799 	       FFTW_REAL tim2_1_0;
11800 	       tim2_0_0 = tim0_1_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_1_1 + tim0_1_4)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_1_2 + tim0_1_3));
11801 	       tim2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tre0_1_4 - tre0_1_1)) + (((FFTW_REAL) FFTW_K587785252) * (tre0_1_3 - tre0_1_2));
11802 	       c_im(inout[stride]) = tim2_0_0 + tim2_1_0;
11803 	       c_im(inout[9 * stride]) = tim2_0_0 - tim2_1_0;
11804 	  }
11805 	  {
11806 	       FFTW_REAL tre2_0_0;
11807 	       FFTW_REAL tre2_1_0;
11808 	       tre2_0_0 = tre0_1_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_1_2 + tre0_1_3)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_1_1 + tre0_1_4));
11809 	       tre2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tim0_1_1 - tim0_1_4)) + (((FFTW_REAL) FFTW_K951056516) * (tim0_1_3 - tim0_1_2));
11810 	       c_re(inout[7 * stride]) = tre2_0_0 + tre2_1_0;
11811 	       c_re(inout[3 * stride]) = tre2_0_0 - tre2_1_0;
11812 	  }
11813 	  {
11814 	       FFTW_REAL tim2_0_0;
11815 	       FFTW_REAL tim2_1_0;
11816 	       tim2_0_0 = tim0_1_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_1_2 + tim0_1_3)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_1_1 + tim0_1_4));
11817 	       tim2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tre0_1_4 - tre0_1_1)) + (((FFTW_REAL) FFTW_K951056516) * (tre0_1_2 - tre0_1_3));
11818 	       c_im(inout[7 * stride]) = tim2_0_0 + tim2_1_0;
11819 	       c_im(inout[3 * stride]) = tim2_0_0 - tim2_1_0;
11820 	  }
11821      }
11822 }
11823 
11824 /* This function contains 174 FP additions and 84 FP multiplications */
11825 
fftw_twiddle_16(FFTW_COMPLEX * A,const FFTW_COMPLEX * W,int stride,int m,int dist)11826 static void fftw_twiddle_16(FFTW_COMPLEX *A, const FFTW_COMPLEX *W, int stride, int m, int dist)
11827 {
11828      int i;
11829      FFTW_COMPLEX *inout;
11830      inout = A;
11831      for (i = 0; i < m; i = i + 1, inout = inout + dist, W = W + 15) {
11832 	  FFTW_REAL tre0_0_0;
11833 	  FFTW_REAL tim0_0_0;
11834 	  FFTW_REAL tre0_0_1;
11835 	  FFTW_REAL tim0_0_1;
11836 	  FFTW_REAL tre0_0_2;
11837 	  FFTW_REAL tim0_0_2;
11838 	  FFTW_REAL tre0_0_3;
11839 	  FFTW_REAL tim0_0_3;
11840 	  FFTW_REAL tre0_1_0;
11841 	  FFTW_REAL tim0_1_0;
11842 	  FFTW_REAL tre0_1_1;
11843 	  FFTW_REAL tim0_1_1;
11844 	  FFTW_REAL tre0_1_2;
11845 	  FFTW_REAL tim0_1_2;
11846 	  FFTW_REAL tre0_1_3;
11847 	  FFTW_REAL tim0_1_3;
11848 	  FFTW_REAL tre0_2_0;
11849 	  FFTW_REAL tim0_2_0;
11850 	  FFTW_REAL tre0_2_1;
11851 	  FFTW_REAL tim0_2_1;
11852 	  FFTW_REAL tre0_2_2;
11853 	  FFTW_REAL tim0_2_2;
11854 	  FFTW_REAL tre0_2_3;
11855 	  FFTW_REAL tim0_2_3;
11856 	  FFTW_REAL tre0_3_0;
11857 	  FFTW_REAL tim0_3_0;
11858 	  FFTW_REAL tre0_3_1;
11859 	  FFTW_REAL tim0_3_1;
11860 	  FFTW_REAL tre0_3_2;
11861 	  FFTW_REAL tim0_3_2;
11862 	  FFTW_REAL tre0_3_3;
11863 	  FFTW_REAL tim0_3_3;
11864 	  {
11865 	       FFTW_REAL tre1_0_0;
11866 	       FFTW_REAL tim1_0_0;
11867 	       FFTW_REAL tre1_0_1;
11868 	       FFTW_REAL tim1_0_1;
11869 	       FFTW_REAL tre1_1_0;
11870 	       FFTW_REAL tim1_1_0;
11871 	       FFTW_REAL tre1_1_1;
11872 	       FFTW_REAL tim1_1_1;
11873 	       {
11874 		    FFTW_REAL tre2_0_0;
11875 		    FFTW_REAL tim2_0_0;
11876 		    FFTW_REAL tre2_1_0;
11877 		    FFTW_REAL tim2_1_0;
11878 		    tre2_0_0 = c_re(inout[0]);
11879 		    tim2_0_0 = c_im(inout[0]);
11880 		    {
11881 			 FFTW_REAL tr;
11882 			 FFTW_REAL ti;
11883 			 FFTW_REAL twr;
11884 			 FFTW_REAL twi;
11885 			 tr = c_re(inout[8 * stride]);
11886 			 ti = c_im(inout[8 * stride]);
11887 			 twr = c_re(W[7]);
11888 			 twi = c_im(W[7]);
11889 			 tre2_1_0 = (tr * twr) - (ti * twi);
11890 			 tim2_1_0 = (tr * twi) + (ti * twr);
11891 		    }
11892 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
11893 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
11894 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
11895 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
11896 	       }
11897 	       {
11898 		    FFTW_REAL tre2_0_0;
11899 		    FFTW_REAL tim2_0_0;
11900 		    FFTW_REAL tre2_1_0;
11901 		    FFTW_REAL tim2_1_0;
11902 		    {
11903 			 FFTW_REAL tr;
11904 			 FFTW_REAL ti;
11905 			 FFTW_REAL twr;
11906 			 FFTW_REAL twi;
11907 			 tr = c_re(inout[4 * stride]);
11908 			 ti = c_im(inout[4 * stride]);
11909 			 twr = c_re(W[3]);
11910 			 twi = c_im(W[3]);
11911 			 tre2_0_0 = (tr * twr) - (ti * twi);
11912 			 tim2_0_0 = (tr * twi) + (ti * twr);
11913 		    }
11914 		    {
11915 			 FFTW_REAL tr;
11916 			 FFTW_REAL ti;
11917 			 FFTW_REAL twr;
11918 			 FFTW_REAL twi;
11919 			 tr = c_re(inout[12 * stride]);
11920 			 ti = c_im(inout[12 * stride]);
11921 			 twr = c_re(W[11]);
11922 			 twi = c_im(W[11]);
11923 			 tre2_1_0 = (tr * twr) - (ti * twi);
11924 			 tim2_1_0 = (tr * twi) + (ti * twr);
11925 		    }
11926 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
11927 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
11928 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
11929 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
11930 	       }
11931 	       tre0_0_0 = tre1_0_0 + tre1_0_1;
11932 	       tim0_0_0 = tim1_0_0 + tim1_0_1;
11933 	       tre0_2_0 = tre1_0_0 - tre1_0_1;
11934 	       tim0_2_0 = tim1_0_0 - tim1_0_1;
11935 	       tre0_1_0 = tre1_1_0 + tim1_1_1;
11936 	       tim0_1_0 = tim1_1_0 - tre1_1_1;
11937 	       tre0_3_0 = tre1_1_0 - tim1_1_1;
11938 	       tim0_3_0 = tim1_1_0 + tre1_1_1;
11939 	  }
11940 	  {
11941 	       FFTW_REAL tre1_0_0;
11942 	       FFTW_REAL tim1_0_0;
11943 	       FFTW_REAL tre1_0_1;
11944 	       FFTW_REAL tim1_0_1;
11945 	       FFTW_REAL tre1_1_0;
11946 	       FFTW_REAL tim1_1_0;
11947 	       FFTW_REAL tre1_1_1;
11948 	       FFTW_REAL tim1_1_1;
11949 	       {
11950 		    FFTW_REAL tre2_0_0;
11951 		    FFTW_REAL tim2_0_0;
11952 		    FFTW_REAL tre2_1_0;
11953 		    FFTW_REAL tim2_1_0;
11954 		    {
11955 			 FFTW_REAL tr;
11956 			 FFTW_REAL ti;
11957 			 FFTW_REAL twr;
11958 			 FFTW_REAL twi;
11959 			 tr = c_re(inout[stride]);
11960 			 ti = c_im(inout[stride]);
11961 			 twr = c_re(W[0]);
11962 			 twi = c_im(W[0]);
11963 			 tre2_0_0 = (tr * twr) - (ti * twi);
11964 			 tim2_0_0 = (tr * twi) + (ti * twr);
11965 		    }
11966 		    {
11967 			 FFTW_REAL tr;
11968 			 FFTW_REAL ti;
11969 			 FFTW_REAL twr;
11970 			 FFTW_REAL twi;
11971 			 tr = c_re(inout[9 * stride]);
11972 			 ti = c_im(inout[9 * stride]);
11973 			 twr = c_re(W[8]);
11974 			 twi = c_im(W[8]);
11975 			 tre2_1_0 = (tr * twr) - (ti * twi);
11976 			 tim2_1_0 = (tr * twi) + (ti * twr);
11977 		    }
11978 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
11979 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
11980 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
11981 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
11982 	       }
11983 	       {
11984 		    FFTW_REAL tre2_0_0;
11985 		    FFTW_REAL tim2_0_0;
11986 		    FFTW_REAL tre2_1_0;
11987 		    FFTW_REAL tim2_1_0;
11988 		    {
11989 			 FFTW_REAL tr;
11990 			 FFTW_REAL ti;
11991 			 FFTW_REAL twr;
11992 			 FFTW_REAL twi;
11993 			 tr = c_re(inout[5 * stride]);
11994 			 ti = c_im(inout[5 * stride]);
11995 			 twr = c_re(W[4]);
11996 			 twi = c_im(W[4]);
11997 			 tre2_0_0 = (tr * twr) - (ti * twi);
11998 			 tim2_0_0 = (tr * twi) + (ti * twr);
11999 		    }
12000 		    {
12001 			 FFTW_REAL tr;
12002 			 FFTW_REAL ti;
12003 			 FFTW_REAL twr;
12004 			 FFTW_REAL twi;
12005 			 tr = c_re(inout[13 * stride]);
12006 			 ti = c_im(inout[13 * stride]);
12007 			 twr = c_re(W[12]);
12008 			 twi = c_im(W[12]);
12009 			 tre2_1_0 = (tr * twr) - (ti * twi);
12010 			 tim2_1_0 = (tr * twi) + (ti * twr);
12011 		    }
12012 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
12013 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
12014 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
12015 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
12016 	       }
12017 	       tre0_0_1 = tre1_0_0 + tre1_0_1;
12018 	       tim0_0_1 = tim1_0_0 + tim1_0_1;
12019 	       tre0_2_1 = tre1_0_0 - tre1_0_1;
12020 	       tim0_2_1 = tim1_0_0 - tim1_0_1;
12021 	       tre0_1_1 = tre1_1_0 + tim1_1_1;
12022 	       tim0_1_1 = tim1_1_0 - tre1_1_1;
12023 	       tre0_3_1 = tre1_1_0 - tim1_1_1;
12024 	       tim0_3_1 = tim1_1_0 + tre1_1_1;
12025 	  }
12026 	  {
12027 	       FFTW_REAL tre1_0_0;
12028 	       FFTW_REAL tim1_0_0;
12029 	       FFTW_REAL tre1_0_1;
12030 	       FFTW_REAL tim1_0_1;
12031 	       FFTW_REAL tre1_1_0;
12032 	       FFTW_REAL tim1_1_0;
12033 	       FFTW_REAL tre1_1_1;
12034 	       FFTW_REAL tim1_1_1;
12035 	       {
12036 		    FFTW_REAL tre2_0_0;
12037 		    FFTW_REAL tim2_0_0;
12038 		    FFTW_REAL tre2_1_0;
12039 		    FFTW_REAL tim2_1_0;
12040 		    {
12041 			 FFTW_REAL tr;
12042 			 FFTW_REAL ti;
12043 			 FFTW_REAL twr;
12044 			 FFTW_REAL twi;
12045 			 tr = c_re(inout[2 * stride]);
12046 			 ti = c_im(inout[2 * stride]);
12047 			 twr = c_re(W[1]);
12048 			 twi = c_im(W[1]);
12049 			 tre2_0_0 = (tr * twr) - (ti * twi);
12050 			 tim2_0_0 = (tr * twi) + (ti * twr);
12051 		    }
12052 		    {
12053 			 FFTW_REAL tr;
12054 			 FFTW_REAL ti;
12055 			 FFTW_REAL twr;
12056 			 FFTW_REAL twi;
12057 			 tr = c_re(inout[10 * stride]);
12058 			 ti = c_im(inout[10 * stride]);
12059 			 twr = c_re(W[9]);
12060 			 twi = c_im(W[9]);
12061 			 tre2_1_0 = (tr * twr) - (ti * twi);
12062 			 tim2_1_0 = (tr * twi) + (ti * twr);
12063 		    }
12064 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
12065 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
12066 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
12067 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
12068 	       }
12069 	       {
12070 		    FFTW_REAL tre2_0_0;
12071 		    FFTW_REAL tim2_0_0;
12072 		    FFTW_REAL tre2_1_0;
12073 		    FFTW_REAL tim2_1_0;
12074 		    {
12075 			 FFTW_REAL tr;
12076 			 FFTW_REAL ti;
12077 			 FFTW_REAL twr;
12078 			 FFTW_REAL twi;
12079 			 tr = c_re(inout[6 * stride]);
12080 			 ti = c_im(inout[6 * stride]);
12081 			 twr = c_re(W[5]);
12082 			 twi = c_im(W[5]);
12083 			 tre2_0_0 = (tr * twr) - (ti * twi);
12084 			 tim2_0_0 = (tr * twi) + (ti * twr);
12085 		    }
12086 		    {
12087 			 FFTW_REAL tr;
12088 			 FFTW_REAL ti;
12089 			 FFTW_REAL twr;
12090 			 FFTW_REAL twi;
12091 			 tr = c_re(inout[14 * stride]);
12092 			 ti = c_im(inout[14 * stride]);
12093 			 twr = c_re(W[13]);
12094 			 twi = c_im(W[13]);
12095 			 tre2_1_0 = (tr * twr) - (ti * twi);
12096 			 tim2_1_0 = (tr * twi) + (ti * twr);
12097 		    }
12098 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
12099 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
12100 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
12101 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
12102 	       }
12103 	       tre0_0_2 = tre1_0_0 + tre1_0_1;
12104 	       tim0_0_2 = tim1_0_0 + tim1_0_1;
12105 	       tre0_2_2 = tre1_0_0 - tre1_0_1;
12106 	       tim0_2_2 = tim1_0_0 - tim1_0_1;
12107 	       tre0_1_2 = tre1_1_0 + tim1_1_1;
12108 	       tim0_1_2 = tim1_1_0 - tre1_1_1;
12109 	       tre0_3_2 = tre1_1_0 - tim1_1_1;
12110 	       tim0_3_2 = tim1_1_0 + tre1_1_1;
12111 	  }
12112 	  {
12113 	       FFTW_REAL tre1_0_0;
12114 	       FFTW_REAL tim1_0_0;
12115 	       FFTW_REAL tre1_0_1;
12116 	       FFTW_REAL tim1_0_1;
12117 	       FFTW_REAL tre1_1_0;
12118 	       FFTW_REAL tim1_1_0;
12119 	       FFTW_REAL tre1_1_1;
12120 	       FFTW_REAL tim1_1_1;
12121 	       {
12122 		    FFTW_REAL tre2_0_0;
12123 		    FFTW_REAL tim2_0_0;
12124 		    FFTW_REAL tre2_1_0;
12125 		    FFTW_REAL tim2_1_0;
12126 		    {
12127 			 FFTW_REAL tr;
12128 			 FFTW_REAL ti;
12129 			 FFTW_REAL twr;
12130 			 FFTW_REAL twi;
12131 			 tr = c_re(inout[3 * stride]);
12132 			 ti = c_im(inout[3 * stride]);
12133 			 twr = c_re(W[2]);
12134 			 twi = c_im(W[2]);
12135 			 tre2_0_0 = (tr * twr) - (ti * twi);
12136 			 tim2_0_0 = (tr * twi) + (ti * twr);
12137 		    }
12138 		    {
12139 			 FFTW_REAL tr;
12140 			 FFTW_REAL ti;
12141 			 FFTW_REAL twr;
12142 			 FFTW_REAL twi;
12143 			 tr = c_re(inout[11 * stride]);
12144 			 ti = c_im(inout[11 * stride]);
12145 			 twr = c_re(W[10]);
12146 			 twi = c_im(W[10]);
12147 			 tre2_1_0 = (tr * twr) - (ti * twi);
12148 			 tim2_1_0 = (tr * twi) + (ti * twr);
12149 		    }
12150 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
12151 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
12152 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
12153 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
12154 	       }
12155 	       {
12156 		    FFTW_REAL tre2_0_0;
12157 		    FFTW_REAL tim2_0_0;
12158 		    FFTW_REAL tre2_1_0;
12159 		    FFTW_REAL tim2_1_0;
12160 		    {
12161 			 FFTW_REAL tr;
12162 			 FFTW_REAL ti;
12163 			 FFTW_REAL twr;
12164 			 FFTW_REAL twi;
12165 			 tr = c_re(inout[7 * stride]);
12166 			 ti = c_im(inout[7 * stride]);
12167 			 twr = c_re(W[6]);
12168 			 twi = c_im(W[6]);
12169 			 tre2_0_0 = (tr * twr) - (ti * twi);
12170 			 tim2_0_0 = (tr * twi) + (ti * twr);
12171 		    }
12172 		    {
12173 			 FFTW_REAL tr;
12174 			 FFTW_REAL ti;
12175 			 FFTW_REAL twr;
12176 			 FFTW_REAL twi;
12177 			 tr = c_re(inout[15 * stride]);
12178 			 ti = c_im(inout[15 * stride]);
12179 			 twr = c_re(W[14]);
12180 			 twi = c_im(W[14]);
12181 			 tre2_1_0 = (tr * twr) - (ti * twi);
12182 			 tim2_1_0 = (tr * twi) + (ti * twr);
12183 		    }
12184 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
12185 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
12186 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
12187 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
12188 	       }
12189 	       tre0_0_3 = tre1_0_0 + tre1_0_1;
12190 	       tim0_0_3 = tim1_0_0 + tim1_0_1;
12191 	       tre0_2_3 = tre1_0_0 - tre1_0_1;
12192 	       tim0_2_3 = tim1_0_0 - tim1_0_1;
12193 	       tre0_1_3 = tre1_1_0 + tim1_1_1;
12194 	       tim0_1_3 = tim1_1_0 - tre1_1_1;
12195 	       tre0_3_3 = tre1_1_0 - tim1_1_1;
12196 	       tim0_3_3 = tim1_1_0 + tre1_1_1;
12197 	  }
12198 	  {
12199 	       FFTW_REAL tre1_0_0;
12200 	       FFTW_REAL tim1_0_0;
12201 	       FFTW_REAL tre1_0_1;
12202 	       FFTW_REAL tim1_0_1;
12203 	       FFTW_REAL tre1_1_0;
12204 	       FFTW_REAL tim1_1_0;
12205 	       FFTW_REAL tre1_1_1;
12206 	       FFTW_REAL tim1_1_1;
12207 	       tre1_0_0 = tre0_0_0 + tre0_0_2;
12208 	       tim1_0_0 = tim0_0_0 + tim0_0_2;
12209 	       tre1_1_0 = tre0_0_0 - tre0_0_2;
12210 	       tim1_1_0 = tim0_0_0 - tim0_0_2;
12211 	       tre1_0_1 = tre0_0_1 + tre0_0_3;
12212 	       tim1_0_1 = tim0_0_1 + tim0_0_3;
12213 	       tre1_1_1 = tre0_0_1 - tre0_0_3;
12214 	       tim1_1_1 = tim0_0_1 - tim0_0_3;
12215 	       c_re(inout[0]) = tre1_0_0 + tre1_0_1;
12216 	       c_im(inout[0]) = tim1_0_0 + tim1_0_1;
12217 	       c_re(inout[8 * stride]) = tre1_0_0 - tre1_0_1;
12218 	       c_im(inout[8 * stride]) = tim1_0_0 - tim1_0_1;
12219 	       c_re(inout[4 * stride]) = tre1_1_0 + tim1_1_1;
12220 	       c_im(inout[4 * stride]) = tim1_1_0 - tre1_1_1;
12221 	       c_re(inout[12 * stride]) = tre1_1_0 - tim1_1_1;
12222 	       c_im(inout[12 * stride]) = tim1_1_0 + tre1_1_1;
12223 	  }
12224 	  {
12225 	       FFTW_REAL tre1_0_0;
12226 	       FFTW_REAL tim1_0_0;
12227 	       FFTW_REAL tre1_0_1;
12228 	       FFTW_REAL tim1_0_1;
12229 	       FFTW_REAL tre1_1_0;
12230 	       FFTW_REAL tim1_1_0;
12231 	       FFTW_REAL tre1_1_1;
12232 	       FFTW_REAL tim1_1_1;
12233 	       {
12234 		    FFTW_REAL tre2_1_0;
12235 		    FFTW_REAL tim2_1_0;
12236 		    tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_1_2 + tim0_1_2);
12237 		    tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_1_2 - tre0_1_2);
12238 		    tre1_0_0 = tre0_1_0 + tre2_1_0;
12239 		    tim1_0_0 = tim0_1_0 + tim2_1_0;
12240 		    tre1_1_0 = tre0_1_0 - tre2_1_0;
12241 		    tim1_1_0 = tim0_1_0 - tim2_1_0;
12242 	       }
12243 	       {
12244 		    FFTW_REAL tre2_0_0;
12245 		    FFTW_REAL tim2_0_0;
12246 		    FFTW_REAL tre2_1_0;
12247 		    FFTW_REAL tim2_1_0;
12248 		    tre2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_1_1) + (((FFTW_REAL) FFTW_K382683432) * tim0_1_1);
12249 		    tim2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_1_1) - (((FFTW_REAL) FFTW_K382683432) * tre0_1_1);
12250 		    tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_1_3) + (((FFTW_REAL) FFTW_K923879532) * tim0_1_3);
12251 		    tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_1_3) - (((FFTW_REAL) FFTW_K923879532) * tre0_1_3);
12252 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
12253 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
12254 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
12255 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
12256 	       }
12257 	       c_re(inout[stride]) = tre1_0_0 + tre1_0_1;
12258 	       c_im(inout[stride]) = tim1_0_0 + tim1_0_1;
12259 	       c_re(inout[9 * stride]) = tre1_0_0 - tre1_0_1;
12260 	       c_im(inout[9 * stride]) = tim1_0_0 - tim1_0_1;
12261 	       c_re(inout[5 * stride]) = tre1_1_0 + tim1_1_1;
12262 	       c_im(inout[5 * stride]) = tim1_1_0 - tre1_1_1;
12263 	       c_re(inout[13 * stride]) = tre1_1_0 - tim1_1_1;
12264 	       c_im(inout[13 * stride]) = tim1_1_0 + tre1_1_1;
12265 	  }
12266 	  {
12267 	       FFTW_REAL tre1_0_0;
12268 	       FFTW_REAL tim1_0_0;
12269 	       FFTW_REAL tre1_0_1;
12270 	       FFTW_REAL tim1_0_1;
12271 	       FFTW_REAL tre1_1_0;
12272 	       FFTW_REAL tim1_1_0;
12273 	       FFTW_REAL tre1_1_1;
12274 	       FFTW_REAL tim1_1_1;
12275 	       tre1_0_0 = tre0_2_0 + tim0_2_2;
12276 	       tim1_0_0 = tim0_2_0 - tre0_2_2;
12277 	       tre1_1_0 = tre0_2_0 - tim0_2_2;
12278 	       tim1_1_0 = tim0_2_0 + tre0_2_2;
12279 	       {
12280 		    FFTW_REAL tre2_0_0;
12281 		    FFTW_REAL tim2_0_0;
12282 		    FFTW_REAL tre2_1_0;
12283 		    FFTW_REAL tim2_1_0;
12284 		    tre2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_2_1 + tim0_2_1);
12285 		    tim2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_2_1 - tre0_2_1);
12286 		    tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_2_3 - tre0_2_3);
12287 		    tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_2_3 + tre0_2_3);
12288 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
12289 		    tim1_0_1 = tim2_0_0 - tim2_1_0;
12290 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
12291 		    tim1_1_1 = tim2_0_0 + tim2_1_0;
12292 	       }
12293 	       c_re(inout[2 * stride]) = tre1_0_0 + tre1_0_1;
12294 	       c_im(inout[2 * stride]) = tim1_0_0 + tim1_0_1;
12295 	       c_re(inout[10 * stride]) = tre1_0_0 - tre1_0_1;
12296 	       c_im(inout[10 * stride]) = tim1_0_0 - tim1_0_1;
12297 	       c_re(inout[6 * stride]) = tre1_1_0 + tim1_1_1;
12298 	       c_im(inout[6 * stride]) = tim1_1_0 - tre1_1_1;
12299 	       c_re(inout[14 * stride]) = tre1_1_0 - tim1_1_1;
12300 	       c_im(inout[14 * stride]) = tim1_1_0 + tre1_1_1;
12301 	  }
12302 	  {
12303 	       FFTW_REAL tre1_0_0;
12304 	       FFTW_REAL tim1_0_0;
12305 	       FFTW_REAL tre1_0_1;
12306 	       FFTW_REAL tim1_0_1;
12307 	       FFTW_REAL tre1_1_0;
12308 	       FFTW_REAL tim1_1_0;
12309 	       FFTW_REAL tre1_1_1;
12310 	       FFTW_REAL tim1_1_1;
12311 	       {
12312 		    FFTW_REAL tre2_1_0;
12313 		    FFTW_REAL tim2_1_0;
12314 		    tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_3_2 - tre0_3_2);
12315 		    tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_3_2 + tre0_3_2);
12316 		    tre1_0_0 = tre0_3_0 + tre2_1_0;
12317 		    tim1_0_0 = tim0_3_0 - tim2_1_0;
12318 		    tre1_1_0 = tre0_3_0 - tre2_1_0;
12319 		    tim1_1_0 = tim0_3_0 + tim2_1_0;
12320 	       }
12321 	       {
12322 		    FFTW_REAL tre2_0_0;
12323 		    FFTW_REAL tim2_0_0;
12324 		    FFTW_REAL tre2_1_0;
12325 		    FFTW_REAL tim2_1_0;
12326 		    tre2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_3_1) + (((FFTW_REAL) FFTW_K923879532) * tim0_3_1);
12327 		    tim2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_3_1) - (((FFTW_REAL) FFTW_K923879532) * tre0_3_1);
12328 		    tre2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_3_3) + (((FFTW_REAL) FFTW_K382683432) * tim0_3_3);
12329 		    tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_3_3) - (((FFTW_REAL) FFTW_K923879532) * tim0_3_3);
12330 		    tre1_0_1 = tre2_0_0 - tre2_1_0;
12331 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
12332 		    tre1_1_1 = tre2_0_0 + tre2_1_0;
12333 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
12334 	       }
12335 	       c_re(inout[3 * stride]) = tre1_0_0 + tre1_0_1;
12336 	       c_im(inout[3 * stride]) = tim1_0_0 + tim1_0_1;
12337 	       c_re(inout[11 * stride]) = tre1_0_0 - tre1_0_1;
12338 	       c_im(inout[11 * stride]) = tim1_0_0 - tim1_0_1;
12339 	       c_re(inout[7 * stride]) = tre1_1_0 + tim1_1_1;
12340 	       c_im(inout[7 * stride]) = tim1_1_0 - tre1_1_1;
12341 	       c_re(inout[15 * stride]) = tre1_1_0 - tim1_1_1;
12342 	       c_im(inout[15 * stride]) = tim1_1_0 + tre1_1_1;
12343 	  }
12344      }
12345 }
12346 
12347 /* This function contains 6 FP additions and 4 FP multiplications */
12348 
fftw_twiddle_2(FFTW_COMPLEX * A,const FFTW_COMPLEX * W,int stride,int m,int dist)12349 static void fftw_twiddle_2(FFTW_COMPLEX *A, const FFTW_COMPLEX *W, int stride, int m, int dist)
12350 {
12351      int i;
12352      FFTW_COMPLEX *inout;
12353      inout = A;
12354      for (i = 0; i < m; i = i + 1, inout = inout + dist, W = W + 1) {
12355 	  FFTW_REAL tre0_0_0;
12356 	  FFTW_REAL tim0_0_0;
12357 	  FFTW_REAL tre0_1_0;
12358 	  FFTW_REAL tim0_1_0;
12359 	  tre0_0_0 = c_re(inout[0]);
12360 	  tim0_0_0 = c_im(inout[0]);
12361 	  {
12362 	       FFTW_REAL tr;
12363 	       FFTW_REAL ti;
12364 	       FFTW_REAL twr;
12365 	       FFTW_REAL twi;
12366 	       tr = c_re(inout[stride]);
12367 	       ti = c_im(inout[stride]);
12368 	       twr = c_re(W[0]);
12369 	       twi = c_im(W[0]);
12370 	       tre0_1_0 = (tr * twr) - (ti * twi);
12371 	       tim0_1_0 = (tr * twi) + (ti * twr);
12372 	  }
12373 	  c_re(inout[0]) = tre0_0_0 + tre0_1_0;
12374 	  c_im(inout[0]) = tim0_0_0 + tim0_1_0;
12375 	  c_re(inout[stride]) = tre0_0_0 - tre0_1_0;
12376 	  c_im(inout[stride]) = tim0_0_0 - tim0_1_0;
12377      }
12378 }
12379 
12380 /* This function contains 18 FP additions and 12 FP multiplications */
12381 
fftw_twiddle_3(FFTW_COMPLEX * A,const FFTW_COMPLEX * W,int stride,int m,int dist)12382 static void fftw_twiddle_3(FFTW_COMPLEX *A, const FFTW_COMPLEX *W, int stride, int m, int dist)
12383 {
12384      int i;
12385      FFTW_COMPLEX *inout;
12386      inout = A;
12387      for (i = 0; i < m; i = i + 1, inout = inout + dist, W = W + 2) {
12388 	  FFTW_REAL tre0_0_0;
12389 	  FFTW_REAL tim0_0_0;
12390 	  FFTW_REAL tre0_1_0;
12391 	  FFTW_REAL tim0_1_0;
12392 	  FFTW_REAL tre0_2_0;
12393 	  FFTW_REAL tim0_2_0;
12394 	  tre0_0_0 = c_re(inout[0]);
12395 	  tim0_0_0 = c_im(inout[0]);
12396 	  {
12397 	       FFTW_REAL tr;
12398 	       FFTW_REAL ti;
12399 	       FFTW_REAL twr;
12400 	       FFTW_REAL twi;
12401 	       tr = c_re(inout[stride]);
12402 	       ti = c_im(inout[stride]);
12403 	       twr = c_re(W[0]);
12404 	       twi = c_im(W[0]);
12405 	       tre0_1_0 = (tr * twr) - (ti * twi);
12406 	       tim0_1_0 = (tr * twi) + (ti * twr);
12407 	  }
12408 	  {
12409 	       FFTW_REAL tr;
12410 	       FFTW_REAL ti;
12411 	       FFTW_REAL twr;
12412 	       FFTW_REAL twi;
12413 	       tr = c_re(inout[2 * stride]);
12414 	       ti = c_im(inout[2 * stride]);
12415 	       twr = c_re(W[1]);
12416 	       twi = c_im(W[1]);
12417 	       tre0_2_0 = (tr * twr) - (ti * twi);
12418 	       tim0_2_0 = (tr * twi) + (ti * twr);
12419 	  }
12420 	  c_re(inout[0]) = tre0_0_0 + tre0_1_0 + tre0_2_0;
12421 	  c_im(inout[0]) = tim0_0_0 + tim0_1_0 + tim0_2_0;
12422 	  {
12423 	       FFTW_REAL tre1_0_0;
12424 	       FFTW_REAL tre1_1_0;
12425 	       tre1_0_0 = tre0_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre0_1_0 + tre0_2_0));
12426 	       tre1_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim0_1_0 - tim0_2_0);
12427 	       c_re(inout[stride]) = tre1_0_0 + tre1_1_0;
12428 	       c_re(inout[2 * stride]) = tre1_0_0 - tre1_1_0;
12429 	  }
12430 	  {
12431 	       FFTW_REAL tim1_0_0;
12432 	       FFTW_REAL tim1_1_0;
12433 	       tim1_0_0 = tim0_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim0_1_0 + tim0_2_0));
12434 	       tim1_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre0_2_0 - tre0_1_0);
12435 	       c_im(inout[stride]) = tim1_0_0 + tim1_1_0;
12436 	       c_im(inout[2 * stride]) = tim1_0_0 - tim1_1_0;
12437 	  }
12438      }
12439 }
12440 
12441 /* This function contains 438 FP additions and 212 FP multiplications */
12442 
fftw_twiddle_32(FFTW_COMPLEX * A,const FFTW_COMPLEX * W,int stride,int m,int dist)12443 static void fftw_twiddle_32(FFTW_COMPLEX *A, const FFTW_COMPLEX *W, int stride, int m, int dist)
12444 {
12445      int i;
12446      FFTW_COMPLEX *inout;
12447      inout = A;
12448      for (i = 0; i < m; i = i + 1, inout = inout + dist, W = W + 31) {
12449 	  FFTW_REAL tre0_0_0;
12450 	  FFTW_REAL tim0_0_0;
12451 	  FFTW_REAL tre0_0_1;
12452 	  FFTW_REAL tim0_0_1;
12453 	  FFTW_REAL tre0_0_2;
12454 	  FFTW_REAL tim0_0_2;
12455 	  FFTW_REAL tre0_0_3;
12456 	  FFTW_REAL tim0_0_3;
12457 	  FFTW_REAL tre0_0_4;
12458 	  FFTW_REAL tim0_0_4;
12459 	  FFTW_REAL tre0_0_5;
12460 	  FFTW_REAL tim0_0_5;
12461 	  FFTW_REAL tre0_0_6;
12462 	  FFTW_REAL tim0_0_6;
12463 	  FFTW_REAL tre0_0_7;
12464 	  FFTW_REAL tim0_0_7;
12465 	  FFTW_REAL tre0_1_0;
12466 	  FFTW_REAL tim0_1_0;
12467 	  FFTW_REAL tre0_1_1;
12468 	  FFTW_REAL tim0_1_1;
12469 	  FFTW_REAL tre0_1_2;
12470 	  FFTW_REAL tim0_1_2;
12471 	  FFTW_REAL tre0_1_3;
12472 	  FFTW_REAL tim0_1_3;
12473 	  FFTW_REAL tre0_1_4;
12474 	  FFTW_REAL tim0_1_4;
12475 	  FFTW_REAL tre0_1_5;
12476 	  FFTW_REAL tim0_1_5;
12477 	  FFTW_REAL tre0_1_6;
12478 	  FFTW_REAL tim0_1_6;
12479 	  FFTW_REAL tre0_1_7;
12480 	  FFTW_REAL tim0_1_7;
12481 	  FFTW_REAL tre0_2_0;
12482 	  FFTW_REAL tim0_2_0;
12483 	  FFTW_REAL tre0_2_1;
12484 	  FFTW_REAL tim0_2_1;
12485 	  FFTW_REAL tre0_2_2;
12486 	  FFTW_REAL tim0_2_2;
12487 	  FFTW_REAL tre0_2_3;
12488 	  FFTW_REAL tim0_2_3;
12489 	  FFTW_REAL tre0_2_4;
12490 	  FFTW_REAL tim0_2_4;
12491 	  FFTW_REAL tre0_2_5;
12492 	  FFTW_REAL tim0_2_5;
12493 	  FFTW_REAL tre0_2_6;
12494 	  FFTW_REAL tim0_2_6;
12495 	  FFTW_REAL tre0_2_7;
12496 	  FFTW_REAL tim0_2_7;
12497 	  FFTW_REAL tre0_3_0;
12498 	  FFTW_REAL tim0_3_0;
12499 	  FFTW_REAL tre0_3_1;
12500 	  FFTW_REAL tim0_3_1;
12501 	  FFTW_REAL tre0_3_2;
12502 	  FFTW_REAL tim0_3_2;
12503 	  FFTW_REAL tre0_3_3;
12504 	  FFTW_REAL tim0_3_3;
12505 	  FFTW_REAL tre0_3_4;
12506 	  FFTW_REAL tim0_3_4;
12507 	  FFTW_REAL tre0_3_5;
12508 	  FFTW_REAL tim0_3_5;
12509 	  FFTW_REAL tre0_3_6;
12510 	  FFTW_REAL tim0_3_6;
12511 	  FFTW_REAL tre0_3_7;
12512 	  FFTW_REAL tim0_3_7;
12513 	  {
12514 	       FFTW_REAL tre1_0_0;
12515 	       FFTW_REAL tim1_0_0;
12516 	       FFTW_REAL tre1_0_1;
12517 	       FFTW_REAL tim1_0_1;
12518 	       FFTW_REAL tre1_1_0;
12519 	       FFTW_REAL tim1_1_0;
12520 	       FFTW_REAL tre1_1_1;
12521 	       FFTW_REAL tim1_1_1;
12522 	       {
12523 		    FFTW_REAL tre2_0_0;
12524 		    FFTW_REAL tim2_0_0;
12525 		    FFTW_REAL tre2_1_0;
12526 		    FFTW_REAL tim2_1_0;
12527 		    tre2_0_0 = c_re(inout[0]);
12528 		    tim2_0_0 = c_im(inout[0]);
12529 		    {
12530 			 FFTW_REAL tr;
12531 			 FFTW_REAL ti;
12532 			 FFTW_REAL twr;
12533 			 FFTW_REAL twi;
12534 			 tr = c_re(inout[16 * stride]);
12535 			 ti = c_im(inout[16 * stride]);
12536 			 twr = c_re(W[15]);
12537 			 twi = c_im(W[15]);
12538 			 tre2_1_0 = (tr * twr) - (ti * twi);
12539 			 tim2_1_0 = (tr * twi) + (ti * twr);
12540 		    }
12541 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
12542 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
12543 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
12544 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
12545 	       }
12546 	       {
12547 		    FFTW_REAL tre2_0_0;
12548 		    FFTW_REAL tim2_0_0;
12549 		    FFTW_REAL tre2_1_0;
12550 		    FFTW_REAL tim2_1_0;
12551 		    {
12552 			 FFTW_REAL tr;
12553 			 FFTW_REAL ti;
12554 			 FFTW_REAL twr;
12555 			 FFTW_REAL twi;
12556 			 tr = c_re(inout[8 * stride]);
12557 			 ti = c_im(inout[8 * stride]);
12558 			 twr = c_re(W[7]);
12559 			 twi = c_im(W[7]);
12560 			 tre2_0_0 = (tr * twr) - (ti * twi);
12561 			 tim2_0_0 = (tr * twi) + (ti * twr);
12562 		    }
12563 		    {
12564 			 FFTW_REAL tr;
12565 			 FFTW_REAL ti;
12566 			 FFTW_REAL twr;
12567 			 FFTW_REAL twi;
12568 			 tr = c_re(inout[24 * stride]);
12569 			 ti = c_im(inout[24 * stride]);
12570 			 twr = c_re(W[23]);
12571 			 twi = c_im(W[23]);
12572 			 tre2_1_0 = (tr * twr) - (ti * twi);
12573 			 tim2_1_0 = (tr * twi) + (ti * twr);
12574 		    }
12575 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
12576 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
12577 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
12578 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
12579 	       }
12580 	       tre0_0_0 = tre1_0_0 + tre1_0_1;
12581 	       tim0_0_0 = tim1_0_0 + tim1_0_1;
12582 	       tre0_2_0 = tre1_0_0 - tre1_0_1;
12583 	       tim0_2_0 = tim1_0_0 - tim1_0_1;
12584 	       tre0_1_0 = tre1_1_0 + tim1_1_1;
12585 	       tim0_1_0 = tim1_1_0 - tre1_1_1;
12586 	       tre0_3_0 = tre1_1_0 - tim1_1_1;
12587 	       tim0_3_0 = tim1_1_0 + tre1_1_1;
12588 	  }
12589 	  {
12590 	       FFTW_REAL tre1_0_0;
12591 	       FFTW_REAL tim1_0_0;
12592 	       FFTW_REAL tre1_0_1;
12593 	       FFTW_REAL tim1_0_1;
12594 	       FFTW_REAL tre1_1_0;
12595 	       FFTW_REAL tim1_1_0;
12596 	       FFTW_REAL tre1_1_1;
12597 	       FFTW_REAL tim1_1_1;
12598 	       {
12599 		    FFTW_REAL tre2_0_0;
12600 		    FFTW_REAL tim2_0_0;
12601 		    FFTW_REAL tre2_1_0;
12602 		    FFTW_REAL tim2_1_0;
12603 		    {
12604 			 FFTW_REAL tr;
12605 			 FFTW_REAL ti;
12606 			 FFTW_REAL twr;
12607 			 FFTW_REAL twi;
12608 			 tr = c_re(inout[stride]);
12609 			 ti = c_im(inout[stride]);
12610 			 twr = c_re(W[0]);
12611 			 twi = c_im(W[0]);
12612 			 tre2_0_0 = (tr * twr) - (ti * twi);
12613 			 tim2_0_0 = (tr * twi) + (ti * twr);
12614 		    }
12615 		    {
12616 			 FFTW_REAL tr;
12617 			 FFTW_REAL ti;
12618 			 FFTW_REAL twr;
12619 			 FFTW_REAL twi;
12620 			 tr = c_re(inout[17 * stride]);
12621 			 ti = c_im(inout[17 * stride]);
12622 			 twr = c_re(W[16]);
12623 			 twi = c_im(W[16]);
12624 			 tre2_1_0 = (tr * twr) - (ti * twi);
12625 			 tim2_1_0 = (tr * twi) + (ti * twr);
12626 		    }
12627 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
12628 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
12629 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
12630 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
12631 	       }
12632 	       {
12633 		    FFTW_REAL tre2_0_0;
12634 		    FFTW_REAL tim2_0_0;
12635 		    FFTW_REAL tre2_1_0;
12636 		    FFTW_REAL tim2_1_0;
12637 		    {
12638 			 FFTW_REAL tr;
12639 			 FFTW_REAL ti;
12640 			 FFTW_REAL twr;
12641 			 FFTW_REAL twi;
12642 			 tr = c_re(inout[9 * stride]);
12643 			 ti = c_im(inout[9 * stride]);
12644 			 twr = c_re(W[8]);
12645 			 twi = c_im(W[8]);
12646 			 tre2_0_0 = (tr * twr) - (ti * twi);
12647 			 tim2_0_0 = (tr * twi) + (ti * twr);
12648 		    }
12649 		    {
12650 			 FFTW_REAL tr;
12651 			 FFTW_REAL ti;
12652 			 FFTW_REAL twr;
12653 			 FFTW_REAL twi;
12654 			 tr = c_re(inout[25 * stride]);
12655 			 ti = c_im(inout[25 * stride]);
12656 			 twr = c_re(W[24]);
12657 			 twi = c_im(W[24]);
12658 			 tre2_1_0 = (tr * twr) - (ti * twi);
12659 			 tim2_1_0 = (tr * twi) + (ti * twr);
12660 		    }
12661 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
12662 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
12663 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
12664 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
12665 	       }
12666 	       tre0_0_1 = tre1_0_0 + tre1_0_1;
12667 	       tim0_0_1 = tim1_0_0 + tim1_0_1;
12668 	       tre0_2_1 = tre1_0_0 - tre1_0_1;
12669 	       tim0_2_1 = tim1_0_0 - tim1_0_1;
12670 	       tre0_1_1 = tre1_1_0 + tim1_1_1;
12671 	       tim0_1_1 = tim1_1_0 - tre1_1_1;
12672 	       tre0_3_1 = tre1_1_0 - tim1_1_1;
12673 	       tim0_3_1 = tim1_1_0 + tre1_1_1;
12674 	  }
12675 	  {
12676 	       FFTW_REAL tre1_0_0;
12677 	       FFTW_REAL tim1_0_0;
12678 	       FFTW_REAL tre1_0_1;
12679 	       FFTW_REAL tim1_0_1;
12680 	       FFTW_REAL tre1_1_0;
12681 	       FFTW_REAL tim1_1_0;
12682 	       FFTW_REAL tre1_1_1;
12683 	       FFTW_REAL tim1_1_1;
12684 	       {
12685 		    FFTW_REAL tre2_0_0;
12686 		    FFTW_REAL tim2_0_0;
12687 		    FFTW_REAL tre2_1_0;
12688 		    FFTW_REAL tim2_1_0;
12689 		    {
12690 			 FFTW_REAL tr;
12691 			 FFTW_REAL ti;
12692 			 FFTW_REAL twr;
12693 			 FFTW_REAL twi;
12694 			 tr = c_re(inout[2 * stride]);
12695 			 ti = c_im(inout[2 * stride]);
12696 			 twr = c_re(W[1]);
12697 			 twi = c_im(W[1]);
12698 			 tre2_0_0 = (tr * twr) - (ti * twi);
12699 			 tim2_0_0 = (tr * twi) + (ti * twr);
12700 		    }
12701 		    {
12702 			 FFTW_REAL tr;
12703 			 FFTW_REAL ti;
12704 			 FFTW_REAL twr;
12705 			 FFTW_REAL twi;
12706 			 tr = c_re(inout[18 * stride]);
12707 			 ti = c_im(inout[18 * stride]);
12708 			 twr = c_re(W[17]);
12709 			 twi = c_im(W[17]);
12710 			 tre2_1_0 = (tr * twr) - (ti * twi);
12711 			 tim2_1_0 = (tr * twi) + (ti * twr);
12712 		    }
12713 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
12714 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
12715 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
12716 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
12717 	       }
12718 	       {
12719 		    FFTW_REAL tre2_0_0;
12720 		    FFTW_REAL tim2_0_0;
12721 		    FFTW_REAL tre2_1_0;
12722 		    FFTW_REAL tim2_1_0;
12723 		    {
12724 			 FFTW_REAL tr;
12725 			 FFTW_REAL ti;
12726 			 FFTW_REAL twr;
12727 			 FFTW_REAL twi;
12728 			 tr = c_re(inout[10 * stride]);
12729 			 ti = c_im(inout[10 * stride]);
12730 			 twr = c_re(W[9]);
12731 			 twi = c_im(W[9]);
12732 			 tre2_0_0 = (tr * twr) - (ti * twi);
12733 			 tim2_0_0 = (tr * twi) + (ti * twr);
12734 		    }
12735 		    {
12736 			 FFTW_REAL tr;
12737 			 FFTW_REAL ti;
12738 			 FFTW_REAL twr;
12739 			 FFTW_REAL twi;
12740 			 tr = c_re(inout[26 * stride]);
12741 			 ti = c_im(inout[26 * stride]);
12742 			 twr = c_re(W[25]);
12743 			 twi = c_im(W[25]);
12744 			 tre2_1_0 = (tr * twr) - (ti * twi);
12745 			 tim2_1_0 = (tr * twi) + (ti * twr);
12746 		    }
12747 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
12748 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
12749 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
12750 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
12751 	       }
12752 	       tre0_0_2 = tre1_0_0 + tre1_0_1;
12753 	       tim0_0_2 = tim1_0_0 + tim1_0_1;
12754 	       tre0_2_2 = tre1_0_0 - tre1_0_1;
12755 	       tim0_2_2 = tim1_0_0 - tim1_0_1;
12756 	       tre0_1_2 = tre1_1_0 + tim1_1_1;
12757 	       tim0_1_2 = tim1_1_0 - tre1_1_1;
12758 	       tre0_3_2 = tre1_1_0 - tim1_1_1;
12759 	       tim0_3_2 = tim1_1_0 + tre1_1_1;
12760 	  }
12761 	  {
12762 	       FFTW_REAL tre1_0_0;
12763 	       FFTW_REAL tim1_0_0;
12764 	       FFTW_REAL tre1_0_1;
12765 	       FFTW_REAL tim1_0_1;
12766 	       FFTW_REAL tre1_1_0;
12767 	       FFTW_REAL tim1_1_0;
12768 	       FFTW_REAL tre1_1_1;
12769 	       FFTW_REAL tim1_1_1;
12770 	       {
12771 		    FFTW_REAL tre2_0_0;
12772 		    FFTW_REAL tim2_0_0;
12773 		    FFTW_REAL tre2_1_0;
12774 		    FFTW_REAL tim2_1_0;
12775 		    {
12776 			 FFTW_REAL tr;
12777 			 FFTW_REAL ti;
12778 			 FFTW_REAL twr;
12779 			 FFTW_REAL twi;
12780 			 tr = c_re(inout[3 * stride]);
12781 			 ti = c_im(inout[3 * stride]);
12782 			 twr = c_re(W[2]);
12783 			 twi = c_im(W[2]);
12784 			 tre2_0_0 = (tr * twr) - (ti * twi);
12785 			 tim2_0_0 = (tr * twi) + (ti * twr);
12786 		    }
12787 		    {
12788 			 FFTW_REAL tr;
12789 			 FFTW_REAL ti;
12790 			 FFTW_REAL twr;
12791 			 FFTW_REAL twi;
12792 			 tr = c_re(inout[19 * stride]);
12793 			 ti = c_im(inout[19 * stride]);
12794 			 twr = c_re(W[18]);
12795 			 twi = c_im(W[18]);
12796 			 tre2_1_0 = (tr * twr) - (ti * twi);
12797 			 tim2_1_0 = (tr * twi) + (ti * twr);
12798 		    }
12799 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
12800 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
12801 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
12802 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
12803 	       }
12804 	       {
12805 		    FFTW_REAL tre2_0_0;
12806 		    FFTW_REAL tim2_0_0;
12807 		    FFTW_REAL tre2_1_0;
12808 		    FFTW_REAL tim2_1_0;
12809 		    {
12810 			 FFTW_REAL tr;
12811 			 FFTW_REAL ti;
12812 			 FFTW_REAL twr;
12813 			 FFTW_REAL twi;
12814 			 tr = c_re(inout[11 * stride]);
12815 			 ti = c_im(inout[11 * stride]);
12816 			 twr = c_re(W[10]);
12817 			 twi = c_im(W[10]);
12818 			 tre2_0_0 = (tr * twr) - (ti * twi);
12819 			 tim2_0_0 = (tr * twi) + (ti * twr);
12820 		    }
12821 		    {
12822 			 FFTW_REAL tr;
12823 			 FFTW_REAL ti;
12824 			 FFTW_REAL twr;
12825 			 FFTW_REAL twi;
12826 			 tr = c_re(inout[27 * stride]);
12827 			 ti = c_im(inout[27 * stride]);
12828 			 twr = c_re(W[26]);
12829 			 twi = c_im(W[26]);
12830 			 tre2_1_0 = (tr * twr) - (ti * twi);
12831 			 tim2_1_0 = (tr * twi) + (ti * twr);
12832 		    }
12833 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
12834 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
12835 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
12836 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
12837 	       }
12838 	       tre0_0_3 = tre1_0_0 + tre1_0_1;
12839 	       tim0_0_3 = tim1_0_0 + tim1_0_1;
12840 	       tre0_2_3 = tre1_0_0 - tre1_0_1;
12841 	       tim0_2_3 = tim1_0_0 - tim1_0_1;
12842 	       tre0_1_3 = tre1_1_0 + tim1_1_1;
12843 	       tim0_1_3 = tim1_1_0 - tre1_1_1;
12844 	       tre0_3_3 = tre1_1_0 - tim1_1_1;
12845 	       tim0_3_3 = tim1_1_0 + tre1_1_1;
12846 	  }
12847 	  {
12848 	       FFTW_REAL tre1_0_0;
12849 	       FFTW_REAL tim1_0_0;
12850 	       FFTW_REAL tre1_0_1;
12851 	       FFTW_REAL tim1_0_1;
12852 	       FFTW_REAL tre1_1_0;
12853 	       FFTW_REAL tim1_1_0;
12854 	       FFTW_REAL tre1_1_1;
12855 	       FFTW_REAL tim1_1_1;
12856 	       {
12857 		    FFTW_REAL tre2_0_0;
12858 		    FFTW_REAL tim2_0_0;
12859 		    FFTW_REAL tre2_1_0;
12860 		    FFTW_REAL tim2_1_0;
12861 		    {
12862 			 FFTW_REAL tr;
12863 			 FFTW_REAL ti;
12864 			 FFTW_REAL twr;
12865 			 FFTW_REAL twi;
12866 			 tr = c_re(inout[4 * stride]);
12867 			 ti = c_im(inout[4 * stride]);
12868 			 twr = c_re(W[3]);
12869 			 twi = c_im(W[3]);
12870 			 tre2_0_0 = (tr * twr) - (ti * twi);
12871 			 tim2_0_0 = (tr * twi) + (ti * twr);
12872 		    }
12873 		    {
12874 			 FFTW_REAL tr;
12875 			 FFTW_REAL ti;
12876 			 FFTW_REAL twr;
12877 			 FFTW_REAL twi;
12878 			 tr = c_re(inout[20 * stride]);
12879 			 ti = c_im(inout[20 * stride]);
12880 			 twr = c_re(W[19]);
12881 			 twi = c_im(W[19]);
12882 			 tre2_1_0 = (tr * twr) - (ti * twi);
12883 			 tim2_1_0 = (tr * twi) + (ti * twr);
12884 		    }
12885 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
12886 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
12887 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
12888 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
12889 	       }
12890 	       {
12891 		    FFTW_REAL tre2_0_0;
12892 		    FFTW_REAL tim2_0_0;
12893 		    FFTW_REAL tre2_1_0;
12894 		    FFTW_REAL tim2_1_0;
12895 		    {
12896 			 FFTW_REAL tr;
12897 			 FFTW_REAL ti;
12898 			 FFTW_REAL twr;
12899 			 FFTW_REAL twi;
12900 			 tr = c_re(inout[12 * stride]);
12901 			 ti = c_im(inout[12 * stride]);
12902 			 twr = c_re(W[11]);
12903 			 twi = c_im(W[11]);
12904 			 tre2_0_0 = (tr * twr) - (ti * twi);
12905 			 tim2_0_0 = (tr * twi) + (ti * twr);
12906 		    }
12907 		    {
12908 			 FFTW_REAL tr;
12909 			 FFTW_REAL ti;
12910 			 FFTW_REAL twr;
12911 			 FFTW_REAL twi;
12912 			 tr = c_re(inout[28 * stride]);
12913 			 ti = c_im(inout[28 * stride]);
12914 			 twr = c_re(W[27]);
12915 			 twi = c_im(W[27]);
12916 			 tre2_1_0 = (tr * twr) - (ti * twi);
12917 			 tim2_1_0 = (tr * twi) + (ti * twr);
12918 		    }
12919 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
12920 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
12921 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
12922 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
12923 	       }
12924 	       tre0_0_4 = tre1_0_0 + tre1_0_1;
12925 	       tim0_0_4 = tim1_0_0 + tim1_0_1;
12926 	       tre0_2_4 = tre1_0_0 - tre1_0_1;
12927 	       tim0_2_4 = tim1_0_0 - tim1_0_1;
12928 	       tre0_1_4 = tre1_1_0 + tim1_1_1;
12929 	       tim0_1_4 = tim1_1_0 - tre1_1_1;
12930 	       tre0_3_4 = tre1_1_0 - tim1_1_1;
12931 	       tim0_3_4 = tim1_1_0 + tre1_1_1;
12932 	  }
12933 	  {
12934 	       FFTW_REAL tre1_0_0;
12935 	       FFTW_REAL tim1_0_0;
12936 	       FFTW_REAL tre1_0_1;
12937 	       FFTW_REAL tim1_0_1;
12938 	       FFTW_REAL tre1_1_0;
12939 	       FFTW_REAL tim1_1_0;
12940 	       FFTW_REAL tre1_1_1;
12941 	       FFTW_REAL tim1_1_1;
12942 	       {
12943 		    FFTW_REAL tre2_0_0;
12944 		    FFTW_REAL tim2_0_0;
12945 		    FFTW_REAL tre2_1_0;
12946 		    FFTW_REAL tim2_1_0;
12947 		    {
12948 			 FFTW_REAL tr;
12949 			 FFTW_REAL ti;
12950 			 FFTW_REAL twr;
12951 			 FFTW_REAL twi;
12952 			 tr = c_re(inout[5 * stride]);
12953 			 ti = c_im(inout[5 * stride]);
12954 			 twr = c_re(W[4]);
12955 			 twi = c_im(W[4]);
12956 			 tre2_0_0 = (tr * twr) - (ti * twi);
12957 			 tim2_0_0 = (tr * twi) + (ti * twr);
12958 		    }
12959 		    {
12960 			 FFTW_REAL tr;
12961 			 FFTW_REAL ti;
12962 			 FFTW_REAL twr;
12963 			 FFTW_REAL twi;
12964 			 tr = c_re(inout[21 * stride]);
12965 			 ti = c_im(inout[21 * stride]);
12966 			 twr = c_re(W[20]);
12967 			 twi = c_im(W[20]);
12968 			 tre2_1_0 = (tr * twr) - (ti * twi);
12969 			 tim2_1_0 = (tr * twi) + (ti * twr);
12970 		    }
12971 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
12972 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
12973 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
12974 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
12975 	       }
12976 	       {
12977 		    FFTW_REAL tre2_0_0;
12978 		    FFTW_REAL tim2_0_0;
12979 		    FFTW_REAL tre2_1_0;
12980 		    FFTW_REAL tim2_1_0;
12981 		    {
12982 			 FFTW_REAL tr;
12983 			 FFTW_REAL ti;
12984 			 FFTW_REAL twr;
12985 			 FFTW_REAL twi;
12986 			 tr = c_re(inout[13 * stride]);
12987 			 ti = c_im(inout[13 * stride]);
12988 			 twr = c_re(W[12]);
12989 			 twi = c_im(W[12]);
12990 			 tre2_0_0 = (tr * twr) - (ti * twi);
12991 			 tim2_0_0 = (tr * twi) + (ti * twr);
12992 		    }
12993 		    {
12994 			 FFTW_REAL tr;
12995 			 FFTW_REAL ti;
12996 			 FFTW_REAL twr;
12997 			 FFTW_REAL twi;
12998 			 tr = c_re(inout[29 * stride]);
12999 			 ti = c_im(inout[29 * stride]);
13000 			 twr = c_re(W[28]);
13001 			 twi = c_im(W[28]);
13002 			 tre2_1_0 = (tr * twr) - (ti * twi);
13003 			 tim2_1_0 = (tr * twi) + (ti * twr);
13004 		    }
13005 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
13006 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
13007 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
13008 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
13009 	       }
13010 	       tre0_0_5 = tre1_0_0 + tre1_0_1;
13011 	       tim0_0_5 = tim1_0_0 + tim1_0_1;
13012 	       tre0_2_5 = tre1_0_0 - tre1_0_1;
13013 	       tim0_2_5 = tim1_0_0 - tim1_0_1;
13014 	       tre0_1_5 = tre1_1_0 + tim1_1_1;
13015 	       tim0_1_5 = tim1_1_0 - tre1_1_1;
13016 	       tre0_3_5 = tre1_1_0 - tim1_1_1;
13017 	       tim0_3_5 = tim1_1_0 + tre1_1_1;
13018 	  }
13019 	  {
13020 	       FFTW_REAL tre1_0_0;
13021 	       FFTW_REAL tim1_0_0;
13022 	       FFTW_REAL tre1_0_1;
13023 	       FFTW_REAL tim1_0_1;
13024 	       FFTW_REAL tre1_1_0;
13025 	       FFTW_REAL tim1_1_0;
13026 	       FFTW_REAL tre1_1_1;
13027 	       FFTW_REAL tim1_1_1;
13028 	       {
13029 		    FFTW_REAL tre2_0_0;
13030 		    FFTW_REAL tim2_0_0;
13031 		    FFTW_REAL tre2_1_0;
13032 		    FFTW_REAL tim2_1_0;
13033 		    {
13034 			 FFTW_REAL tr;
13035 			 FFTW_REAL ti;
13036 			 FFTW_REAL twr;
13037 			 FFTW_REAL twi;
13038 			 tr = c_re(inout[6 * stride]);
13039 			 ti = c_im(inout[6 * stride]);
13040 			 twr = c_re(W[5]);
13041 			 twi = c_im(W[5]);
13042 			 tre2_0_0 = (tr * twr) - (ti * twi);
13043 			 tim2_0_0 = (tr * twi) + (ti * twr);
13044 		    }
13045 		    {
13046 			 FFTW_REAL tr;
13047 			 FFTW_REAL ti;
13048 			 FFTW_REAL twr;
13049 			 FFTW_REAL twi;
13050 			 tr = c_re(inout[22 * stride]);
13051 			 ti = c_im(inout[22 * stride]);
13052 			 twr = c_re(W[21]);
13053 			 twi = c_im(W[21]);
13054 			 tre2_1_0 = (tr * twr) - (ti * twi);
13055 			 tim2_1_0 = (tr * twi) + (ti * twr);
13056 		    }
13057 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
13058 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
13059 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
13060 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
13061 	       }
13062 	       {
13063 		    FFTW_REAL tre2_0_0;
13064 		    FFTW_REAL tim2_0_0;
13065 		    FFTW_REAL tre2_1_0;
13066 		    FFTW_REAL tim2_1_0;
13067 		    {
13068 			 FFTW_REAL tr;
13069 			 FFTW_REAL ti;
13070 			 FFTW_REAL twr;
13071 			 FFTW_REAL twi;
13072 			 tr = c_re(inout[14 * stride]);
13073 			 ti = c_im(inout[14 * stride]);
13074 			 twr = c_re(W[13]);
13075 			 twi = c_im(W[13]);
13076 			 tre2_0_0 = (tr * twr) - (ti * twi);
13077 			 tim2_0_0 = (tr * twi) + (ti * twr);
13078 		    }
13079 		    {
13080 			 FFTW_REAL tr;
13081 			 FFTW_REAL ti;
13082 			 FFTW_REAL twr;
13083 			 FFTW_REAL twi;
13084 			 tr = c_re(inout[30 * stride]);
13085 			 ti = c_im(inout[30 * stride]);
13086 			 twr = c_re(W[29]);
13087 			 twi = c_im(W[29]);
13088 			 tre2_1_0 = (tr * twr) - (ti * twi);
13089 			 tim2_1_0 = (tr * twi) + (ti * twr);
13090 		    }
13091 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
13092 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
13093 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
13094 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
13095 	       }
13096 	       tre0_0_6 = tre1_0_0 + tre1_0_1;
13097 	       tim0_0_6 = tim1_0_0 + tim1_0_1;
13098 	       tre0_2_6 = tre1_0_0 - tre1_0_1;
13099 	       tim0_2_6 = tim1_0_0 - tim1_0_1;
13100 	       tre0_1_6 = tre1_1_0 + tim1_1_1;
13101 	       tim0_1_6 = tim1_1_0 - tre1_1_1;
13102 	       tre0_3_6 = tre1_1_0 - tim1_1_1;
13103 	       tim0_3_6 = tim1_1_0 + tre1_1_1;
13104 	  }
13105 	  {
13106 	       FFTW_REAL tre1_0_0;
13107 	       FFTW_REAL tim1_0_0;
13108 	       FFTW_REAL tre1_0_1;
13109 	       FFTW_REAL tim1_0_1;
13110 	       FFTW_REAL tre1_1_0;
13111 	       FFTW_REAL tim1_1_0;
13112 	       FFTW_REAL tre1_1_1;
13113 	       FFTW_REAL tim1_1_1;
13114 	       {
13115 		    FFTW_REAL tre2_0_0;
13116 		    FFTW_REAL tim2_0_0;
13117 		    FFTW_REAL tre2_1_0;
13118 		    FFTW_REAL tim2_1_0;
13119 		    {
13120 			 FFTW_REAL tr;
13121 			 FFTW_REAL ti;
13122 			 FFTW_REAL twr;
13123 			 FFTW_REAL twi;
13124 			 tr = c_re(inout[7 * stride]);
13125 			 ti = c_im(inout[7 * stride]);
13126 			 twr = c_re(W[6]);
13127 			 twi = c_im(W[6]);
13128 			 tre2_0_0 = (tr * twr) - (ti * twi);
13129 			 tim2_0_0 = (tr * twi) + (ti * twr);
13130 		    }
13131 		    {
13132 			 FFTW_REAL tr;
13133 			 FFTW_REAL ti;
13134 			 FFTW_REAL twr;
13135 			 FFTW_REAL twi;
13136 			 tr = c_re(inout[23 * stride]);
13137 			 ti = c_im(inout[23 * stride]);
13138 			 twr = c_re(W[22]);
13139 			 twi = c_im(W[22]);
13140 			 tre2_1_0 = (tr * twr) - (ti * twi);
13141 			 tim2_1_0 = (tr * twi) + (ti * twr);
13142 		    }
13143 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
13144 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
13145 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
13146 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
13147 	       }
13148 	       {
13149 		    FFTW_REAL tre2_0_0;
13150 		    FFTW_REAL tim2_0_0;
13151 		    FFTW_REAL tre2_1_0;
13152 		    FFTW_REAL tim2_1_0;
13153 		    {
13154 			 FFTW_REAL tr;
13155 			 FFTW_REAL ti;
13156 			 FFTW_REAL twr;
13157 			 FFTW_REAL twi;
13158 			 tr = c_re(inout[15 * stride]);
13159 			 ti = c_im(inout[15 * stride]);
13160 			 twr = c_re(W[14]);
13161 			 twi = c_im(W[14]);
13162 			 tre2_0_0 = (tr * twr) - (ti * twi);
13163 			 tim2_0_0 = (tr * twi) + (ti * twr);
13164 		    }
13165 		    {
13166 			 FFTW_REAL tr;
13167 			 FFTW_REAL ti;
13168 			 FFTW_REAL twr;
13169 			 FFTW_REAL twi;
13170 			 tr = c_re(inout[31 * stride]);
13171 			 ti = c_im(inout[31 * stride]);
13172 			 twr = c_re(W[30]);
13173 			 twi = c_im(W[30]);
13174 			 tre2_1_0 = (tr * twr) - (ti * twi);
13175 			 tim2_1_0 = (tr * twi) + (ti * twr);
13176 		    }
13177 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
13178 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
13179 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
13180 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
13181 	       }
13182 	       tre0_0_7 = tre1_0_0 + tre1_0_1;
13183 	       tim0_0_7 = tim1_0_0 + tim1_0_1;
13184 	       tre0_2_7 = tre1_0_0 - tre1_0_1;
13185 	       tim0_2_7 = tim1_0_0 - tim1_0_1;
13186 	       tre0_1_7 = tre1_1_0 + tim1_1_1;
13187 	       tim0_1_7 = tim1_1_0 - tre1_1_1;
13188 	       tre0_3_7 = tre1_1_0 - tim1_1_1;
13189 	       tim0_3_7 = tim1_1_0 + tre1_1_1;
13190 	  }
13191 	  {
13192 	       FFTW_REAL tre1_0_0;
13193 	       FFTW_REAL tim1_0_0;
13194 	       FFTW_REAL tre1_0_1;
13195 	       FFTW_REAL tim1_0_1;
13196 	       FFTW_REAL tre1_0_2;
13197 	       FFTW_REAL tim1_0_2;
13198 	       FFTW_REAL tre1_0_3;
13199 	       FFTW_REAL tim1_0_3;
13200 	       FFTW_REAL tre1_1_0;
13201 	       FFTW_REAL tim1_1_0;
13202 	       FFTW_REAL tre1_1_1;
13203 	       FFTW_REAL tim1_1_1;
13204 	       FFTW_REAL tre1_1_2;
13205 	       FFTW_REAL tim1_1_2;
13206 	       FFTW_REAL tre1_1_3;
13207 	       FFTW_REAL tim1_1_3;
13208 	       tre1_0_0 = tre0_0_0 + tre0_0_4;
13209 	       tim1_0_0 = tim0_0_0 + tim0_0_4;
13210 	       tre1_1_0 = tre0_0_0 - tre0_0_4;
13211 	       tim1_1_0 = tim0_0_0 - tim0_0_4;
13212 	       tre1_0_1 = tre0_0_1 + tre0_0_5;
13213 	       tim1_0_1 = tim0_0_1 + tim0_0_5;
13214 	       tre1_1_1 = tre0_0_1 - tre0_0_5;
13215 	       tim1_1_1 = tim0_0_1 - tim0_0_5;
13216 	       tre1_0_2 = tre0_0_2 + tre0_0_6;
13217 	       tim1_0_2 = tim0_0_2 + tim0_0_6;
13218 	       tre1_1_2 = tre0_0_2 - tre0_0_6;
13219 	       tim1_1_2 = tim0_0_2 - tim0_0_6;
13220 	       tre1_0_3 = tre0_0_3 + tre0_0_7;
13221 	       tim1_0_3 = tim0_0_3 + tim0_0_7;
13222 	       tre1_1_3 = tre0_0_3 - tre0_0_7;
13223 	       tim1_1_3 = tim0_0_3 - tim0_0_7;
13224 	       {
13225 		    FFTW_REAL tre2_0_0;
13226 		    FFTW_REAL tim2_0_0;
13227 		    FFTW_REAL tre2_0_1;
13228 		    FFTW_REAL tim2_0_1;
13229 		    FFTW_REAL tre2_1_0;
13230 		    FFTW_REAL tim2_1_0;
13231 		    FFTW_REAL tre2_1_1;
13232 		    FFTW_REAL tim2_1_1;
13233 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
13234 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
13235 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
13236 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
13237 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
13238 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
13239 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
13240 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
13241 		    c_re(inout[0]) = tre2_0_0 + tre2_0_1;
13242 		    c_im(inout[0]) = tim2_0_0 + tim2_0_1;
13243 		    c_re(inout[16 * stride]) = tre2_0_0 - tre2_0_1;
13244 		    c_im(inout[16 * stride]) = tim2_0_0 - tim2_0_1;
13245 		    c_re(inout[8 * stride]) = tre2_1_0 + tim2_1_1;
13246 		    c_im(inout[8 * stride]) = tim2_1_0 - tre2_1_1;
13247 		    c_re(inout[24 * stride]) = tre2_1_0 - tim2_1_1;
13248 		    c_im(inout[24 * stride]) = tim2_1_0 + tre2_1_1;
13249 	       }
13250 	       {
13251 		    FFTW_REAL tre2_0_0;
13252 		    FFTW_REAL tim2_0_0;
13253 		    FFTW_REAL tre2_0_1;
13254 		    FFTW_REAL tim2_0_1;
13255 		    FFTW_REAL tre2_1_0;
13256 		    FFTW_REAL tim2_1_0;
13257 		    FFTW_REAL tre2_1_1;
13258 		    FFTW_REAL tim2_1_1;
13259 		    tre2_0_0 = tre1_1_0 + tim1_1_2;
13260 		    tim2_0_0 = tim1_1_0 - tre1_1_2;
13261 		    tre2_1_0 = tre1_1_0 - tim1_1_2;
13262 		    tim2_1_0 = tim1_1_0 + tre1_1_2;
13263 		    {
13264 			 FFTW_REAL tre3_0_0;
13265 			 FFTW_REAL tim3_0_0;
13266 			 FFTW_REAL tre3_1_0;
13267 			 FFTW_REAL tim3_1_0;
13268 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
13269 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
13270 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
13271 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
13272 			 tre2_0_1 = tre3_0_0 + tre3_1_0;
13273 			 tim2_0_1 = tim3_0_0 - tim3_1_0;
13274 			 tre2_1_1 = tre3_0_0 - tre3_1_0;
13275 			 tim2_1_1 = tim3_0_0 + tim3_1_0;
13276 		    }
13277 		    c_re(inout[4 * stride]) = tre2_0_0 + tre2_0_1;
13278 		    c_im(inout[4 * stride]) = tim2_0_0 + tim2_0_1;
13279 		    c_re(inout[20 * stride]) = tre2_0_0 - tre2_0_1;
13280 		    c_im(inout[20 * stride]) = tim2_0_0 - tim2_0_1;
13281 		    c_re(inout[12 * stride]) = tre2_1_0 + tim2_1_1;
13282 		    c_im(inout[12 * stride]) = tim2_1_0 - tre2_1_1;
13283 		    c_re(inout[28 * stride]) = tre2_1_0 - tim2_1_1;
13284 		    c_im(inout[28 * stride]) = tim2_1_0 + tre2_1_1;
13285 	       }
13286 	  }
13287 	  {
13288 	       FFTW_REAL tre1_0_0;
13289 	       FFTW_REAL tim1_0_0;
13290 	       FFTW_REAL tre1_0_1;
13291 	       FFTW_REAL tim1_0_1;
13292 	       FFTW_REAL tre1_0_2;
13293 	       FFTW_REAL tim1_0_2;
13294 	       FFTW_REAL tre1_0_3;
13295 	       FFTW_REAL tim1_0_3;
13296 	       FFTW_REAL tre1_1_0;
13297 	       FFTW_REAL tim1_1_0;
13298 	       FFTW_REAL tre1_1_1;
13299 	       FFTW_REAL tim1_1_1;
13300 	       FFTW_REAL tre1_1_2;
13301 	       FFTW_REAL tim1_1_2;
13302 	       FFTW_REAL tre1_1_3;
13303 	       FFTW_REAL tim1_1_3;
13304 	       {
13305 		    FFTW_REAL tre2_1_0;
13306 		    FFTW_REAL tim2_1_0;
13307 		    tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_1_4 + tim0_1_4);
13308 		    tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_1_4 - tre0_1_4);
13309 		    tre1_0_0 = tre0_1_0 + tre2_1_0;
13310 		    tim1_0_0 = tim0_1_0 + tim2_1_0;
13311 		    tre1_1_0 = tre0_1_0 - tre2_1_0;
13312 		    tim1_1_0 = tim0_1_0 - tim2_1_0;
13313 	       }
13314 	       {
13315 		    FFTW_REAL tre2_0_0;
13316 		    FFTW_REAL tim2_0_0;
13317 		    FFTW_REAL tre2_1_0;
13318 		    FFTW_REAL tim2_1_0;
13319 		    tre2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tre0_1_1) + (((FFTW_REAL) FFTW_K195090322) * tim0_1_1);
13320 		    tim2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tim0_1_1) - (((FFTW_REAL) FFTW_K195090322) * tre0_1_1);
13321 		    tre2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tre0_1_5) + (((FFTW_REAL) FFTW_K831469612) * tim0_1_5);
13322 		    tim2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tim0_1_5) - (((FFTW_REAL) FFTW_K831469612) * tre0_1_5);
13323 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
13324 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
13325 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
13326 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
13327 	       }
13328 	       {
13329 		    FFTW_REAL tre2_0_0;
13330 		    FFTW_REAL tim2_0_0;
13331 		    FFTW_REAL tre2_1_0;
13332 		    FFTW_REAL tim2_1_0;
13333 		    tre2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_1_2) + (((FFTW_REAL) FFTW_K382683432) * tim0_1_2);
13334 		    tim2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_1_2) - (((FFTW_REAL) FFTW_K382683432) * tre0_1_2);
13335 		    tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_1_6) + (((FFTW_REAL) FFTW_K923879532) * tim0_1_6);
13336 		    tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_1_6) - (((FFTW_REAL) FFTW_K923879532) * tre0_1_6);
13337 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
13338 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
13339 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
13340 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
13341 	       }
13342 	       {
13343 		    FFTW_REAL tre2_0_0;
13344 		    FFTW_REAL tim2_0_0;
13345 		    FFTW_REAL tre2_1_0;
13346 		    FFTW_REAL tim2_1_0;
13347 		    tre2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_1_3) + (((FFTW_REAL) FFTW_K555570233) * tim0_1_3);
13348 		    tim2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_1_3) - (((FFTW_REAL) FFTW_K555570233) * tre0_1_3);
13349 		    tre2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tre0_1_7) + (((FFTW_REAL) FFTW_K980785280) * tim0_1_7);
13350 		    tim2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tim0_1_7) - (((FFTW_REAL) FFTW_K980785280) * tre0_1_7);
13351 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
13352 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
13353 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
13354 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
13355 	       }
13356 	       {
13357 		    FFTW_REAL tre2_0_0;
13358 		    FFTW_REAL tim2_0_0;
13359 		    FFTW_REAL tre2_0_1;
13360 		    FFTW_REAL tim2_0_1;
13361 		    FFTW_REAL tre2_1_0;
13362 		    FFTW_REAL tim2_1_0;
13363 		    FFTW_REAL tre2_1_1;
13364 		    FFTW_REAL tim2_1_1;
13365 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
13366 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
13367 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
13368 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
13369 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
13370 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
13371 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
13372 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
13373 		    c_re(inout[stride]) = tre2_0_0 + tre2_0_1;
13374 		    c_im(inout[stride]) = tim2_0_0 + tim2_0_1;
13375 		    c_re(inout[17 * stride]) = tre2_0_0 - tre2_0_1;
13376 		    c_im(inout[17 * stride]) = tim2_0_0 - tim2_0_1;
13377 		    c_re(inout[9 * stride]) = tre2_1_0 + tim2_1_1;
13378 		    c_im(inout[9 * stride]) = tim2_1_0 - tre2_1_1;
13379 		    c_re(inout[25 * stride]) = tre2_1_0 - tim2_1_1;
13380 		    c_im(inout[25 * stride]) = tim2_1_0 + tre2_1_1;
13381 	       }
13382 	       {
13383 		    FFTW_REAL tre2_0_0;
13384 		    FFTW_REAL tim2_0_0;
13385 		    FFTW_REAL tre2_0_1;
13386 		    FFTW_REAL tim2_0_1;
13387 		    FFTW_REAL tre2_1_0;
13388 		    FFTW_REAL tim2_1_0;
13389 		    FFTW_REAL tre2_1_1;
13390 		    FFTW_REAL tim2_1_1;
13391 		    tre2_0_0 = tre1_1_0 + tim1_1_2;
13392 		    tim2_0_0 = tim1_1_0 - tre1_1_2;
13393 		    tre2_1_0 = tre1_1_0 - tim1_1_2;
13394 		    tim2_1_0 = tim1_1_0 + tre1_1_2;
13395 		    {
13396 			 FFTW_REAL tre3_0_0;
13397 			 FFTW_REAL tim3_0_0;
13398 			 FFTW_REAL tre3_1_0;
13399 			 FFTW_REAL tim3_1_0;
13400 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
13401 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
13402 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
13403 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
13404 			 tre2_0_1 = tre3_0_0 + tre3_1_0;
13405 			 tim2_0_1 = tim3_0_0 - tim3_1_0;
13406 			 tre2_1_1 = tre3_0_0 - tre3_1_0;
13407 			 tim2_1_1 = tim3_0_0 + tim3_1_0;
13408 		    }
13409 		    c_re(inout[5 * stride]) = tre2_0_0 + tre2_0_1;
13410 		    c_im(inout[5 * stride]) = tim2_0_0 + tim2_0_1;
13411 		    c_re(inout[21 * stride]) = tre2_0_0 - tre2_0_1;
13412 		    c_im(inout[21 * stride]) = tim2_0_0 - tim2_0_1;
13413 		    c_re(inout[13 * stride]) = tre2_1_0 + tim2_1_1;
13414 		    c_im(inout[13 * stride]) = tim2_1_0 - tre2_1_1;
13415 		    c_re(inout[29 * stride]) = tre2_1_0 - tim2_1_1;
13416 		    c_im(inout[29 * stride]) = tim2_1_0 + tre2_1_1;
13417 	       }
13418 	  }
13419 	  {
13420 	       FFTW_REAL tre1_0_0;
13421 	       FFTW_REAL tim1_0_0;
13422 	       FFTW_REAL tre1_0_1;
13423 	       FFTW_REAL tim1_0_1;
13424 	       FFTW_REAL tre1_0_2;
13425 	       FFTW_REAL tim1_0_2;
13426 	       FFTW_REAL tre1_0_3;
13427 	       FFTW_REAL tim1_0_3;
13428 	       FFTW_REAL tre1_1_0;
13429 	       FFTW_REAL tim1_1_0;
13430 	       FFTW_REAL tre1_1_1;
13431 	       FFTW_REAL tim1_1_1;
13432 	       FFTW_REAL tre1_1_2;
13433 	       FFTW_REAL tim1_1_2;
13434 	       FFTW_REAL tre1_1_3;
13435 	       FFTW_REAL tim1_1_3;
13436 	       tre1_0_0 = tre0_2_0 + tim0_2_4;
13437 	       tim1_0_0 = tim0_2_0 - tre0_2_4;
13438 	       tre1_1_0 = tre0_2_0 - tim0_2_4;
13439 	       tim1_1_0 = tim0_2_0 + tre0_2_4;
13440 	       {
13441 		    FFTW_REAL tre2_0_0;
13442 		    FFTW_REAL tim2_0_0;
13443 		    FFTW_REAL tre2_1_0;
13444 		    FFTW_REAL tim2_1_0;
13445 		    tre2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_2_1) + (((FFTW_REAL) FFTW_K382683432) * tim0_2_1);
13446 		    tim2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_2_1) - (((FFTW_REAL) FFTW_K382683432) * tre0_2_1);
13447 		    tre2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_2_5) - (((FFTW_REAL) FFTW_K382683432) * tre0_2_5);
13448 		    tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_2_5) + (((FFTW_REAL) FFTW_K923879532) * tre0_2_5);
13449 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
13450 		    tim1_0_1 = tim2_0_0 - tim2_1_0;
13451 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
13452 		    tim1_1_1 = tim2_0_0 + tim2_1_0;
13453 	       }
13454 	       {
13455 		    FFTW_REAL tre2_0_0;
13456 		    FFTW_REAL tim2_0_0;
13457 		    FFTW_REAL tre2_1_0;
13458 		    FFTW_REAL tim2_1_0;
13459 		    tre2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_2_2 + tim0_2_2);
13460 		    tim2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_2_2 - tre0_2_2);
13461 		    tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_2_6 - tre0_2_6);
13462 		    tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_2_6 + tre0_2_6);
13463 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
13464 		    tim1_0_2 = tim2_0_0 - tim2_1_0;
13465 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
13466 		    tim1_1_2 = tim2_0_0 + tim2_1_0;
13467 	       }
13468 	       {
13469 		    FFTW_REAL tre2_0_0;
13470 		    FFTW_REAL tim2_0_0;
13471 		    FFTW_REAL tre2_1_0;
13472 		    FFTW_REAL tim2_1_0;
13473 		    tre2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_2_3) + (((FFTW_REAL) FFTW_K923879532) * tim0_2_3);
13474 		    tim2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_2_3) - (((FFTW_REAL) FFTW_K923879532) * tre0_2_3);
13475 		    tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_2_7) - (((FFTW_REAL) FFTW_K923879532) * tre0_2_7);
13476 		    tim2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_2_7) + (((FFTW_REAL) FFTW_K382683432) * tre0_2_7);
13477 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
13478 		    tim1_0_3 = tim2_0_0 - tim2_1_0;
13479 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
13480 		    tim1_1_3 = tim2_0_0 + tim2_1_0;
13481 	       }
13482 	       {
13483 		    FFTW_REAL tre2_0_0;
13484 		    FFTW_REAL tim2_0_0;
13485 		    FFTW_REAL tre2_0_1;
13486 		    FFTW_REAL tim2_0_1;
13487 		    FFTW_REAL tre2_1_0;
13488 		    FFTW_REAL tim2_1_0;
13489 		    FFTW_REAL tre2_1_1;
13490 		    FFTW_REAL tim2_1_1;
13491 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
13492 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
13493 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
13494 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
13495 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
13496 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
13497 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
13498 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
13499 		    c_re(inout[2 * stride]) = tre2_0_0 + tre2_0_1;
13500 		    c_im(inout[2 * stride]) = tim2_0_0 + tim2_0_1;
13501 		    c_re(inout[18 * stride]) = tre2_0_0 - tre2_0_1;
13502 		    c_im(inout[18 * stride]) = tim2_0_0 - tim2_0_1;
13503 		    c_re(inout[10 * stride]) = tre2_1_0 + tim2_1_1;
13504 		    c_im(inout[10 * stride]) = tim2_1_0 - tre2_1_1;
13505 		    c_re(inout[26 * stride]) = tre2_1_0 - tim2_1_1;
13506 		    c_im(inout[26 * stride]) = tim2_1_0 + tre2_1_1;
13507 	       }
13508 	       {
13509 		    FFTW_REAL tre2_0_0;
13510 		    FFTW_REAL tim2_0_0;
13511 		    FFTW_REAL tre2_0_1;
13512 		    FFTW_REAL tim2_0_1;
13513 		    FFTW_REAL tre2_1_0;
13514 		    FFTW_REAL tim2_1_0;
13515 		    FFTW_REAL tre2_1_1;
13516 		    FFTW_REAL tim2_1_1;
13517 		    tre2_0_0 = tre1_1_0 + tim1_1_2;
13518 		    tim2_0_0 = tim1_1_0 - tre1_1_2;
13519 		    tre2_1_0 = tre1_1_0 - tim1_1_2;
13520 		    tim2_1_0 = tim1_1_0 + tre1_1_2;
13521 		    {
13522 			 FFTW_REAL tre3_0_0;
13523 			 FFTW_REAL tim3_0_0;
13524 			 FFTW_REAL tre3_1_0;
13525 			 FFTW_REAL tim3_1_0;
13526 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
13527 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
13528 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
13529 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
13530 			 tre2_0_1 = tre3_0_0 + tre3_1_0;
13531 			 tim2_0_1 = tim3_0_0 - tim3_1_0;
13532 			 tre2_1_1 = tre3_0_0 - tre3_1_0;
13533 			 tim2_1_1 = tim3_0_0 + tim3_1_0;
13534 		    }
13535 		    c_re(inout[6 * stride]) = tre2_0_0 + tre2_0_1;
13536 		    c_im(inout[6 * stride]) = tim2_0_0 + tim2_0_1;
13537 		    c_re(inout[22 * stride]) = tre2_0_0 - tre2_0_1;
13538 		    c_im(inout[22 * stride]) = tim2_0_0 - tim2_0_1;
13539 		    c_re(inout[14 * stride]) = tre2_1_0 + tim2_1_1;
13540 		    c_im(inout[14 * stride]) = tim2_1_0 - tre2_1_1;
13541 		    c_re(inout[30 * stride]) = tre2_1_0 - tim2_1_1;
13542 		    c_im(inout[30 * stride]) = tim2_1_0 + tre2_1_1;
13543 	       }
13544 	  }
13545 	  {
13546 	       FFTW_REAL tre1_0_0;
13547 	       FFTW_REAL tim1_0_0;
13548 	       FFTW_REAL tre1_0_1;
13549 	       FFTW_REAL tim1_0_1;
13550 	       FFTW_REAL tre1_0_2;
13551 	       FFTW_REAL tim1_0_2;
13552 	       FFTW_REAL tre1_0_3;
13553 	       FFTW_REAL tim1_0_3;
13554 	       FFTW_REAL tre1_1_0;
13555 	       FFTW_REAL tim1_1_0;
13556 	       FFTW_REAL tre1_1_1;
13557 	       FFTW_REAL tim1_1_1;
13558 	       FFTW_REAL tre1_1_2;
13559 	       FFTW_REAL tim1_1_2;
13560 	       FFTW_REAL tre1_1_3;
13561 	       FFTW_REAL tim1_1_3;
13562 	       {
13563 		    FFTW_REAL tre2_1_0;
13564 		    FFTW_REAL tim2_1_0;
13565 		    tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_3_4 - tre0_3_4);
13566 		    tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_3_4 + tre0_3_4);
13567 		    tre1_0_0 = tre0_3_0 + tre2_1_0;
13568 		    tim1_0_0 = tim0_3_0 - tim2_1_0;
13569 		    tre1_1_0 = tre0_3_0 - tre2_1_0;
13570 		    tim1_1_0 = tim0_3_0 + tim2_1_0;
13571 	       }
13572 	       {
13573 		    FFTW_REAL tre2_0_0;
13574 		    FFTW_REAL tim2_0_0;
13575 		    FFTW_REAL tre2_1_0;
13576 		    FFTW_REAL tim2_1_0;
13577 		    tre2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_3_1) + (((FFTW_REAL) FFTW_K555570233) * tim0_3_1);
13578 		    tim2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_3_1) - (((FFTW_REAL) FFTW_K555570233) * tre0_3_1);
13579 		    tre2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tim0_3_5) - (((FFTW_REAL) FFTW_K980785280) * tre0_3_5);
13580 		    tim2_1_0 = (((FFTW_REAL) FFTW_K980785280) * tim0_3_5) + (((FFTW_REAL) FFTW_K195090322) * tre0_3_5);
13581 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
13582 		    tim1_0_1 = tim2_0_0 - tim2_1_0;
13583 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
13584 		    tim1_1_1 = tim2_0_0 + tim2_1_0;
13585 	       }
13586 	       {
13587 		    FFTW_REAL tre2_0_0;
13588 		    FFTW_REAL tim2_0_0;
13589 		    FFTW_REAL tre2_1_0;
13590 		    FFTW_REAL tim2_1_0;
13591 		    tre2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_3_2) + (((FFTW_REAL) FFTW_K923879532) * tim0_3_2);
13592 		    tim2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_3_2) - (((FFTW_REAL) FFTW_K923879532) * tre0_3_2);
13593 		    tre2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_3_6) + (((FFTW_REAL) FFTW_K382683432) * tim0_3_6);
13594 		    tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_3_6) - (((FFTW_REAL) FFTW_K923879532) * tim0_3_6);
13595 		    tre1_0_2 = tre2_0_0 - tre2_1_0;
13596 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
13597 		    tre1_1_2 = tre2_0_0 + tre2_1_0;
13598 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
13599 	       }
13600 	       {
13601 		    FFTW_REAL tre2_0_0;
13602 		    FFTW_REAL tim2_0_0;
13603 		    FFTW_REAL tre2_1_0;
13604 		    FFTW_REAL tim2_1_0;
13605 		    tre2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tim0_3_3) - (((FFTW_REAL) FFTW_K195090322) * tre0_3_3);
13606 		    tim2_0_0 = (((FFTW_REAL) FFTW_K195090322) * tim0_3_3) + (((FFTW_REAL) FFTW_K980785280) * tre0_3_3);
13607 		    tre2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tre0_3_7) + (((FFTW_REAL) FFTW_K831469612) * tim0_3_7);
13608 		    tim2_1_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_3_7) - (((FFTW_REAL) FFTW_K555570233) * tim0_3_7);
13609 		    tre1_0_3 = tre2_0_0 - tre2_1_0;
13610 		    tim1_0_3 = tim2_1_0 - tim2_0_0;
13611 		    tre1_1_3 = tre2_0_0 + tre2_1_0;
13612 		    tim1_1_3 = (-(tim2_0_0 + tim2_1_0));
13613 	       }
13614 	       {
13615 		    FFTW_REAL tre2_0_0;
13616 		    FFTW_REAL tim2_0_0;
13617 		    FFTW_REAL tre2_0_1;
13618 		    FFTW_REAL tim2_0_1;
13619 		    FFTW_REAL tre2_1_0;
13620 		    FFTW_REAL tim2_1_0;
13621 		    FFTW_REAL tre2_1_1;
13622 		    FFTW_REAL tim2_1_1;
13623 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
13624 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
13625 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
13626 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
13627 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
13628 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
13629 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
13630 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
13631 		    c_re(inout[3 * stride]) = tre2_0_0 + tre2_0_1;
13632 		    c_im(inout[3 * stride]) = tim2_0_0 + tim2_0_1;
13633 		    c_re(inout[19 * stride]) = tre2_0_0 - tre2_0_1;
13634 		    c_im(inout[19 * stride]) = tim2_0_0 - tim2_0_1;
13635 		    c_re(inout[11 * stride]) = tre2_1_0 + tim2_1_1;
13636 		    c_im(inout[11 * stride]) = tim2_1_0 - tre2_1_1;
13637 		    c_re(inout[27 * stride]) = tre2_1_0 - tim2_1_1;
13638 		    c_im(inout[27 * stride]) = tim2_1_0 + tre2_1_1;
13639 	       }
13640 	       {
13641 		    FFTW_REAL tre2_0_0;
13642 		    FFTW_REAL tim2_0_0;
13643 		    FFTW_REAL tre2_0_1;
13644 		    FFTW_REAL tim2_0_1;
13645 		    FFTW_REAL tre2_1_0;
13646 		    FFTW_REAL tim2_1_0;
13647 		    FFTW_REAL tre2_1_1;
13648 		    FFTW_REAL tim2_1_1;
13649 		    tre2_0_0 = tre1_1_0 + tim1_1_2;
13650 		    tim2_0_0 = tim1_1_0 - tre1_1_2;
13651 		    tre2_1_0 = tre1_1_0 - tim1_1_2;
13652 		    tim2_1_0 = tim1_1_0 + tre1_1_2;
13653 		    {
13654 			 FFTW_REAL tre3_0_0;
13655 			 FFTW_REAL tim3_0_0;
13656 			 FFTW_REAL tre3_1_0;
13657 			 FFTW_REAL tim3_1_0;
13658 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
13659 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
13660 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
13661 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
13662 			 tre2_0_1 = tre3_0_0 + tre3_1_0;
13663 			 tim2_0_1 = tim3_0_0 - tim3_1_0;
13664 			 tre2_1_1 = tre3_0_0 - tre3_1_0;
13665 			 tim2_1_1 = tim3_0_0 + tim3_1_0;
13666 		    }
13667 		    c_re(inout[7 * stride]) = tre2_0_0 + tre2_0_1;
13668 		    c_im(inout[7 * stride]) = tim2_0_0 + tim2_0_1;
13669 		    c_re(inout[23 * stride]) = tre2_0_0 - tre2_0_1;
13670 		    c_im(inout[23 * stride]) = tim2_0_0 - tim2_0_1;
13671 		    c_re(inout[15 * stride]) = tre2_1_0 + tim2_1_1;
13672 		    c_im(inout[15 * stride]) = tim2_1_0 - tre2_1_1;
13673 		    c_re(inout[31 * stride]) = tre2_1_0 - tim2_1_1;
13674 		    c_im(inout[31 * stride]) = tim2_1_0 + tre2_1_1;
13675 	       }
13676 	  }
13677      }
13678 }
13679 
13680 /* This function contains 22 FP additions and 12 FP multiplications */
13681 
fftw_twiddle_4(FFTW_COMPLEX * A,const FFTW_COMPLEX * W,int stride,int m,int dist)13682 static void fftw_twiddle_4(FFTW_COMPLEX *A, const FFTW_COMPLEX *W, int stride, int m, int dist)
13683 {
13684      int i;
13685      FFTW_COMPLEX *inout;
13686      inout = A;
13687      for (i = 0; i < m; i = i + 1, inout = inout + dist, W = W + 3) {
13688 	  FFTW_REAL tre0_0_0;
13689 	  FFTW_REAL tim0_0_0;
13690 	  FFTW_REAL tre0_0_1;
13691 	  FFTW_REAL tim0_0_1;
13692 	  FFTW_REAL tre0_1_0;
13693 	  FFTW_REAL tim0_1_0;
13694 	  FFTW_REAL tre0_1_1;
13695 	  FFTW_REAL tim0_1_1;
13696 	  {
13697 	       FFTW_REAL tre1_0_0;
13698 	       FFTW_REAL tim1_0_0;
13699 	       FFTW_REAL tre1_1_0;
13700 	       FFTW_REAL tim1_1_0;
13701 	       tre1_0_0 = c_re(inout[0]);
13702 	       tim1_0_0 = c_im(inout[0]);
13703 	       {
13704 		    FFTW_REAL tr;
13705 		    FFTW_REAL ti;
13706 		    FFTW_REAL twr;
13707 		    FFTW_REAL twi;
13708 		    tr = c_re(inout[2 * stride]);
13709 		    ti = c_im(inout[2 * stride]);
13710 		    twr = c_re(W[1]);
13711 		    twi = c_im(W[1]);
13712 		    tre1_1_0 = (tr * twr) - (ti * twi);
13713 		    tim1_1_0 = (tr * twi) + (ti * twr);
13714 	       }
13715 	       tre0_0_0 = tre1_0_0 + tre1_1_0;
13716 	       tim0_0_0 = tim1_0_0 + tim1_1_0;
13717 	       tre0_1_0 = tre1_0_0 - tre1_1_0;
13718 	       tim0_1_0 = tim1_0_0 - tim1_1_0;
13719 	  }
13720 	  {
13721 	       FFTW_REAL tre1_0_0;
13722 	       FFTW_REAL tim1_0_0;
13723 	       FFTW_REAL tre1_1_0;
13724 	       FFTW_REAL tim1_1_0;
13725 	       {
13726 		    FFTW_REAL tr;
13727 		    FFTW_REAL ti;
13728 		    FFTW_REAL twr;
13729 		    FFTW_REAL twi;
13730 		    tr = c_re(inout[stride]);
13731 		    ti = c_im(inout[stride]);
13732 		    twr = c_re(W[0]);
13733 		    twi = c_im(W[0]);
13734 		    tre1_0_0 = (tr * twr) - (ti * twi);
13735 		    tim1_0_0 = (tr * twi) + (ti * twr);
13736 	       }
13737 	       {
13738 		    FFTW_REAL tr;
13739 		    FFTW_REAL ti;
13740 		    FFTW_REAL twr;
13741 		    FFTW_REAL twi;
13742 		    tr = c_re(inout[3 * stride]);
13743 		    ti = c_im(inout[3 * stride]);
13744 		    twr = c_re(W[2]);
13745 		    twi = c_im(W[2]);
13746 		    tre1_1_0 = (tr * twr) - (ti * twi);
13747 		    tim1_1_0 = (tr * twi) + (ti * twr);
13748 	       }
13749 	       tre0_0_1 = tre1_0_0 + tre1_1_0;
13750 	       tim0_0_1 = tim1_0_0 + tim1_1_0;
13751 	       tre0_1_1 = tre1_0_0 - tre1_1_0;
13752 	       tim0_1_1 = tim1_0_0 - tim1_1_0;
13753 	  }
13754 	  c_re(inout[0]) = tre0_0_0 + tre0_0_1;
13755 	  c_im(inout[0]) = tim0_0_0 + tim0_0_1;
13756 	  c_re(inout[2 * stride]) = tre0_0_0 - tre0_0_1;
13757 	  c_im(inout[2 * stride]) = tim0_0_0 - tim0_0_1;
13758 	  c_re(inout[stride]) = tre0_1_0 + tim0_1_1;
13759 	  c_im(inout[stride]) = tim0_1_0 - tre0_1_1;
13760 	  c_re(inout[3 * stride]) = tre0_1_0 - tim0_1_1;
13761 	  c_im(inout[3 * stride]) = tim0_1_0 + tre0_1_1;
13762      }
13763 }
13764 
13765 /* This function contains 52 FP additions and 32 FP multiplications */
13766 
fftw_twiddle_5(FFTW_COMPLEX * A,const FFTW_COMPLEX * W,int stride,int m,int dist)13767 static void fftw_twiddle_5(FFTW_COMPLEX *A, const FFTW_COMPLEX *W, int stride, int m, int dist)
13768 {
13769      int i;
13770      FFTW_COMPLEX *inout;
13771      inout = A;
13772      for (i = 0; i < m; i = i + 1, inout = inout + dist, W = W + 4) {
13773 	  FFTW_REAL tre0_0_0;
13774 	  FFTW_REAL tim0_0_0;
13775 	  FFTW_REAL tre0_1_0;
13776 	  FFTW_REAL tim0_1_0;
13777 	  FFTW_REAL tre0_2_0;
13778 	  FFTW_REAL tim0_2_0;
13779 	  FFTW_REAL tre0_3_0;
13780 	  FFTW_REAL tim0_3_0;
13781 	  FFTW_REAL tre0_4_0;
13782 	  FFTW_REAL tim0_4_0;
13783 	  tre0_0_0 = c_re(inout[0]);
13784 	  tim0_0_0 = c_im(inout[0]);
13785 	  {
13786 	       FFTW_REAL tr;
13787 	       FFTW_REAL ti;
13788 	       FFTW_REAL twr;
13789 	       FFTW_REAL twi;
13790 	       tr = c_re(inout[stride]);
13791 	       ti = c_im(inout[stride]);
13792 	       twr = c_re(W[0]);
13793 	       twi = c_im(W[0]);
13794 	       tre0_1_0 = (tr * twr) - (ti * twi);
13795 	       tim0_1_0 = (tr * twi) + (ti * twr);
13796 	  }
13797 	  {
13798 	       FFTW_REAL tr;
13799 	       FFTW_REAL ti;
13800 	       FFTW_REAL twr;
13801 	       FFTW_REAL twi;
13802 	       tr = c_re(inout[2 * stride]);
13803 	       ti = c_im(inout[2 * stride]);
13804 	       twr = c_re(W[1]);
13805 	       twi = c_im(W[1]);
13806 	       tre0_2_0 = (tr * twr) - (ti * twi);
13807 	       tim0_2_0 = (tr * twi) + (ti * twr);
13808 	  }
13809 	  {
13810 	       FFTW_REAL tr;
13811 	       FFTW_REAL ti;
13812 	       FFTW_REAL twr;
13813 	       FFTW_REAL twi;
13814 	       tr = c_re(inout[3 * stride]);
13815 	       ti = c_im(inout[3 * stride]);
13816 	       twr = c_re(W[2]);
13817 	       twi = c_im(W[2]);
13818 	       tre0_3_0 = (tr * twr) - (ti * twi);
13819 	       tim0_3_0 = (tr * twi) + (ti * twr);
13820 	  }
13821 	  {
13822 	       FFTW_REAL tr;
13823 	       FFTW_REAL ti;
13824 	       FFTW_REAL twr;
13825 	       FFTW_REAL twi;
13826 	       tr = c_re(inout[4 * stride]);
13827 	       ti = c_im(inout[4 * stride]);
13828 	       twr = c_re(W[3]);
13829 	       twi = c_im(W[3]);
13830 	       tre0_4_0 = (tr * twr) - (ti * twi);
13831 	       tim0_4_0 = (tr * twi) + (ti * twr);
13832 	  }
13833 	  c_re(inout[0]) = tre0_0_0 + tre0_1_0 + tre0_2_0 + tre0_3_0 + tre0_4_0;
13834 	  c_im(inout[0]) = tim0_0_0 + tim0_1_0 + tim0_2_0 + tim0_3_0 + tim0_4_0;
13835 	  {
13836 	       FFTW_REAL tre1_0_0;
13837 	       FFTW_REAL tre1_1_0;
13838 	       tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_1_0 + tre0_4_0)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_2_0 + tre0_3_0));
13839 	       tre1_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tim0_1_0 - tim0_4_0)) + (((FFTW_REAL) FFTW_K587785252) * (tim0_2_0 - tim0_3_0));
13840 	       c_re(inout[stride]) = tre1_0_0 + tre1_1_0;
13841 	       c_re(inout[4 * stride]) = tre1_0_0 - tre1_1_0;
13842 	  }
13843 	  {
13844 	       FFTW_REAL tim1_0_0;
13845 	       FFTW_REAL tim1_1_0;
13846 	       tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_1_0 + tim0_4_0)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_2_0 + tim0_3_0));
13847 	       tim1_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tre0_4_0 - tre0_1_0)) + (((FFTW_REAL) FFTW_K587785252) * (tre0_3_0 - tre0_2_0));
13848 	       c_im(inout[stride]) = tim1_0_0 + tim1_1_0;
13849 	       c_im(inout[4 * stride]) = tim1_0_0 - tim1_1_0;
13850 	  }
13851 	  {
13852 	       FFTW_REAL tre1_0_0;
13853 	       FFTW_REAL tre1_1_0;
13854 	       tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_2_0 + tre0_3_0)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_1_0 + tre0_4_0));
13855 	       tre1_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tim0_1_0 - tim0_4_0)) + (((FFTW_REAL) FFTW_K951056516) * (tim0_3_0 - tim0_2_0));
13856 	       c_re(inout[2 * stride]) = tre1_0_0 + tre1_1_0;
13857 	       c_re(inout[3 * stride]) = tre1_0_0 - tre1_1_0;
13858 	  }
13859 	  {
13860 	       FFTW_REAL tim1_0_0;
13861 	       FFTW_REAL tim1_1_0;
13862 	       tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_2_0 + tim0_3_0)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_1_0 + tim0_4_0));
13863 	       tim1_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tre0_4_0 - tre0_1_0)) + (((FFTW_REAL) FFTW_K951056516) * (tre0_2_0 - tre0_3_0));
13864 	       c_im(inout[2 * stride]) = tim1_0_0 + tim1_1_0;
13865 	       c_im(inout[3 * stride]) = tim1_0_0 - tim1_1_0;
13866 	  }
13867      }
13868 }
13869 
13870 /* This function contains 50 FP additions and 28 FP multiplications */
13871 
fftw_twiddle_6(FFTW_COMPLEX * A,const FFTW_COMPLEX * W,int stride,int m,int dist)13872 static void fftw_twiddle_6(FFTW_COMPLEX *A, const FFTW_COMPLEX *W, int stride, int m, int dist)
13873 {
13874      int i;
13875      FFTW_COMPLEX *inout;
13876      inout = A;
13877      for (i = 0; i < m; i = i + 1, inout = inout + dist, W = W + 5) {
13878 	  FFTW_REAL tre0_0_0;
13879 	  FFTW_REAL tim0_0_0;
13880 	  FFTW_REAL tre0_0_1;
13881 	  FFTW_REAL tim0_0_1;
13882 	  FFTW_REAL tre0_0_2;
13883 	  FFTW_REAL tim0_0_2;
13884 	  FFTW_REAL tre0_1_0;
13885 	  FFTW_REAL tim0_1_0;
13886 	  FFTW_REAL tre0_1_1;
13887 	  FFTW_REAL tim0_1_1;
13888 	  FFTW_REAL tre0_1_2;
13889 	  FFTW_REAL tim0_1_2;
13890 	  {
13891 	       FFTW_REAL tre1_0_0;
13892 	       FFTW_REAL tim1_0_0;
13893 	       FFTW_REAL tre1_1_0;
13894 	       FFTW_REAL tim1_1_0;
13895 	       tre1_0_0 = c_re(inout[0]);
13896 	       tim1_0_0 = c_im(inout[0]);
13897 	       {
13898 		    FFTW_REAL tr;
13899 		    FFTW_REAL ti;
13900 		    FFTW_REAL twr;
13901 		    FFTW_REAL twi;
13902 		    tr = c_re(inout[3 * stride]);
13903 		    ti = c_im(inout[3 * stride]);
13904 		    twr = c_re(W[2]);
13905 		    twi = c_im(W[2]);
13906 		    tre1_1_0 = (tr * twr) - (ti * twi);
13907 		    tim1_1_0 = (tr * twi) + (ti * twr);
13908 	       }
13909 	       tre0_0_0 = tre1_0_0 + tre1_1_0;
13910 	       tim0_0_0 = tim1_0_0 + tim1_1_0;
13911 	       tre0_1_0 = tre1_0_0 - tre1_1_0;
13912 	       tim0_1_0 = tim1_0_0 - tim1_1_0;
13913 	  }
13914 	  {
13915 	       FFTW_REAL tre1_0_0;
13916 	       FFTW_REAL tim1_0_0;
13917 	       FFTW_REAL tre1_1_0;
13918 	       FFTW_REAL tim1_1_0;
13919 	       {
13920 		    FFTW_REAL tr;
13921 		    FFTW_REAL ti;
13922 		    FFTW_REAL twr;
13923 		    FFTW_REAL twi;
13924 		    tr = c_re(inout[2 * stride]);
13925 		    ti = c_im(inout[2 * stride]);
13926 		    twr = c_re(W[1]);
13927 		    twi = c_im(W[1]);
13928 		    tre1_0_0 = (tr * twr) - (ti * twi);
13929 		    tim1_0_0 = (tr * twi) + (ti * twr);
13930 	       }
13931 	       {
13932 		    FFTW_REAL tr;
13933 		    FFTW_REAL ti;
13934 		    FFTW_REAL twr;
13935 		    FFTW_REAL twi;
13936 		    tr = c_re(inout[5 * stride]);
13937 		    ti = c_im(inout[5 * stride]);
13938 		    twr = c_re(W[4]);
13939 		    twi = c_im(W[4]);
13940 		    tre1_1_0 = (tr * twr) - (ti * twi);
13941 		    tim1_1_0 = (tr * twi) + (ti * twr);
13942 	       }
13943 	       tre0_0_1 = tre1_0_0 + tre1_1_0;
13944 	       tim0_0_1 = tim1_0_0 + tim1_1_0;
13945 	       tre0_1_1 = tre1_0_0 - tre1_1_0;
13946 	       tim0_1_1 = tim1_0_0 - tim1_1_0;
13947 	  }
13948 	  {
13949 	       FFTW_REAL tre1_0_0;
13950 	       FFTW_REAL tim1_0_0;
13951 	       FFTW_REAL tre1_1_0;
13952 	       FFTW_REAL tim1_1_0;
13953 	       {
13954 		    FFTW_REAL tr;
13955 		    FFTW_REAL ti;
13956 		    FFTW_REAL twr;
13957 		    FFTW_REAL twi;
13958 		    tr = c_re(inout[4 * stride]);
13959 		    ti = c_im(inout[4 * stride]);
13960 		    twr = c_re(W[3]);
13961 		    twi = c_im(W[3]);
13962 		    tre1_0_0 = (tr * twr) - (ti * twi);
13963 		    tim1_0_0 = (tr * twi) + (ti * twr);
13964 	       }
13965 	       {
13966 		    FFTW_REAL tr;
13967 		    FFTW_REAL ti;
13968 		    FFTW_REAL twr;
13969 		    FFTW_REAL twi;
13970 		    tr = c_re(inout[stride]);
13971 		    ti = c_im(inout[stride]);
13972 		    twr = c_re(W[0]);
13973 		    twi = c_im(W[0]);
13974 		    tre1_1_0 = (tr * twr) - (ti * twi);
13975 		    tim1_1_0 = (tr * twi) + (ti * twr);
13976 	       }
13977 	       tre0_0_2 = tre1_0_0 + tre1_1_0;
13978 	       tim0_0_2 = tim1_0_0 + tim1_1_0;
13979 	       tre0_1_2 = tre1_0_0 - tre1_1_0;
13980 	       tim0_1_2 = tim1_0_0 - tim1_1_0;
13981 	  }
13982 	  c_re(inout[0]) = tre0_0_0 + tre0_0_1 + tre0_0_2;
13983 	  c_im(inout[0]) = tim0_0_0 + tim0_0_1 + tim0_0_2;
13984 	  {
13985 	       FFTW_REAL tre2_0_0;
13986 	       FFTW_REAL tre2_1_0;
13987 	       tre2_0_0 = tre0_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre0_0_1 + tre0_0_2));
13988 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim0_0_1 - tim0_0_2);
13989 	       c_re(inout[4 * stride]) = tre2_0_0 + tre2_1_0;
13990 	       c_re(inout[2 * stride]) = tre2_0_0 - tre2_1_0;
13991 	  }
13992 	  {
13993 	       FFTW_REAL tim2_0_0;
13994 	       FFTW_REAL tim2_1_0;
13995 	       tim2_0_0 = tim0_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim0_0_1 + tim0_0_2));
13996 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre0_0_2 - tre0_0_1);
13997 	       c_im(inout[4 * stride]) = tim2_0_0 + tim2_1_0;
13998 	       c_im(inout[2 * stride]) = tim2_0_0 - tim2_1_0;
13999 	  }
14000 	  c_re(inout[3 * stride]) = tre0_1_0 + tre0_1_1 + tre0_1_2;
14001 	  c_im(inout[3 * stride]) = tim0_1_0 + tim0_1_1 + tim0_1_2;
14002 	  {
14003 	       FFTW_REAL tre2_0_0;
14004 	       FFTW_REAL tre2_1_0;
14005 	       tre2_0_0 = tre0_1_0 - (((FFTW_REAL) FFTW_K499999999) * (tre0_1_1 + tre0_1_2));
14006 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim0_1_1 - tim0_1_2);
14007 	       c_re(inout[stride]) = tre2_0_0 + tre2_1_0;
14008 	       c_re(inout[5 * stride]) = tre2_0_0 - tre2_1_0;
14009 	  }
14010 	  {
14011 	       FFTW_REAL tim2_0_0;
14012 	       FFTW_REAL tim2_1_0;
14013 	       tim2_0_0 = tim0_1_0 - (((FFTW_REAL) FFTW_K499999999) * (tim0_1_1 + tim0_1_2));
14014 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre0_1_2 - tre0_1_1);
14015 	       c_im(inout[stride]) = tim2_0_0 + tim2_1_0;
14016 	       c_im(inout[5 * stride]) = tim2_0_0 - tim2_1_0;
14017 	  }
14018      }
14019 }
14020 
14021 /* This function contains 1054 FP additions and 500 FP multiplications */
14022 
fftw_twiddle_64(FFTW_COMPLEX * A,const FFTW_COMPLEX * W,int stride,int m,int dist)14023 static void fftw_twiddle_64(FFTW_COMPLEX *A, const FFTW_COMPLEX *W, int stride, int m, int dist)
14024 {
14025      int i;
14026      FFTW_COMPLEX *inout;
14027      inout = A;
14028      for (i = 0; i < m; i = i + 1, inout = inout + dist, W = W + 63) {
14029 	  FFTW_REAL tre0_0_0;
14030 	  FFTW_REAL tim0_0_0;
14031 	  FFTW_REAL tre0_0_1;
14032 	  FFTW_REAL tim0_0_1;
14033 	  FFTW_REAL tre0_0_2;
14034 	  FFTW_REAL tim0_0_2;
14035 	  FFTW_REAL tre0_0_3;
14036 	  FFTW_REAL tim0_0_3;
14037 	  FFTW_REAL tre0_0_4;
14038 	  FFTW_REAL tim0_0_4;
14039 	  FFTW_REAL tre0_0_5;
14040 	  FFTW_REAL tim0_0_5;
14041 	  FFTW_REAL tre0_0_6;
14042 	  FFTW_REAL tim0_0_6;
14043 	  FFTW_REAL tre0_0_7;
14044 	  FFTW_REAL tim0_0_7;
14045 	  FFTW_REAL tre0_1_0;
14046 	  FFTW_REAL tim0_1_0;
14047 	  FFTW_REAL tre0_1_1;
14048 	  FFTW_REAL tim0_1_1;
14049 	  FFTW_REAL tre0_1_2;
14050 	  FFTW_REAL tim0_1_2;
14051 	  FFTW_REAL tre0_1_3;
14052 	  FFTW_REAL tim0_1_3;
14053 	  FFTW_REAL tre0_1_4;
14054 	  FFTW_REAL tim0_1_4;
14055 	  FFTW_REAL tre0_1_5;
14056 	  FFTW_REAL tim0_1_5;
14057 	  FFTW_REAL tre0_1_6;
14058 	  FFTW_REAL tim0_1_6;
14059 	  FFTW_REAL tre0_1_7;
14060 	  FFTW_REAL tim0_1_7;
14061 	  FFTW_REAL tre0_2_0;
14062 	  FFTW_REAL tim0_2_0;
14063 	  FFTW_REAL tre0_2_1;
14064 	  FFTW_REAL tim0_2_1;
14065 	  FFTW_REAL tre0_2_2;
14066 	  FFTW_REAL tim0_2_2;
14067 	  FFTW_REAL tre0_2_3;
14068 	  FFTW_REAL tim0_2_3;
14069 	  FFTW_REAL tre0_2_4;
14070 	  FFTW_REAL tim0_2_4;
14071 	  FFTW_REAL tre0_2_5;
14072 	  FFTW_REAL tim0_2_5;
14073 	  FFTW_REAL tre0_2_6;
14074 	  FFTW_REAL tim0_2_6;
14075 	  FFTW_REAL tre0_2_7;
14076 	  FFTW_REAL tim0_2_7;
14077 	  FFTW_REAL tre0_3_0;
14078 	  FFTW_REAL tim0_3_0;
14079 	  FFTW_REAL tre0_3_1;
14080 	  FFTW_REAL tim0_3_1;
14081 	  FFTW_REAL tre0_3_2;
14082 	  FFTW_REAL tim0_3_2;
14083 	  FFTW_REAL tre0_3_3;
14084 	  FFTW_REAL tim0_3_3;
14085 	  FFTW_REAL tre0_3_4;
14086 	  FFTW_REAL tim0_3_4;
14087 	  FFTW_REAL tre0_3_5;
14088 	  FFTW_REAL tim0_3_5;
14089 	  FFTW_REAL tre0_3_6;
14090 	  FFTW_REAL tim0_3_6;
14091 	  FFTW_REAL tre0_3_7;
14092 	  FFTW_REAL tim0_3_7;
14093 	  FFTW_REAL tre0_4_0;
14094 	  FFTW_REAL tim0_4_0;
14095 	  FFTW_REAL tre0_4_1;
14096 	  FFTW_REAL tim0_4_1;
14097 	  FFTW_REAL tre0_4_2;
14098 	  FFTW_REAL tim0_4_2;
14099 	  FFTW_REAL tre0_4_3;
14100 	  FFTW_REAL tim0_4_3;
14101 	  FFTW_REAL tre0_4_4;
14102 	  FFTW_REAL tim0_4_4;
14103 	  FFTW_REAL tre0_4_5;
14104 	  FFTW_REAL tim0_4_5;
14105 	  FFTW_REAL tre0_4_6;
14106 	  FFTW_REAL tim0_4_6;
14107 	  FFTW_REAL tre0_4_7;
14108 	  FFTW_REAL tim0_4_7;
14109 	  FFTW_REAL tre0_5_0;
14110 	  FFTW_REAL tim0_5_0;
14111 	  FFTW_REAL tre0_5_1;
14112 	  FFTW_REAL tim0_5_1;
14113 	  FFTW_REAL tre0_5_2;
14114 	  FFTW_REAL tim0_5_2;
14115 	  FFTW_REAL tre0_5_3;
14116 	  FFTW_REAL tim0_5_3;
14117 	  FFTW_REAL tre0_5_4;
14118 	  FFTW_REAL tim0_5_4;
14119 	  FFTW_REAL tre0_5_5;
14120 	  FFTW_REAL tim0_5_5;
14121 	  FFTW_REAL tre0_5_6;
14122 	  FFTW_REAL tim0_5_6;
14123 	  FFTW_REAL tre0_5_7;
14124 	  FFTW_REAL tim0_5_7;
14125 	  FFTW_REAL tre0_6_0;
14126 	  FFTW_REAL tim0_6_0;
14127 	  FFTW_REAL tre0_6_1;
14128 	  FFTW_REAL tim0_6_1;
14129 	  FFTW_REAL tre0_6_2;
14130 	  FFTW_REAL tim0_6_2;
14131 	  FFTW_REAL tre0_6_3;
14132 	  FFTW_REAL tim0_6_3;
14133 	  FFTW_REAL tre0_6_4;
14134 	  FFTW_REAL tim0_6_4;
14135 	  FFTW_REAL tre0_6_5;
14136 	  FFTW_REAL tim0_6_5;
14137 	  FFTW_REAL tre0_6_6;
14138 	  FFTW_REAL tim0_6_6;
14139 	  FFTW_REAL tre0_6_7;
14140 	  FFTW_REAL tim0_6_7;
14141 	  FFTW_REAL tre0_7_0;
14142 	  FFTW_REAL tim0_7_0;
14143 	  FFTW_REAL tre0_7_1;
14144 	  FFTW_REAL tim0_7_1;
14145 	  FFTW_REAL tre0_7_2;
14146 	  FFTW_REAL tim0_7_2;
14147 	  FFTW_REAL tre0_7_3;
14148 	  FFTW_REAL tim0_7_3;
14149 	  FFTW_REAL tre0_7_4;
14150 	  FFTW_REAL tim0_7_4;
14151 	  FFTW_REAL tre0_7_5;
14152 	  FFTW_REAL tim0_7_5;
14153 	  FFTW_REAL tre0_7_6;
14154 	  FFTW_REAL tim0_7_6;
14155 	  FFTW_REAL tre0_7_7;
14156 	  FFTW_REAL tim0_7_7;
14157 	  {
14158 	       FFTW_REAL tre1_0_0;
14159 	       FFTW_REAL tim1_0_0;
14160 	       FFTW_REAL tre1_0_1;
14161 	       FFTW_REAL tim1_0_1;
14162 	       FFTW_REAL tre1_0_2;
14163 	       FFTW_REAL tim1_0_2;
14164 	       FFTW_REAL tre1_0_3;
14165 	       FFTW_REAL tim1_0_3;
14166 	       FFTW_REAL tre1_1_0;
14167 	       FFTW_REAL tim1_1_0;
14168 	       FFTW_REAL tre1_1_1;
14169 	       FFTW_REAL tim1_1_1;
14170 	       FFTW_REAL tre1_1_2;
14171 	       FFTW_REAL tim1_1_2;
14172 	       FFTW_REAL tre1_1_3;
14173 	       FFTW_REAL tim1_1_3;
14174 	       {
14175 		    FFTW_REAL tre2_0_0;
14176 		    FFTW_REAL tim2_0_0;
14177 		    FFTW_REAL tre2_1_0;
14178 		    FFTW_REAL tim2_1_0;
14179 		    tre2_0_0 = c_re(inout[0]);
14180 		    tim2_0_0 = c_im(inout[0]);
14181 		    {
14182 			 FFTW_REAL tr;
14183 			 FFTW_REAL ti;
14184 			 FFTW_REAL twr;
14185 			 FFTW_REAL twi;
14186 			 tr = c_re(inout[32 * stride]);
14187 			 ti = c_im(inout[32 * stride]);
14188 			 twr = c_re(W[31]);
14189 			 twi = c_im(W[31]);
14190 			 tre2_1_0 = (tr * twr) - (ti * twi);
14191 			 tim2_1_0 = (tr * twi) + (ti * twr);
14192 		    }
14193 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
14194 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
14195 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
14196 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
14197 	       }
14198 	       {
14199 		    FFTW_REAL tre2_0_0;
14200 		    FFTW_REAL tim2_0_0;
14201 		    FFTW_REAL tre2_1_0;
14202 		    FFTW_REAL tim2_1_0;
14203 		    {
14204 			 FFTW_REAL tr;
14205 			 FFTW_REAL ti;
14206 			 FFTW_REAL twr;
14207 			 FFTW_REAL twi;
14208 			 tr = c_re(inout[8 * stride]);
14209 			 ti = c_im(inout[8 * stride]);
14210 			 twr = c_re(W[7]);
14211 			 twi = c_im(W[7]);
14212 			 tre2_0_0 = (tr * twr) - (ti * twi);
14213 			 tim2_0_0 = (tr * twi) + (ti * twr);
14214 		    }
14215 		    {
14216 			 FFTW_REAL tr;
14217 			 FFTW_REAL ti;
14218 			 FFTW_REAL twr;
14219 			 FFTW_REAL twi;
14220 			 tr = c_re(inout[40 * stride]);
14221 			 ti = c_im(inout[40 * stride]);
14222 			 twr = c_re(W[39]);
14223 			 twi = c_im(W[39]);
14224 			 tre2_1_0 = (tr * twr) - (ti * twi);
14225 			 tim2_1_0 = (tr * twi) + (ti * twr);
14226 		    }
14227 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
14228 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
14229 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
14230 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
14231 	       }
14232 	       {
14233 		    FFTW_REAL tre2_0_0;
14234 		    FFTW_REAL tim2_0_0;
14235 		    FFTW_REAL tre2_1_0;
14236 		    FFTW_REAL tim2_1_0;
14237 		    {
14238 			 FFTW_REAL tr;
14239 			 FFTW_REAL ti;
14240 			 FFTW_REAL twr;
14241 			 FFTW_REAL twi;
14242 			 tr = c_re(inout[16 * stride]);
14243 			 ti = c_im(inout[16 * stride]);
14244 			 twr = c_re(W[15]);
14245 			 twi = c_im(W[15]);
14246 			 tre2_0_0 = (tr * twr) - (ti * twi);
14247 			 tim2_0_0 = (tr * twi) + (ti * twr);
14248 		    }
14249 		    {
14250 			 FFTW_REAL tr;
14251 			 FFTW_REAL ti;
14252 			 FFTW_REAL twr;
14253 			 FFTW_REAL twi;
14254 			 tr = c_re(inout[48 * stride]);
14255 			 ti = c_im(inout[48 * stride]);
14256 			 twr = c_re(W[47]);
14257 			 twi = c_im(W[47]);
14258 			 tre2_1_0 = (tr * twr) - (ti * twi);
14259 			 tim2_1_0 = (tr * twi) + (ti * twr);
14260 		    }
14261 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
14262 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
14263 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
14264 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
14265 	       }
14266 	       {
14267 		    FFTW_REAL tre2_0_0;
14268 		    FFTW_REAL tim2_0_0;
14269 		    FFTW_REAL tre2_1_0;
14270 		    FFTW_REAL tim2_1_0;
14271 		    {
14272 			 FFTW_REAL tr;
14273 			 FFTW_REAL ti;
14274 			 FFTW_REAL twr;
14275 			 FFTW_REAL twi;
14276 			 tr = c_re(inout[24 * stride]);
14277 			 ti = c_im(inout[24 * stride]);
14278 			 twr = c_re(W[23]);
14279 			 twi = c_im(W[23]);
14280 			 tre2_0_0 = (tr * twr) - (ti * twi);
14281 			 tim2_0_0 = (tr * twi) + (ti * twr);
14282 		    }
14283 		    {
14284 			 FFTW_REAL tr;
14285 			 FFTW_REAL ti;
14286 			 FFTW_REAL twr;
14287 			 FFTW_REAL twi;
14288 			 tr = c_re(inout[56 * stride]);
14289 			 ti = c_im(inout[56 * stride]);
14290 			 twr = c_re(W[55]);
14291 			 twi = c_im(W[55]);
14292 			 tre2_1_0 = (tr * twr) - (ti * twi);
14293 			 tim2_1_0 = (tr * twi) + (ti * twr);
14294 		    }
14295 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
14296 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
14297 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
14298 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
14299 	       }
14300 	       {
14301 		    FFTW_REAL tre2_0_0;
14302 		    FFTW_REAL tim2_0_0;
14303 		    FFTW_REAL tre2_0_1;
14304 		    FFTW_REAL tim2_0_1;
14305 		    FFTW_REAL tre2_1_0;
14306 		    FFTW_REAL tim2_1_0;
14307 		    FFTW_REAL tre2_1_1;
14308 		    FFTW_REAL tim2_1_1;
14309 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
14310 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
14311 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
14312 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
14313 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
14314 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
14315 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
14316 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
14317 		    tre0_0_0 = tre2_0_0 + tre2_0_1;
14318 		    tim0_0_0 = tim2_0_0 + tim2_0_1;
14319 		    tre0_4_0 = tre2_0_0 - tre2_0_1;
14320 		    tim0_4_0 = tim2_0_0 - tim2_0_1;
14321 		    tre0_2_0 = tre2_1_0 + tim2_1_1;
14322 		    tim0_2_0 = tim2_1_0 - tre2_1_1;
14323 		    tre0_6_0 = tre2_1_0 - tim2_1_1;
14324 		    tim0_6_0 = tim2_1_0 + tre2_1_1;
14325 	       }
14326 	       {
14327 		    FFTW_REAL tre2_0_0;
14328 		    FFTW_REAL tim2_0_0;
14329 		    FFTW_REAL tre2_0_1;
14330 		    FFTW_REAL tim2_0_1;
14331 		    FFTW_REAL tre2_1_0;
14332 		    FFTW_REAL tim2_1_0;
14333 		    FFTW_REAL tre2_1_1;
14334 		    FFTW_REAL tim2_1_1;
14335 		    tre2_0_0 = tre1_1_0 + tim1_1_2;
14336 		    tim2_0_0 = tim1_1_0 - tre1_1_2;
14337 		    tre2_1_0 = tre1_1_0 - tim1_1_2;
14338 		    tim2_1_0 = tim1_1_0 + tre1_1_2;
14339 		    {
14340 			 FFTW_REAL tre3_0_0;
14341 			 FFTW_REAL tim3_0_0;
14342 			 FFTW_REAL tre3_1_0;
14343 			 FFTW_REAL tim3_1_0;
14344 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
14345 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
14346 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
14347 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
14348 			 tre2_0_1 = tre3_0_0 + tre3_1_0;
14349 			 tim2_0_1 = tim3_0_0 - tim3_1_0;
14350 			 tre2_1_1 = tre3_0_0 - tre3_1_0;
14351 			 tim2_1_1 = tim3_0_0 + tim3_1_0;
14352 		    }
14353 		    tre0_1_0 = tre2_0_0 + tre2_0_1;
14354 		    tim0_1_0 = tim2_0_0 + tim2_0_1;
14355 		    tre0_5_0 = tre2_0_0 - tre2_0_1;
14356 		    tim0_5_0 = tim2_0_0 - tim2_0_1;
14357 		    tre0_3_0 = tre2_1_0 + tim2_1_1;
14358 		    tim0_3_0 = tim2_1_0 - tre2_1_1;
14359 		    tre0_7_0 = tre2_1_0 - tim2_1_1;
14360 		    tim0_7_0 = tim2_1_0 + tre2_1_1;
14361 	       }
14362 	  }
14363 	  {
14364 	       FFTW_REAL tre1_0_0;
14365 	       FFTW_REAL tim1_0_0;
14366 	       FFTW_REAL tre1_0_1;
14367 	       FFTW_REAL tim1_0_1;
14368 	       FFTW_REAL tre1_0_2;
14369 	       FFTW_REAL tim1_0_2;
14370 	       FFTW_REAL tre1_0_3;
14371 	       FFTW_REAL tim1_0_3;
14372 	       FFTW_REAL tre1_1_0;
14373 	       FFTW_REAL tim1_1_0;
14374 	       FFTW_REAL tre1_1_1;
14375 	       FFTW_REAL tim1_1_1;
14376 	       FFTW_REAL tre1_1_2;
14377 	       FFTW_REAL tim1_1_2;
14378 	       FFTW_REAL tre1_1_3;
14379 	       FFTW_REAL tim1_1_3;
14380 	       {
14381 		    FFTW_REAL tre2_0_0;
14382 		    FFTW_REAL tim2_0_0;
14383 		    FFTW_REAL tre2_1_0;
14384 		    FFTW_REAL tim2_1_0;
14385 		    {
14386 			 FFTW_REAL tr;
14387 			 FFTW_REAL ti;
14388 			 FFTW_REAL twr;
14389 			 FFTW_REAL twi;
14390 			 tr = c_re(inout[stride]);
14391 			 ti = c_im(inout[stride]);
14392 			 twr = c_re(W[0]);
14393 			 twi = c_im(W[0]);
14394 			 tre2_0_0 = (tr * twr) - (ti * twi);
14395 			 tim2_0_0 = (tr * twi) + (ti * twr);
14396 		    }
14397 		    {
14398 			 FFTW_REAL tr;
14399 			 FFTW_REAL ti;
14400 			 FFTW_REAL twr;
14401 			 FFTW_REAL twi;
14402 			 tr = c_re(inout[33 * stride]);
14403 			 ti = c_im(inout[33 * stride]);
14404 			 twr = c_re(W[32]);
14405 			 twi = c_im(W[32]);
14406 			 tre2_1_0 = (tr * twr) - (ti * twi);
14407 			 tim2_1_0 = (tr * twi) + (ti * twr);
14408 		    }
14409 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
14410 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
14411 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
14412 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
14413 	       }
14414 	       {
14415 		    FFTW_REAL tre2_0_0;
14416 		    FFTW_REAL tim2_0_0;
14417 		    FFTW_REAL tre2_1_0;
14418 		    FFTW_REAL tim2_1_0;
14419 		    {
14420 			 FFTW_REAL tr;
14421 			 FFTW_REAL ti;
14422 			 FFTW_REAL twr;
14423 			 FFTW_REAL twi;
14424 			 tr = c_re(inout[9 * stride]);
14425 			 ti = c_im(inout[9 * stride]);
14426 			 twr = c_re(W[8]);
14427 			 twi = c_im(W[8]);
14428 			 tre2_0_0 = (tr * twr) - (ti * twi);
14429 			 tim2_0_0 = (tr * twi) + (ti * twr);
14430 		    }
14431 		    {
14432 			 FFTW_REAL tr;
14433 			 FFTW_REAL ti;
14434 			 FFTW_REAL twr;
14435 			 FFTW_REAL twi;
14436 			 tr = c_re(inout[41 * stride]);
14437 			 ti = c_im(inout[41 * stride]);
14438 			 twr = c_re(W[40]);
14439 			 twi = c_im(W[40]);
14440 			 tre2_1_0 = (tr * twr) - (ti * twi);
14441 			 tim2_1_0 = (tr * twi) + (ti * twr);
14442 		    }
14443 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
14444 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
14445 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
14446 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
14447 	       }
14448 	       {
14449 		    FFTW_REAL tre2_0_0;
14450 		    FFTW_REAL tim2_0_0;
14451 		    FFTW_REAL tre2_1_0;
14452 		    FFTW_REAL tim2_1_0;
14453 		    {
14454 			 FFTW_REAL tr;
14455 			 FFTW_REAL ti;
14456 			 FFTW_REAL twr;
14457 			 FFTW_REAL twi;
14458 			 tr = c_re(inout[17 * stride]);
14459 			 ti = c_im(inout[17 * stride]);
14460 			 twr = c_re(W[16]);
14461 			 twi = c_im(W[16]);
14462 			 tre2_0_0 = (tr * twr) - (ti * twi);
14463 			 tim2_0_0 = (tr * twi) + (ti * twr);
14464 		    }
14465 		    {
14466 			 FFTW_REAL tr;
14467 			 FFTW_REAL ti;
14468 			 FFTW_REAL twr;
14469 			 FFTW_REAL twi;
14470 			 tr = c_re(inout[49 * stride]);
14471 			 ti = c_im(inout[49 * stride]);
14472 			 twr = c_re(W[48]);
14473 			 twi = c_im(W[48]);
14474 			 tre2_1_0 = (tr * twr) - (ti * twi);
14475 			 tim2_1_0 = (tr * twi) + (ti * twr);
14476 		    }
14477 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
14478 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
14479 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
14480 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
14481 	       }
14482 	       {
14483 		    FFTW_REAL tre2_0_0;
14484 		    FFTW_REAL tim2_0_0;
14485 		    FFTW_REAL tre2_1_0;
14486 		    FFTW_REAL tim2_1_0;
14487 		    {
14488 			 FFTW_REAL tr;
14489 			 FFTW_REAL ti;
14490 			 FFTW_REAL twr;
14491 			 FFTW_REAL twi;
14492 			 tr = c_re(inout[25 * stride]);
14493 			 ti = c_im(inout[25 * stride]);
14494 			 twr = c_re(W[24]);
14495 			 twi = c_im(W[24]);
14496 			 tre2_0_0 = (tr * twr) - (ti * twi);
14497 			 tim2_0_0 = (tr * twi) + (ti * twr);
14498 		    }
14499 		    {
14500 			 FFTW_REAL tr;
14501 			 FFTW_REAL ti;
14502 			 FFTW_REAL twr;
14503 			 FFTW_REAL twi;
14504 			 tr = c_re(inout[57 * stride]);
14505 			 ti = c_im(inout[57 * stride]);
14506 			 twr = c_re(W[56]);
14507 			 twi = c_im(W[56]);
14508 			 tre2_1_0 = (tr * twr) - (ti * twi);
14509 			 tim2_1_0 = (tr * twi) + (ti * twr);
14510 		    }
14511 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
14512 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
14513 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
14514 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
14515 	       }
14516 	       {
14517 		    FFTW_REAL tre2_0_0;
14518 		    FFTW_REAL tim2_0_0;
14519 		    FFTW_REAL tre2_0_1;
14520 		    FFTW_REAL tim2_0_1;
14521 		    FFTW_REAL tre2_1_0;
14522 		    FFTW_REAL tim2_1_0;
14523 		    FFTW_REAL tre2_1_1;
14524 		    FFTW_REAL tim2_1_1;
14525 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
14526 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
14527 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
14528 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
14529 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
14530 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
14531 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
14532 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
14533 		    tre0_0_1 = tre2_0_0 + tre2_0_1;
14534 		    tim0_0_1 = tim2_0_0 + tim2_0_1;
14535 		    tre0_4_1 = tre2_0_0 - tre2_0_1;
14536 		    tim0_4_1 = tim2_0_0 - tim2_0_1;
14537 		    tre0_2_1 = tre2_1_0 + tim2_1_1;
14538 		    tim0_2_1 = tim2_1_0 - tre2_1_1;
14539 		    tre0_6_1 = tre2_1_0 - tim2_1_1;
14540 		    tim0_6_1 = tim2_1_0 + tre2_1_1;
14541 	       }
14542 	       {
14543 		    FFTW_REAL tre2_0_0;
14544 		    FFTW_REAL tim2_0_0;
14545 		    FFTW_REAL tre2_0_1;
14546 		    FFTW_REAL tim2_0_1;
14547 		    FFTW_REAL tre2_1_0;
14548 		    FFTW_REAL tim2_1_0;
14549 		    FFTW_REAL tre2_1_1;
14550 		    FFTW_REAL tim2_1_1;
14551 		    tre2_0_0 = tre1_1_0 + tim1_1_2;
14552 		    tim2_0_0 = tim1_1_0 - tre1_1_2;
14553 		    tre2_1_0 = tre1_1_0 - tim1_1_2;
14554 		    tim2_1_0 = tim1_1_0 + tre1_1_2;
14555 		    {
14556 			 FFTW_REAL tre3_0_0;
14557 			 FFTW_REAL tim3_0_0;
14558 			 FFTW_REAL tre3_1_0;
14559 			 FFTW_REAL tim3_1_0;
14560 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
14561 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
14562 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
14563 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
14564 			 tre2_0_1 = tre3_0_0 + tre3_1_0;
14565 			 tim2_0_1 = tim3_0_0 - tim3_1_0;
14566 			 tre2_1_1 = tre3_0_0 - tre3_1_0;
14567 			 tim2_1_1 = tim3_0_0 + tim3_1_0;
14568 		    }
14569 		    tre0_1_1 = tre2_0_0 + tre2_0_1;
14570 		    tim0_1_1 = tim2_0_0 + tim2_0_1;
14571 		    tre0_5_1 = tre2_0_0 - tre2_0_1;
14572 		    tim0_5_1 = tim2_0_0 - tim2_0_1;
14573 		    tre0_3_1 = tre2_1_0 + tim2_1_1;
14574 		    tim0_3_1 = tim2_1_0 - tre2_1_1;
14575 		    tre0_7_1 = tre2_1_0 - tim2_1_1;
14576 		    tim0_7_1 = tim2_1_0 + tre2_1_1;
14577 	       }
14578 	  }
14579 	  {
14580 	       FFTW_REAL tre1_0_0;
14581 	       FFTW_REAL tim1_0_0;
14582 	       FFTW_REAL tre1_0_1;
14583 	       FFTW_REAL tim1_0_1;
14584 	       FFTW_REAL tre1_0_2;
14585 	       FFTW_REAL tim1_0_2;
14586 	       FFTW_REAL tre1_0_3;
14587 	       FFTW_REAL tim1_0_3;
14588 	       FFTW_REAL tre1_1_0;
14589 	       FFTW_REAL tim1_1_0;
14590 	       FFTW_REAL tre1_1_1;
14591 	       FFTW_REAL tim1_1_1;
14592 	       FFTW_REAL tre1_1_2;
14593 	       FFTW_REAL tim1_1_2;
14594 	       FFTW_REAL tre1_1_3;
14595 	       FFTW_REAL tim1_1_3;
14596 	       {
14597 		    FFTW_REAL tre2_0_0;
14598 		    FFTW_REAL tim2_0_0;
14599 		    FFTW_REAL tre2_1_0;
14600 		    FFTW_REAL tim2_1_0;
14601 		    {
14602 			 FFTW_REAL tr;
14603 			 FFTW_REAL ti;
14604 			 FFTW_REAL twr;
14605 			 FFTW_REAL twi;
14606 			 tr = c_re(inout[2 * stride]);
14607 			 ti = c_im(inout[2 * stride]);
14608 			 twr = c_re(W[1]);
14609 			 twi = c_im(W[1]);
14610 			 tre2_0_0 = (tr * twr) - (ti * twi);
14611 			 tim2_0_0 = (tr * twi) + (ti * twr);
14612 		    }
14613 		    {
14614 			 FFTW_REAL tr;
14615 			 FFTW_REAL ti;
14616 			 FFTW_REAL twr;
14617 			 FFTW_REAL twi;
14618 			 tr = c_re(inout[34 * stride]);
14619 			 ti = c_im(inout[34 * stride]);
14620 			 twr = c_re(W[33]);
14621 			 twi = c_im(W[33]);
14622 			 tre2_1_0 = (tr * twr) - (ti * twi);
14623 			 tim2_1_0 = (tr * twi) + (ti * twr);
14624 		    }
14625 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
14626 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
14627 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
14628 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
14629 	       }
14630 	       {
14631 		    FFTW_REAL tre2_0_0;
14632 		    FFTW_REAL tim2_0_0;
14633 		    FFTW_REAL tre2_1_0;
14634 		    FFTW_REAL tim2_1_0;
14635 		    {
14636 			 FFTW_REAL tr;
14637 			 FFTW_REAL ti;
14638 			 FFTW_REAL twr;
14639 			 FFTW_REAL twi;
14640 			 tr = c_re(inout[10 * stride]);
14641 			 ti = c_im(inout[10 * stride]);
14642 			 twr = c_re(W[9]);
14643 			 twi = c_im(W[9]);
14644 			 tre2_0_0 = (tr * twr) - (ti * twi);
14645 			 tim2_0_0 = (tr * twi) + (ti * twr);
14646 		    }
14647 		    {
14648 			 FFTW_REAL tr;
14649 			 FFTW_REAL ti;
14650 			 FFTW_REAL twr;
14651 			 FFTW_REAL twi;
14652 			 tr = c_re(inout[42 * stride]);
14653 			 ti = c_im(inout[42 * stride]);
14654 			 twr = c_re(W[41]);
14655 			 twi = c_im(W[41]);
14656 			 tre2_1_0 = (tr * twr) - (ti * twi);
14657 			 tim2_1_0 = (tr * twi) + (ti * twr);
14658 		    }
14659 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
14660 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
14661 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
14662 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
14663 	       }
14664 	       {
14665 		    FFTW_REAL tre2_0_0;
14666 		    FFTW_REAL tim2_0_0;
14667 		    FFTW_REAL tre2_1_0;
14668 		    FFTW_REAL tim2_1_0;
14669 		    {
14670 			 FFTW_REAL tr;
14671 			 FFTW_REAL ti;
14672 			 FFTW_REAL twr;
14673 			 FFTW_REAL twi;
14674 			 tr = c_re(inout[18 * stride]);
14675 			 ti = c_im(inout[18 * stride]);
14676 			 twr = c_re(W[17]);
14677 			 twi = c_im(W[17]);
14678 			 tre2_0_0 = (tr * twr) - (ti * twi);
14679 			 tim2_0_0 = (tr * twi) + (ti * twr);
14680 		    }
14681 		    {
14682 			 FFTW_REAL tr;
14683 			 FFTW_REAL ti;
14684 			 FFTW_REAL twr;
14685 			 FFTW_REAL twi;
14686 			 tr = c_re(inout[50 * stride]);
14687 			 ti = c_im(inout[50 * stride]);
14688 			 twr = c_re(W[49]);
14689 			 twi = c_im(W[49]);
14690 			 tre2_1_0 = (tr * twr) - (ti * twi);
14691 			 tim2_1_0 = (tr * twi) + (ti * twr);
14692 		    }
14693 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
14694 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
14695 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
14696 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
14697 	       }
14698 	       {
14699 		    FFTW_REAL tre2_0_0;
14700 		    FFTW_REAL tim2_0_0;
14701 		    FFTW_REAL tre2_1_0;
14702 		    FFTW_REAL tim2_1_0;
14703 		    {
14704 			 FFTW_REAL tr;
14705 			 FFTW_REAL ti;
14706 			 FFTW_REAL twr;
14707 			 FFTW_REAL twi;
14708 			 tr = c_re(inout[26 * stride]);
14709 			 ti = c_im(inout[26 * stride]);
14710 			 twr = c_re(W[25]);
14711 			 twi = c_im(W[25]);
14712 			 tre2_0_0 = (tr * twr) - (ti * twi);
14713 			 tim2_0_0 = (tr * twi) + (ti * twr);
14714 		    }
14715 		    {
14716 			 FFTW_REAL tr;
14717 			 FFTW_REAL ti;
14718 			 FFTW_REAL twr;
14719 			 FFTW_REAL twi;
14720 			 tr = c_re(inout[58 * stride]);
14721 			 ti = c_im(inout[58 * stride]);
14722 			 twr = c_re(W[57]);
14723 			 twi = c_im(W[57]);
14724 			 tre2_1_0 = (tr * twr) - (ti * twi);
14725 			 tim2_1_0 = (tr * twi) + (ti * twr);
14726 		    }
14727 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
14728 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
14729 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
14730 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
14731 	       }
14732 	       {
14733 		    FFTW_REAL tre2_0_0;
14734 		    FFTW_REAL tim2_0_0;
14735 		    FFTW_REAL tre2_0_1;
14736 		    FFTW_REAL tim2_0_1;
14737 		    FFTW_REAL tre2_1_0;
14738 		    FFTW_REAL tim2_1_0;
14739 		    FFTW_REAL tre2_1_1;
14740 		    FFTW_REAL tim2_1_1;
14741 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
14742 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
14743 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
14744 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
14745 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
14746 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
14747 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
14748 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
14749 		    tre0_0_2 = tre2_0_0 + tre2_0_1;
14750 		    tim0_0_2 = tim2_0_0 + tim2_0_1;
14751 		    tre0_4_2 = tre2_0_0 - tre2_0_1;
14752 		    tim0_4_2 = tim2_0_0 - tim2_0_1;
14753 		    tre0_2_2 = tre2_1_0 + tim2_1_1;
14754 		    tim0_2_2 = tim2_1_0 - tre2_1_1;
14755 		    tre0_6_2 = tre2_1_0 - tim2_1_1;
14756 		    tim0_6_2 = tim2_1_0 + tre2_1_1;
14757 	       }
14758 	       {
14759 		    FFTW_REAL tre2_0_0;
14760 		    FFTW_REAL tim2_0_0;
14761 		    FFTW_REAL tre2_0_1;
14762 		    FFTW_REAL tim2_0_1;
14763 		    FFTW_REAL tre2_1_0;
14764 		    FFTW_REAL tim2_1_0;
14765 		    FFTW_REAL tre2_1_1;
14766 		    FFTW_REAL tim2_1_1;
14767 		    tre2_0_0 = tre1_1_0 + tim1_1_2;
14768 		    tim2_0_0 = tim1_1_0 - tre1_1_2;
14769 		    tre2_1_0 = tre1_1_0 - tim1_1_2;
14770 		    tim2_1_0 = tim1_1_0 + tre1_1_2;
14771 		    {
14772 			 FFTW_REAL tre3_0_0;
14773 			 FFTW_REAL tim3_0_0;
14774 			 FFTW_REAL tre3_1_0;
14775 			 FFTW_REAL tim3_1_0;
14776 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
14777 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
14778 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
14779 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
14780 			 tre2_0_1 = tre3_0_0 + tre3_1_0;
14781 			 tim2_0_1 = tim3_0_0 - tim3_1_0;
14782 			 tre2_1_1 = tre3_0_0 - tre3_1_0;
14783 			 tim2_1_1 = tim3_0_0 + tim3_1_0;
14784 		    }
14785 		    tre0_1_2 = tre2_0_0 + tre2_0_1;
14786 		    tim0_1_2 = tim2_0_0 + tim2_0_1;
14787 		    tre0_5_2 = tre2_0_0 - tre2_0_1;
14788 		    tim0_5_2 = tim2_0_0 - tim2_0_1;
14789 		    tre0_3_2 = tre2_1_0 + tim2_1_1;
14790 		    tim0_3_2 = tim2_1_0 - tre2_1_1;
14791 		    tre0_7_2 = tre2_1_0 - tim2_1_1;
14792 		    tim0_7_2 = tim2_1_0 + tre2_1_1;
14793 	       }
14794 	  }
14795 	  {
14796 	       FFTW_REAL tre1_0_0;
14797 	       FFTW_REAL tim1_0_0;
14798 	       FFTW_REAL tre1_0_1;
14799 	       FFTW_REAL tim1_0_1;
14800 	       FFTW_REAL tre1_0_2;
14801 	       FFTW_REAL tim1_0_2;
14802 	       FFTW_REAL tre1_0_3;
14803 	       FFTW_REAL tim1_0_3;
14804 	       FFTW_REAL tre1_1_0;
14805 	       FFTW_REAL tim1_1_0;
14806 	       FFTW_REAL tre1_1_1;
14807 	       FFTW_REAL tim1_1_1;
14808 	       FFTW_REAL tre1_1_2;
14809 	       FFTW_REAL tim1_1_2;
14810 	       FFTW_REAL tre1_1_3;
14811 	       FFTW_REAL tim1_1_3;
14812 	       {
14813 		    FFTW_REAL tre2_0_0;
14814 		    FFTW_REAL tim2_0_0;
14815 		    FFTW_REAL tre2_1_0;
14816 		    FFTW_REAL tim2_1_0;
14817 		    {
14818 			 FFTW_REAL tr;
14819 			 FFTW_REAL ti;
14820 			 FFTW_REAL twr;
14821 			 FFTW_REAL twi;
14822 			 tr = c_re(inout[3 * stride]);
14823 			 ti = c_im(inout[3 * stride]);
14824 			 twr = c_re(W[2]);
14825 			 twi = c_im(W[2]);
14826 			 tre2_0_0 = (tr * twr) - (ti * twi);
14827 			 tim2_0_0 = (tr * twi) + (ti * twr);
14828 		    }
14829 		    {
14830 			 FFTW_REAL tr;
14831 			 FFTW_REAL ti;
14832 			 FFTW_REAL twr;
14833 			 FFTW_REAL twi;
14834 			 tr = c_re(inout[35 * stride]);
14835 			 ti = c_im(inout[35 * stride]);
14836 			 twr = c_re(W[34]);
14837 			 twi = c_im(W[34]);
14838 			 tre2_1_0 = (tr * twr) - (ti * twi);
14839 			 tim2_1_0 = (tr * twi) + (ti * twr);
14840 		    }
14841 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
14842 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
14843 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
14844 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
14845 	       }
14846 	       {
14847 		    FFTW_REAL tre2_0_0;
14848 		    FFTW_REAL tim2_0_0;
14849 		    FFTW_REAL tre2_1_0;
14850 		    FFTW_REAL tim2_1_0;
14851 		    {
14852 			 FFTW_REAL tr;
14853 			 FFTW_REAL ti;
14854 			 FFTW_REAL twr;
14855 			 FFTW_REAL twi;
14856 			 tr = c_re(inout[11 * stride]);
14857 			 ti = c_im(inout[11 * stride]);
14858 			 twr = c_re(W[10]);
14859 			 twi = c_im(W[10]);
14860 			 tre2_0_0 = (tr * twr) - (ti * twi);
14861 			 tim2_0_0 = (tr * twi) + (ti * twr);
14862 		    }
14863 		    {
14864 			 FFTW_REAL tr;
14865 			 FFTW_REAL ti;
14866 			 FFTW_REAL twr;
14867 			 FFTW_REAL twi;
14868 			 tr = c_re(inout[43 * stride]);
14869 			 ti = c_im(inout[43 * stride]);
14870 			 twr = c_re(W[42]);
14871 			 twi = c_im(W[42]);
14872 			 tre2_1_0 = (tr * twr) - (ti * twi);
14873 			 tim2_1_0 = (tr * twi) + (ti * twr);
14874 		    }
14875 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
14876 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
14877 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
14878 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
14879 	       }
14880 	       {
14881 		    FFTW_REAL tre2_0_0;
14882 		    FFTW_REAL tim2_0_0;
14883 		    FFTW_REAL tre2_1_0;
14884 		    FFTW_REAL tim2_1_0;
14885 		    {
14886 			 FFTW_REAL tr;
14887 			 FFTW_REAL ti;
14888 			 FFTW_REAL twr;
14889 			 FFTW_REAL twi;
14890 			 tr = c_re(inout[19 * stride]);
14891 			 ti = c_im(inout[19 * stride]);
14892 			 twr = c_re(W[18]);
14893 			 twi = c_im(W[18]);
14894 			 tre2_0_0 = (tr * twr) - (ti * twi);
14895 			 tim2_0_0 = (tr * twi) + (ti * twr);
14896 		    }
14897 		    {
14898 			 FFTW_REAL tr;
14899 			 FFTW_REAL ti;
14900 			 FFTW_REAL twr;
14901 			 FFTW_REAL twi;
14902 			 tr = c_re(inout[51 * stride]);
14903 			 ti = c_im(inout[51 * stride]);
14904 			 twr = c_re(W[50]);
14905 			 twi = c_im(W[50]);
14906 			 tre2_1_0 = (tr * twr) - (ti * twi);
14907 			 tim2_1_0 = (tr * twi) + (ti * twr);
14908 		    }
14909 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
14910 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
14911 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
14912 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
14913 	       }
14914 	       {
14915 		    FFTW_REAL tre2_0_0;
14916 		    FFTW_REAL tim2_0_0;
14917 		    FFTW_REAL tre2_1_0;
14918 		    FFTW_REAL tim2_1_0;
14919 		    {
14920 			 FFTW_REAL tr;
14921 			 FFTW_REAL ti;
14922 			 FFTW_REAL twr;
14923 			 FFTW_REAL twi;
14924 			 tr = c_re(inout[27 * stride]);
14925 			 ti = c_im(inout[27 * stride]);
14926 			 twr = c_re(W[26]);
14927 			 twi = c_im(W[26]);
14928 			 tre2_0_0 = (tr * twr) - (ti * twi);
14929 			 tim2_0_0 = (tr * twi) + (ti * twr);
14930 		    }
14931 		    {
14932 			 FFTW_REAL tr;
14933 			 FFTW_REAL ti;
14934 			 FFTW_REAL twr;
14935 			 FFTW_REAL twi;
14936 			 tr = c_re(inout[59 * stride]);
14937 			 ti = c_im(inout[59 * stride]);
14938 			 twr = c_re(W[58]);
14939 			 twi = c_im(W[58]);
14940 			 tre2_1_0 = (tr * twr) - (ti * twi);
14941 			 tim2_1_0 = (tr * twi) + (ti * twr);
14942 		    }
14943 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
14944 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
14945 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
14946 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
14947 	       }
14948 	       {
14949 		    FFTW_REAL tre2_0_0;
14950 		    FFTW_REAL tim2_0_0;
14951 		    FFTW_REAL tre2_0_1;
14952 		    FFTW_REAL tim2_0_1;
14953 		    FFTW_REAL tre2_1_0;
14954 		    FFTW_REAL tim2_1_0;
14955 		    FFTW_REAL tre2_1_1;
14956 		    FFTW_REAL tim2_1_1;
14957 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
14958 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
14959 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
14960 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
14961 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
14962 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
14963 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
14964 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
14965 		    tre0_0_3 = tre2_0_0 + tre2_0_1;
14966 		    tim0_0_3 = tim2_0_0 + tim2_0_1;
14967 		    tre0_4_3 = tre2_0_0 - tre2_0_1;
14968 		    tim0_4_3 = tim2_0_0 - tim2_0_1;
14969 		    tre0_2_3 = tre2_1_0 + tim2_1_1;
14970 		    tim0_2_3 = tim2_1_0 - tre2_1_1;
14971 		    tre0_6_3 = tre2_1_0 - tim2_1_1;
14972 		    tim0_6_3 = tim2_1_0 + tre2_1_1;
14973 	       }
14974 	       {
14975 		    FFTW_REAL tre2_0_0;
14976 		    FFTW_REAL tim2_0_0;
14977 		    FFTW_REAL tre2_0_1;
14978 		    FFTW_REAL tim2_0_1;
14979 		    FFTW_REAL tre2_1_0;
14980 		    FFTW_REAL tim2_1_0;
14981 		    FFTW_REAL tre2_1_1;
14982 		    FFTW_REAL tim2_1_1;
14983 		    tre2_0_0 = tre1_1_0 + tim1_1_2;
14984 		    tim2_0_0 = tim1_1_0 - tre1_1_2;
14985 		    tre2_1_0 = tre1_1_0 - tim1_1_2;
14986 		    tim2_1_0 = tim1_1_0 + tre1_1_2;
14987 		    {
14988 			 FFTW_REAL tre3_0_0;
14989 			 FFTW_REAL tim3_0_0;
14990 			 FFTW_REAL tre3_1_0;
14991 			 FFTW_REAL tim3_1_0;
14992 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
14993 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
14994 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
14995 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
14996 			 tre2_0_1 = tre3_0_0 + tre3_1_0;
14997 			 tim2_0_1 = tim3_0_0 - tim3_1_0;
14998 			 tre2_1_1 = tre3_0_0 - tre3_1_0;
14999 			 tim2_1_1 = tim3_0_0 + tim3_1_0;
15000 		    }
15001 		    tre0_1_3 = tre2_0_0 + tre2_0_1;
15002 		    tim0_1_3 = tim2_0_0 + tim2_0_1;
15003 		    tre0_5_3 = tre2_0_0 - tre2_0_1;
15004 		    tim0_5_3 = tim2_0_0 - tim2_0_1;
15005 		    tre0_3_3 = tre2_1_0 + tim2_1_1;
15006 		    tim0_3_3 = tim2_1_0 - tre2_1_1;
15007 		    tre0_7_3 = tre2_1_0 - tim2_1_1;
15008 		    tim0_7_3 = tim2_1_0 + tre2_1_1;
15009 	       }
15010 	  }
15011 	  {
15012 	       FFTW_REAL tre1_0_0;
15013 	       FFTW_REAL tim1_0_0;
15014 	       FFTW_REAL tre1_0_1;
15015 	       FFTW_REAL tim1_0_1;
15016 	       FFTW_REAL tre1_0_2;
15017 	       FFTW_REAL tim1_0_2;
15018 	       FFTW_REAL tre1_0_3;
15019 	       FFTW_REAL tim1_0_3;
15020 	       FFTW_REAL tre1_1_0;
15021 	       FFTW_REAL tim1_1_0;
15022 	       FFTW_REAL tre1_1_1;
15023 	       FFTW_REAL tim1_1_1;
15024 	       FFTW_REAL tre1_1_2;
15025 	       FFTW_REAL tim1_1_2;
15026 	       FFTW_REAL tre1_1_3;
15027 	       FFTW_REAL tim1_1_3;
15028 	       {
15029 		    FFTW_REAL tre2_0_0;
15030 		    FFTW_REAL tim2_0_0;
15031 		    FFTW_REAL tre2_1_0;
15032 		    FFTW_REAL tim2_1_0;
15033 		    {
15034 			 FFTW_REAL tr;
15035 			 FFTW_REAL ti;
15036 			 FFTW_REAL twr;
15037 			 FFTW_REAL twi;
15038 			 tr = c_re(inout[4 * stride]);
15039 			 ti = c_im(inout[4 * stride]);
15040 			 twr = c_re(W[3]);
15041 			 twi = c_im(W[3]);
15042 			 tre2_0_0 = (tr * twr) - (ti * twi);
15043 			 tim2_0_0 = (tr * twi) + (ti * twr);
15044 		    }
15045 		    {
15046 			 FFTW_REAL tr;
15047 			 FFTW_REAL ti;
15048 			 FFTW_REAL twr;
15049 			 FFTW_REAL twi;
15050 			 tr = c_re(inout[36 * stride]);
15051 			 ti = c_im(inout[36 * stride]);
15052 			 twr = c_re(W[35]);
15053 			 twi = c_im(W[35]);
15054 			 tre2_1_0 = (tr * twr) - (ti * twi);
15055 			 tim2_1_0 = (tr * twi) + (ti * twr);
15056 		    }
15057 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
15058 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
15059 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
15060 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
15061 	       }
15062 	       {
15063 		    FFTW_REAL tre2_0_0;
15064 		    FFTW_REAL tim2_0_0;
15065 		    FFTW_REAL tre2_1_0;
15066 		    FFTW_REAL tim2_1_0;
15067 		    {
15068 			 FFTW_REAL tr;
15069 			 FFTW_REAL ti;
15070 			 FFTW_REAL twr;
15071 			 FFTW_REAL twi;
15072 			 tr = c_re(inout[12 * stride]);
15073 			 ti = c_im(inout[12 * stride]);
15074 			 twr = c_re(W[11]);
15075 			 twi = c_im(W[11]);
15076 			 tre2_0_0 = (tr * twr) - (ti * twi);
15077 			 tim2_0_0 = (tr * twi) + (ti * twr);
15078 		    }
15079 		    {
15080 			 FFTW_REAL tr;
15081 			 FFTW_REAL ti;
15082 			 FFTW_REAL twr;
15083 			 FFTW_REAL twi;
15084 			 tr = c_re(inout[44 * stride]);
15085 			 ti = c_im(inout[44 * stride]);
15086 			 twr = c_re(W[43]);
15087 			 twi = c_im(W[43]);
15088 			 tre2_1_0 = (tr * twr) - (ti * twi);
15089 			 tim2_1_0 = (tr * twi) + (ti * twr);
15090 		    }
15091 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
15092 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
15093 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
15094 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
15095 	       }
15096 	       {
15097 		    FFTW_REAL tre2_0_0;
15098 		    FFTW_REAL tim2_0_0;
15099 		    FFTW_REAL tre2_1_0;
15100 		    FFTW_REAL tim2_1_0;
15101 		    {
15102 			 FFTW_REAL tr;
15103 			 FFTW_REAL ti;
15104 			 FFTW_REAL twr;
15105 			 FFTW_REAL twi;
15106 			 tr = c_re(inout[20 * stride]);
15107 			 ti = c_im(inout[20 * stride]);
15108 			 twr = c_re(W[19]);
15109 			 twi = c_im(W[19]);
15110 			 tre2_0_0 = (tr * twr) - (ti * twi);
15111 			 tim2_0_0 = (tr * twi) + (ti * twr);
15112 		    }
15113 		    {
15114 			 FFTW_REAL tr;
15115 			 FFTW_REAL ti;
15116 			 FFTW_REAL twr;
15117 			 FFTW_REAL twi;
15118 			 tr = c_re(inout[52 * stride]);
15119 			 ti = c_im(inout[52 * stride]);
15120 			 twr = c_re(W[51]);
15121 			 twi = c_im(W[51]);
15122 			 tre2_1_0 = (tr * twr) - (ti * twi);
15123 			 tim2_1_0 = (tr * twi) + (ti * twr);
15124 		    }
15125 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
15126 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
15127 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
15128 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
15129 	       }
15130 	       {
15131 		    FFTW_REAL tre2_0_0;
15132 		    FFTW_REAL tim2_0_0;
15133 		    FFTW_REAL tre2_1_0;
15134 		    FFTW_REAL tim2_1_0;
15135 		    {
15136 			 FFTW_REAL tr;
15137 			 FFTW_REAL ti;
15138 			 FFTW_REAL twr;
15139 			 FFTW_REAL twi;
15140 			 tr = c_re(inout[28 * stride]);
15141 			 ti = c_im(inout[28 * stride]);
15142 			 twr = c_re(W[27]);
15143 			 twi = c_im(W[27]);
15144 			 tre2_0_0 = (tr * twr) - (ti * twi);
15145 			 tim2_0_0 = (tr * twi) + (ti * twr);
15146 		    }
15147 		    {
15148 			 FFTW_REAL tr;
15149 			 FFTW_REAL ti;
15150 			 FFTW_REAL twr;
15151 			 FFTW_REAL twi;
15152 			 tr = c_re(inout[60 * stride]);
15153 			 ti = c_im(inout[60 * stride]);
15154 			 twr = c_re(W[59]);
15155 			 twi = c_im(W[59]);
15156 			 tre2_1_0 = (tr * twr) - (ti * twi);
15157 			 tim2_1_0 = (tr * twi) + (ti * twr);
15158 		    }
15159 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
15160 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
15161 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
15162 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
15163 	       }
15164 	       {
15165 		    FFTW_REAL tre2_0_0;
15166 		    FFTW_REAL tim2_0_0;
15167 		    FFTW_REAL tre2_0_1;
15168 		    FFTW_REAL tim2_0_1;
15169 		    FFTW_REAL tre2_1_0;
15170 		    FFTW_REAL tim2_1_0;
15171 		    FFTW_REAL tre2_1_1;
15172 		    FFTW_REAL tim2_1_1;
15173 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
15174 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
15175 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
15176 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
15177 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
15178 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
15179 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
15180 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
15181 		    tre0_0_4 = tre2_0_0 + tre2_0_1;
15182 		    tim0_0_4 = tim2_0_0 + tim2_0_1;
15183 		    tre0_4_4 = tre2_0_0 - tre2_0_1;
15184 		    tim0_4_4 = tim2_0_0 - tim2_0_1;
15185 		    tre0_2_4 = tre2_1_0 + tim2_1_1;
15186 		    tim0_2_4 = tim2_1_0 - tre2_1_1;
15187 		    tre0_6_4 = tre2_1_0 - tim2_1_1;
15188 		    tim0_6_4 = tim2_1_0 + tre2_1_1;
15189 	       }
15190 	       {
15191 		    FFTW_REAL tre2_0_0;
15192 		    FFTW_REAL tim2_0_0;
15193 		    FFTW_REAL tre2_0_1;
15194 		    FFTW_REAL tim2_0_1;
15195 		    FFTW_REAL tre2_1_0;
15196 		    FFTW_REAL tim2_1_0;
15197 		    FFTW_REAL tre2_1_1;
15198 		    FFTW_REAL tim2_1_1;
15199 		    tre2_0_0 = tre1_1_0 + tim1_1_2;
15200 		    tim2_0_0 = tim1_1_0 - tre1_1_2;
15201 		    tre2_1_0 = tre1_1_0 - tim1_1_2;
15202 		    tim2_1_0 = tim1_1_0 + tre1_1_2;
15203 		    {
15204 			 FFTW_REAL tre3_0_0;
15205 			 FFTW_REAL tim3_0_0;
15206 			 FFTW_REAL tre3_1_0;
15207 			 FFTW_REAL tim3_1_0;
15208 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
15209 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
15210 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
15211 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
15212 			 tre2_0_1 = tre3_0_0 + tre3_1_0;
15213 			 tim2_0_1 = tim3_0_0 - tim3_1_0;
15214 			 tre2_1_1 = tre3_0_0 - tre3_1_0;
15215 			 tim2_1_1 = tim3_0_0 + tim3_1_0;
15216 		    }
15217 		    tre0_1_4 = tre2_0_0 + tre2_0_1;
15218 		    tim0_1_4 = tim2_0_0 + tim2_0_1;
15219 		    tre0_5_4 = tre2_0_0 - tre2_0_1;
15220 		    tim0_5_4 = tim2_0_0 - tim2_0_1;
15221 		    tre0_3_4 = tre2_1_0 + tim2_1_1;
15222 		    tim0_3_4 = tim2_1_0 - tre2_1_1;
15223 		    tre0_7_4 = tre2_1_0 - tim2_1_1;
15224 		    tim0_7_4 = tim2_1_0 + tre2_1_1;
15225 	       }
15226 	  }
15227 	  {
15228 	       FFTW_REAL tre1_0_0;
15229 	       FFTW_REAL tim1_0_0;
15230 	       FFTW_REAL tre1_0_1;
15231 	       FFTW_REAL tim1_0_1;
15232 	       FFTW_REAL tre1_0_2;
15233 	       FFTW_REAL tim1_0_2;
15234 	       FFTW_REAL tre1_0_3;
15235 	       FFTW_REAL tim1_0_3;
15236 	       FFTW_REAL tre1_1_0;
15237 	       FFTW_REAL tim1_1_0;
15238 	       FFTW_REAL tre1_1_1;
15239 	       FFTW_REAL tim1_1_1;
15240 	       FFTW_REAL tre1_1_2;
15241 	       FFTW_REAL tim1_1_2;
15242 	       FFTW_REAL tre1_1_3;
15243 	       FFTW_REAL tim1_1_3;
15244 	       {
15245 		    FFTW_REAL tre2_0_0;
15246 		    FFTW_REAL tim2_0_0;
15247 		    FFTW_REAL tre2_1_0;
15248 		    FFTW_REAL tim2_1_0;
15249 		    {
15250 			 FFTW_REAL tr;
15251 			 FFTW_REAL ti;
15252 			 FFTW_REAL twr;
15253 			 FFTW_REAL twi;
15254 			 tr = c_re(inout[5 * stride]);
15255 			 ti = c_im(inout[5 * stride]);
15256 			 twr = c_re(W[4]);
15257 			 twi = c_im(W[4]);
15258 			 tre2_0_0 = (tr * twr) - (ti * twi);
15259 			 tim2_0_0 = (tr * twi) + (ti * twr);
15260 		    }
15261 		    {
15262 			 FFTW_REAL tr;
15263 			 FFTW_REAL ti;
15264 			 FFTW_REAL twr;
15265 			 FFTW_REAL twi;
15266 			 tr = c_re(inout[37 * stride]);
15267 			 ti = c_im(inout[37 * stride]);
15268 			 twr = c_re(W[36]);
15269 			 twi = c_im(W[36]);
15270 			 tre2_1_0 = (tr * twr) - (ti * twi);
15271 			 tim2_1_0 = (tr * twi) + (ti * twr);
15272 		    }
15273 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
15274 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
15275 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
15276 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
15277 	       }
15278 	       {
15279 		    FFTW_REAL tre2_0_0;
15280 		    FFTW_REAL tim2_0_0;
15281 		    FFTW_REAL tre2_1_0;
15282 		    FFTW_REAL tim2_1_0;
15283 		    {
15284 			 FFTW_REAL tr;
15285 			 FFTW_REAL ti;
15286 			 FFTW_REAL twr;
15287 			 FFTW_REAL twi;
15288 			 tr = c_re(inout[13 * stride]);
15289 			 ti = c_im(inout[13 * stride]);
15290 			 twr = c_re(W[12]);
15291 			 twi = c_im(W[12]);
15292 			 tre2_0_0 = (tr * twr) - (ti * twi);
15293 			 tim2_0_0 = (tr * twi) + (ti * twr);
15294 		    }
15295 		    {
15296 			 FFTW_REAL tr;
15297 			 FFTW_REAL ti;
15298 			 FFTW_REAL twr;
15299 			 FFTW_REAL twi;
15300 			 tr = c_re(inout[45 * stride]);
15301 			 ti = c_im(inout[45 * stride]);
15302 			 twr = c_re(W[44]);
15303 			 twi = c_im(W[44]);
15304 			 tre2_1_0 = (tr * twr) - (ti * twi);
15305 			 tim2_1_0 = (tr * twi) + (ti * twr);
15306 		    }
15307 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
15308 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
15309 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
15310 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
15311 	       }
15312 	       {
15313 		    FFTW_REAL tre2_0_0;
15314 		    FFTW_REAL tim2_0_0;
15315 		    FFTW_REAL tre2_1_0;
15316 		    FFTW_REAL tim2_1_0;
15317 		    {
15318 			 FFTW_REAL tr;
15319 			 FFTW_REAL ti;
15320 			 FFTW_REAL twr;
15321 			 FFTW_REAL twi;
15322 			 tr = c_re(inout[21 * stride]);
15323 			 ti = c_im(inout[21 * stride]);
15324 			 twr = c_re(W[20]);
15325 			 twi = c_im(W[20]);
15326 			 tre2_0_0 = (tr * twr) - (ti * twi);
15327 			 tim2_0_0 = (tr * twi) + (ti * twr);
15328 		    }
15329 		    {
15330 			 FFTW_REAL tr;
15331 			 FFTW_REAL ti;
15332 			 FFTW_REAL twr;
15333 			 FFTW_REAL twi;
15334 			 tr = c_re(inout[53 * stride]);
15335 			 ti = c_im(inout[53 * stride]);
15336 			 twr = c_re(W[52]);
15337 			 twi = c_im(W[52]);
15338 			 tre2_1_0 = (tr * twr) - (ti * twi);
15339 			 tim2_1_0 = (tr * twi) + (ti * twr);
15340 		    }
15341 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
15342 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
15343 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
15344 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
15345 	       }
15346 	       {
15347 		    FFTW_REAL tre2_0_0;
15348 		    FFTW_REAL tim2_0_0;
15349 		    FFTW_REAL tre2_1_0;
15350 		    FFTW_REAL tim2_1_0;
15351 		    {
15352 			 FFTW_REAL tr;
15353 			 FFTW_REAL ti;
15354 			 FFTW_REAL twr;
15355 			 FFTW_REAL twi;
15356 			 tr = c_re(inout[29 * stride]);
15357 			 ti = c_im(inout[29 * stride]);
15358 			 twr = c_re(W[28]);
15359 			 twi = c_im(W[28]);
15360 			 tre2_0_0 = (tr * twr) - (ti * twi);
15361 			 tim2_0_0 = (tr * twi) + (ti * twr);
15362 		    }
15363 		    {
15364 			 FFTW_REAL tr;
15365 			 FFTW_REAL ti;
15366 			 FFTW_REAL twr;
15367 			 FFTW_REAL twi;
15368 			 tr = c_re(inout[61 * stride]);
15369 			 ti = c_im(inout[61 * stride]);
15370 			 twr = c_re(W[60]);
15371 			 twi = c_im(W[60]);
15372 			 tre2_1_0 = (tr * twr) - (ti * twi);
15373 			 tim2_1_0 = (tr * twi) + (ti * twr);
15374 		    }
15375 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
15376 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
15377 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
15378 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
15379 	       }
15380 	       {
15381 		    FFTW_REAL tre2_0_0;
15382 		    FFTW_REAL tim2_0_0;
15383 		    FFTW_REAL tre2_0_1;
15384 		    FFTW_REAL tim2_0_1;
15385 		    FFTW_REAL tre2_1_0;
15386 		    FFTW_REAL tim2_1_0;
15387 		    FFTW_REAL tre2_1_1;
15388 		    FFTW_REAL tim2_1_1;
15389 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
15390 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
15391 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
15392 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
15393 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
15394 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
15395 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
15396 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
15397 		    tre0_0_5 = tre2_0_0 + tre2_0_1;
15398 		    tim0_0_5 = tim2_0_0 + tim2_0_1;
15399 		    tre0_4_5 = tre2_0_0 - tre2_0_1;
15400 		    tim0_4_5 = tim2_0_0 - tim2_0_1;
15401 		    tre0_2_5 = tre2_1_0 + tim2_1_1;
15402 		    tim0_2_5 = tim2_1_0 - tre2_1_1;
15403 		    tre0_6_5 = tre2_1_0 - tim2_1_1;
15404 		    tim0_6_5 = tim2_1_0 + tre2_1_1;
15405 	       }
15406 	       {
15407 		    FFTW_REAL tre2_0_0;
15408 		    FFTW_REAL tim2_0_0;
15409 		    FFTW_REAL tre2_0_1;
15410 		    FFTW_REAL tim2_0_1;
15411 		    FFTW_REAL tre2_1_0;
15412 		    FFTW_REAL tim2_1_0;
15413 		    FFTW_REAL tre2_1_1;
15414 		    FFTW_REAL tim2_1_1;
15415 		    tre2_0_0 = tre1_1_0 + tim1_1_2;
15416 		    tim2_0_0 = tim1_1_0 - tre1_1_2;
15417 		    tre2_1_0 = tre1_1_0 - tim1_1_2;
15418 		    tim2_1_0 = tim1_1_0 + tre1_1_2;
15419 		    {
15420 			 FFTW_REAL tre3_0_0;
15421 			 FFTW_REAL tim3_0_0;
15422 			 FFTW_REAL tre3_1_0;
15423 			 FFTW_REAL tim3_1_0;
15424 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
15425 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
15426 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
15427 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
15428 			 tre2_0_1 = tre3_0_0 + tre3_1_0;
15429 			 tim2_0_1 = tim3_0_0 - tim3_1_0;
15430 			 tre2_1_1 = tre3_0_0 - tre3_1_0;
15431 			 tim2_1_1 = tim3_0_0 + tim3_1_0;
15432 		    }
15433 		    tre0_1_5 = tre2_0_0 + tre2_0_1;
15434 		    tim0_1_5 = tim2_0_0 + tim2_0_1;
15435 		    tre0_5_5 = tre2_0_0 - tre2_0_1;
15436 		    tim0_5_5 = tim2_0_0 - tim2_0_1;
15437 		    tre0_3_5 = tre2_1_0 + tim2_1_1;
15438 		    tim0_3_5 = tim2_1_0 - tre2_1_1;
15439 		    tre0_7_5 = tre2_1_0 - tim2_1_1;
15440 		    tim0_7_5 = tim2_1_0 + tre2_1_1;
15441 	       }
15442 	  }
15443 	  {
15444 	       FFTW_REAL tre1_0_0;
15445 	       FFTW_REAL tim1_0_0;
15446 	       FFTW_REAL tre1_0_1;
15447 	       FFTW_REAL tim1_0_1;
15448 	       FFTW_REAL tre1_0_2;
15449 	       FFTW_REAL tim1_0_2;
15450 	       FFTW_REAL tre1_0_3;
15451 	       FFTW_REAL tim1_0_3;
15452 	       FFTW_REAL tre1_1_0;
15453 	       FFTW_REAL tim1_1_0;
15454 	       FFTW_REAL tre1_1_1;
15455 	       FFTW_REAL tim1_1_1;
15456 	       FFTW_REAL tre1_1_2;
15457 	       FFTW_REAL tim1_1_2;
15458 	       FFTW_REAL tre1_1_3;
15459 	       FFTW_REAL tim1_1_3;
15460 	       {
15461 		    FFTW_REAL tre2_0_0;
15462 		    FFTW_REAL tim2_0_0;
15463 		    FFTW_REAL tre2_1_0;
15464 		    FFTW_REAL tim2_1_0;
15465 		    {
15466 			 FFTW_REAL tr;
15467 			 FFTW_REAL ti;
15468 			 FFTW_REAL twr;
15469 			 FFTW_REAL twi;
15470 			 tr = c_re(inout[6 * stride]);
15471 			 ti = c_im(inout[6 * stride]);
15472 			 twr = c_re(W[5]);
15473 			 twi = c_im(W[5]);
15474 			 tre2_0_0 = (tr * twr) - (ti * twi);
15475 			 tim2_0_0 = (tr * twi) + (ti * twr);
15476 		    }
15477 		    {
15478 			 FFTW_REAL tr;
15479 			 FFTW_REAL ti;
15480 			 FFTW_REAL twr;
15481 			 FFTW_REAL twi;
15482 			 tr = c_re(inout[38 * stride]);
15483 			 ti = c_im(inout[38 * stride]);
15484 			 twr = c_re(W[37]);
15485 			 twi = c_im(W[37]);
15486 			 tre2_1_0 = (tr * twr) - (ti * twi);
15487 			 tim2_1_0 = (tr * twi) + (ti * twr);
15488 		    }
15489 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
15490 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
15491 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
15492 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
15493 	       }
15494 	       {
15495 		    FFTW_REAL tre2_0_0;
15496 		    FFTW_REAL tim2_0_0;
15497 		    FFTW_REAL tre2_1_0;
15498 		    FFTW_REAL tim2_1_0;
15499 		    {
15500 			 FFTW_REAL tr;
15501 			 FFTW_REAL ti;
15502 			 FFTW_REAL twr;
15503 			 FFTW_REAL twi;
15504 			 tr = c_re(inout[14 * stride]);
15505 			 ti = c_im(inout[14 * stride]);
15506 			 twr = c_re(W[13]);
15507 			 twi = c_im(W[13]);
15508 			 tre2_0_0 = (tr * twr) - (ti * twi);
15509 			 tim2_0_0 = (tr * twi) + (ti * twr);
15510 		    }
15511 		    {
15512 			 FFTW_REAL tr;
15513 			 FFTW_REAL ti;
15514 			 FFTW_REAL twr;
15515 			 FFTW_REAL twi;
15516 			 tr = c_re(inout[46 * stride]);
15517 			 ti = c_im(inout[46 * stride]);
15518 			 twr = c_re(W[45]);
15519 			 twi = c_im(W[45]);
15520 			 tre2_1_0 = (tr * twr) - (ti * twi);
15521 			 tim2_1_0 = (tr * twi) + (ti * twr);
15522 		    }
15523 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
15524 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
15525 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
15526 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
15527 	       }
15528 	       {
15529 		    FFTW_REAL tre2_0_0;
15530 		    FFTW_REAL tim2_0_0;
15531 		    FFTW_REAL tre2_1_0;
15532 		    FFTW_REAL tim2_1_0;
15533 		    {
15534 			 FFTW_REAL tr;
15535 			 FFTW_REAL ti;
15536 			 FFTW_REAL twr;
15537 			 FFTW_REAL twi;
15538 			 tr = c_re(inout[22 * stride]);
15539 			 ti = c_im(inout[22 * stride]);
15540 			 twr = c_re(W[21]);
15541 			 twi = c_im(W[21]);
15542 			 tre2_0_0 = (tr * twr) - (ti * twi);
15543 			 tim2_0_0 = (tr * twi) + (ti * twr);
15544 		    }
15545 		    {
15546 			 FFTW_REAL tr;
15547 			 FFTW_REAL ti;
15548 			 FFTW_REAL twr;
15549 			 FFTW_REAL twi;
15550 			 tr = c_re(inout[54 * stride]);
15551 			 ti = c_im(inout[54 * stride]);
15552 			 twr = c_re(W[53]);
15553 			 twi = c_im(W[53]);
15554 			 tre2_1_0 = (tr * twr) - (ti * twi);
15555 			 tim2_1_0 = (tr * twi) + (ti * twr);
15556 		    }
15557 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
15558 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
15559 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
15560 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
15561 	       }
15562 	       {
15563 		    FFTW_REAL tre2_0_0;
15564 		    FFTW_REAL tim2_0_0;
15565 		    FFTW_REAL tre2_1_0;
15566 		    FFTW_REAL tim2_1_0;
15567 		    {
15568 			 FFTW_REAL tr;
15569 			 FFTW_REAL ti;
15570 			 FFTW_REAL twr;
15571 			 FFTW_REAL twi;
15572 			 tr = c_re(inout[30 * stride]);
15573 			 ti = c_im(inout[30 * stride]);
15574 			 twr = c_re(W[29]);
15575 			 twi = c_im(W[29]);
15576 			 tre2_0_0 = (tr * twr) - (ti * twi);
15577 			 tim2_0_0 = (tr * twi) + (ti * twr);
15578 		    }
15579 		    {
15580 			 FFTW_REAL tr;
15581 			 FFTW_REAL ti;
15582 			 FFTW_REAL twr;
15583 			 FFTW_REAL twi;
15584 			 tr = c_re(inout[62 * stride]);
15585 			 ti = c_im(inout[62 * stride]);
15586 			 twr = c_re(W[61]);
15587 			 twi = c_im(W[61]);
15588 			 tre2_1_0 = (tr * twr) - (ti * twi);
15589 			 tim2_1_0 = (tr * twi) + (ti * twr);
15590 		    }
15591 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
15592 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
15593 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
15594 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
15595 	       }
15596 	       {
15597 		    FFTW_REAL tre2_0_0;
15598 		    FFTW_REAL tim2_0_0;
15599 		    FFTW_REAL tre2_0_1;
15600 		    FFTW_REAL tim2_0_1;
15601 		    FFTW_REAL tre2_1_0;
15602 		    FFTW_REAL tim2_1_0;
15603 		    FFTW_REAL tre2_1_1;
15604 		    FFTW_REAL tim2_1_1;
15605 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
15606 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
15607 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
15608 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
15609 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
15610 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
15611 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
15612 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
15613 		    tre0_0_6 = tre2_0_0 + tre2_0_1;
15614 		    tim0_0_6 = tim2_0_0 + tim2_0_1;
15615 		    tre0_4_6 = tre2_0_0 - tre2_0_1;
15616 		    tim0_4_6 = tim2_0_0 - tim2_0_1;
15617 		    tre0_2_6 = tre2_1_0 + tim2_1_1;
15618 		    tim0_2_6 = tim2_1_0 - tre2_1_1;
15619 		    tre0_6_6 = tre2_1_0 - tim2_1_1;
15620 		    tim0_6_6 = tim2_1_0 + tre2_1_1;
15621 	       }
15622 	       {
15623 		    FFTW_REAL tre2_0_0;
15624 		    FFTW_REAL tim2_0_0;
15625 		    FFTW_REAL tre2_0_1;
15626 		    FFTW_REAL tim2_0_1;
15627 		    FFTW_REAL tre2_1_0;
15628 		    FFTW_REAL tim2_1_0;
15629 		    FFTW_REAL tre2_1_1;
15630 		    FFTW_REAL tim2_1_1;
15631 		    tre2_0_0 = tre1_1_0 + tim1_1_2;
15632 		    tim2_0_0 = tim1_1_0 - tre1_1_2;
15633 		    tre2_1_0 = tre1_1_0 - tim1_1_2;
15634 		    tim2_1_0 = tim1_1_0 + tre1_1_2;
15635 		    {
15636 			 FFTW_REAL tre3_0_0;
15637 			 FFTW_REAL tim3_0_0;
15638 			 FFTW_REAL tre3_1_0;
15639 			 FFTW_REAL tim3_1_0;
15640 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
15641 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
15642 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
15643 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
15644 			 tre2_0_1 = tre3_0_0 + tre3_1_0;
15645 			 tim2_0_1 = tim3_0_0 - tim3_1_0;
15646 			 tre2_1_1 = tre3_0_0 - tre3_1_0;
15647 			 tim2_1_1 = tim3_0_0 + tim3_1_0;
15648 		    }
15649 		    tre0_1_6 = tre2_0_0 + tre2_0_1;
15650 		    tim0_1_6 = tim2_0_0 + tim2_0_1;
15651 		    tre0_5_6 = tre2_0_0 - tre2_0_1;
15652 		    tim0_5_6 = tim2_0_0 - tim2_0_1;
15653 		    tre0_3_6 = tre2_1_0 + tim2_1_1;
15654 		    tim0_3_6 = tim2_1_0 - tre2_1_1;
15655 		    tre0_7_6 = tre2_1_0 - tim2_1_1;
15656 		    tim0_7_6 = tim2_1_0 + tre2_1_1;
15657 	       }
15658 	  }
15659 	  {
15660 	       FFTW_REAL tre1_0_0;
15661 	       FFTW_REAL tim1_0_0;
15662 	       FFTW_REAL tre1_0_1;
15663 	       FFTW_REAL tim1_0_1;
15664 	       FFTW_REAL tre1_0_2;
15665 	       FFTW_REAL tim1_0_2;
15666 	       FFTW_REAL tre1_0_3;
15667 	       FFTW_REAL tim1_0_3;
15668 	       FFTW_REAL tre1_1_0;
15669 	       FFTW_REAL tim1_1_0;
15670 	       FFTW_REAL tre1_1_1;
15671 	       FFTW_REAL tim1_1_1;
15672 	       FFTW_REAL tre1_1_2;
15673 	       FFTW_REAL tim1_1_2;
15674 	       FFTW_REAL tre1_1_3;
15675 	       FFTW_REAL tim1_1_3;
15676 	       {
15677 		    FFTW_REAL tre2_0_0;
15678 		    FFTW_REAL tim2_0_0;
15679 		    FFTW_REAL tre2_1_0;
15680 		    FFTW_REAL tim2_1_0;
15681 		    {
15682 			 FFTW_REAL tr;
15683 			 FFTW_REAL ti;
15684 			 FFTW_REAL twr;
15685 			 FFTW_REAL twi;
15686 			 tr = c_re(inout[7 * stride]);
15687 			 ti = c_im(inout[7 * stride]);
15688 			 twr = c_re(W[6]);
15689 			 twi = c_im(W[6]);
15690 			 tre2_0_0 = (tr * twr) - (ti * twi);
15691 			 tim2_0_0 = (tr * twi) + (ti * twr);
15692 		    }
15693 		    {
15694 			 FFTW_REAL tr;
15695 			 FFTW_REAL ti;
15696 			 FFTW_REAL twr;
15697 			 FFTW_REAL twi;
15698 			 tr = c_re(inout[39 * stride]);
15699 			 ti = c_im(inout[39 * stride]);
15700 			 twr = c_re(W[38]);
15701 			 twi = c_im(W[38]);
15702 			 tre2_1_0 = (tr * twr) - (ti * twi);
15703 			 tim2_1_0 = (tr * twi) + (ti * twr);
15704 		    }
15705 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
15706 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
15707 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
15708 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
15709 	       }
15710 	       {
15711 		    FFTW_REAL tre2_0_0;
15712 		    FFTW_REAL tim2_0_0;
15713 		    FFTW_REAL tre2_1_0;
15714 		    FFTW_REAL tim2_1_0;
15715 		    {
15716 			 FFTW_REAL tr;
15717 			 FFTW_REAL ti;
15718 			 FFTW_REAL twr;
15719 			 FFTW_REAL twi;
15720 			 tr = c_re(inout[15 * stride]);
15721 			 ti = c_im(inout[15 * stride]);
15722 			 twr = c_re(W[14]);
15723 			 twi = c_im(W[14]);
15724 			 tre2_0_0 = (tr * twr) - (ti * twi);
15725 			 tim2_0_0 = (tr * twi) + (ti * twr);
15726 		    }
15727 		    {
15728 			 FFTW_REAL tr;
15729 			 FFTW_REAL ti;
15730 			 FFTW_REAL twr;
15731 			 FFTW_REAL twi;
15732 			 tr = c_re(inout[47 * stride]);
15733 			 ti = c_im(inout[47 * stride]);
15734 			 twr = c_re(W[46]);
15735 			 twi = c_im(W[46]);
15736 			 tre2_1_0 = (tr * twr) - (ti * twi);
15737 			 tim2_1_0 = (tr * twi) + (ti * twr);
15738 		    }
15739 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
15740 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
15741 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
15742 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
15743 	       }
15744 	       {
15745 		    FFTW_REAL tre2_0_0;
15746 		    FFTW_REAL tim2_0_0;
15747 		    FFTW_REAL tre2_1_0;
15748 		    FFTW_REAL tim2_1_0;
15749 		    {
15750 			 FFTW_REAL tr;
15751 			 FFTW_REAL ti;
15752 			 FFTW_REAL twr;
15753 			 FFTW_REAL twi;
15754 			 tr = c_re(inout[23 * stride]);
15755 			 ti = c_im(inout[23 * stride]);
15756 			 twr = c_re(W[22]);
15757 			 twi = c_im(W[22]);
15758 			 tre2_0_0 = (tr * twr) - (ti * twi);
15759 			 tim2_0_0 = (tr * twi) + (ti * twr);
15760 		    }
15761 		    {
15762 			 FFTW_REAL tr;
15763 			 FFTW_REAL ti;
15764 			 FFTW_REAL twr;
15765 			 FFTW_REAL twi;
15766 			 tr = c_re(inout[55 * stride]);
15767 			 ti = c_im(inout[55 * stride]);
15768 			 twr = c_re(W[54]);
15769 			 twi = c_im(W[54]);
15770 			 tre2_1_0 = (tr * twr) - (ti * twi);
15771 			 tim2_1_0 = (tr * twi) + (ti * twr);
15772 		    }
15773 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
15774 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
15775 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
15776 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
15777 	       }
15778 	       {
15779 		    FFTW_REAL tre2_0_0;
15780 		    FFTW_REAL tim2_0_0;
15781 		    FFTW_REAL tre2_1_0;
15782 		    FFTW_REAL tim2_1_0;
15783 		    {
15784 			 FFTW_REAL tr;
15785 			 FFTW_REAL ti;
15786 			 FFTW_REAL twr;
15787 			 FFTW_REAL twi;
15788 			 tr = c_re(inout[31 * stride]);
15789 			 ti = c_im(inout[31 * stride]);
15790 			 twr = c_re(W[30]);
15791 			 twi = c_im(W[30]);
15792 			 tre2_0_0 = (tr * twr) - (ti * twi);
15793 			 tim2_0_0 = (tr * twi) + (ti * twr);
15794 		    }
15795 		    {
15796 			 FFTW_REAL tr;
15797 			 FFTW_REAL ti;
15798 			 FFTW_REAL twr;
15799 			 FFTW_REAL twi;
15800 			 tr = c_re(inout[63 * stride]);
15801 			 ti = c_im(inout[63 * stride]);
15802 			 twr = c_re(W[62]);
15803 			 twi = c_im(W[62]);
15804 			 tre2_1_0 = (tr * twr) - (ti * twi);
15805 			 tim2_1_0 = (tr * twi) + (ti * twr);
15806 		    }
15807 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
15808 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
15809 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
15810 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
15811 	       }
15812 	       {
15813 		    FFTW_REAL tre2_0_0;
15814 		    FFTW_REAL tim2_0_0;
15815 		    FFTW_REAL tre2_0_1;
15816 		    FFTW_REAL tim2_0_1;
15817 		    FFTW_REAL tre2_1_0;
15818 		    FFTW_REAL tim2_1_0;
15819 		    FFTW_REAL tre2_1_1;
15820 		    FFTW_REAL tim2_1_1;
15821 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
15822 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
15823 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
15824 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
15825 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
15826 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
15827 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
15828 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
15829 		    tre0_0_7 = tre2_0_0 + tre2_0_1;
15830 		    tim0_0_7 = tim2_0_0 + tim2_0_1;
15831 		    tre0_4_7 = tre2_0_0 - tre2_0_1;
15832 		    tim0_4_7 = tim2_0_0 - tim2_0_1;
15833 		    tre0_2_7 = tre2_1_0 + tim2_1_1;
15834 		    tim0_2_7 = tim2_1_0 - tre2_1_1;
15835 		    tre0_6_7 = tre2_1_0 - tim2_1_1;
15836 		    tim0_6_7 = tim2_1_0 + tre2_1_1;
15837 	       }
15838 	       {
15839 		    FFTW_REAL tre2_0_0;
15840 		    FFTW_REAL tim2_0_0;
15841 		    FFTW_REAL tre2_0_1;
15842 		    FFTW_REAL tim2_0_1;
15843 		    FFTW_REAL tre2_1_0;
15844 		    FFTW_REAL tim2_1_0;
15845 		    FFTW_REAL tre2_1_1;
15846 		    FFTW_REAL tim2_1_1;
15847 		    tre2_0_0 = tre1_1_0 + tim1_1_2;
15848 		    tim2_0_0 = tim1_1_0 - tre1_1_2;
15849 		    tre2_1_0 = tre1_1_0 - tim1_1_2;
15850 		    tim2_1_0 = tim1_1_0 + tre1_1_2;
15851 		    {
15852 			 FFTW_REAL tre3_0_0;
15853 			 FFTW_REAL tim3_0_0;
15854 			 FFTW_REAL tre3_1_0;
15855 			 FFTW_REAL tim3_1_0;
15856 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
15857 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
15858 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
15859 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
15860 			 tre2_0_1 = tre3_0_0 + tre3_1_0;
15861 			 tim2_0_1 = tim3_0_0 - tim3_1_0;
15862 			 tre2_1_1 = tre3_0_0 - tre3_1_0;
15863 			 tim2_1_1 = tim3_0_0 + tim3_1_0;
15864 		    }
15865 		    tre0_1_7 = tre2_0_0 + tre2_0_1;
15866 		    tim0_1_7 = tim2_0_0 + tim2_0_1;
15867 		    tre0_5_7 = tre2_0_0 - tre2_0_1;
15868 		    tim0_5_7 = tim2_0_0 - tim2_0_1;
15869 		    tre0_3_7 = tre2_1_0 + tim2_1_1;
15870 		    tim0_3_7 = tim2_1_0 - tre2_1_1;
15871 		    tre0_7_7 = tre2_1_0 - tim2_1_1;
15872 		    tim0_7_7 = tim2_1_0 + tre2_1_1;
15873 	       }
15874 	  }
15875 	  {
15876 	       FFTW_REAL tre1_0_0;
15877 	       FFTW_REAL tim1_0_0;
15878 	       FFTW_REAL tre1_0_1;
15879 	       FFTW_REAL tim1_0_1;
15880 	       FFTW_REAL tre1_0_2;
15881 	       FFTW_REAL tim1_0_2;
15882 	       FFTW_REAL tre1_0_3;
15883 	       FFTW_REAL tim1_0_3;
15884 	       FFTW_REAL tre1_1_0;
15885 	       FFTW_REAL tim1_1_0;
15886 	       FFTW_REAL tre1_1_1;
15887 	       FFTW_REAL tim1_1_1;
15888 	       FFTW_REAL tre1_1_2;
15889 	       FFTW_REAL tim1_1_2;
15890 	       FFTW_REAL tre1_1_3;
15891 	       FFTW_REAL tim1_1_3;
15892 	       tre1_0_0 = tre0_0_0 + tre0_0_4;
15893 	       tim1_0_0 = tim0_0_0 + tim0_0_4;
15894 	       tre1_1_0 = tre0_0_0 - tre0_0_4;
15895 	       tim1_1_0 = tim0_0_0 - tim0_0_4;
15896 	       tre1_0_1 = tre0_0_1 + tre0_0_5;
15897 	       tim1_0_1 = tim0_0_1 + tim0_0_5;
15898 	       tre1_1_1 = tre0_0_1 - tre0_0_5;
15899 	       tim1_1_1 = tim0_0_1 - tim0_0_5;
15900 	       tre1_0_2 = tre0_0_2 + tre0_0_6;
15901 	       tim1_0_2 = tim0_0_2 + tim0_0_6;
15902 	       tre1_1_2 = tre0_0_2 - tre0_0_6;
15903 	       tim1_1_2 = tim0_0_2 - tim0_0_6;
15904 	       tre1_0_3 = tre0_0_3 + tre0_0_7;
15905 	       tim1_0_3 = tim0_0_3 + tim0_0_7;
15906 	       tre1_1_3 = tre0_0_3 - tre0_0_7;
15907 	       tim1_1_3 = tim0_0_3 - tim0_0_7;
15908 	       {
15909 		    FFTW_REAL tre2_0_0;
15910 		    FFTW_REAL tim2_0_0;
15911 		    FFTW_REAL tre2_0_1;
15912 		    FFTW_REAL tim2_0_1;
15913 		    FFTW_REAL tre2_1_0;
15914 		    FFTW_REAL tim2_1_0;
15915 		    FFTW_REAL tre2_1_1;
15916 		    FFTW_REAL tim2_1_1;
15917 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
15918 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
15919 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
15920 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
15921 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
15922 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
15923 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
15924 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
15925 		    c_re(inout[0]) = tre2_0_0 + tre2_0_1;
15926 		    c_im(inout[0]) = tim2_0_0 + tim2_0_1;
15927 		    c_re(inout[32 * stride]) = tre2_0_0 - tre2_0_1;
15928 		    c_im(inout[32 * stride]) = tim2_0_0 - tim2_0_1;
15929 		    c_re(inout[16 * stride]) = tre2_1_0 + tim2_1_1;
15930 		    c_im(inout[16 * stride]) = tim2_1_0 - tre2_1_1;
15931 		    c_re(inout[48 * stride]) = tre2_1_0 - tim2_1_1;
15932 		    c_im(inout[48 * stride]) = tim2_1_0 + tre2_1_1;
15933 	       }
15934 	       {
15935 		    FFTW_REAL tre2_0_0;
15936 		    FFTW_REAL tim2_0_0;
15937 		    FFTW_REAL tre2_0_1;
15938 		    FFTW_REAL tim2_0_1;
15939 		    FFTW_REAL tre2_1_0;
15940 		    FFTW_REAL tim2_1_0;
15941 		    FFTW_REAL tre2_1_1;
15942 		    FFTW_REAL tim2_1_1;
15943 		    tre2_0_0 = tre1_1_0 + tim1_1_2;
15944 		    tim2_0_0 = tim1_1_0 - tre1_1_2;
15945 		    tre2_1_0 = tre1_1_0 - tim1_1_2;
15946 		    tim2_1_0 = tim1_1_0 + tre1_1_2;
15947 		    {
15948 			 FFTW_REAL tre3_0_0;
15949 			 FFTW_REAL tim3_0_0;
15950 			 FFTW_REAL tre3_1_0;
15951 			 FFTW_REAL tim3_1_0;
15952 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
15953 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
15954 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
15955 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
15956 			 tre2_0_1 = tre3_0_0 + tre3_1_0;
15957 			 tim2_0_1 = tim3_0_0 - tim3_1_0;
15958 			 tre2_1_1 = tre3_0_0 - tre3_1_0;
15959 			 tim2_1_1 = tim3_0_0 + tim3_1_0;
15960 		    }
15961 		    c_re(inout[8 * stride]) = tre2_0_0 + tre2_0_1;
15962 		    c_im(inout[8 * stride]) = tim2_0_0 + tim2_0_1;
15963 		    c_re(inout[40 * stride]) = tre2_0_0 - tre2_0_1;
15964 		    c_im(inout[40 * stride]) = tim2_0_0 - tim2_0_1;
15965 		    c_re(inout[24 * stride]) = tre2_1_0 + tim2_1_1;
15966 		    c_im(inout[24 * stride]) = tim2_1_0 - tre2_1_1;
15967 		    c_re(inout[56 * stride]) = tre2_1_0 - tim2_1_1;
15968 		    c_im(inout[56 * stride]) = tim2_1_0 + tre2_1_1;
15969 	       }
15970 	  }
15971 	  {
15972 	       FFTW_REAL tre1_0_0;
15973 	       FFTW_REAL tim1_0_0;
15974 	       FFTW_REAL tre1_0_1;
15975 	       FFTW_REAL tim1_0_1;
15976 	       FFTW_REAL tre1_0_2;
15977 	       FFTW_REAL tim1_0_2;
15978 	       FFTW_REAL tre1_0_3;
15979 	       FFTW_REAL tim1_0_3;
15980 	       FFTW_REAL tre1_1_0;
15981 	       FFTW_REAL tim1_1_0;
15982 	       FFTW_REAL tre1_1_1;
15983 	       FFTW_REAL tim1_1_1;
15984 	       FFTW_REAL tre1_1_2;
15985 	       FFTW_REAL tim1_1_2;
15986 	       FFTW_REAL tre1_1_3;
15987 	       FFTW_REAL tim1_1_3;
15988 	       {
15989 		    FFTW_REAL tre2_1_0;
15990 		    FFTW_REAL tim2_1_0;
15991 		    tre2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_1_4) + (((FFTW_REAL) FFTW_K382683432) * tim0_1_4);
15992 		    tim2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_1_4) - (((FFTW_REAL) FFTW_K382683432) * tre0_1_4);
15993 		    tre1_0_0 = tre0_1_0 + tre2_1_0;
15994 		    tim1_0_0 = tim0_1_0 + tim2_1_0;
15995 		    tre1_1_0 = tre0_1_0 - tre2_1_0;
15996 		    tim1_1_0 = tim0_1_0 - tim2_1_0;
15997 	       }
15998 	       {
15999 		    FFTW_REAL tre2_0_0;
16000 		    FFTW_REAL tim2_0_0;
16001 		    FFTW_REAL tre2_1_0;
16002 		    FFTW_REAL tim2_1_0;
16003 		    tre2_0_0 = (((FFTW_REAL) FFTW_K995184726) * tre0_1_1) + (((FFTW_REAL) FFTW_K098017140) * tim0_1_1);
16004 		    tim2_0_0 = (((FFTW_REAL) FFTW_K995184726) * tim0_1_1) - (((FFTW_REAL) FFTW_K098017140) * tre0_1_1);
16005 		    tre2_1_0 = (((FFTW_REAL) FFTW_K881921264) * tre0_1_5) + (((FFTW_REAL) FFTW_K471396736) * tim0_1_5);
16006 		    tim2_1_0 = (((FFTW_REAL) FFTW_K881921264) * tim0_1_5) - (((FFTW_REAL) FFTW_K471396736) * tre0_1_5);
16007 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
16008 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
16009 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
16010 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
16011 	       }
16012 	       {
16013 		    FFTW_REAL tre2_0_0;
16014 		    FFTW_REAL tim2_0_0;
16015 		    FFTW_REAL tre2_1_0;
16016 		    FFTW_REAL tim2_1_0;
16017 		    tre2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tre0_1_2) + (((FFTW_REAL) FFTW_K195090322) * tim0_1_2);
16018 		    tim2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tim0_1_2) - (((FFTW_REAL) FFTW_K195090322) * tre0_1_2);
16019 		    tre2_1_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_1_6) + (((FFTW_REAL) FFTW_K555570233) * tim0_1_6);
16020 		    tim2_1_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_1_6) - (((FFTW_REAL) FFTW_K555570233) * tre0_1_6);
16021 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
16022 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
16023 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
16024 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
16025 	       }
16026 	       {
16027 		    FFTW_REAL tre2_0_0;
16028 		    FFTW_REAL tim2_0_0;
16029 		    FFTW_REAL tre2_1_0;
16030 		    FFTW_REAL tim2_1_0;
16031 		    tre2_0_0 = (((FFTW_REAL) FFTW_K956940335) * tre0_1_3) + (((FFTW_REAL) FFTW_K290284677) * tim0_1_3);
16032 		    tim2_0_0 = (((FFTW_REAL) FFTW_K956940335) * tim0_1_3) - (((FFTW_REAL) FFTW_K290284677) * tre0_1_3);
16033 		    tre2_1_0 = (((FFTW_REAL) FFTW_K773010453) * tre0_1_7) + (((FFTW_REAL) FFTW_K634393284) * tim0_1_7);
16034 		    tim2_1_0 = (((FFTW_REAL) FFTW_K773010453) * tim0_1_7) - (((FFTW_REAL) FFTW_K634393284) * tre0_1_7);
16035 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
16036 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
16037 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
16038 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
16039 	       }
16040 	       {
16041 		    FFTW_REAL tre2_0_0;
16042 		    FFTW_REAL tim2_0_0;
16043 		    FFTW_REAL tre2_0_1;
16044 		    FFTW_REAL tim2_0_1;
16045 		    FFTW_REAL tre2_1_0;
16046 		    FFTW_REAL tim2_1_0;
16047 		    FFTW_REAL tre2_1_1;
16048 		    FFTW_REAL tim2_1_1;
16049 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
16050 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
16051 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
16052 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
16053 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
16054 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
16055 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
16056 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
16057 		    c_re(inout[stride]) = tre2_0_0 + tre2_0_1;
16058 		    c_im(inout[stride]) = tim2_0_0 + tim2_0_1;
16059 		    c_re(inout[33 * stride]) = tre2_0_0 - tre2_0_1;
16060 		    c_im(inout[33 * stride]) = tim2_0_0 - tim2_0_1;
16061 		    c_re(inout[17 * stride]) = tre2_1_0 + tim2_1_1;
16062 		    c_im(inout[17 * stride]) = tim2_1_0 - tre2_1_1;
16063 		    c_re(inout[49 * stride]) = tre2_1_0 - tim2_1_1;
16064 		    c_im(inout[49 * stride]) = tim2_1_0 + tre2_1_1;
16065 	       }
16066 	       {
16067 		    FFTW_REAL tre2_0_0;
16068 		    FFTW_REAL tim2_0_0;
16069 		    FFTW_REAL tre2_0_1;
16070 		    FFTW_REAL tim2_0_1;
16071 		    FFTW_REAL tre2_1_0;
16072 		    FFTW_REAL tim2_1_0;
16073 		    FFTW_REAL tre2_1_1;
16074 		    FFTW_REAL tim2_1_1;
16075 		    tre2_0_0 = tre1_1_0 + tim1_1_2;
16076 		    tim2_0_0 = tim1_1_0 - tre1_1_2;
16077 		    tre2_1_0 = tre1_1_0 - tim1_1_2;
16078 		    tim2_1_0 = tim1_1_0 + tre1_1_2;
16079 		    {
16080 			 FFTW_REAL tre3_0_0;
16081 			 FFTW_REAL tim3_0_0;
16082 			 FFTW_REAL tre3_1_0;
16083 			 FFTW_REAL tim3_1_0;
16084 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
16085 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
16086 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
16087 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
16088 			 tre2_0_1 = tre3_0_0 + tre3_1_0;
16089 			 tim2_0_1 = tim3_0_0 - tim3_1_0;
16090 			 tre2_1_1 = tre3_0_0 - tre3_1_0;
16091 			 tim2_1_1 = tim3_0_0 + tim3_1_0;
16092 		    }
16093 		    c_re(inout[9 * stride]) = tre2_0_0 + tre2_0_1;
16094 		    c_im(inout[9 * stride]) = tim2_0_0 + tim2_0_1;
16095 		    c_re(inout[41 * stride]) = tre2_0_0 - tre2_0_1;
16096 		    c_im(inout[41 * stride]) = tim2_0_0 - tim2_0_1;
16097 		    c_re(inout[25 * stride]) = tre2_1_0 + tim2_1_1;
16098 		    c_im(inout[25 * stride]) = tim2_1_0 - tre2_1_1;
16099 		    c_re(inout[57 * stride]) = tre2_1_0 - tim2_1_1;
16100 		    c_im(inout[57 * stride]) = tim2_1_0 + tre2_1_1;
16101 	       }
16102 	  }
16103 	  {
16104 	       FFTW_REAL tre1_0_0;
16105 	       FFTW_REAL tim1_0_0;
16106 	       FFTW_REAL tre1_0_1;
16107 	       FFTW_REAL tim1_0_1;
16108 	       FFTW_REAL tre1_0_2;
16109 	       FFTW_REAL tim1_0_2;
16110 	       FFTW_REAL tre1_0_3;
16111 	       FFTW_REAL tim1_0_3;
16112 	       FFTW_REAL tre1_1_0;
16113 	       FFTW_REAL tim1_1_0;
16114 	       FFTW_REAL tre1_1_1;
16115 	       FFTW_REAL tim1_1_1;
16116 	       FFTW_REAL tre1_1_2;
16117 	       FFTW_REAL tim1_1_2;
16118 	       FFTW_REAL tre1_1_3;
16119 	       FFTW_REAL tim1_1_3;
16120 	       {
16121 		    FFTW_REAL tre2_1_0;
16122 		    FFTW_REAL tim2_1_0;
16123 		    tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_2_4 + tim0_2_4);
16124 		    tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_2_4 - tre0_2_4);
16125 		    tre1_0_0 = tre0_2_0 + tre2_1_0;
16126 		    tim1_0_0 = tim0_2_0 + tim2_1_0;
16127 		    tre1_1_0 = tre0_2_0 - tre2_1_0;
16128 		    tim1_1_0 = tim0_2_0 - tim2_1_0;
16129 	       }
16130 	       {
16131 		    FFTW_REAL tre2_0_0;
16132 		    FFTW_REAL tim2_0_0;
16133 		    FFTW_REAL tre2_1_0;
16134 		    FFTW_REAL tim2_1_0;
16135 		    tre2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tre0_2_1) + (((FFTW_REAL) FFTW_K195090322) * tim0_2_1);
16136 		    tim2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tim0_2_1) - (((FFTW_REAL) FFTW_K195090322) * tre0_2_1);
16137 		    tre2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tre0_2_5) + (((FFTW_REAL) FFTW_K831469612) * tim0_2_5);
16138 		    tim2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tim0_2_5) - (((FFTW_REAL) FFTW_K831469612) * tre0_2_5);
16139 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
16140 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
16141 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
16142 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
16143 	       }
16144 	       {
16145 		    FFTW_REAL tre2_0_0;
16146 		    FFTW_REAL tim2_0_0;
16147 		    FFTW_REAL tre2_1_0;
16148 		    FFTW_REAL tim2_1_0;
16149 		    tre2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_2_2) + (((FFTW_REAL) FFTW_K382683432) * tim0_2_2);
16150 		    tim2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_2_2) - (((FFTW_REAL) FFTW_K382683432) * tre0_2_2);
16151 		    tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_2_6) + (((FFTW_REAL) FFTW_K923879532) * tim0_2_6);
16152 		    tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_2_6) - (((FFTW_REAL) FFTW_K923879532) * tre0_2_6);
16153 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
16154 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
16155 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
16156 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
16157 	       }
16158 	       {
16159 		    FFTW_REAL tre2_0_0;
16160 		    FFTW_REAL tim2_0_0;
16161 		    FFTW_REAL tre2_1_0;
16162 		    FFTW_REAL tim2_1_0;
16163 		    tre2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_2_3) + (((FFTW_REAL) FFTW_K555570233) * tim0_2_3);
16164 		    tim2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_2_3) - (((FFTW_REAL) FFTW_K555570233) * tre0_2_3);
16165 		    tre2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tre0_2_7) + (((FFTW_REAL) FFTW_K980785280) * tim0_2_7);
16166 		    tim2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tim0_2_7) - (((FFTW_REAL) FFTW_K980785280) * tre0_2_7);
16167 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
16168 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
16169 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
16170 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
16171 	       }
16172 	       {
16173 		    FFTW_REAL tre2_0_0;
16174 		    FFTW_REAL tim2_0_0;
16175 		    FFTW_REAL tre2_0_1;
16176 		    FFTW_REAL tim2_0_1;
16177 		    FFTW_REAL tre2_1_0;
16178 		    FFTW_REAL tim2_1_0;
16179 		    FFTW_REAL tre2_1_1;
16180 		    FFTW_REAL tim2_1_1;
16181 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
16182 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
16183 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
16184 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
16185 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
16186 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
16187 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
16188 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
16189 		    c_re(inout[2 * stride]) = tre2_0_0 + tre2_0_1;
16190 		    c_im(inout[2 * stride]) = tim2_0_0 + tim2_0_1;
16191 		    c_re(inout[34 * stride]) = tre2_0_0 - tre2_0_1;
16192 		    c_im(inout[34 * stride]) = tim2_0_0 - tim2_0_1;
16193 		    c_re(inout[18 * stride]) = tre2_1_0 + tim2_1_1;
16194 		    c_im(inout[18 * stride]) = tim2_1_0 - tre2_1_1;
16195 		    c_re(inout[50 * stride]) = tre2_1_0 - tim2_1_1;
16196 		    c_im(inout[50 * stride]) = tim2_1_0 + tre2_1_1;
16197 	       }
16198 	       {
16199 		    FFTW_REAL tre2_0_0;
16200 		    FFTW_REAL tim2_0_0;
16201 		    FFTW_REAL tre2_0_1;
16202 		    FFTW_REAL tim2_0_1;
16203 		    FFTW_REAL tre2_1_0;
16204 		    FFTW_REAL tim2_1_0;
16205 		    FFTW_REAL tre2_1_1;
16206 		    FFTW_REAL tim2_1_1;
16207 		    tre2_0_0 = tre1_1_0 + tim1_1_2;
16208 		    tim2_0_0 = tim1_1_0 - tre1_1_2;
16209 		    tre2_1_0 = tre1_1_0 - tim1_1_2;
16210 		    tim2_1_0 = tim1_1_0 + tre1_1_2;
16211 		    {
16212 			 FFTW_REAL tre3_0_0;
16213 			 FFTW_REAL tim3_0_0;
16214 			 FFTW_REAL tre3_1_0;
16215 			 FFTW_REAL tim3_1_0;
16216 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
16217 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
16218 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
16219 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
16220 			 tre2_0_1 = tre3_0_0 + tre3_1_0;
16221 			 tim2_0_1 = tim3_0_0 - tim3_1_0;
16222 			 tre2_1_1 = tre3_0_0 - tre3_1_0;
16223 			 tim2_1_1 = tim3_0_0 + tim3_1_0;
16224 		    }
16225 		    c_re(inout[10 * stride]) = tre2_0_0 + tre2_0_1;
16226 		    c_im(inout[10 * stride]) = tim2_0_0 + tim2_0_1;
16227 		    c_re(inout[42 * stride]) = tre2_0_0 - tre2_0_1;
16228 		    c_im(inout[42 * stride]) = tim2_0_0 - tim2_0_1;
16229 		    c_re(inout[26 * stride]) = tre2_1_0 + tim2_1_1;
16230 		    c_im(inout[26 * stride]) = tim2_1_0 - tre2_1_1;
16231 		    c_re(inout[58 * stride]) = tre2_1_0 - tim2_1_1;
16232 		    c_im(inout[58 * stride]) = tim2_1_0 + tre2_1_1;
16233 	       }
16234 	  }
16235 	  {
16236 	       FFTW_REAL tre1_0_0;
16237 	       FFTW_REAL tim1_0_0;
16238 	       FFTW_REAL tre1_0_1;
16239 	       FFTW_REAL tim1_0_1;
16240 	       FFTW_REAL tre1_0_2;
16241 	       FFTW_REAL tim1_0_2;
16242 	       FFTW_REAL tre1_0_3;
16243 	       FFTW_REAL tim1_0_3;
16244 	       FFTW_REAL tre1_1_0;
16245 	       FFTW_REAL tim1_1_0;
16246 	       FFTW_REAL tre1_1_1;
16247 	       FFTW_REAL tim1_1_1;
16248 	       FFTW_REAL tre1_1_2;
16249 	       FFTW_REAL tim1_1_2;
16250 	       FFTW_REAL tre1_1_3;
16251 	       FFTW_REAL tim1_1_3;
16252 	       {
16253 		    FFTW_REAL tre2_1_0;
16254 		    FFTW_REAL tim2_1_0;
16255 		    tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_3_4) + (((FFTW_REAL) FFTW_K923879532) * tim0_3_4);
16256 		    tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_3_4) - (((FFTW_REAL) FFTW_K923879532) * tre0_3_4);
16257 		    tre1_0_0 = tre0_3_0 + tre2_1_0;
16258 		    tim1_0_0 = tim0_3_0 + tim2_1_0;
16259 		    tre1_1_0 = tre0_3_0 - tre2_1_0;
16260 		    tim1_1_0 = tim0_3_0 - tim2_1_0;
16261 	       }
16262 	       {
16263 		    FFTW_REAL tre2_0_0;
16264 		    FFTW_REAL tim2_0_0;
16265 		    FFTW_REAL tre2_1_0;
16266 		    FFTW_REAL tim2_1_0;
16267 		    tre2_0_0 = (((FFTW_REAL) FFTW_K956940335) * tre0_3_1) + (((FFTW_REAL) FFTW_K290284677) * tim0_3_1);
16268 		    tim2_0_0 = (((FFTW_REAL) FFTW_K956940335) * tim0_3_1) - (((FFTW_REAL) FFTW_K290284677) * tre0_3_1);
16269 		    tre2_1_0 = (((FFTW_REAL) FFTW_K098017140) * tre0_3_5) + (((FFTW_REAL) FFTW_K995184726) * tim0_3_5);
16270 		    tim2_1_0 = (((FFTW_REAL) FFTW_K098017140) * tim0_3_5) - (((FFTW_REAL) FFTW_K995184726) * tre0_3_5);
16271 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
16272 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
16273 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
16274 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
16275 	       }
16276 	       {
16277 		    FFTW_REAL tre2_0_0;
16278 		    FFTW_REAL tim2_0_0;
16279 		    FFTW_REAL tre2_1_0;
16280 		    FFTW_REAL tim2_1_0;
16281 		    tre2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_3_2) + (((FFTW_REAL) FFTW_K555570233) * tim0_3_2);
16282 		    tim2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_3_2) - (((FFTW_REAL) FFTW_K555570233) * tre0_3_2);
16283 		    tre2_1_0 = (((FFTW_REAL) FFTW_K980785280) * tim0_3_6) - (((FFTW_REAL) FFTW_K195090322) * tre0_3_6);
16284 		    tim2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tim0_3_6) + (((FFTW_REAL) FFTW_K980785280) * tre0_3_6);
16285 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
16286 		    tim1_0_2 = tim2_0_0 - tim2_1_0;
16287 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
16288 		    tim1_1_2 = tim2_0_0 + tim2_1_0;
16289 	       }
16290 	       {
16291 		    FFTW_REAL tre2_0_0;
16292 		    FFTW_REAL tim2_0_0;
16293 		    FFTW_REAL tre2_1_0;
16294 		    FFTW_REAL tim2_1_0;
16295 		    tre2_0_0 = (((FFTW_REAL) FFTW_K634393284) * tre0_3_3) + (((FFTW_REAL) FFTW_K773010453) * tim0_3_3);
16296 		    tim2_0_0 = (((FFTW_REAL) FFTW_K634393284) * tim0_3_3) - (((FFTW_REAL) FFTW_K773010453) * tre0_3_3);
16297 		    tre2_1_0 = (((FFTW_REAL) FFTW_K881921264) * tim0_3_7) - (((FFTW_REAL) FFTW_K471396736) * tre0_3_7);
16298 		    tim2_1_0 = (((FFTW_REAL) FFTW_K471396736) * tim0_3_7) + (((FFTW_REAL) FFTW_K881921264) * tre0_3_7);
16299 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
16300 		    tim1_0_3 = tim2_0_0 - tim2_1_0;
16301 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
16302 		    tim1_1_3 = tim2_0_0 + tim2_1_0;
16303 	       }
16304 	       {
16305 		    FFTW_REAL tre2_0_0;
16306 		    FFTW_REAL tim2_0_0;
16307 		    FFTW_REAL tre2_0_1;
16308 		    FFTW_REAL tim2_0_1;
16309 		    FFTW_REAL tre2_1_0;
16310 		    FFTW_REAL tim2_1_0;
16311 		    FFTW_REAL tre2_1_1;
16312 		    FFTW_REAL tim2_1_1;
16313 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
16314 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
16315 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
16316 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
16317 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
16318 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
16319 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
16320 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
16321 		    c_re(inout[3 * stride]) = tre2_0_0 + tre2_0_1;
16322 		    c_im(inout[3 * stride]) = tim2_0_0 + tim2_0_1;
16323 		    c_re(inout[35 * stride]) = tre2_0_0 - tre2_0_1;
16324 		    c_im(inout[35 * stride]) = tim2_0_0 - tim2_0_1;
16325 		    c_re(inout[19 * stride]) = tre2_1_0 + tim2_1_1;
16326 		    c_im(inout[19 * stride]) = tim2_1_0 - tre2_1_1;
16327 		    c_re(inout[51 * stride]) = tre2_1_0 - tim2_1_1;
16328 		    c_im(inout[51 * stride]) = tim2_1_0 + tre2_1_1;
16329 	       }
16330 	       {
16331 		    FFTW_REAL tre2_0_0;
16332 		    FFTW_REAL tim2_0_0;
16333 		    FFTW_REAL tre2_0_1;
16334 		    FFTW_REAL tim2_0_1;
16335 		    FFTW_REAL tre2_1_0;
16336 		    FFTW_REAL tim2_1_0;
16337 		    FFTW_REAL tre2_1_1;
16338 		    FFTW_REAL tim2_1_1;
16339 		    tre2_0_0 = tre1_1_0 + tim1_1_2;
16340 		    tim2_0_0 = tim1_1_0 - tre1_1_2;
16341 		    tre2_1_0 = tre1_1_0 - tim1_1_2;
16342 		    tim2_1_0 = tim1_1_0 + tre1_1_2;
16343 		    {
16344 			 FFTW_REAL tre3_0_0;
16345 			 FFTW_REAL tim3_0_0;
16346 			 FFTW_REAL tre3_1_0;
16347 			 FFTW_REAL tim3_1_0;
16348 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
16349 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
16350 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
16351 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
16352 			 tre2_0_1 = tre3_0_0 + tre3_1_0;
16353 			 tim2_0_1 = tim3_0_0 - tim3_1_0;
16354 			 tre2_1_1 = tre3_0_0 - tre3_1_0;
16355 			 tim2_1_1 = tim3_0_0 + tim3_1_0;
16356 		    }
16357 		    c_re(inout[11 * stride]) = tre2_0_0 + tre2_0_1;
16358 		    c_im(inout[11 * stride]) = tim2_0_0 + tim2_0_1;
16359 		    c_re(inout[43 * stride]) = tre2_0_0 - tre2_0_1;
16360 		    c_im(inout[43 * stride]) = tim2_0_0 - tim2_0_1;
16361 		    c_re(inout[27 * stride]) = tre2_1_0 + tim2_1_1;
16362 		    c_im(inout[27 * stride]) = tim2_1_0 - tre2_1_1;
16363 		    c_re(inout[59 * stride]) = tre2_1_0 - tim2_1_1;
16364 		    c_im(inout[59 * stride]) = tim2_1_0 + tre2_1_1;
16365 	       }
16366 	  }
16367 	  {
16368 	       FFTW_REAL tre1_0_0;
16369 	       FFTW_REAL tim1_0_0;
16370 	       FFTW_REAL tre1_0_1;
16371 	       FFTW_REAL tim1_0_1;
16372 	       FFTW_REAL tre1_0_2;
16373 	       FFTW_REAL tim1_0_2;
16374 	       FFTW_REAL tre1_0_3;
16375 	       FFTW_REAL tim1_0_3;
16376 	       FFTW_REAL tre1_1_0;
16377 	       FFTW_REAL tim1_1_0;
16378 	       FFTW_REAL tre1_1_1;
16379 	       FFTW_REAL tim1_1_1;
16380 	       FFTW_REAL tre1_1_2;
16381 	       FFTW_REAL tim1_1_2;
16382 	       FFTW_REAL tre1_1_3;
16383 	       FFTW_REAL tim1_1_3;
16384 	       tre1_0_0 = tre0_4_0 + tim0_4_4;
16385 	       tim1_0_0 = tim0_4_0 - tre0_4_4;
16386 	       tre1_1_0 = tre0_4_0 - tim0_4_4;
16387 	       tim1_1_0 = tim0_4_0 + tre0_4_4;
16388 	       {
16389 		    FFTW_REAL tre2_0_0;
16390 		    FFTW_REAL tim2_0_0;
16391 		    FFTW_REAL tre2_1_0;
16392 		    FFTW_REAL tim2_1_0;
16393 		    tre2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_4_1) + (((FFTW_REAL) FFTW_K382683432) * tim0_4_1);
16394 		    tim2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_4_1) - (((FFTW_REAL) FFTW_K382683432) * tre0_4_1);
16395 		    tre2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_4_5) - (((FFTW_REAL) FFTW_K382683432) * tre0_4_5);
16396 		    tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_4_5) + (((FFTW_REAL) FFTW_K923879532) * tre0_4_5);
16397 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
16398 		    tim1_0_1 = tim2_0_0 - tim2_1_0;
16399 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
16400 		    tim1_1_1 = tim2_0_0 + tim2_1_0;
16401 	       }
16402 	       {
16403 		    FFTW_REAL tre2_0_0;
16404 		    FFTW_REAL tim2_0_0;
16405 		    FFTW_REAL tre2_1_0;
16406 		    FFTW_REAL tim2_1_0;
16407 		    tre2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_4_2 + tim0_4_2);
16408 		    tim2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_4_2 - tre0_4_2);
16409 		    tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_4_6 - tre0_4_6);
16410 		    tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_4_6 + tre0_4_6);
16411 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
16412 		    tim1_0_2 = tim2_0_0 - tim2_1_0;
16413 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
16414 		    tim1_1_2 = tim2_0_0 + tim2_1_0;
16415 	       }
16416 	       {
16417 		    FFTW_REAL tre2_0_0;
16418 		    FFTW_REAL tim2_0_0;
16419 		    FFTW_REAL tre2_1_0;
16420 		    FFTW_REAL tim2_1_0;
16421 		    tre2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_4_3) + (((FFTW_REAL) FFTW_K923879532) * tim0_4_3);
16422 		    tim2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_4_3) - (((FFTW_REAL) FFTW_K923879532) * tre0_4_3);
16423 		    tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_4_7) - (((FFTW_REAL) FFTW_K923879532) * tre0_4_7);
16424 		    tim2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_4_7) + (((FFTW_REAL) FFTW_K382683432) * tre0_4_7);
16425 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
16426 		    tim1_0_3 = tim2_0_0 - tim2_1_0;
16427 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
16428 		    tim1_1_3 = tim2_0_0 + tim2_1_0;
16429 	       }
16430 	       {
16431 		    FFTW_REAL tre2_0_0;
16432 		    FFTW_REAL tim2_0_0;
16433 		    FFTW_REAL tre2_0_1;
16434 		    FFTW_REAL tim2_0_1;
16435 		    FFTW_REAL tre2_1_0;
16436 		    FFTW_REAL tim2_1_0;
16437 		    FFTW_REAL tre2_1_1;
16438 		    FFTW_REAL tim2_1_1;
16439 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
16440 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
16441 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
16442 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
16443 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
16444 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
16445 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
16446 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
16447 		    c_re(inout[4 * stride]) = tre2_0_0 + tre2_0_1;
16448 		    c_im(inout[4 * stride]) = tim2_0_0 + tim2_0_1;
16449 		    c_re(inout[36 * stride]) = tre2_0_0 - tre2_0_1;
16450 		    c_im(inout[36 * stride]) = tim2_0_0 - tim2_0_1;
16451 		    c_re(inout[20 * stride]) = tre2_1_0 + tim2_1_1;
16452 		    c_im(inout[20 * stride]) = tim2_1_0 - tre2_1_1;
16453 		    c_re(inout[52 * stride]) = tre2_1_0 - tim2_1_1;
16454 		    c_im(inout[52 * stride]) = tim2_1_0 + tre2_1_1;
16455 	       }
16456 	       {
16457 		    FFTW_REAL tre2_0_0;
16458 		    FFTW_REAL tim2_0_0;
16459 		    FFTW_REAL tre2_0_1;
16460 		    FFTW_REAL tim2_0_1;
16461 		    FFTW_REAL tre2_1_0;
16462 		    FFTW_REAL tim2_1_0;
16463 		    FFTW_REAL tre2_1_1;
16464 		    FFTW_REAL tim2_1_1;
16465 		    tre2_0_0 = tre1_1_0 + tim1_1_2;
16466 		    tim2_0_0 = tim1_1_0 - tre1_1_2;
16467 		    tre2_1_0 = tre1_1_0 - tim1_1_2;
16468 		    tim2_1_0 = tim1_1_0 + tre1_1_2;
16469 		    {
16470 			 FFTW_REAL tre3_0_0;
16471 			 FFTW_REAL tim3_0_0;
16472 			 FFTW_REAL tre3_1_0;
16473 			 FFTW_REAL tim3_1_0;
16474 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
16475 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
16476 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
16477 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
16478 			 tre2_0_1 = tre3_0_0 + tre3_1_0;
16479 			 tim2_0_1 = tim3_0_0 - tim3_1_0;
16480 			 tre2_1_1 = tre3_0_0 - tre3_1_0;
16481 			 tim2_1_1 = tim3_0_0 + tim3_1_0;
16482 		    }
16483 		    c_re(inout[12 * stride]) = tre2_0_0 + tre2_0_1;
16484 		    c_im(inout[12 * stride]) = tim2_0_0 + tim2_0_1;
16485 		    c_re(inout[44 * stride]) = tre2_0_0 - tre2_0_1;
16486 		    c_im(inout[44 * stride]) = tim2_0_0 - tim2_0_1;
16487 		    c_re(inout[28 * stride]) = tre2_1_0 + tim2_1_1;
16488 		    c_im(inout[28 * stride]) = tim2_1_0 - tre2_1_1;
16489 		    c_re(inout[60 * stride]) = tre2_1_0 - tim2_1_1;
16490 		    c_im(inout[60 * stride]) = tim2_1_0 + tre2_1_1;
16491 	       }
16492 	  }
16493 	  {
16494 	       FFTW_REAL tre1_0_0;
16495 	       FFTW_REAL tim1_0_0;
16496 	       FFTW_REAL tre1_0_1;
16497 	       FFTW_REAL tim1_0_1;
16498 	       FFTW_REAL tre1_0_2;
16499 	       FFTW_REAL tim1_0_2;
16500 	       FFTW_REAL tre1_0_3;
16501 	       FFTW_REAL tim1_0_3;
16502 	       FFTW_REAL tre1_1_0;
16503 	       FFTW_REAL tim1_1_0;
16504 	       FFTW_REAL tre1_1_1;
16505 	       FFTW_REAL tim1_1_1;
16506 	       FFTW_REAL tre1_1_2;
16507 	       FFTW_REAL tim1_1_2;
16508 	       FFTW_REAL tre1_1_3;
16509 	       FFTW_REAL tim1_1_3;
16510 	       {
16511 		    FFTW_REAL tre2_1_0;
16512 		    FFTW_REAL tim2_1_0;
16513 		    tre2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_5_4) - (((FFTW_REAL) FFTW_K382683432) * tre0_5_4);
16514 		    tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_5_4) + (((FFTW_REAL) FFTW_K923879532) * tre0_5_4);
16515 		    tre1_0_0 = tre0_5_0 + tre2_1_0;
16516 		    tim1_0_0 = tim0_5_0 - tim2_1_0;
16517 		    tre1_1_0 = tre0_5_0 - tre2_1_0;
16518 		    tim1_1_0 = tim0_5_0 + tim2_1_0;
16519 	       }
16520 	       {
16521 		    FFTW_REAL tre2_0_0;
16522 		    FFTW_REAL tim2_0_0;
16523 		    FFTW_REAL tre2_1_0;
16524 		    FFTW_REAL tim2_1_0;
16525 		    tre2_0_0 = (((FFTW_REAL) FFTW_K881921264) * tre0_5_1) + (((FFTW_REAL) FFTW_K471396736) * tim0_5_1);
16526 		    tim2_0_0 = (((FFTW_REAL) FFTW_K881921264) * tim0_5_1) - (((FFTW_REAL) FFTW_K471396736) * tre0_5_1);
16527 		    tre2_1_0 = (((FFTW_REAL) FFTW_K634393284) * tim0_5_5) - (((FFTW_REAL) FFTW_K773010453) * tre0_5_5);
16528 		    tim2_1_0 = (((FFTW_REAL) FFTW_K773010453) * tim0_5_5) + (((FFTW_REAL) FFTW_K634393284) * tre0_5_5);
16529 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
16530 		    tim1_0_1 = tim2_0_0 - tim2_1_0;
16531 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
16532 		    tim1_1_1 = tim2_0_0 + tim2_1_0;
16533 	       }
16534 	       {
16535 		    FFTW_REAL tre2_0_0;
16536 		    FFTW_REAL tim2_0_0;
16537 		    FFTW_REAL tre2_1_0;
16538 		    FFTW_REAL tim2_1_0;
16539 		    tre2_0_0 = (((FFTW_REAL) FFTW_K555570233) * tre0_5_2) + (((FFTW_REAL) FFTW_K831469612) * tim0_5_2);
16540 		    tim2_0_0 = (((FFTW_REAL) FFTW_K555570233) * tim0_5_2) - (((FFTW_REAL) FFTW_K831469612) * tre0_5_2);
16541 		    tre2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tim0_5_6) - (((FFTW_REAL) FFTW_K980785280) * tre0_5_6);
16542 		    tim2_1_0 = (((FFTW_REAL) FFTW_K980785280) * tim0_5_6) + (((FFTW_REAL) FFTW_K195090322) * tre0_5_6);
16543 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
16544 		    tim1_0_2 = tim2_0_0 - tim2_1_0;
16545 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
16546 		    tim1_1_2 = tim2_0_0 + tim2_1_0;
16547 	       }
16548 	       {
16549 		    FFTW_REAL tre2_0_0;
16550 		    FFTW_REAL tim2_0_0;
16551 		    FFTW_REAL tre2_1_0;
16552 		    FFTW_REAL tim2_1_0;
16553 		    tre2_0_0 = (((FFTW_REAL) FFTW_K098017140) * tre0_5_3) + (((FFTW_REAL) FFTW_K995184726) * tim0_5_3);
16554 		    tim2_0_0 = (((FFTW_REAL) FFTW_K098017140) * tim0_5_3) - (((FFTW_REAL) FFTW_K995184726) * tre0_5_3);
16555 		    tre2_1_0 = (((FFTW_REAL) FFTW_K956940335) * tre0_5_7) + (((FFTW_REAL) FFTW_K290284677) * tim0_5_7);
16556 		    tim2_1_0 = (((FFTW_REAL) FFTW_K290284677) * tre0_5_7) - (((FFTW_REAL) FFTW_K956940335) * tim0_5_7);
16557 		    tre1_0_3 = tre2_0_0 - tre2_1_0;
16558 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
16559 		    tre1_1_3 = tre2_0_0 + tre2_1_0;
16560 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
16561 	       }
16562 	       {
16563 		    FFTW_REAL tre2_0_0;
16564 		    FFTW_REAL tim2_0_0;
16565 		    FFTW_REAL tre2_0_1;
16566 		    FFTW_REAL tim2_0_1;
16567 		    FFTW_REAL tre2_1_0;
16568 		    FFTW_REAL tim2_1_0;
16569 		    FFTW_REAL tre2_1_1;
16570 		    FFTW_REAL tim2_1_1;
16571 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
16572 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
16573 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
16574 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
16575 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
16576 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
16577 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
16578 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
16579 		    c_re(inout[5 * stride]) = tre2_0_0 + tre2_0_1;
16580 		    c_im(inout[5 * stride]) = tim2_0_0 + tim2_0_1;
16581 		    c_re(inout[37 * stride]) = tre2_0_0 - tre2_0_1;
16582 		    c_im(inout[37 * stride]) = tim2_0_0 - tim2_0_1;
16583 		    c_re(inout[21 * stride]) = tre2_1_0 + tim2_1_1;
16584 		    c_im(inout[21 * stride]) = tim2_1_0 - tre2_1_1;
16585 		    c_re(inout[53 * stride]) = tre2_1_0 - tim2_1_1;
16586 		    c_im(inout[53 * stride]) = tim2_1_0 + tre2_1_1;
16587 	       }
16588 	       {
16589 		    FFTW_REAL tre2_0_0;
16590 		    FFTW_REAL tim2_0_0;
16591 		    FFTW_REAL tre2_0_1;
16592 		    FFTW_REAL tim2_0_1;
16593 		    FFTW_REAL tre2_1_0;
16594 		    FFTW_REAL tim2_1_0;
16595 		    FFTW_REAL tre2_1_1;
16596 		    FFTW_REAL tim2_1_1;
16597 		    tre2_0_0 = tre1_1_0 + tim1_1_2;
16598 		    tim2_0_0 = tim1_1_0 - tre1_1_2;
16599 		    tre2_1_0 = tre1_1_0 - tim1_1_2;
16600 		    tim2_1_0 = tim1_1_0 + tre1_1_2;
16601 		    {
16602 			 FFTW_REAL tre3_0_0;
16603 			 FFTW_REAL tim3_0_0;
16604 			 FFTW_REAL tre3_1_0;
16605 			 FFTW_REAL tim3_1_0;
16606 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
16607 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
16608 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
16609 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
16610 			 tre2_0_1 = tre3_0_0 + tre3_1_0;
16611 			 tim2_0_1 = tim3_0_0 - tim3_1_0;
16612 			 tre2_1_1 = tre3_0_0 - tre3_1_0;
16613 			 tim2_1_1 = tim3_0_0 + tim3_1_0;
16614 		    }
16615 		    c_re(inout[13 * stride]) = tre2_0_0 + tre2_0_1;
16616 		    c_im(inout[13 * stride]) = tim2_0_0 + tim2_0_1;
16617 		    c_re(inout[45 * stride]) = tre2_0_0 - tre2_0_1;
16618 		    c_im(inout[45 * stride]) = tim2_0_0 - tim2_0_1;
16619 		    c_re(inout[29 * stride]) = tre2_1_0 + tim2_1_1;
16620 		    c_im(inout[29 * stride]) = tim2_1_0 - tre2_1_1;
16621 		    c_re(inout[61 * stride]) = tre2_1_0 - tim2_1_1;
16622 		    c_im(inout[61 * stride]) = tim2_1_0 + tre2_1_1;
16623 	       }
16624 	  }
16625 	  {
16626 	       FFTW_REAL tre1_0_0;
16627 	       FFTW_REAL tim1_0_0;
16628 	       FFTW_REAL tre1_0_1;
16629 	       FFTW_REAL tim1_0_1;
16630 	       FFTW_REAL tre1_0_2;
16631 	       FFTW_REAL tim1_0_2;
16632 	       FFTW_REAL tre1_0_3;
16633 	       FFTW_REAL tim1_0_3;
16634 	       FFTW_REAL tre1_1_0;
16635 	       FFTW_REAL tim1_1_0;
16636 	       FFTW_REAL tre1_1_1;
16637 	       FFTW_REAL tim1_1_1;
16638 	       FFTW_REAL tre1_1_2;
16639 	       FFTW_REAL tim1_1_2;
16640 	       FFTW_REAL tre1_1_3;
16641 	       FFTW_REAL tim1_1_3;
16642 	       {
16643 		    FFTW_REAL tre2_1_0;
16644 		    FFTW_REAL tim2_1_0;
16645 		    tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_6_4 - tre0_6_4);
16646 		    tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_6_4 + tre0_6_4);
16647 		    tre1_0_0 = tre0_6_0 + tre2_1_0;
16648 		    tim1_0_0 = tim0_6_0 - tim2_1_0;
16649 		    tre1_1_0 = tre0_6_0 - tre2_1_0;
16650 		    tim1_1_0 = tim0_6_0 + tim2_1_0;
16651 	       }
16652 	       {
16653 		    FFTW_REAL tre2_0_0;
16654 		    FFTW_REAL tim2_0_0;
16655 		    FFTW_REAL tre2_1_0;
16656 		    FFTW_REAL tim2_1_0;
16657 		    tre2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_6_1) + (((FFTW_REAL) FFTW_K555570233) * tim0_6_1);
16658 		    tim2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_6_1) - (((FFTW_REAL) FFTW_K555570233) * tre0_6_1);
16659 		    tre2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tim0_6_5) - (((FFTW_REAL) FFTW_K980785280) * tre0_6_5);
16660 		    tim2_1_0 = (((FFTW_REAL) FFTW_K980785280) * tim0_6_5) + (((FFTW_REAL) FFTW_K195090322) * tre0_6_5);
16661 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
16662 		    tim1_0_1 = tim2_0_0 - tim2_1_0;
16663 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
16664 		    tim1_1_1 = tim2_0_0 + tim2_1_0;
16665 	       }
16666 	       {
16667 		    FFTW_REAL tre2_0_0;
16668 		    FFTW_REAL tim2_0_0;
16669 		    FFTW_REAL tre2_1_0;
16670 		    FFTW_REAL tim2_1_0;
16671 		    tre2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_6_2) + (((FFTW_REAL) FFTW_K923879532) * tim0_6_2);
16672 		    tim2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_6_2) - (((FFTW_REAL) FFTW_K923879532) * tre0_6_2);
16673 		    tre2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_6_6) + (((FFTW_REAL) FFTW_K382683432) * tim0_6_6);
16674 		    tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_6_6) - (((FFTW_REAL) FFTW_K923879532) * tim0_6_6);
16675 		    tre1_0_2 = tre2_0_0 - tre2_1_0;
16676 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
16677 		    tre1_1_2 = tre2_0_0 + tre2_1_0;
16678 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
16679 	       }
16680 	       {
16681 		    FFTW_REAL tre2_0_0;
16682 		    FFTW_REAL tim2_0_0;
16683 		    FFTW_REAL tre2_1_0;
16684 		    FFTW_REAL tim2_1_0;
16685 		    tre2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tim0_6_3) - (((FFTW_REAL) FFTW_K195090322) * tre0_6_3);
16686 		    tim2_0_0 = (((FFTW_REAL) FFTW_K195090322) * tim0_6_3) + (((FFTW_REAL) FFTW_K980785280) * tre0_6_3);
16687 		    tre2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tre0_6_7) + (((FFTW_REAL) FFTW_K831469612) * tim0_6_7);
16688 		    tim2_1_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_6_7) - (((FFTW_REAL) FFTW_K555570233) * tim0_6_7);
16689 		    tre1_0_3 = tre2_0_0 - tre2_1_0;
16690 		    tim1_0_3 = tim2_1_0 - tim2_0_0;
16691 		    tre1_1_3 = tre2_0_0 + tre2_1_0;
16692 		    tim1_1_3 = (-(tim2_0_0 + tim2_1_0));
16693 	       }
16694 	       {
16695 		    FFTW_REAL tre2_0_0;
16696 		    FFTW_REAL tim2_0_0;
16697 		    FFTW_REAL tre2_0_1;
16698 		    FFTW_REAL tim2_0_1;
16699 		    FFTW_REAL tre2_1_0;
16700 		    FFTW_REAL tim2_1_0;
16701 		    FFTW_REAL tre2_1_1;
16702 		    FFTW_REAL tim2_1_1;
16703 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
16704 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
16705 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
16706 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
16707 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
16708 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
16709 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
16710 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
16711 		    c_re(inout[6 * stride]) = tre2_0_0 + tre2_0_1;
16712 		    c_im(inout[6 * stride]) = tim2_0_0 + tim2_0_1;
16713 		    c_re(inout[38 * stride]) = tre2_0_0 - tre2_0_1;
16714 		    c_im(inout[38 * stride]) = tim2_0_0 - tim2_0_1;
16715 		    c_re(inout[22 * stride]) = tre2_1_0 + tim2_1_1;
16716 		    c_im(inout[22 * stride]) = tim2_1_0 - tre2_1_1;
16717 		    c_re(inout[54 * stride]) = tre2_1_0 - tim2_1_1;
16718 		    c_im(inout[54 * stride]) = tim2_1_0 + tre2_1_1;
16719 	       }
16720 	       {
16721 		    FFTW_REAL tre2_0_0;
16722 		    FFTW_REAL tim2_0_0;
16723 		    FFTW_REAL tre2_0_1;
16724 		    FFTW_REAL tim2_0_1;
16725 		    FFTW_REAL tre2_1_0;
16726 		    FFTW_REAL tim2_1_0;
16727 		    FFTW_REAL tre2_1_1;
16728 		    FFTW_REAL tim2_1_1;
16729 		    tre2_0_0 = tre1_1_0 + tim1_1_2;
16730 		    tim2_0_0 = tim1_1_0 - tre1_1_2;
16731 		    tre2_1_0 = tre1_1_0 - tim1_1_2;
16732 		    tim2_1_0 = tim1_1_0 + tre1_1_2;
16733 		    {
16734 			 FFTW_REAL tre3_0_0;
16735 			 FFTW_REAL tim3_0_0;
16736 			 FFTW_REAL tre3_1_0;
16737 			 FFTW_REAL tim3_1_0;
16738 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
16739 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
16740 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
16741 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
16742 			 tre2_0_1 = tre3_0_0 + tre3_1_0;
16743 			 tim2_0_1 = tim3_0_0 - tim3_1_0;
16744 			 tre2_1_1 = tre3_0_0 - tre3_1_0;
16745 			 tim2_1_1 = tim3_0_0 + tim3_1_0;
16746 		    }
16747 		    c_re(inout[14 * stride]) = tre2_0_0 + tre2_0_1;
16748 		    c_im(inout[14 * stride]) = tim2_0_0 + tim2_0_1;
16749 		    c_re(inout[46 * stride]) = tre2_0_0 - tre2_0_1;
16750 		    c_im(inout[46 * stride]) = tim2_0_0 - tim2_0_1;
16751 		    c_re(inout[30 * stride]) = tre2_1_0 + tim2_1_1;
16752 		    c_im(inout[30 * stride]) = tim2_1_0 - tre2_1_1;
16753 		    c_re(inout[62 * stride]) = tre2_1_0 - tim2_1_1;
16754 		    c_im(inout[62 * stride]) = tim2_1_0 + tre2_1_1;
16755 	       }
16756 	  }
16757 	  {
16758 	       FFTW_REAL tre1_0_0;
16759 	       FFTW_REAL tim1_0_0;
16760 	       FFTW_REAL tre1_0_1;
16761 	       FFTW_REAL tim1_0_1;
16762 	       FFTW_REAL tre1_0_2;
16763 	       FFTW_REAL tim1_0_2;
16764 	       FFTW_REAL tre1_0_3;
16765 	       FFTW_REAL tim1_0_3;
16766 	       FFTW_REAL tre1_1_0;
16767 	       FFTW_REAL tim1_1_0;
16768 	       FFTW_REAL tre1_1_1;
16769 	       FFTW_REAL tim1_1_1;
16770 	       FFTW_REAL tre1_1_2;
16771 	       FFTW_REAL tim1_1_2;
16772 	       FFTW_REAL tre1_1_3;
16773 	       FFTW_REAL tim1_1_3;
16774 	       {
16775 		    FFTW_REAL tre2_1_0;
16776 		    FFTW_REAL tim2_1_0;
16777 		    tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_7_4) - (((FFTW_REAL) FFTW_K923879532) * tre0_7_4);
16778 		    tim2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_7_4) + (((FFTW_REAL) FFTW_K382683432) * tre0_7_4);
16779 		    tre1_0_0 = tre0_7_0 + tre2_1_0;
16780 		    tim1_0_0 = tim0_7_0 - tim2_1_0;
16781 		    tre1_1_0 = tre0_7_0 - tre2_1_0;
16782 		    tim1_1_0 = tim0_7_0 + tim2_1_0;
16783 	       }
16784 	       {
16785 		    FFTW_REAL tre2_0_0;
16786 		    FFTW_REAL tim2_0_0;
16787 		    FFTW_REAL tre2_1_0;
16788 		    FFTW_REAL tim2_1_0;
16789 		    tre2_0_0 = (((FFTW_REAL) FFTW_K773010453) * tre0_7_1) + (((FFTW_REAL) FFTW_K634393284) * tim0_7_1);
16790 		    tim2_0_0 = (((FFTW_REAL) FFTW_K773010453) * tim0_7_1) - (((FFTW_REAL) FFTW_K634393284) * tre0_7_1);
16791 		    tre2_1_0 = (((FFTW_REAL) FFTW_K956940335) * tre0_7_5) + (((FFTW_REAL) FFTW_K290284677) * tim0_7_5);
16792 		    tim2_1_0 = (((FFTW_REAL) FFTW_K290284677) * tre0_7_5) - (((FFTW_REAL) FFTW_K956940335) * tim0_7_5);
16793 		    tre1_0_1 = tre2_0_0 - tre2_1_0;
16794 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
16795 		    tre1_1_1 = tre2_0_0 + tre2_1_0;
16796 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
16797 	       }
16798 	       {
16799 		    FFTW_REAL tre2_0_0;
16800 		    FFTW_REAL tim2_0_0;
16801 		    FFTW_REAL tre2_1_0;
16802 		    FFTW_REAL tim2_1_0;
16803 		    tre2_0_0 = (((FFTW_REAL) FFTW_K195090322) * tre0_7_2) + (((FFTW_REAL) FFTW_K980785280) * tim0_7_2);
16804 		    tim2_0_0 = (((FFTW_REAL) FFTW_K195090322) * tim0_7_2) - (((FFTW_REAL) FFTW_K980785280) * tre0_7_2);
16805 		    tre2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tre0_7_6) + (((FFTW_REAL) FFTW_K831469612) * tim0_7_6);
16806 		    tim2_1_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_7_6) - (((FFTW_REAL) FFTW_K555570233) * tim0_7_6);
16807 		    tre1_0_2 = tre2_0_0 - tre2_1_0;
16808 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
16809 		    tre1_1_2 = tre2_0_0 + tre2_1_0;
16810 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
16811 	       }
16812 	       {
16813 		    FFTW_REAL tre2_0_0;
16814 		    FFTW_REAL tim2_0_0;
16815 		    FFTW_REAL tre2_1_0;
16816 		    FFTW_REAL tim2_1_0;
16817 		    tre2_0_0 = (((FFTW_REAL) FFTW_K881921264) * tim0_7_3) - (((FFTW_REAL) FFTW_K471396736) * tre0_7_3);
16818 		    tim2_0_0 = (((FFTW_REAL) FFTW_K471396736) * tim0_7_3) + (((FFTW_REAL) FFTW_K881921264) * tre0_7_3);
16819 		    tre2_1_0 = (((FFTW_REAL) FFTW_K098017140) * tre0_7_7) - (((FFTW_REAL) FFTW_K995184726) * tim0_7_7);
16820 		    tim2_1_0 = (((FFTW_REAL) FFTW_K098017140) * tim0_7_7) + (((FFTW_REAL) FFTW_K995184726) * tre0_7_7);
16821 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
16822 		    tim1_0_3 = tim2_1_0 - tim2_0_0;
16823 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
16824 		    tim1_1_3 = (-(tim2_0_0 + tim2_1_0));
16825 	       }
16826 	       {
16827 		    FFTW_REAL tre2_0_0;
16828 		    FFTW_REAL tim2_0_0;
16829 		    FFTW_REAL tre2_0_1;
16830 		    FFTW_REAL tim2_0_1;
16831 		    FFTW_REAL tre2_1_0;
16832 		    FFTW_REAL tim2_1_0;
16833 		    FFTW_REAL tre2_1_1;
16834 		    FFTW_REAL tim2_1_1;
16835 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
16836 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
16837 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
16838 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
16839 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
16840 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
16841 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
16842 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
16843 		    c_re(inout[7 * stride]) = tre2_0_0 + tre2_0_1;
16844 		    c_im(inout[7 * stride]) = tim2_0_0 + tim2_0_1;
16845 		    c_re(inout[39 * stride]) = tre2_0_0 - tre2_0_1;
16846 		    c_im(inout[39 * stride]) = tim2_0_0 - tim2_0_1;
16847 		    c_re(inout[23 * stride]) = tre2_1_0 + tim2_1_1;
16848 		    c_im(inout[23 * stride]) = tim2_1_0 - tre2_1_1;
16849 		    c_re(inout[55 * stride]) = tre2_1_0 - tim2_1_1;
16850 		    c_im(inout[55 * stride]) = tim2_1_0 + tre2_1_1;
16851 	       }
16852 	       {
16853 		    FFTW_REAL tre2_0_0;
16854 		    FFTW_REAL tim2_0_0;
16855 		    FFTW_REAL tre2_0_1;
16856 		    FFTW_REAL tim2_0_1;
16857 		    FFTW_REAL tre2_1_0;
16858 		    FFTW_REAL tim2_1_0;
16859 		    FFTW_REAL tre2_1_1;
16860 		    FFTW_REAL tim2_1_1;
16861 		    tre2_0_0 = tre1_1_0 + tim1_1_2;
16862 		    tim2_0_0 = tim1_1_0 - tre1_1_2;
16863 		    tre2_1_0 = tre1_1_0 - tim1_1_2;
16864 		    tim2_1_0 = tim1_1_0 + tre1_1_2;
16865 		    {
16866 			 FFTW_REAL tre3_0_0;
16867 			 FFTW_REAL tim3_0_0;
16868 			 FFTW_REAL tre3_1_0;
16869 			 FFTW_REAL tim3_1_0;
16870 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 + tim1_1_1);
16871 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 - tre1_1_1);
16872 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 - tre1_1_3);
16873 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_3 + tre1_1_3);
16874 			 tre2_0_1 = tre3_0_0 + tre3_1_0;
16875 			 tim2_0_1 = tim3_0_0 - tim3_1_0;
16876 			 tre2_1_1 = tre3_0_0 - tre3_1_0;
16877 			 tim2_1_1 = tim3_0_0 + tim3_1_0;
16878 		    }
16879 		    c_re(inout[15 * stride]) = tre2_0_0 + tre2_0_1;
16880 		    c_im(inout[15 * stride]) = tim2_0_0 + tim2_0_1;
16881 		    c_re(inout[47 * stride]) = tre2_0_0 - tre2_0_1;
16882 		    c_im(inout[47 * stride]) = tim2_0_0 - tim2_0_1;
16883 		    c_re(inout[31 * stride]) = tre2_1_0 + tim2_1_1;
16884 		    c_im(inout[31 * stride]) = tim2_1_0 - tre2_1_1;
16885 		    c_re(inout[63 * stride]) = tre2_1_0 - tim2_1_1;
16886 		    c_im(inout[63 * stride]) = tim2_1_0 + tre2_1_1;
16887 	       }
16888 	  }
16889      }
16890 }
16891 
16892 /* This function contains 102 FP additions and 60 FP multiplications */
16893 
fftw_twiddle_7(FFTW_COMPLEX * A,const FFTW_COMPLEX * W,int stride,int m,int dist)16894 static void fftw_twiddle_7(FFTW_COMPLEX *A, const FFTW_COMPLEX *W, int stride, int m, int dist)
16895 {
16896      int i;
16897      FFTW_COMPLEX *inout;
16898      inout = A;
16899      for (i = 0; i < m; i = i + 1, inout = inout + dist, W = W + 6) {
16900 	  FFTW_REAL tre0_0_0;
16901 	  FFTW_REAL tim0_0_0;
16902 	  FFTW_REAL tre0_1_0;
16903 	  FFTW_REAL tim0_1_0;
16904 	  FFTW_REAL tre0_2_0;
16905 	  FFTW_REAL tim0_2_0;
16906 	  FFTW_REAL tre0_3_0;
16907 	  FFTW_REAL tim0_3_0;
16908 	  FFTW_REAL tre0_4_0;
16909 	  FFTW_REAL tim0_4_0;
16910 	  FFTW_REAL tre0_5_0;
16911 	  FFTW_REAL tim0_5_0;
16912 	  FFTW_REAL tre0_6_0;
16913 	  FFTW_REAL tim0_6_0;
16914 	  tre0_0_0 = c_re(inout[0]);
16915 	  tim0_0_0 = c_im(inout[0]);
16916 	  {
16917 	       FFTW_REAL tr;
16918 	       FFTW_REAL ti;
16919 	       FFTW_REAL twr;
16920 	       FFTW_REAL twi;
16921 	       tr = c_re(inout[stride]);
16922 	       ti = c_im(inout[stride]);
16923 	       twr = c_re(W[0]);
16924 	       twi = c_im(W[0]);
16925 	       tre0_1_0 = (tr * twr) - (ti * twi);
16926 	       tim0_1_0 = (tr * twi) + (ti * twr);
16927 	  }
16928 	  {
16929 	       FFTW_REAL tr;
16930 	       FFTW_REAL ti;
16931 	       FFTW_REAL twr;
16932 	       FFTW_REAL twi;
16933 	       tr = c_re(inout[2 * stride]);
16934 	       ti = c_im(inout[2 * stride]);
16935 	       twr = c_re(W[1]);
16936 	       twi = c_im(W[1]);
16937 	       tre0_2_0 = (tr * twr) - (ti * twi);
16938 	       tim0_2_0 = (tr * twi) + (ti * twr);
16939 	  }
16940 	  {
16941 	       FFTW_REAL tr;
16942 	       FFTW_REAL ti;
16943 	       FFTW_REAL twr;
16944 	       FFTW_REAL twi;
16945 	       tr = c_re(inout[3 * stride]);
16946 	       ti = c_im(inout[3 * stride]);
16947 	       twr = c_re(W[2]);
16948 	       twi = c_im(W[2]);
16949 	       tre0_3_0 = (tr * twr) - (ti * twi);
16950 	       tim0_3_0 = (tr * twi) + (ti * twr);
16951 	  }
16952 	  {
16953 	       FFTW_REAL tr;
16954 	       FFTW_REAL ti;
16955 	       FFTW_REAL twr;
16956 	       FFTW_REAL twi;
16957 	       tr = c_re(inout[4 * stride]);
16958 	       ti = c_im(inout[4 * stride]);
16959 	       twr = c_re(W[3]);
16960 	       twi = c_im(W[3]);
16961 	       tre0_4_0 = (tr * twr) - (ti * twi);
16962 	       tim0_4_0 = (tr * twi) + (ti * twr);
16963 	  }
16964 	  {
16965 	       FFTW_REAL tr;
16966 	       FFTW_REAL ti;
16967 	       FFTW_REAL twr;
16968 	       FFTW_REAL twi;
16969 	       tr = c_re(inout[5 * stride]);
16970 	       ti = c_im(inout[5 * stride]);
16971 	       twr = c_re(W[4]);
16972 	       twi = c_im(W[4]);
16973 	       tre0_5_0 = (tr * twr) - (ti * twi);
16974 	       tim0_5_0 = (tr * twi) + (ti * twr);
16975 	  }
16976 	  {
16977 	       FFTW_REAL tr;
16978 	       FFTW_REAL ti;
16979 	       FFTW_REAL twr;
16980 	       FFTW_REAL twi;
16981 	       tr = c_re(inout[6 * stride]);
16982 	       ti = c_im(inout[6 * stride]);
16983 	       twr = c_re(W[5]);
16984 	       twi = c_im(W[5]);
16985 	       tre0_6_0 = (tr * twr) - (ti * twi);
16986 	       tim0_6_0 = (tr * twi) + (ti * twr);
16987 	  }
16988 	  c_re(inout[0]) = tre0_0_0 + tre0_1_0 + tre0_2_0 + tre0_3_0 + tre0_4_0 + tre0_5_0 + tre0_6_0;
16989 	  c_im(inout[0]) = tim0_0_0 + tim0_1_0 + tim0_2_0 + tim0_3_0 + tim0_4_0 + tim0_5_0 + tim0_6_0;
16990 	  {
16991 	       FFTW_REAL tre1_0_0;
16992 	       FFTW_REAL tre1_1_0;
16993 	       tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tre0_1_0 + tre0_6_0)) - (((FFTW_REAL) FFTW_K900968867) * (tre0_3_0 + tre0_4_0)) - (((FFTW_REAL) FFTW_K222520933) * (tre0_2_0 + tre0_5_0));
16994 	       tre1_1_0 = (((FFTW_REAL) FFTW_K781831482) * (tim0_1_0 - tim0_6_0)) + (((FFTW_REAL) FFTW_K974927912) * (tim0_2_0 - tim0_5_0)) + (((FFTW_REAL) FFTW_K433883739) * (tim0_3_0 - tim0_4_0));
16995 	       c_re(inout[stride]) = tre1_0_0 + tre1_1_0;
16996 	       c_re(inout[6 * stride]) = tre1_0_0 - tre1_1_0;
16997 	  }
16998 	  {
16999 	       FFTW_REAL tim1_0_0;
17000 	       FFTW_REAL tim1_1_0;
17001 	       tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tim0_1_0 + tim0_6_0)) - (((FFTW_REAL) FFTW_K900968867) * (tim0_3_0 + tim0_4_0)) - (((FFTW_REAL) FFTW_K222520933) * (tim0_2_0 + tim0_5_0));
17002 	       tim1_1_0 = (((FFTW_REAL) FFTW_K781831482) * (tre0_6_0 - tre0_1_0)) + (((FFTW_REAL) FFTW_K974927912) * (tre0_5_0 - tre0_2_0)) + (((FFTW_REAL) FFTW_K433883739) * (tre0_4_0 - tre0_3_0));
17003 	       c_im(inout[stride]) = tim1_0_0 + tim1_1_0;
17004 	       c_im(inout[6 * stride]) = tim1_0_0 - tim1_1_0;
17005 	  }
17006 	  {
17007 	       FFTW_REAL tre1_0_0;
17008 	       FFTW_REAL tre1_1_0;
17009 	       tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tre0_3_0 + tre0_4_0)) - (((FFTW_REAL) FFTW_K900968867) * (tre0_2_0 + tre0_5_0)) - (((FFTW_REAL) FFTW_K222520933) * (tre0_1_0 + tre0_6_0));
17010 	       tre1_1_0 = (((FFTW_REAL) FFTW_K974927912) * (tim0_1_0 - tim0_6_0)) + (((FFTW_REAL) FFTW_K433883739) * (tim0_5_0 - tim0_2_0)) + (((FFTW_REAL) FFTW_K781831482) * (tim0_4_0 - tim0_3_0));
17011 	       c_re(inout[2 * stride]) = tre1_0_0 + tre1_1_0;
17012 	       c_re(inout[5 * stride]) = tre1_0_0 - tre1_1_0;
17013 	  }
17014 	  {
17015 	       FFTW_REAL tim1_0_0;
17016 	       FFTW_REAL tim1_1_0;
17017 	       tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tim0_3_0 + tim0_4_0)) - (((FFTW_REAL) FFTW_K900968867) * (tim0_2_0 + tim0_5_0)) - (((FFTW_REAL) FFTW_K222520933) * (tim0_1_0 + tim0_6_0));
17018 	       tim1_1_0 = (((FFTW_REAL) FFTW_K974927912) * (tre0_6_0 - tre0_1_0)) + (((FFTW_REAL) FFTW_K433883739) * (tre0_2_0 - tre0_5_0)) + (((FFTW_REAL) FFTW_K781831482) * (tre0_3_0 - tre0_4_0));
17019 	       c_im(inout[2 * stride]) = tim1_0_0 + tim1_1_0;
17020 	       c_im(inout[5 * stride]) = tim1_0_0 - tim1_1_0;
17021 	  }
17022 	  {
17023 	       FFTW_REAL tre1_0_0;
17024 	       FFTW_REAL tre1_1_0;
17025 	       tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tre0_2_0 + tre0_5_0)) - (((FFTW_REAL) FFTW_K222520933) * (tre0_3_0 + tre0_4_0)) - (((FFTW_REAL) FFTW_K900968867) * (tre0_1_0 + tre0_6_0));
17026 	       tre1_1_0 = (((FFTW_REAL) FFTW_K433883739) * (tim0_1_0 - tim0_6_0)) + (((FFTW_REAL) FFTW_K781831482) * (tim0_5_0 - tim0_2_0)) + (((FFTW_REAL) FFTW_K974927912) * (tim0_3_0 - tim0_4_0));
17027 	       c_re(inout[3 * stride]) = tre1_0_0 + tre1_1_0;
17028 	       c_re(inout[4 * stride]) = tre1_0_0 - tre1_1_0;
17029 	  }
17030 	  {
17031 	       FFTW_REAL tim1_0_0;
17032 	       FFTW_REAL tim1_1_0;
17033 	       tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tim0_2_0 + tim0_5_0)) - (((FFTW_REAL) FFTW_K222520933) * (tim0_3_0 + tim0_4_0)) - (((FFTW_REAL) FFTW_K900968867) * (tim0_1_0 + tim0_6_0));
17034 	       tim1_1_0 = (((FFTW_REAL) FFTW_K433883739) * (tre0_6_0 - tre0_1_0)) + (((FFTW_REAL) FFTW_K781831482) * (tre0_2_0 - tre0_5_0)) + (((FFTW_REAL) FFTW_K974927912) * (tre0_4_0 - tre0_3_0));
17035 	       c_im(inout[3 * stride]) = tim1_0_0 + tim1_1_0;
17036 	       c_im(inout[4 * stride]) = tim1_0_0 - tim1_1_0;
17037 	  }
17038      }
17039 }
17040 
17041 /* This function contains 66 FP additions and 32 FP multiplications */
17042 
fftw_twiddle_8(FFTW_COMPLEX * A,const FFTW_COMPLEX * W,int stride,int m,int dist)17043 static void fftw_twiddle_8(FFTW_COMPLEX *A, const FFTW_COMPLEX *W, int stride, int m, int dist)
17044 {
17045      int i;
17046      FFTW_COMPLEX *inout;
17047      inout = A;
17048      for (i = 0; i < m; i = i + 1, inout = inout + dist, W = W + 7) {
17049 	  FFTW_REAL tre0_0_0;
17050 	  FFTW_REAL tim0_0_0;
17051 	  FFTW_REAL tre0_0_1;
17052 	  FFTW_REAL tim0_0_1;
17053 	  FFTW_REAL tre0_0_2;
17054 	  FFTW_REAL tim0_0_2;
17055 	  FFTW_REAL tre0_0_3;
17056 	  FFTW_REAL tim0_0_3;
17057 	  FFTW_REAL tre0_1_0;
17058 	  FFTW_REAL tim0_1_0;
17059 	  FFTW_REAL tre0_1_1;
17060 	  FFTW_REAL tim0_1_1;
17061 	  FFTW_REAL tre0_1_2;
17062 	  FFTW_REAL tim0_1_2;
17063 	  FFTW_REAL tre0_1_3;
17064 	  FFTW_REAL tim0_1_3;
17065 	  {
17066 	       FFTW_REAL tre1_0_0;
17067 	       FFTW_REAL tim1_0_0;
17068 	       FFTW_REAL tre1_1_0;
17069 	       FFTW_REAL tim1_1_0;
17070 	       tre1_0_0 = c_re(inout[0]);
17071 	       tim1_0_0 = c_im(inout[0]);
17072 	       {
17073 		    FFTW_REAL tr;
17074 		    FFTW_REAL ti;
17075 		    FFTW_REAL twr;
17076 		    FFTW_REAL twi;
17077 		    tr = c_re(inout[4 * stride]);
17078 		    ti = c_im(inout[4 * stride]);
17079 		    twr = c_re(W[3]);
17080 		    twi = c_im(W[3]);
17081 		    tre1_1_0 = (tr * twr) - (ti * twi);
17082 		    tim1_1_0 = (tr * twi) + (ti * twr);
17083 	       }
17084 	       tre0_0_0 = tre1_0_0 + tre1_1_0;
17085 	       tim0_0_0 = tim1_0_0 + tim1_1_0;
17086 	       tre0_1_0 = tre1_0_0 - tre1_1_0;
17087 	       tim0_1_0 = tim1_0_0 - tim1_1_0;
17088 	  }
17089 	  {
17090 	       FFTW_REAL tre1_0_0;
17091 	       FFTW_REAL tim1_0_0;
17092 	       FFTW_REAL tre1_1_0;
17093 	       FFTW_REAL tim1_1_0;
17094 	       {
17095 		    FFTW_REAL tr;
17096 		    FFTW_REAL ti;
17097 		    FFTW_REAL twr;
17098 		    FFTW_REAL twi;
17099 		    tr = c_re(inout[stride]);
17100 		    ti = c_im(inout[stride]);
17101 		    twr = c_re(W[0]);
17102 		    twi = c_im(W[0]);
17103 		    tre1_0_0 = (tr * twr) - (ti * twi);
17104 		    tim1_0_0 = (tr * twi) + (ti * twr);
17105 	       }
17106 	       {
17107 		    FFTW_REAL tr;
17108 		    FFTW_REAL ti;
17109 		    FFTW_REAL twr;
17110 		    FFTW_REAL twi;
17111 		    tr = c_re(inout[5 * stride]);
17112 		    ti = c_im(inout[5 * stride]);
17113 		    twr = c_re(W[4]);
17114 		    twi = c_im(W[4]);
17115 		    tre1_1_0 = (tr * twr) - (ti * twi);
17116 		    tim1_1_0 = (tr * twi) + (ti * twr);
17117 	       }
17118 	       tre0_0_1 = tre1_0_0 + tre1_1_0;
17119 	       tim0_0_1 = tim1_0_0 + tim1_1_0;
17120 	       tre0_1_1 = tre1_0_0 - tre1_1_0;
17121 	       tim0_1_1 = tim1_0_0 - tim1_1_0;
17122 	  }
17123 	  {
17124 	       FFTW_REAL tre1_0_0;
17125 	       FFTW_REAL tim1_0_0;
17126 	       FFTW_REAL tre1_1_0;
17127 	       FFTW_REAL tim1_1_0;
17128 	       {
17129 		    FFTW_REAL tr;
17130 		    FFTW_REAL ti;
17131 		    FFTW_REAL twr;
17132 		    FFTW_REAL twi;
17133 		    tr = c_re(inout[2 * stride]);
17134 		    ti = c_im(inout[2 * stride]);
17135 		    twr = c_re(W[1]);
17136 		    twi = c_im(W[1]);
17137 		    tre1_0_0 = (tr * twr) - (ti * twi);
17138 		    tim1_0_0 = (tr * twi) + (ti * twr);
17139 	       }
17140 	       {
17141 		    FFTW_REAL tr;
17142 		    FFTW_REAL ti;
17143 		    FFTW_REAL twr;
17144 		    FFTW_REAL twi;
17145 		    tr = c_re(inout[6 * stride]);
17146 		    ti = c_im(inout[6 * stride]);
17147 		    twr = c_re(W[5]);
17148 		    twi = c_im(W[5]);
17149 		    tre1_1_0 = (tr * twr) - (ti * twi);
17150 		    tim1_1_0 = (tr * twi) + (ti * twr);
17151 	       }
17152 	       tre0_0_2 = tre1_0_0 + tre1_1_0;
17153 	       tim0_0_2 = tim1_0_0 + tim1_1_0;
17154 	       tre0_1_2 = tre1_0_0 - tre1_1_0;
17155 	       tim0_1_2 = tim1_0_0 - tim1_1_0;
17156 	  }
17157 	  {
17158 	       FFTW_REAL tre1_0_0;
17159 	       FFTW_REAL tim1_0_0;
17160 	       FFTW_REAL tre1_1_0;
17161 	       FFTW_REAL tim1_1_0;
17162 	       {
17163 		    FFTW_REAL tr;
17164 		    FFTW_REAL ti;
17165 		    FFTW_REAL twr;
17166 		    FFTW_REAL twi;
17167 		    tr = c_re(inout[3 * stride]);
17168 		    ti = c_im(inout[3 * stride]);
17169 		    twr = c_re(W[2]);
17170 		    twi = c_im(W[2]);
17171 		    tre1_0_0 = (tr * twr) - (ti * twi);
17172 		    tim1_0_0 = (tr * twi) + (ti * twr);
17173 	       }
17174 	       {
17175 		    FFTW_REAL tr;
17176 		    FFTW_REAL ti;
17177 		    FFTW_REAL twr;
17178 		    FFTW_REAL twi;
17179 		    tr = c_re(inout[7 * stride]);
17180 		    ti = c_im(inout[7 * stride]);
17181 		    twr = c_re(W[6]);
17182 		    twi = c_im(W[6]);
17183 		    tre1_1_0 = (tr * twr) - (ti * twi);
17184 		    tim1_1_0 = (tr * twi) + (ti * twr);
17185 	       }
17186 	       tre0_0_3 = tre1_0_0 + tre1_1_0;
17187 	       tim0_0_3 = tim1_0_0 + tim1_1_0;
17188 	       tre0_1_3 = tre1_0_0 - tre1_1_0;
17189 	       tim0_1_3 = tim1_0_0 - tim1_1_0;
17190 	  }
17191 	  {
17192 	       FFTW_REAL tre1_0_0;
17193 	       FFTW_REAL tim1_0_0;
17194 	       FFTW_REAL tre1_0_1;
17195 	       FFTW_REAL tim1_0_1;
17196 	       FFTW_REAL tre1_1_0;
17197 	       FFTW_REAL tim1_1_0;
17198 	       FFTW_REAL tre1_1_1;
17199 	       FFTW_REAL tim1_1_1;
17200 	       tre1_0_0 = tre0_0_0 + tre0_0_2;
17201 	       tim1_0_0 = tim0_0_0 + tim0_0_2;
17202 	       tre1_1_0 = tre0_0_0 - tre0_0_2;
17203 	       tim1_1_0 = tim0_0_0 - tim0_0_2;
17204 	       tre1_0_1 = tre0_0_1 + tre0_0_3;
17205 	       tim1_0_1 = tim0_0_1 + tim0_0_3;
17206 	       tre1_1_1 = tre0_0_1 - tre0_0_3;
17207 	       tim1_1_1 = tim0_0_1 - tim0_0_3;
17208 	       c_re(inout[0]) = tre1_0_0 + tre1_0_1;
17209 	       c_im(inout[0]) = tim1_0_0 + tim1_0_1;
17210 	       c_re(inout[4 * stride]) = tre1_0_0 - tre1_0_1;
17211 	       c_im(inout[4 * stride]) = tim1_0_0 - tim1_0_1;
17212 	       c_re(inout[2 * stride]) = tre1_1_0 + tim1_1_1;
17213 	       c_im(inout[2 * stride]) = tim1_1_0 - tre1_1_1;
17214 	       c_re(inout[6 * stride]) = tre1_1_0 - tim1_1_1;
17215 	       c_im(inout[6 * stride]) = tim1_1_0 + tre1_1_1;
17216 	  }
17217 	  {
17218 	       FFTW_REAL tre1_0_0;
17219 	       FFTW_REAL tim1_0_0;
17220 	       FFTW_REAL tre1_0_1;
17221 	       FFTW_REAL tim1_0_1;
17222 	       FFTW_REAL tre1_1_0;
17223 	       FFTW_REAL tim1_1_0;
17224 	       FFTW_REAL tre1_1_1;
17225 	       FFTW_REAL tim1_1_1;
17226 	       tre1_0_0 = tre0_1_0 + tim0_1_2;
17227 	       tim1_0_0 = tim0_1_0 - tre0_1_2;
17228 	       tre1_1_0 = tre0_1_0 - tim0_1_2;
17229 	       tim1_1_0 = tim0_1_0 + tre0_1_2;
17230 	       {
17231 		    FFTW_REAL tre2_0_0;
17232 		    FFTW_REAL tim2_0_0;
17233 		    FFTW_REAL tre2_1_0;
17234 		    FFTW_REAL tim2_1_0;
17235 		    tre2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_1_1 + tim0_1_1);
17236 		    tim2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_1_1 - tre0_1_1);
17237 		    tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_1_3 - tre0_1_3);
17238 		    tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_1_3 + tre0_1_3);
17239 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
17240 		    tim1_0_1 = tim2_0_0 - tim2_1_0;
17241 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
17242 		    tim1_1_1 = tim2_0_0 + tim2_1_0;
17243 	       }
17244 	       c_re(inout[stride]) = tre1_0_0 + tre1_0_1;
17245 	       c_im(inout[stride]) = tim1_0_0 + tim1_0_1;
17246 	       c_re(inout[5 * stride]) = tre1_0_0 - tre1_0_1;
17247 	       c_im(inout[5 * stride]) = tim1_0_0 - tim1_0_1;
17248 	       c_re(inout[3 * stride]) = tre1_1_0 + tim1_1_1;
17249 	       c_im(inout[3 * stride]) = tim1_1_0 - tre1_1_1;
17250 	       c_re(inout[7 * stride]) = tre1_1_0 - tim1_1_1;
17251 	       c_im(inout[7 * stride]) = tim1_1_0 + tre1_1_1;
17252 	  }
17253      }
17254 }
17255 
17256 /* This function contains 108 FP additions and 72 FP multiplications */
17257 
fftw_twiddle_9(FFTW_COMPLEX * A,const FFTW_COMPLEX * W,int stride,int m,int dist)17258 static void fftw_twiddle_9(FFTW_COMPLEX *A, const FFTW_COMPLEX *W, int stride, int m, int dist)
17259 {
17260      int i;
17261      FFTW_COMPLEX *inout;
17262      inout = A;
17263      for (i = 0; i < m; i = i + 1, inout = inout + dist, W = W + 8) {
17264 	  FFTW_REAL tre0_0_0;
17265 	  FFTW_REAL tim0_0_0;
17266 	  FFTW_REAL tre0_0_1;
17267 	  FFTW_REAL tim0_0_1;
17268 	  FFTW_REAL tre0_0_2;
17269 	  FFTW_REAL tim0_0_2;
17270 	  FFTW_REAL tre0_1_0;
17271 	  FFTW_REAL tim0_1_0;
17272 	  FFTW_REAL tre0_1_1;
17273 	  FFTW_REAL tim0_1_1;
17274 	  FFTW_REAL tre0_1_2;
17275 	  FFTW_REAL tim0_1_2;
17276 	  FFTW_REAL tre0_2_0;
17277 	  FFTW_REAL tim0_2_0;
17278 	  FFTW_REAL tre0_2_1;
17279 	  FFTW_REAL tim0_2_1;
17280 	  FFTW_REAL tre0_2_2;
17281 	  FFTW_REAL tim0_2_2;
17282 	  {
17283 	       FFTW_REAL tre1_0_0;
17284 	       FFTW_REAL tim1_0_0;
17285 	       FFTW_REAL tre1_1_0;
17286 	       FFTW_REAL tim1_1_0;
17287 	       FFTW_REAL tre1_2_0;
17288 	       FFTW_REAL tim1_2_0;
17289 	       tre1_0_0 = c_re(inout[0]);
17290 	       tim1_0_0 = c_im(inout[0]);
17291 	       {
17292 		    FFTW_REAL tr;
17293 		    FFTW_REAL ti;
17294 		    FFTW_REAL twr;
17295 		    FFTW_REAL twi;
17296 		    tr = c_re(inout[3 * stride]);
17297 		    ti = c_im(inout[3 * stride]);
17298 		    twr = c_re(W[2]);
17299 		    twi = c_im(W[2]);
17300 		    tre1_1_0 = (tr * twr) - (ti * twi);
17301 		    tim1_1_0 = (tr * twi) + (ti * twr);
17302 	       }
17303 	       {
17304 		    FFTW_REAL tr;
17305 		    FFTW_REAL ti;
17306 		    FFTW_REAL twr;
17307 		    FFTW_REAL twi;
17308 		    tr = c_re(inout[6 * stride]);
17309 		    ti = c_im(inout[6 * stride]);
17310 		    twr = c_re(W[5]);
17311 		    twi = c_im(W[5]);
17312 		    tre1_2_0 = (tr * twr) - (ti * twi);
17313 		    tim1_2_0 = (tr * twi) + (ti * twr);
17314 	       }
17315 	       tre0_0_0 = tre1_0_0 + tre1_1_0 + tre1_2_0;
17316 	       tim0_0_0 = tim1_0_0 + tim1_1_0 + tim1_2_0;
17317 	       {
17318 		    FFTW_REAL tre2_0_0;
17319 		    FFTW_REAL tre2_1_0;
17320 		    tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
17321 		    tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_1_0 - tim1_2_0);
17322 		    tre0_1_0 = tre2_0_0 + tre2_1_0;
17323 		    tre0_2_0 = tre2_0_0 - tre2_1_0;
17324 	       }
17325 	       {
17326 		    FFTW_REAL tim2_0_0;
17327 		    FFTW_REAL tim2_1_0;
17328 		    tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
17329 		    tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_2_0 - tre1_1_0);
17330 		    tim0_1_0 = tim2_0_0 + tim2_1_0;
17331 		    tim0_2_0 = tim2_0_0 - tim2_1_0;
17332 	       }
17333 	  }
17334 	  {
17335 	       FFTW_REAL tre1_0_0;
17336 	       FFTW_REAL tim1_0_0;
17337 	       FFTW_REAL tre1_1_0;
17338 	       FFTW_REAL tim1_1_0;
17339 	       FFTW_REAL tre1_2_0;
17340 	       FFTW_REAL tim1_2_0;
17341 	       {
17342 		    FFTW_REAL tr;
17343 		    FFTW_REAL ti;
17344 		    FFTW_REAL twr;
17345 		    FFTW_REAL twi;
17346 		    tr = c_re(inout[stride]);
17347 		    ti = c_im(inout[stride]);
17348 		    twr = c_re(W[0]);
17349 		    twi = c_im(W[0]);
17350 		    tre1_0_0 = (tr * twr) - (ti * twi);
17351 		    tim1_0_0 = (tr * twi) + (ti * twr);
17352 	       }
17353 	       {
17354 		    FFTW_REAL tr;
17355 		    FFTW_REAL ti;
17356 		    FFTW_REAL twr;
17357 		    FFTW_REAL twi;
17358 		    tr = c_re(inout[4 * stride]);
17359 		    ti = c_im(inout[4 * stride]);
17360 		    twr = c_re(W[3]);
17361 		    twi = c_im(W[3]);
17362 		    tre1_1_0 = (tr * twr) - (ti * twi);
17363 		    tim1_1_0 = (tr * twi) + (ti * twr);
17364 	       }
17365 	       {
17366 		    FFTW_REAL tr;
17367 		    FFTW_REAL ti;
17368 		    FFTW_REAL twr;
17369 		    FFTW_REAL twi;
17370 		    tr = c_re(inout[7 * stride]);
17371 		    ti = c_im(inout[7 * stride]);
17372 		    twr = c_re(W[6]);
17373 		    twi = c_im(W[6]);
17374 		    tre1_2_0 = (tr * twr) - (ti * twi);
17375 		    tim1_2_0 = (tr * twi) + (ti * twr);
17376 	       }
17377 	       tre0_0_1 = tre1_0_0 + tre1_1_0 + tre1_2_0;
17378 	       tim0_0_1 = tim1_0_0 + tim1_1_0 + tim1_2_0;
17379 	       {
17380 		    FFTW_REAL tre2_0_0;
17381 		    FFTW_REAL tre2_1_0;
17382 		    tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
17383 		    tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_1_0 - tim1_2_0);
17384 		    tre0_1_1 = tre2_0_0 + tre2_1_0;
17385 		    tre0_2_1 = tre2_0_0 - tre2_1_0;
17386 	       }
17387 	       {
17388 		    FFTW_REAL tim2_0_0;
17389 		    FFTW_REAL tim2_1_0;
17390 		    tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
17391 		    tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_2_0 - tre1_1_0);
17392 		    tim0_1_1 = tim2_0_0 + tim2_1_0;
17393 		    tim0_2_1 = tim2_0_0 - tim2_1_0;
17394 	       }
17395 	  }
17396 	  {
17397 	       FFTW_REAL tre1_0_0;
17398 	       FFTW_REAL tim1_0_0;
17399 	       FFTW_REAL tre1_1_0;
17400 	       FFTW_REAL tim1_1_0;
17401 	       FFTW_REAL tre1_2_0;
17402 	       FFTW_REAL tim1_2_0;
17403 	       {
17404 		    FFTW_REAL tr;
17405 		    FFTW_REAL ti;
17406 		    FFTW_REAL twr;
17407 		    FFTW_REAL twi;
17408 		    tr = c_re(inout[2 * stride]);
17409 		    ti = c_im(inout[2 * stride]);
17410 		    twr = c_re(W[1]);
17411 		    twi = c_im(W[1]);
17412 		    tre1_0_0 = (tr * twr) - (ti * twi);
17413 		    tim1_0_0 = (tr * twi) + (ti * twr);
17414 	       }
17415 	       {
17416 		    FFTW_REAL tr;
17417 		    FFTW_REAL ti;
17418 		    FFTW_REAL twr;
17419 		    FFTW_REAL twi;
17420 		    tr = c_re(inout[5 * stride]);
17421 		    ti = c_im(inout[5 * stride]);
17422 		    twr = c_re(W[4]);
17423 		    twi = c_im(W[4]);
17424 		    tre1_1_0 = (tr * twr) - (ti * twi);
17425 		    tim1_1_0 = (tr * twi) + (ti * twr);
17426 	       }
17427 	       {
17428 		    FFTW_REAL tr;
17429 		    FFTW_REAL ti;
17430 		    FFTW_REAL twr;
17431 		    FFTW_REAL twi;
17432 		    tr = c_re(inout[8 * stride]);
17433 		    ti = c_im(inout[8 * stride]);
17434 		    twr = c_re(W[7]);
17435 		    twi = c_im(W[7]);
17436 		    tre1_2_0 = (tr * twr) - (ti * twi);
17437 		    tim1_2_0 = (tr * twi) + (ti * twr);
17438 	       }
17439 	       tre0_0_2 = tre1_0_0 + tre1_1_0 + tre1_2_0;
17440 	       tim0_0_2 = tim1_0_0 + tim1_1_0 + tim1_2_0;
17441 	       {
17442 		    FFTW_REAL tre2_0_0;
17443 		    FFTW_REAL tre2_1_0;
17444 		    tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
17445 		    tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_1_0 - tim1_2_0);
17446 		    tre0_1_2 = tre2_0_0 + tre2_1_0;
17447 		    tre0_2_2 = tre2_0_0 - tre2_1_0;
17448 	       }
17449 	       {
17450 		    FFTW_REAL tim2_0_0;
17451 		    FFTW_REAL tim2_1_0;
17452 		    tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
17453 		    tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_2_0 - tre1_1_0);
17454 		    tim0_1_2 = tim2_0_0 + tim2_1_0;
17455 		    tim0_2_2 = tim2_0_0 - tim2_1_0;
17456 	       }
17457 	  }
17458 	  c_re(inout[0]) = tre0_0_0 + tre0_0_1 + tre0_0_2;
17459 	  c_im(inout[0]) = tim0_0_0 + tim0_0_1 + tim0_0_2;
17460 	  {
17461 	       FFTW_REAL tre2_0_0;
17462 	       FFTW_REAL tre2_1_0;
17463 	       tre2_0_0 = tre0_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre0_0_1 + tre0_0_2));
17464 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim0_0_1 - tim0_0_2);
17465 	       c_re(inout[3 * stride]) = tre2_0_0 + tre2_1_0;
17466 	       c_re(inout[6 * stride]) = tre2_0_0 - tre2_1_0;
17467 	  }
17468 	  {
17469 	       FFTW_REAL tim2_0_0;
17470 	       FFTW_REAL tim2_1_0;
17471 	       tim2_0_0 = tim0_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim0_0_1 + tim0_0_2));
17472 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre0_0_2 - tre0_0_1);
17473 	       c_im(inout[3 * stride]) = tim2_0_0 + tim2_1_0;
17474 	       c_im(inout[6 * stride]) = tim2_0_0 - tim2_1_0;
17475 	  }
17476 	  {
17477 	       FFTW_REAL tre1_1_0;
17478 	       FFTW_REAL tim1_1_0;
17479 	       FFTW_REAL tre1_2_0;
17480 	       FFTW_REAL tim1_2_0;
17481 	       tre1_1_0 = (((FFTW_REAL) FFTW_K766044443) * tre0_1_1) + (((FFTW_REAL) FFTW_K642787609) * tim0_1_1);
17482 	       tim1_1_0 = (((FFTW_REAL) FFTW_K766044443) * tim0_1_1) - (((FFTW_REAL) FFTW_K642787609) * tre0_1_1);
17483 	       tre1_2_0 = (((FFTW_REAL) FFTW_K173648177) * tre0_1_2) + (((FFTW_REAL) FFTW_K984807753) * tim0_1_2);
17484 	       tim1_2_0 = (((FFTW_REAL) FFTW_K173648177) * tim0_1_2) - (((FFTW_REAL) FFTW_K984807753) * tre0_1_2);
17485 	       c_re(inout[stride]) = tre0_1_0 + tre1_1_0 + tre1_2_0;
17486 	       c_im(inout[stride]) = tim0_1_0 + tim1_1_0 + tim1_2_0;
17487 	       {
17488 		    FFTW_REAL tre2_0_0;
17489 		    FFTW_REAL tre2_1_0;
17490 		    tre2_0_0 = tre0_1_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
17491 		    tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_1_0 - tim1_2_0);
17492 		    c_re(inout[4 * stride]) = tre2_0_0 + tre2_1_0;
17493 		    c_re(inout[7 * stride]) = tre2_0_0 - tre2_1_0;
17494 	       }
17495 	       {
17496 		    FFTW_REAL tim2_0_0;
17497 		    FFTW_REAL tim2_1_0;
17498 		    tim2_0_0 = tim0_1_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
17499 		    tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_2_0 - tre1_1_0);
17500 		    c_im(inout[4 * stride]) = tim2_0_0 + tim2_1_0;
17501 		    c_im(inout[7 * stride]) = tim2_0_0 - tim2_1_0;
17502 	       }
17503 	  }
17504 	  {
17505 	       FFTW_REAL tre1_1_0;
17506 	       FFTW_REAL tim1_1_0;
17507 	       FFTW_REAL tre1_2_0;
17508 	       FFTW_REAL tim1_2_0;
17509 	       tre1_1_0 = (((FFTW_REAL) FFTW_K173648177) * tre0_2_1) + (((FFTW_REAL) FFTW_K984807753) * tim0_2_1);
17510 	       tim1_1_0 = (((FFTW_REAL) FFTW_K173648177) * tim0_2_1) - (((FFTW_REAL) FFTW_K984807753) * tre0_2_1);
17511 	       tre1_2_0 = (((FFTW_REAL) FFTW_K342020143) * tim0_2_2) - (((FFTW_REAL) FFTW_K939692620) * tre0_2_2);
17512 	       tim1_2_0 = (((FFTW_REAL) FFTW_K939692620) * tim0_2_2) + (((FFTW_REAL) FFTW_K342020143) * tre0_2_2);
17513 	       c_re(inout[2 * stride]) = tre0_2_0 + tre1_1_0 + tre1_2_0;
17514 	       c_im(inout[2 * stride]) = tim0_2_0 + tim1_1_0 - tim1_2_0;
17515 	       {
17516 		    FFTW_REAL tre2_0_0;
17517 		    FFTW_REAL tre2_1_0;
17518 		    tre2_0_0 = tre0_2_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
17519 		    tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_1_0 + tim1_2_0);
17520 		    c_re(inout[5 * stride]) = tre2_0_0 + tre2_1_0;
17521 		    c_re(inout[8 * stride]) = tre2_0_0 - tre2_1_0;
17522 	       }
17523 	       {
17524 		    FFTW_REAL tim2_0_0;
17525 		    FFTW_REAL tim2_1_0;
17526 		    tim2_0_0 = tim0_2_0 + (((FFTW_REAL) FFTW_K499999999) * (tim1_2_0 - tim1_1_0));
17527 		    tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_2_0 - tre1_1_0);
17528 		    c_im(inout[5 * stride]) = tim2_0_0 + tim2_1_0;
17529 		    c_im(inout[8 * stride]) = tim2_0_0 - tim2_1_0;
17530 	       }
17531 	  }
17532      }
17533 }
17534 
17535 /* This function contains 126 FP additions and 68 FP multiplications */
17536 
fftwi_twiddle_10(FFTW_COMPLEX * A,const FFTW_COMPLEX * W,int stride,int m,int dist)17537 static void fftwi_twiddle_10(FFTW_COMPLEX *A, const FFTW_COMPLEX *W, int stride, int m, int dist)
17538 {
17539      int i;
17540      FFTW_COMPLEX *inout;
17541      inout = A;
17542      for (i = 0; i < m; i = i + 1, inout = inout + dist, W = W + 9) {
17543 	  FFTW_REAL tre0_0_0;
17544 	  FFTW_REAL tim0_0_0;
17545 	  FFTW_REAL tre0_0_1;
17546 	  FFTW_REAL tim0_0_1;
17547 	  FFTW_REAL tre0_0_2;
17548 	  FFTW_REAL tim0_0_2;
17549 	  FFTW_REAL tre0_0_3;
17550 	  FFTW_REAL tim0_0_3;
17551 	  FFTW_REAL tre0_0_4;
17552 	  FFTW_REAL tim0_0_4;
17553 	  FFTW_REAL tre0_1_0;
17554 	  FFTW_REAL tim0_1_0;
17555 	  FFTW_REAL tre0_1_1;
17556 	  FFTW_REAL tim0_1_1;
17557 	  FFTW_REAL tre0_1_2;
17558 	  FFTW_REAL tim0_1_2;
17559 	  FFTW_REAL tre0_1_3;
17560 	  FFTW_REAL tim0_1_3;
17561 	  FFTW_REAL tre0_1_4;
17562 	  FFTW_REAL tim0_1_4;
17563 	  {
17564 	       FFTW_REAL tre1_0_0;
17565 	       FFTW_REAL tim1_0_0;
17566 	       FFTW_REAL tre1_1_0;
17567 	       FFTW_REAL tim1_1_0;
17568 	       tre1_0_0 = c_re(inout[0]);
17569 	       tim1_0_0 = c_im(inout[0]);
17570 	       {
17571 		    FFTW_REAL tr;
17572 		    FFTW_REAL ti;
17573 		    FFTW_REAL twr;
17574 		    FFTW_REAL twi;
17575 		    tr = c_re(inout[5 * stride]);
17576 		    ti = c_im(inout[5 * stride]);
17577 		    twr = c_re(W[4]);
17578 		    twi = c_im(W[4]);
17579 		    tre1_1_0 = (tr * twr) + (ti * twi);
17580 		    tim1_1_0 = (ti * twr) - (tr * twi);
17581 	       }
17582 	       tre0_0_0 = tre1_0_0 + tre1_1_0;
17583 	       tim0_0_0 = tim1_0_0 + tim1_1_0;
17584 	       tre0_1_0 = tre1_0_0 - tre1_1_0;
17585 	       tim0_1_0 = tim1_0_0 - tim1_1_0;
17586 	  }
17587 	  {
17588 	       FFTW_REAL tre1_0_0;
17589 	       FFTW_REAL tim1_0_0;
17590 	       FFTW_REAL tre1_1_0;
17591 	       FFTW_REAL tim1_1_0;
17592 	       {
17593 		    FFTW_REAL tr;
17594 		    FFTW_REAL ti;
17595 		    FFTW_REAL twr;
17596 		    FFTW_REAL twi;
17597 		    tr = c_re(inout[2 * stride]);
17598 		    ti = c_im(inout[2 * stride]);
17599 		    twr = c_re(W[1]);
17600 		    twi = c_im(W[1]);
17601 		    tre1_0_0 = (tr * twr) + (ti * twi);
17602 		    tim1_0_0 = (ti * twr) - (tr * twi);
17603 	       }
17604 	       {
17605 		    FFTW_REAL tr;
17606 		    FFTW_REAL ti;
17607 		    FFTW_REAL twr;
17608 		    FFTW_REAL twi;
17609 		    tr = c_re(inout[7 * stride]);
17610 		    ti = c_im(inout[7 * stride]);
17611 		    twr = c_re(W[6]);
17612 		    twi = c_im(W[6]);
17613 		    tre1_1_0 = (tr * twr) + (ti * twi);
17614 		    tim1_1_0 = (ti * twr) - (tr * twi);
17615 	       }
17616 	       tre0_0_1 = tre1_0_0 + tre1_1_0;
17617 	       tim0_0_1 = tim1_0_0 + tim1_1_0;
17618 	       tre0_1_1 = tre1_0_0 - tre1_1_0;
17619 	       tim0_1_1 = tim1_0_0 - tim1_1_0;
17620 	  }
17621 	  {
17622 	       FFTW_REAL tre1_0_0;
17623 	       FFTW_REAL tim1_0_0;
17624 	       FFTW_REAL tre1_1_0;
17625 	       FFTW_REAL tim1_1_0;
17626 	       {
17627 		    FFTW_REAL tr;
17628 		    FFTW_REAL ti;
17629 		    FFTW_REAL twr;
17630 		    FFTW_REAL twi;
17631 		    tr = c_re(inout[4 * stride]);
17632 		    ti = c_im(inout[4 * stride]);
17633 		    twr = c_re(W[3]);
17634 		    twi = c_im(W[3]);
17635 		    tre1_0_0 = (tr * twr) + (ti * twi);
17636 		    tim1_0_0 = (ti * twr) - (tr * twi);
17637 	       }
17638 	       {
17639 		    FFTW_REAL tr;
17640 		    FFTW_REAL ti;
17641 		    FFTW_REAL twr;
17642 		    FFTW_REAL twi;
17643 		    tr = c_re(inout[9 * stride]);
17644 		    ti = c_im(inout[9 * stride]);
17645 		    twr = c_re(W[8]);
17646 		    twi = c_im(W[8]);
17647 		    tre1_1_0 = (tr * twr) + (ti * twi);
17648 		    tim1_1_0 = (ti * twr) - (tr * twi);
17649 	       }
17650 	       tre0_0_2 = tre1_0_0 + tre1_1_0;
17651 	       tim0_0_2 = tim1_0_0 + tim1_1_0;
17652 	       tre0_1_2 = tre1_0_0 - tre1_1_0;
17653 	       tim0_1_2 = tim1_0_0 - tim1_1_0;
17654 	  }
17655 	  {
17656 	       FFTW_REAL tre1_0_0;
17657 	       FFTW_REAL tim1_0_0;
17658 	       FFTW_REAL tre1_1_0;
17659 	       FFTW_REAL tim1_1_0;
17660 	       {
17661 		    FFTW_REAL tr;
17662 		    FFTW_REAL ti;
17663 		    FFTW_REAL twr;
17664 		    FFTW_REAL twi;
17665 		    tr = c_re(inout[6 * stride]);
17666 		    ti = c_im(inout[6 * stride]);
17667 		    twr = c_re(W[5]);
17668 		    twi = c_im(W[5]);
17669 		    tre1_0_0 = (tr * twr) + (ti * twi);
17670 		    tim1_0_0 = (ti * twr) - (tr * twi);
17671 	       }
17672 	       {
17673 		    FFTW_REAL tr;
17674 		    FFTW_REAL ti;
17675 		    FFTW_REAL twr;
17676 		    FFTW_REAL twi;
17677 		    tr = c_re(inout[stride]);
17678 		    ti = c_im(inout[stride]);
17679 		    twr = c_re(W[0]);
17680 		    twi = c_im(W[0]);
17681 		    tre1_1_0 = (tr * twr) + (ti * twi);
17682 		    tim1_1_0 = (ti * twr) - (tr * twi);
17683 	       }
17684 	       tre0_0_3 = tre1_0_0 + tre1_1_0;
17685 	       tim0_0_3 = tim1_0_0 + tim1_1_0;
17686 	       tre0_1_3 = tre1_0_0 - tre1_1_0;
17687 	       tim0_1_3 = tim1_0_0 - tim1_1_0;
17688 	  }
17689 	  {
17690 	       FFTW_REAL tre1_0_0;
17691 	       FFTW_REAL tim1_0_0;
17692 	       FFTW_REAL tre1_1_0;
17693 	       FFTW_REAL tim1_1_0;
17694 	       {
17695 		    FFTW_REAL tr;
17696 		    FFTW_REAL ti;
17697 		    FFTW_REAL twr;
17698 		    FFTW_REAL twi;
17699 		    tr = c_re(inout[8 * stride]);
17700 		    ti = c_im(inout[8 * stride]);
17701 		    twr = c_re(W[7]);
17702 		    twi = c_im(W[7]);
17703 		    tre1_0_0 = (tr * twr) + (ti * twi);
17704 		    tim1_0_0 = (ti * twr) - (tr * twi);
17705 	       }
17706 	       {
17707 		    FFTW_REAL tr;
17708 		    FFTW_REAL ti;
17709 		    FFTW_REAL twr;
17710 		    FFTW_REAL twi;
17711 		    tr = c_re(inout[3 * stride]);
17712 		    ti = c_im(inout[3 * stride]);
17713 		    twr = c_re(W[2]);
17714 		    twi = c_im(W[2]);
17715 		    tre1_1_0 = (tr * twr) + (ti * twi);
17716 		    tim1_1_0 = (ti * twr) - (tr * twi);
17717 	       }
17718 	       tre0_0_4 = tre1_0_0 + tre1_1_0;
17719 	       tim0_0_4 = tim1_0_0 + tim1_1_0;
17720 	       tre0_1_4 = tre1_0_0 - tre1_1_0;
17721 	       tim0_1_4 = tim1_0_0 - tim1_1_0;
17722 	  }
17723 	  c_re(inout[0]) = tre0_0_0 + tre0_0_1 + tre0_0_2 + tre0_0_3 + tre0_0_4;
17724 	  c_im(inout[0]) = tim0_0_0 + tim0_0_1 + tim0_0_2 + tim0_0_3 + tim0_0_4;
17725 	  {
17726 	       FFTW_REAL tre2_0_0;
17727 	       FFTW_REAL tre2_1_0;
17728 	       tre2_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_0_1 + tre0_0_4)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_0_2 + tre0_0_3));
17729 	       tre2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tim0_0_4 - tim0_0_1)) + (((FFTW_REAL) FFTW_K587785252) * (tim0_0_3 - tim0_0_2));
17730 	       c_re(inout[6 * stride]) = tre2_0_0 + tre2_1_0;
17731 	       c_re(inout[4 * stride]) = tre2_0_0 - tre2_1_0;
17732 	  }
17733 	  {
17734 	       FFTW_REAL tim2_0_0;
17735 	       FFTW_REAL tim2_1_0;
17736 	       tim2_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_0_1 + tim0_0_4)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_0_2 + tim0_0_3));
17737 	       tim2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tre0_0_1 - tre0_0_4)) + (((FFTW_REAL) FFTW_K587785252) * (tre0_0_2 - tre0_0_3));
17738 	       c_im(inout[6 * stride]) = tim2_0_0 + tim2_1_0;
17739 	       c_im(inout[4 * stride]) = tim2_0_0 - tim2_1_0;
17740 	  }
17741 	  {
17742 	       FFTW_REAL tre2_0_0;
17743 	       FFTW_REAL tre2_1_0;
17744 	       tre2_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_0_2 + tre0_0_3)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_0_1 + tre0_0_4));
17745 	       tre2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tim0_0_4 - tim0_0_1)) + (((FFTW_REAL) FFTW_K951056516) * (tim0_0_2 - tim0_0_3));
17746 	       c_re(inout[2 * stride]) = tre2_0_0 + tre2_1_0;
17747 	       c_re(inout[8 * stride]) = tre2_0_0 - tre2_1_0;
17748 	  }
17749 	  {
17750 	       FFTW_REAL tim2_0_0;
17751 	       FFTW_REAL tim2_1_0;
17752 	       tim2_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_0_2 + tim0_0_3)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_0_1 + tim0_0_4));
17753 	       tim2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tre0_0_1 - tre0_0_4)) + (((FFTW_REAL) FFTW_K951056516) * (tre0_0_3 - tre0_0_2));
17754 	       c_im(inout[2 * stride]) = tim2_0_0 + tim2_1_0;
17755 	       c_im(inout[8 * stride]) = tim2_0_0 - tim2_1_0;
17756 	  }
17757 	  c_re(inout[5 * stride]) = tre0_1_0 + tre0_1_1 + tre0_1_2 + tre0_1_3 + tre0_1_4;
17758 	  c_im(inout[5 * stride]) = tim0_1_0 + tim0_1_1 + tim0_1_2 + tim0_1_3 + tim0_1_4;
17759 	  {
17760 	       FFTW_REAL tre2_0_0;
17761 	       FFTW_REAL tre2_1_0;
17762 	       tre2_0_0 = tre0_1_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_1_1 + tre0_1_4)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_1_2 + tre0_1_3));
17763 	       tre2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tim0_1_4 - tim0_1_1)) + (((FFTW_REAL) FFTW_K587785252) * (tim0_1_3 - tim0_1_2));
17764 	       c_re(inout[stride]) = tre2_0_0 + tre2_1_0;
17765 	       c_re(inout[9 * stride]) = tre2_0_0 - tre2_1_0;
17766 	  }
17767 	  {
17768 	       FFTW_REAL tim2_0_0;
17769 	       FFTW_REAL tim2_1_0;
17770 	       tim2_0_0 = tim0_1_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_1_1 + tim0_1_4)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_1_2 + tim0_1_3));
17771 	       tim2_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tre0_1_1 - tre0_1_4)) + (((FFTW_REAL) FFTW_K587785252) * (tre0_1_2 - tre0_1_3));
17772 	       c_im(inout[stride]) = tim2_0_0 + tim2_1_0;
17773 	       c_im(inout[9 * stride]) = tim2_0_0 - tim2_1_0;
17774 	  }
17775 	  {
17776 	       FFTW_REAL tre2_0_0;
17777 	       FFTW_REAL tre2_1_0;
17778 	       tre2_0_0 = tre0_1_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_1_2 + tre0_1_3)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_1_1 + tre0_1_4));
17779 	       tre2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tim0_1_4 - tim0_1_1)) + (((FFTW_REAL) FFTW_K951056516) * (tim0_1_2 - tim0_1_3));
17780 	       c_re(inout[7 * stride]) = tre2_0_0 + tre2_1_0;
17781 	       c_re(inout[3 * stride]) = tre2_0_0 - tre2_1_0;
17782 	  }
17783 	  {
17784 	       FFTW_REAL tim2_0_0;
17785 	       FFTW_REAL tim2_1_0;
17786 	       tim2_0_0 = tim0_1_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_1_2 + tim0_1_3)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_1_1 + tim0_1_4));
17787 	       tim2_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tre0_1_1 - tre0_1_4)) + (((FFTW_REAL) FFTW_K951056516) * (tre0_1_3 - tre0_1_2));
17788 	       c_im(inout[7 * stride]) = tim2_0_0 + tim2_1_0;
17789 	       c_im(inout[3 * stride]) = tim2_0_0 - tim2_1_0;
17790 	  }
17791      }
17792 }
17793 
17794 /* This function contains 174 FP additions and 84 FP multiplications */
17795 
fftwi_twiddle_16(FFTW_COMPLEX * A,const FFTW_COMPLEX * W,int stride,int m,int dist)17796 static void fftwi_twiddle_16(FFTW_COMPLEX *A, const FFTW_COMPLEX *W, int stride, int m, int dist)
17797 {
17798      int i;
17799      FFTW_COMPLEX *inout;
17800      inout = A;
17801      for (i = 0; i < m; i = i + 1, inout = inout + dist, W = W + 15) {
17802 	  FFTW_REAL tre0_0_0;
17803 	  FFTW_REAL tim0_0_0;
17804 	  FFTW_REAL tre0_0_1;
17805 	  FFTW_REAL tim0_0_1;
17806 	  FFTW_REAL tre0_0_2;
17807 	  FFTW_REAL tim0_0_2;
17808 	  FFTW_REAL tre0_0_3;
17809 	  FFTW_REAL tim0_0_3;
17810 	  FFTW_REAL tre0_1_0;
17811 	  FFTW_REAL tim0_1_0;
17812 	  FFTW_REAL tre0_1_1;
17813 	  FFTW_REAL tim0_1_1;
17814 	  FFTW_REAL tre0_1_2;
17815 	  FFTW_REAL tim0_1_2;
17816 	  FFTW_REAL tre0_1_3;
17817 	  FFTW_REAL tim0_1_3;
17818 	  FFTW_REAL tre0_2_0;
17819 	  FFTW_REAL tim0_2_0;
17820 	  FFTW_REAL tre0_2_1;
17821 	  FFTW_REAL tim0_2_1;
17822 	  FFTW_REAL tre0_2_2;
17823 	  FFTW_REAL tim0_2_2;
17824 	  FFTW_REAL tre0_2_3;
17825 	  FFTW_REAL tim0_2_3;
17826 	  FFTW_REAL tre0_3_0;
17827 	  FFTW_REAL tim0_3_0;
17828 	  FFTW_REAL tre0_3_1;
17829 	  FFTW_REAL tim0_3_1;
17830 	  FFTW_REAL tre0_3_2;
17831 	  FFTW_REAL tim0_3_2;
17832 	  FFTW_REAL tre0_3_3;
17833 	  FFTW_REAL tim0_3_3;
17834 	  {
17835 	       FFTW_REAL tre1_0_0;
17836 	       FFTW_REAL tim1_0_0;
17837 	       FFTW_REAL tre1_0_1;
17838 	       FFTW_REAL tim1_0_1;
17839 	       FFTW_REAL tre1_1_0;
17840 	       FFTW_REAL tim1_1_0;
17841 	       FFTW_REAL tre1_1_1;
17842 	       FFTW_REAL tim1_1_1;
17843 	       {
17844 		    FFTW_REAL tre2_0_0;
17845 		    FFTW_REAL tim2_0_0;
17846 		    FFTW_REAL tre2_1_0;
17847 		    FFTW_REAL tim2_1_0;
17848 		    tre2_0_0 = c_re(inout[0]);
17849 		    tim2_0_0 = c_im(inout[0]);
17850 		    {
17851 			 FFTW_REAL tr;
17852 			 FFTW_REAL ti;
17853 			 FFTW_REAL twr;
17854 			 FFTW_REAL twi;
17855 			 tr = c_re(inout[8 * stride]);
17856 			 ti = c_im(inout[8 * stride]);
17857 			 twr = c_re(W[7]);
17858 			 twi = c_im(W[7]);
17859 			 tre2_1_0 = (tr * twr) + (ti * twi);
17860 			 tim2_1_0 = (ti * twr) - (tr * twi);
17861 		    }
17862 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
17863 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
17864 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
17865 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
17866 	       }
17867 	       {
17868 		    FFTW_REAL tre2_0_0;
17869 		    FFTW_REAL tim2_0_0;
17870 		    FFTW_REAL tre2_1_0;
17871 		    FFTW_REAL tim2_1_0;
17872 		    {
17873 			 FFTW_REAL tr;
17874 			 FFTW_REAL ti;
17875 			 FFTW_REAL twr;
17876 			 FFTW_REAL twi;
17877 			 tr = c_re(inout[4 * stride]);
17878 			 ti = c_im(inout[4 * stride]);
17879 			 twr = c_re(W[3]);
17880 			 twi = c_im(W[3]);
17881 			 tre2_0_0 = (tr * twr) + (ti * twi);
17882 			 tim2_0_0 = (ti * twr) - (tr * twi);
17883 		    }
17884 		    {
17885 			 FFTW_REAL tr;
17886 			 FFTW_REAL ti;
17887 			 FFTW_REAL twr;
17888 			 FFTW_REAL twi;
17889 			 tr = c_re(inout[12 * stride]);
17890 			 ti = c_im(inout[12 * stride]);
17891 			 twr = c_re(W[11]);
17892 			 twi = c_im(W[11]);
17893 			 tre2_1_0 = (tr * twr) + (ti * twi);
17894 			 tim2_1_0 = (ti * twr) - (tr * twi);
17895 		    }
17896 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
17897 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
17898 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
17899 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
17900 	       }
17901 	       tre0_0_0 = tre1_0_0 + tre1_0_1;
17902 	       tim0_0_0 = tim1_0_0 + tim1_0_1;
17903 	       tre0_2_0 = tre1_0_0 - tre1_0_1;
17904 	       tim0_2_0 = tim1_0_0 - tim1_0_1;
17905 	       tre0_1_0 = tre1_1_0 - tim1_1_1;
17906 	       tim0_1_0 = tim1_1_0 + tre1_1_1;
17907 	       tre0_3_0 = tre1_1_0 + tim1_1_1;
17908 	       tim0_3_0 = tim1_1_0 - tre1_1_1;
17909 	  }
17910 	  {
17911 	       FFTW_REAL tre1_0_0;
17912 	       FFTW_REAL tim1_0_0;
17913 	       FFTW_REAL tre1_0_1;
17914 	       FFTW_REAL tim1_0_1;
17915 	       FFTW_REAL tre1_1_0;
17916 	       FFTW_REAL tim1_1_0;
17917 	       FFTW_REAL tre1_1_1;
17918 	       FFTW_REAL tim1_1_1;
17919 	       {
17920 		    FFTW_REAL tre2_0_0;
17921 		    FFTW_REAL tim2_0_0;
17922 		    FFTW_REAL tre2_1_0;
17923 		    FFTW_REAL tim2_1_0;
17924 		    {
17925 			 FFTW_REAL tr;
17926 			 FFTW_REAL ti;
17927 			 FFTW_REAL twr;
17928 			 FFTW_REAL twi;
17929 			 tr = c_re(inout[stride]);
17930 			 ti = c_im(inout[stride]);
17931 			 twr = c_re(W[0]);
17932 			 twi = c_im(W[0]);
17933 			 tre2_0_0 = (tr * twr) + (ti * twi);
17934 			 tim2_0_0 = (ti * twr) - (tr * twi);
17935 		    }
17936 		    {
17937 			 FFTW_REAL tr;
17938 			 FFTW_REAL ti;
17939 			 FFTW_REAL twr;
17940 			 FFTW_REAL twi;
17941 			 tr = c_re(inout[9 * stride]);
17942 			 ti = c_im(inout[9 * stride]);
17943 			 twr = c_re(W[8]);
17944 			 twi = c_im(W[8]);
17945 			 tre2_1_0 = (tr * twr) + (ti * twi);
17946 			 tim2_1_0 = (ti * twr) - (tr * twi);
17947 		    }
17948 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
17949 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
17950 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
17951 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
17952 	       }
17953 	       {
17954 		    FFTW_REAL tre2_0_0;
17955 		    FFTW_REAL tim2_0_0;
17956 		    FFTW_REAL tre2_1_0;
17957 		    FFTW_REAL tim2_1_0;
17958 		    {
17959 			 FFTW_REAL tr;
17960 			 FFTW_REAL ti;
17961 			 FFTW_REAL twr;
17962 			 FFTW_REAL twi;
17963 			 tr = c_re(inout[5 * stride]);
17964 			 ti = c_im(inout[5 * stride]);
17965 			 twr = c_re(W[4]);
17966 			 twi = c_im(W[4]);
17967 			 tre2_0_0 = (tr * twr) + (ti * twi);
17968 			 tim2_0_0 = (ti * twr) - (tr * twi);
17969 		    }
17970 		    {
17971 			 FFTW_REAL tr;
17972 			 FFTW_REAL ti;
17973 			 FFTW_REAL twr;
17974 			 FFTW_REAL twi;
17975 			 tr = c_re(inout[13 * stride]);
17976 			 ti = c_im(inout[13 * stride]);
17977 			 twr = c_re(W[12]);
17978 			 twi = c_im(W[12]);
17979 			 tre2_1_0 = (tr * twr) + (ti * twi);
17980 			 tim2_1_0 = (ti * twr) - (tr * twi);
17981 		    }
17982 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
17983 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
17984 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
17985 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
17986 	       }
17987 	       tre0_0_1 = tre1_0_0 + tre1_0_1;
17988 	       tim0_0_1 = tim1_0_0 + tim1_0_1;
17989 	       tre0_2_1 = tre1_0_0 - tre1_0_1;
17990 	       tim0_2_1 = tim1_0_0 - tim1_0_1;
17991 	       tre0_1_1 = tre1_1_0 - tim1_1_1;
17992 	       tim0_1_1 = tim1_1_0 + tre1_1_1;
17993 	       tre0_3_1 = tre1_1_0 + tim1_1_1;
17994 	       tim0_3_1 = tim1_1_0 - tre1_1_1;
17995 	  }
17996 	  {
17997 	       FFTW_REAL tre1_0_0;
17998 	       FFTW_REAL tim1_0_0;
17999 	       FFTW_REAL tre1_0_1;
18000 	       FFTW_REAL tim1_0_1;
18001 	       FFTW_REAL tre1_1_0;
18002 	       FFTW_REAL tim1_1_0;
18003 	       FFTW_REAL tre1_1_1;
18004 	       FFTW_REAL tim1_1_1;
18005 	       {
18006 		    FFTW_REAL tre2_0_0;
18007 		    FFTW_REAL tim2_0_0;
18008 		    FFTW_REAL tre2_1_0;
18009 		    FFTW_REAL tim2_1_0;
18010 		    {
18011 			 FFTW_REAL tr;
18012 			 FFTW_REAL ti;
18013 			 FFTW_REAL twr;
18014 			 FFTW_REAL twi;
18015 			 tr = c_re(inout[2 * stride]);
18016 			 ti = c_im(inout[2 * stride]);
18017 			 twr = c_re(W[1]);
18018 			 twi = c_im(W[1]);
18019 			 tre2_0_0 = (tr * twr) + (ti * twi);
18020 			 tim2_0_0 = (ti * twr) - (tr * twi);
18021 		    }
18022 		    {
18023 			 FFTW_REAL tr;
18024 			 FFTW_REAL ti;
18025 			 FFTW_REAL twr;
18026 			 FFTW_REAL twi;
18027 			 tr = c_re(inout[10 * stride]);
18028 			 ti = c_im(inout[10 * stride]);
18029 			 twr = c_re(W[9]);
18030 			 twi = c_im(W[9]);
18031 			 tre2_1_0 = (tr * twr) + (ti * twi);
18032 			 tim2_1_0 = (ti * twr) - (tr * twi);
18033 		    }
18034 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
18035 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
18036 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
18037 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
18038 	       }
18039 	       {
18040 		    FFTW_REAL tre2_0_0;
18041 		    FFTW_REAL tim2_0_0;
18042 		    FFTW_REAL tre2_1_0;
18043 		    FFTW_REAL tim2_1_0;
18044 		    {
18045 			 FFTW_REAL tr;
18046 			 FFTW_REAL ti;
18047 			 FFTW_REAL twr;
18048 			 FFTW_REAL twi;
18049 			 tr = c_re(inout[6 * stride]);
18050 			 ti = c_im(inout[6 * stride]);
18051 			 twr = c_re(W[5]);
18052 			 twi = c_im(W[5]);
18053 			 tre2_0_0 = (tr * twr) + (ti * twi);
18054 			 tim2_0_0 = (ti * twr) - (tr * twi);
18055 		    }
18056 		    {
18057 			 FFTW_REAL tr;
18058 			 FFTW_REAL ti;
18059 			 FFTW_REAL twr;
18060 			 FFTW_REAL twi;
18061 			 tr = c_re(inout[14 * stride]);
18062 			 ti = c_im(inout[14 * stride]);
18063 			 twr = c_re(W[13]);
18064 			 twi = c_im(W[13]);
18065 			 tre2_1_0 = (tr * twr) + (ti * twi);
18066 			 tim2_1_0 = (ti * twr) - (tr * twi);
18067 		    }
18068 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
18069 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
18070 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
18071 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
18072 	       }
18073 	       tre0_0_2 = tre1_0_0 + tre1_0_1;
18074 	       tim0_0_2 = tim1_0_0 + tim1_0_1;
18075 	       tre0_2_2 = tre1_0_0 - tre1_0_1;
18076 	       tim0_2_2 = tim1_0_0 - tim1_0_1;
18077 	       tre0_1_2 = tre1_1_0 - tim1_1_1;
18078 	       tim0_1_2 = tim1_1_0 + tre1_1_1;
18079 	       tre0_3_2 = tre1_1_0 + tim1_1_1;
18080 	       tim0_3_2 = tim1_1_0 - tre1_1_1;
18081 	  }
18082 	  {
18083 	       FFTW_REAL tre1_0_0;
18084 	       FFTW_REAL tim1_0_0;
18085 	       FFTW_REAL tre1_0_1;
18086 	       FFTW_REAL tim1_0_1;
18087 	       FFTW_REAL tre1_1_0;
18088 	       FFTW_REAL tim1_1_0;
18089 	       FFTW_REAL tre1_1_1;
18090 	       FFTW_REAL tim1_1_1;
18091 	       {
18092 		    FFTW_REAL tre2_0_0;
18093 		    FFTW_REAL tim2_0_0;
18094 		    FFTW_REAL tre2_1_0;
18095 		    FFTW_REAL tim2_1_0;
18096 		    {
18097 			 FFTW_REAL tr;
18098 			 FFTW_REAL ti;
18099 			 FFTW_REAL twr;
18100 			 FFTW_REAL twi;
18101 			 tr = c_re(inout[3 * stride]);
18102 			 ti = c_im(inout[3 * stride]);
18103 			 twr = c_re(W[2]);
18104 			 twi = c_im(W[2]);
18105 			 tre2_0_0 = (tr * twr) + (ti * twi);
18106 			 tim2_0_0 = (ti * twr) - (tr * twi);
18107 		    }
18108 		    {
18109 			 FFTW_REAL tr;
18110 			 FFTW_REAL ti;
18111 			 FFTW_REAL twr;
18112 			 FFTW_REAL twi;
18113 			 tr = c_re(inout[11 * stride]);
18114 			 ti = c_im(inout[11 * stride]);
18115 			 twr = c_re(W[10]);
18116 			 twi = c_im(W[10]);
18117 			 tre2_1_0 = (tr * twr) + (ti * twi);
18118 			 tim2_1_0 = (ti * twr) - (tr * twi);
18119 		    }
18120 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
18121 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
18122 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
18123 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
18124 	       }
18125 	       {
18126 		    FFTW_REAL tre2_0_0;
18127 		    FFTW_REAL tim2_0_0;
18128 		    FFTW_REAL tre2_1_0;
18129 		    FFTW_REAL tim2_1_0;
18130 		    {
18131 			 FFTW_REAL tr;
18132 			 FFTW_REAL ti;
18133 			 FFTW_REAL twr;
18134 			 FFTW_REAL twi;
18135 			 tr = c_re(inout[7 * stride]);
18136 			 ti = c_im(inout[7 * stride]);
18137 			 twr = c_re(W[6]);
18138 			 twi = c_im(W[6]);
18139 			 tre2_0_0 = (tr * twr) + (ti * twi);
18140 			 tim2_0_0 = (ti * twr) - (tr * twi);
18141 		    }
18142 		    {
18143 			 FFTW_REAL tr;
18144 			 FFTW_REAL ti;
18145 			 FFTW_REAL twr;
18146 			 FFTW_REAL twi;
18147 			 tr = c_re(inout[15 * stride]);
18148 			 ti = c_im(inout[15 * stride]);
18149 			 twr = c_re(W[14]);
18150 			 twi = c_im(W[14]);
18151 			 tre2_1_0 = (tr * twr) + (ti * twi);
18152 			 tim2_1_0 = (ti * twr) - (tr * twi);
18153 		    }
18154 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
18155 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
18156 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
18157 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
18158 	       }
18159 	       tre0_0_3 = tre1_0_0 + tre1_0_1;
18160 	       tim0_0_3 = tim1_0_0 + tim1_0_1;
18161 	       tre0_2_3 = tre1_0_0 - tre1_0_1;
18162 	       tim0_2_3 = tim1_0_0 - tim1_0_1;
18163 	       tre0_1_3 = tre1_1_0 - tim1_1_1;
18164 	       tim0_1_3 = tim1_1_0 + tre1_1_1;
18165 	       tre0_3_3 = tre1_1_0 + tim1_1_1;
18166 	       tim0_3_3 = tim1_1_0 - tre1_1_1;
18167 	  }
18168 	  {
18169 	       FFTW_REAL tre1_0_0;
18170 	       FFTW_REAL tim1_0_0;
18171 	       FFTW_REAL tre1_0_1;
18172 	       FFTW_REAL tim1_0_1;
18173 	       FFTW_REAL tre1_1_0;
18174 	       FFTW_REAL tim1_1_0;
18175 	       FFTW_REAL tre1_1_1;
18176 	       FFTW_REAL tim1_1_1;
18177 	       tre1_0_0 = tre0_0_0 + tre0_0_2;
18178 	       tim1_0_0 = tim0_0_0 + tim0_0_2;
18179 	       tre1_1_0 = tre0_0_0 - tre0_0_2;
18180 	       tim1_1_0 = tim0_0_0 - tim0_0_2;
18181 	       tre1_0_1 = tre0_0_1 + tre0_0_3;
18182 	       tim1_0_1 = tim0_0_1 + tim0_0_3;
18183 	       tre1_1_1 = tre0_0_1 - tre0_0_3;
18184 	       tim1_1_1 = tim0_0_1 - tim0_0_3;
18185 	       c_re(inout[0]) = tre1_0_0 + tre1_0_1;
18186 	       c_im(inout[0]) = tim1_0_0 + tim1_0_1;
18187 	       c_re(inout[8 * stride]) = tre1_0_0 - tre1_0_1;
18188 	       c_im(inout[8 * stride]) = tim1_0_0 - tim1_0_1;
18189 	       c_re(inout[4 * stride]) = tre1_1_0 - tim1_1_1;
18190 	       c_im(inout[4 * stride]) = tim1_1_0 + tre1_1_1;
18191 	       c_re(inout[12 * stride]) = tre1_1_0 + tim1_1_1;
18192 	       c_im(inout[12 * stride]) = tim1_1_0 - tre1_1_1;
18193 	  }
18194 	  {
18195 	       FFTW_REAL tre1_0_0;
18196 	       FFTW_REAL tim1_0_0;
18197 	       FFTW_REAL tre1_0_1;
18198 	       FFTW_REAL tim1_0_1;
18199 	       FFTW_REAL tre1_1_0;
18200 	       FFTW_REAL tim1_1_0;
18201 	       FFTW_REAL tre1_1_1;
18202 	       FFTW_REAL tim1_1_1;
18203 	       {
18204 		    FFTW_REAL tre2_1_0;
18205 		    FFTW_REAL tim2_1_0;
18206 		    tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_1_2 - tim0_1_2);
18207 		    tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_1_2 + tre0_1_2);
18208 		    tre1_0_0 = tre0_1_0 + tre2_1_0;
18209 		    tim1_0_0 = tim0_1_0 + tim2_1_0;
18210 		    tre1_1_0 = tre0_1_0 - tre2_1_0;
18211 		    tim1_1_0 = tim0_1_0 - tim2_1_0;
18212 	       }
18213 	       {
18214 		    FFTW_REAL tre2_0_0;
18215 		    FFTW_REAL tim2_0_0;
18216 		    FFTW_REAL tre2_1_0;
18217 		    FFTW_REAL tim2_1_0;
18218 		    tre2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_1_1) - (((FFTW_REAL) FFTW_K382683432) * tim0_1_1);
18219 		    tim2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_1_1) + (((FFTW_REAL) FFTW_K382683432) * tre0_1_1);
18220 		    tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_1_3) - (((FFTW_REAL) FFTW_K923879532) * tim0_1_3);
18221 		    tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_1_3) + (((FFTW_REAL) FFTW_K923879532) * tre0_1_3);
18222 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
18223 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
18224 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
18225 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
18226 	       }
18227 	       c_re(inout[stride]) = tre1_0_0 + tre1_0_1;
18228 	       c_im(inout[stride]) = tim1_0_0 + tim1_0_1;
18229 	       c_re(inout[9 * stride]) = tre1_0_0 - tre1_0_1;
18230 	       c_im(inout[9 * stride]) = tim1_0_0 - tim1_0_1;
18231 	       c_re(inout[5 * stride]) = tre1_1_0 - tim1_1_1;
18232 	       c_im(inout[5 * stride]) = tim1_1_0 + tre1_1_1;
18233 	       c_re(inout[13 * stride]) = tre1_1_0 + tim1_1_1;
18234 	       c_im(inout[13 * stride]) = tim1_1_0 - tre1_1_1;
18235 	  }
18236 	  {
18237 	       FFTW_REAL tre1_0_0;
18238 	       FFTW_REAL tim1_0_0;
18239 	       FFTW_REAL tre1_0_1;
18240 	       FFTW_REAL tim1_0_1;
18241 	       FFTW_REAL tre1_1_0;
18242 	       FFTW_REAL tim1_1_0;
18243 	       FFTW_REAL tre1_1_1;
18244 	       FFTW_REAL tim1_1_1;
18245 	       tre1_0_0 = tre0_2_0 - tim0_2_2;
18246 	       tim1_0_0 = tim0_2_0 + tre0_2_2;
18247 	       tre1_1_0 = tre0_2_0 + tim0_2_2;
18248 	       tim1_1_0 = tim0_2_0 - tre0_2_2;
18249 	       {
18250 		    FFTW_REAL tre2_0_0;
18251 		    FFTW_REAL tim2_0_0;
18252 		    FFTW_REAL tre2_1_0;
18253 		    FFTW_REAL tim2_1_0;
18254 		    tre2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_2_1 - tim0_2_1);
18255 		    tim2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_2_1 + tre0_2_1);
18256 		    tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_2_3 + tim0_2_3);
18257 		    tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_2_3 - tim0_2_3);
18258 		    tre1_0_1 = tre2_0_0 - tre2_1_0;
18259 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
18260 		    tre1_1_1 = tre2_0_0 + tre2_1_0;
18261 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
18262 	       }
18263 	       c_re(inout[2 * stride]) = tre1_0_0 + tre1_0_1;
18264 	       c_im(inout[2 * stride]) = tim1_0_0 + tim1_0_1;
18265 	       c_re(inout[10 * stride]) = tre1_0_0 - tre1_0_1;
18266 	       c_im(inout[10 * stride]) = tim1_0_0 - tim1_0_1;
18267 	       c_re(inout[6 * stride]) = tre1_1_0 - tim1_1_1;
18268 	       c_im(inout[6 * stride]) = tim1_1_0 + tre1_1_1;
18269 	       c_re(inout[14 * stride]) = tre1_1_0 + tim1_1_1;
18270 	       c_im(inout[14 * stride]) = tim1_1_0 - tre1_1_1;
18271 	  }
18272 	  {
18273 	       FFTW_REAL tre1_0_0;
18274 	       FFTW_REAL tim1_0_0;
18275 	       FFTW_REAL tre1_0_1;
18276 	       FFTW_REAL tim1_0_1;
18277 	       FFTW_REAL tre1_1_0;
18278 	       FFTW_REAL tim1_1_0;
18279 	       FFTW_REAL tre1_1_1;
18280 	       FFTW_REAL tim1_1_1;
18281 	       {
18282 		    FFTW_REAL tre2_1_0;
18283 		    FFTW_REAL tim2_1_0;
18284 		    tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_3_2 + tim0_3_2);
18285 		    tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_3_2 - tim0_3_2);
18286 		    tre1_0_0 = tre0_3_0 - tre2_1_0;
18287 		    tim1_0_0 = tim0_3_0 + tim2_1_0;
18288 		    tre1_1_0 = tre0_3_0 + tre2_1_0;
18289 		    tim1_1_0 = tim0_3_0 - tim2_1_0;
18290 	       }
18291 	       {
18292 		    FFTW_REAL tre2_0_0;
18293 		    FFTW_REAL tim2_0_0;
18294 		    FFTW_REAL tre2_1_0;
18295 		    FFTW_REAL tim2_1_0;
18296 		    tre2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_3_1) - (((FFTW_REAL) FFTW_K923879532) * tim0_3_1);
18297 		    tim2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_3_1) + (((FFTW_REAL) FFTW_K923879532) * tre0_3_1);
18298 		    tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_3_3) - (((FFTW_REAL) FFTW_K923879532) * tre0_3_3);
18299 		    tim2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_3_3) + (((FFTW_REAL) FFTW_K382683432) * tre0_3_3);
18300 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
18301 		    tim1_0_1 = tim2_0_0 - tim2_1_0;
18302 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
18303 		    tim1_1_1 = tim2_0_0 + tim2_1_0;
18304 	       }
18305 	       c_re(inout[3 * stride]) = tre1_0_0 + tre1_0_1;
18306 	       c_im(inout[3 * stride]) = tim1_0_0 + tim1_0_1;
18307 	       c_re(inout[11 * stride]) = tre1_0_0 - tre1_0_1;
18308 	       c_im(inout[11 * stride]) = tim1_0_0 - tim1_0_1;
18309 	       c_re(inout[7 * stride]) = tre1_1_0 - tim1_1_1;
18310 	       c_im(inout[7 * stride]) = tim1_1_0 + tre1_1_1;
18311 	       c_re(inout[15 * stride]) = tre1_1_0 + tim1_1_1;
18312 	       c_im(inout[15 * stride]) = tim1_1_0 - tre1_1_1;
18313 	  }
18314      }
18315 }
18316 
18317 /* This function contains 6 FP additions and 4 FP multiplications */
18318 
fftwi_twiddle_2(FFTW_COMPLEX * A,const FFTW_COMPLEX * W,int stride,int m,int dist)18319 static void fftwi_twiddle_2(FFTW_COMPLEX *A, const FFTW_COMPLEX *W, int stride, int m, int dist)
18320 {
18321      int i;
18322      FFTW_COMPLEX *inout;
18323      inout = A;
18324      for (i = 0; i < m; i = i + 1, inout = inout + dist, W = W + 1) {
18325 	  FFTW_REAL tre0_0_0;
18326 	  FFTW_REAL tim0_0_0;
18327 	  FFTW_REAL tre0_1_0;
18328 	  FFTW_REAL tim0_1_0;
18329 	  tre0_0_0 = c_re(inout[0]);
18330 	  tim0_0_0 = c_im(inout[0]);
18331 	  {
18332 	       FFTW_REAL tr;
18333 	       FFTW_REAL ti;
18334 	       FFTW_REAL twr;
18335 	       FFTW_REAL twi;
18336 	       tr = c_re(inout[stride]);
18337 	       ti = c_im(inout[stride]);
18338 	       twr = c_re(W[0]);
18339 	       twi = c_im(W[0]);
18340 	       tre0_1_0 = (tr * twr) + (ti * twi);
18341 	       tim0_1_0 = (ti * twr) - (tr * twi);
18342 	  }
18343 	  c_re(inout[0]) = tre0_0_0 + tre0_1_0;
18344 	  c_im(inout[0]) = tim0_0_0 + tim0_1_0;
18345 	  c_re(inout[stride]) = tre0_0_0 - tre0_1_0;
18346 	  c_im(inout[stride]) = tim0_0_0 - tim0_1_0;
18347      }
18348 }
18349 
18350 /* This function contains 18 FP additions and 12 FP multiplications */
18351 
fftwi_twiddle_3(FFTW_COMPLEX * A,const FFTW_COMPLEX * W,int stride,int m,int dist)18352 static void fftwi_twiddle_3(FFTW_COMPLEX *A, const FFTW_COMPLEX *W, int stride, int m, int dist)
18353 {
18354      int i;
18355      FFTW_COMPLEX *inout;
18356      inout = A;
18357      for (i = 0; i < m; i = i + 1, inout = inout + dist, W = W + 2) {
18358 	  FFTW_REAL tre0_0_0;
18359 	  FFTW_REAL tim0_0_0;
18360 	  FFTW_REAL tre0_1_0;
18361 	  FFTW_REAL tim0_1_0;
18362 	  FFTW_REAL tre0_2_0;
18363 	  FFTW_REAL tim0_2_0;
18364 	  tre0_0_0 = c_re(inout[0]);
18365 	  tim0_0_0 = c_im(inout[0]);
18366 	  {
18367 	       FFTW_REAL tr;
18368 	       FFTW_REAL ti;
18369 	       FFTW_REAL twr;
18370 	       FFTW_REAL twi;
18371 	       tr = c_re(inout[stride]);
18372 	       ti = c_im(inout[stride]);
18373 	       twr = c_re(W[0]);
18374 	       twi = c_im(W[0]);
18375 	       tre0_1_0 = (tr * twr) + (ti * twi);
18376 	       tim0_1_0 = (ti * twr) - (tr * twi);
18377 	  }
18378 	  {
18379 	       FFTW_REAL tr;
18380 	       FFTW_REAL ti;
18381 	       FFTW_REAL twr;
18382 	       FFTW_REAL twi;
18383 	       tr = c_re(inout[2 * stride]);
18384 	       ti = c_im(inout[2 * stride]);
18385 	       twr = c_re(W[1]);
18386 	       twi = c_im(W[1]);
18387 	       tre0_2_0 = (tr * twr) + (ti * twi);
18388 	       tim0_2_0 = (ti * twr) - (tr * twi);
18389 	  }
18390 	  c_re(inout[0]) = tre0_0_0 + tre0_1_0 + tre0_2_0;
18391 	  c_im(inout[0]) = tim0_0_0 + tim0_1_0 + tim0_2_0;
18392 	  {
18393 	       FFTW_REAL tre1_0_0;
18394 	       FFTW_REAL tre1_1_0;
18395 	       tre1_0_0 = tre0_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre0_1_0 + tre0_2_0));
18396 	       tre1_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim0_2_0 - tim0_1_0);
18397 	       c_re(inout[stride]) = tre1_0_0 + tre1_1_0;
18398 	       c_re(inout[2 * stride]) = tre1_0_0 - tre1_1_0;
18399 	  }
18400 	  {
18401 	       FFTW_REAL tim1_0_0;
18402 	       FFTW_REAL tim1_1_0;
18403 	       tim1_0_0 = tim0_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim0_1_0 + tim0_2_0));
18404 	       tim1_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre0_1_0 - tre0_2_0);
18405 	       c_im(inout[stride]) = tim1_0_0 + tim1_1_0;
18406 	       c_im(inout[2 * stride]) = tim1_0_0 - tim1_1_0;
18407 	  }
18408      }
18409 }
18410 
18411 /* This function contains 438 FP additions and 212 FP multiplications */
18412 
fftwi_twiddle_32(FFTW_COMPLEX * A,const FFTW_COMPLEX * W,int stride,int m,int dist)18413 static void fftwi_twiddle_32(FFTW_COMPLEX *A, const FFTW_COMPLEX *W, int stride, int m, int dist)
18414 {
18415      int i;
18416      FFTW_COMPLEX *inout;
18417      inout = A;
18418      for (i = 0; i < m; i = i + 1, inout = inout + dist, W = W + 31) {
18419 	  FFTW_REAL tre0_0_0;
18420 	  FFTW_REAL tim0_0_0;
18421 	  FFTW_REAL tre0_0_1;
18422 	  FFTW_REAL tim0_0_1;
18423 	  FFTW_REAL tre0_0_2;
18424 	  FFTW_REAL tim0_0_2;
18425 	  FFTW_REAL tre0_0_3;
18426 	  FFTW_REAL tim0_0_3;
18427 	  FFTW_REAL tre0_0_4;
18428 	  FFTW_REAL tim0_0_4;
18429 	  FFTW_REAL tre0_0_5;
18430 	  FFTW_REAL tim0_0_5;
18431 	  FFTW_REAL tre0_0_6;
18432 	  FFTW_REAL tim0_0_6;
18433 	  FFTW_REAL tre0_0_7;
18434 	  FFTW_REAL tim0_0_7;
18435 	  FFTW_REAL tre0_1_0;
18436 	  FFTW_REAL tim0_1_0;
18437 	  FFTW_REAL tre0_1_1;
18438 	  FFTW_REAL tim0_1_1;
18439 	  FFTW_REAL tre0_1_2;
18440 	  FFTW_REAL tim0_1_2;
18441 	  FFTW_REAL tre0_1_3;
18442 	  FFTW_REAL tim0_1_3;
18443 	  FFTW_REAL tre0_1_4;
18444 	  FFTW_REAL tim0_1_4;
18445 	  FFTW_REAL tre0_1_5;
18446 	  FFTW_REAL tim0_1_5;
18447 	  FFTW_REAL tre0_1_6;
18448 	  FFTW_REAL tim0_1_6;
18449 	  FFTW_REAL tre0_1_7;
18450 	  FFTW_REAL tim0_1_7;
18451 	  FFTW_REAL tre0_2_0;
18452 	  FFTW_REAL tim0_2_0;
18453 	  FFTW_REAL tre0_2_1;
18454 	  FFTW_REAL tim0_2_1;
18455 	  FFTW_REAL tre0_2_2;
18456 	  FFTW_REAL tim0_2_2;
18457 	  FFTW_REAL tre0_2_3;
18458 	  FFTW_REAL tim0_2_3;
18459 	  FFTW_REAL tre0_2_4;
18460 	  FFTW_REAL tim0_2_4;
18461 	  FFTW_REAL tre0_2_5;
18462 	  FFTW_REAL tim0_2_5;
18463 	  FFTW_REAL tre0_2_6;
18464 	  FFTW_REAL tim0_2_6;
18465 	  FFTW_REAL tre0_2_7;
18466 	  FFTW_REAL tim0_2_7;
18467 	  FFTW_REAL tre0_3_0;
18468 	  FFTW_REAL tim0_3_0;
18469 	  FFTW_REAL tre0_3_1;
18470 	  FFTW_REAL tim0_3_1;
18471 	  FFTW_REAL tre0_3_2;
18472 	  FFTW_REAL tim0_3_2;
18473 	  FFTW_REAL tre0_3_3;
18474 	  FFTW_REAL tim0_3_3;
18475 	  FFTW_REAL tre0_3_4;
18476 	  FFTW_REAL tim0_3_4;
18477 	  FFTW_REAL tre0_3_5;
18478 	  FFTW_REAL tim0_3_5;
18479 	  FFTW_REAL tre0_3_6;
18480 	  FFTW_REAL tim0_3_6;
18481 	  FFTW_REAL tre0_3_7;
18482 	  FFTW_REAL tim0_3_7;
18483 	  {
18484 	       FFTW_REAL tre1_0_0;
18485 	       FFTW_REAL tim1_0_0;
18486 	       FFTW_REAL tre1_0_1;
18487 	       FFTW_REAL tim1_0_1;
18488 	       FFTW_REAL tre1_1_0;
18489 	       FFTW_REAL tim1_1_0;
18490 	       FFTW_REAL tre1_1_1;
18491 	       FFTW_REAL tim1_1_1;
18492 	       {
18493 		    FFTW_REAL tre2_0_0;
18494 		    FFTW_REAL tim2_0_0;
18495 		    FFTW_REAL tre2_1_0;
18496 		    FFTW_REAL tim2_1_0;
18497 		    tre2_0_0 = c_re(inout[0]);
18498 		    tim2_0_0 = c_im(inout[0]);
18499 		    {
18500 			 FFTW_REAL tr;
18501 			 FFTW_REAL ti;
18502 			 FFTW_REAL twr;
18503 			 FFTW_REAL twi;
18504 			 tr = c_re(inout[16 * stride]);
18505 			 ti = c_im(inout[16 * stride]);
18506 			 twr = c_re(W[15]);
18507 			 twi = c_im(W[15]);
18508 			 tre2_1_0 = (tr * twr) + (ti * twi);
18509 			 tim2_1_0 = (ti * twr) - (tr * twi);
18510 		    }
18511 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
18512 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
18513 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
18514 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
18515 	       }
18516 	       {
18517 		    FFTW_REAL tre2_0_0;
18518 		    FFTW_REAL tim2_0_0;
18519 		    FFTW_REAL tre2_1_0;
18520 		    FFTW_REAL tim2_1_0;
18521 		    {
18522 			 FFTW_REAL tr;
18523 			 FFTW_REAL ti;
18524 			 FFTW_REAL twr;
18525 			 FFTW_REAL twi;
18526 			 tr = c_re(inout[8 * stride]);
18527 			 ti = c_im(inout[8 * stride]);
18528 			 twr = c_re(W[7]);
18529 			 twi = c_im(W[7]);
18530 			 tre2_0_0 = (tr * twr) + (ti * twi);
18531 			 tim2_0_0 = (ti * twr) - (tr * twi);
18532 		    }
18533 		    {
18534 			 FFTW_REAL tr;
18535 			 FFTW_REAL ti;
18536 			 FFTW_REAL twr;
18537 			 FFTW_REAL twi;
18538 			 tr = c_re(inout[24 * stride]);
18539 			 ti = c_im(inout[24 * stride]);
18540 			 twr = c_re(W[23]);
18541 			 twi = c_im(W[23]);
18542 			 tre2_1_0 = (tr * twr) + (ti * twi);
18543 			 tim2_1_0 = (ti * twr) - (tr * twi);
18544 		    }
18545 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
18546 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
18547 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
18548 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
18549 	       }
18550 	       tre0_0_0 = tre1_0_0 + tre1_0_1;
18551 	       tim0_0_0 = tim1_0_0 + tim1_0_1;
18552 	       tre0_2_0 = tre1_0_0 - tre1_0_1;
18553 	       tim0_2_0 = tim1_0_0 - tim1_0_1;
18554 	       tre0_1_0 = tre1_1_0 - tim1_1_1;
18555 	       tim0_1_0 = tim1_1_0 + tre1_1_1;
18556 	       tre0_3_0 = tre1_1_0 + tim1_1_1;
18557 	       tim0_3_0 = tim1_1_0 - tre1_1_1;
18558 	  }
18559 	  {
18560 	       FFTW_REAL tre1_0_0;
18561 	       FFTW_REAL tim1_0_0;
18562 	       FFTW_REAL tre1_0_1;
18563 	       FFTW_REAL tim1_0_1;
18564 	       FFTW_REAL tre1_1_0;
18565 	       FFTW_REAL tim1_1_0;
18566 	       FFTW_REAL tre1_1_1;
18567 	       FFTW_REAL tim1_1_1;
18568 	       {
18569 		    FFTW_REAL tre2_0_0;
18570 		    FFTW_REAL tim2_0_0;
18571 		    FFTW_REAL tre2_1_0;
18572 		    FFTW_REAL tim2_1_0;
18573 		    {
18574 			 FFTW_REAL tr;
18575 			 FFTW_REAL ti;
18576 			 FFTW_REAL twr;
18577 			 FFTW_REAL twi;
18578 			 tr = c_re(inout[stride]);
18579 			 ti = c_im(inout[stride]);
18580 			 twr = c_re(W[0]);
18581 			 twi = c_im(W[0]);
18582 			 tre2_0_0 = (tr * twr) + (ti * twi);
18583 			 tim2_0_0 = (ti * twr) - (tr * twi);
18584 		    }
18585 		    {
18586 			 FFTW_REAL tr;
18587 			 FFTW_REAL ti;
18588 			 FFTW_REAL twr;
18589 			 FFTW_REAL twi;
18590 			 tr = c_re(inout[17 * stride]);
18591 			 ti = c_im(inout[17 * stride]);
18592 			 twr = c_re(W[16]);
18593 			 twi = c_im(W[16]);
18594 			 tre2_1_0 = (tr * twr) + (ti * twi);
18595 			 tim2_1_0 = (ti * twr) - (tr * twi);
18596 		    }
18597 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
18598 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
18599 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
18600 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
18601 	       }
18602 	       {
18603 		    FFTW_REAL tre2_0_0;
18604 		    FFTW_REAL tim2_0_0;
18605 		    FFTW_REAL tre2_1_0;
18606 		    FFTW_REAL tim2_1_0;
18607 		    {
18608 			 FFTW_REAL tr;
18609 			 FFTW_REAL ti;
18610 			 FFTW_REAL twr;
18611 			 FFTW_REAL twi;
18612 			 tr = c_re(inout[9 * stride]);
18613 			 ti = c_im(inout[9 * stride]);
18614 			 twr = c_re(W[8]);
18615 			 twi = c_im(W[8]);
18616 			 tre2_0_0 = (tr * twr) + (ti * twi);
18617 			 tim2_0_0 = (ti * twr) - (tr * twi);
18618 		    }
18619 		    {
18620 			 FFTW_REAL tr;
18621 			 FFTW_REAL ti;
18622 			 FFTW_REAL twr;
18623 			 FFTW_REAL twi;
18624 			 tr = c_re(inout[25 * stride]);
18625 			 ti = c_im(inout[25 * stride]);
18626 			 twr = c_re(W[24]);
18627 			 twi = c_im(W[24]);
18628 			 tre2_1_0 = (tr * twr) + (ti * twi);
18629 			 tim2_1_0 = (ti * twr) - (tr * twi);
18630 		    }
18631 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
18632 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
18633 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
18634 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
18635 	       }
18636 	       tre0_0_1 = tre1_0_0 + tre1_0_1;
18637 	       tim0_0_1 = tim1_0_0 + tim1_0_1;
18638 	       tre0_2_1 = tre1_0_0 - tre1_0_1;
18639 	       tim0_2_1 = tim1_0_0 - tim1_0_1;
18640 	       tre0_1_1 = tre1_1_0 - tim1_1_1;
18641 	       tim0_1_1 = tim1_1_0 + tre1_1_1;
18642 	       tre0_3_1 = tre1_1_0 + tim1_1_1;
18643 	       tim0_3_1 = tim1_1_0 - tre1_1_1;
18644 	  }
18645 	  {
18646 	       FFTW_REAL tre1_0_0;
18647 	       FFTW_REAL tim1_0_0;
18648 	       FFTW_REAL tre1_0_1;
18649 	       FFTW_REAL tim1_0_1;
18650 	       FFTW_REAL tre1_1_0;
18651 	       FFTW_REAL tim1_1_0;
18652 	       FFTW_REAL tre1_1_1;
18653 	       FFTW_REAL tim1_1_1;
18654 	       {
18655 		    FFTW_REAL tre2_0_0;
18656 		    FFTW_REAL tim2_0_0;
18657 		    FFTW_REAL tre2_1_0;
18658 		    FFTW_REAL tim2_1_0;
18659 		    {
18660 			 FFTW_REAL tr;
18661 			 FFTW_REAL ti;
18662 			 FFTW_REAL twr;
18663 			 FFTW_REAL twi;
18664 			 tr = c_re(inout[2 * stride]);
18665 			 ti = c_im(inout[2 * stride]);
18666 			 twr = c_re(W[1]);
18667 			 twi = c_im(W[1]);
18668 			 tre2_0_0 = (tr * twr) + (ti * twi);
18669 			 tim2_0_0 = (ti * twr) - (tr * twi);
18670 		    }
18671 		    {
18672 			 FFTW_REAL tr;
18673 			 FFTW_REAL ti;
18674 			 FFTW_REAL twr;
18675 			 FFTW_REAL twi;
18676 			 tr = c_re(inout[18 * stride]);
18677 			 ti = c_im(inout[18 * stride]);
18678 			 twr = c_re(W[17]);
18679 			 twi = c_im(W[17]);
18680 			 tre2_1_0 = (tr * twr) + (ti * twi);
18681 			 tim2_1_0 = (ti * twr) - (tr * twi);
18682 		    }
18683 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
18684 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
18685 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
18686 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
18687 	       }
18688 	       {
18689 		    FFTW_REAL tre2_0_0;
18690 		    FFTW_REAL tim2_0_0;
18691 		    FFTW_REAL tre2_1_0;
18692 		    FFTW_REAL tim2_1_0;
18693 		    {
18694 			 FFTW_REAL tr;
18695 			 FFTW_REAL ti;
18696 			 FFTW_REAL twr;
18697 			 FFTW_REAL twi;
18698 			 tr = c_re(inout[10 * stride]);
18699 			 ti = c_im(inout[10 * stride]);
18700 			 twr = c_re(W[9]);
18701 			 twi = c_im(W[9]);
18702 			 tre2_0_0 = (tr * twr) + (ti * twi);
18703 			 tim2_0_0 = (ti * twr) - (tr * twi);
18704 		    }
18705 		    {
18706 			 FFTW_REAL tr;
18707 			 FFTW_REAL ti;
18708 			 FFTW_REAL twr;
18709 			 FFTW_REAL twi;
18710 			 tr = c_re(inout[26 * stride]);
18711 			 ti = c_im(inout[26 * stride]);
18712 			 twr = c_re(W[25]);
18713 			 twi = c_im(W[25]);
18714 			 tre2_1_0 = (tr * twr) + (ti * twi);
18715 			 tim2_1_0 = (ti * twr) - (tr * twi);
18716 		    }
18717 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
18718 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
18719 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
18720 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
18721 	       }
18722 	       tre0_0_2 = tre1_0_0 + tre1_0_1;
18723 	       tim0_0_2 = tim1_0_0 + tim1_0_1;
18724 	       tre0_2_2 = tre1_0_0 - tre1_0_1;
18725 	       tim0_2_2 = tim1_0_0 - tim1_0_1;
18726 	       tre0_1_2 = tre1_1_0 - tim1_1_1;
18727 	       tim0_1_2 = tim1_1_0 + tre1_1_1;
18728 	       tre0_3_2 = tre1_1_0 + tim1_1_1;
18729 	       tim0_3_2 = tim1_1_0 - tre1_1_1;
18730 	  }
18731 	  {
18732 	       FFTW_REAL tre1_0_0;
18733 	       FFTW_REAL tim1_0_0;
18734 	       FFTW_REAL tre1_0_1;
18735 	       FFTW_REAL tim1_0_1;
18736 	       FFTW_REAL tre1_1_0;
18737 	       FFTW_REAL tim1_1_0;
18738 	       FFTW_REAL tre1_1_1;
18739 	       FFTW_REAL tim1_1_1;
18740 	       {
18741 		    FFTW_REAL tre2_0_0;
18742 		    FFTW_REAL tim2_0_0;
18743 		    FFTW_REAL tre2_1_0;
18744 		    FFTW_REAL tim2_1_0;
18745 		    {
18746 			 FFTW_REAL tr;
18747 			 FFTW_REAL ti;
18748 			 FFTW_REAL twr;
18749 			 FFTW_REAL twi;
18750 			 tr = c_re(inout[3 * stride]);
18751 			 ti = c_im(inout[3 * stride]);
18752 			 twr = c_re(W[2]);
18753 			 twi = c_im(W[2]);
18754 			 tre2_0_0 = (tr * twr) + (ti * twi);
18755 			 tim2_0_0 = (ti * twr) - (tr * twi);
18756 		    }
18757 		    {
18758 			 FFTW_REAL tr;
18759 			 FFTW_REAL ti;
18760 			 FFTW_REAL twr;
18761 			 FFTW_REAL twi;
18762 			 tr = c_re(inout[19 * stride]);
18763 			 ti = c_im(inout[19 * stride]);
18764 			 twr = c_re(W[18]);
18765 			 twi = c_im(W[18]);
18766 			 tre2_1_0 = (tr * twr) + (ti * twi);
18767 			 tim2_1_0 = (ti * twr) - (tr * twi);
18768 		    }
18769 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
18770 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
18771 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
18772 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
18773 	       }
18774 	       {
18775 		    FFTW_REAL tre2_0_0;
18776 		    FFTW_REAL tim2_0_0;
18777 		    FFTW_REAL tre2_1_0;
18778 		    FFTW_REAL tim2_1_0;
18779 		    {
18780 			 FFTW_REAL tr;
18781 			 FFTW_REAL ti;
18782 			 FFTW_REAL twr;
18783 			 FFTW_REAL twi;
18784 			 tr = c_re(inout[11 * stride]);
18785 			 ti = c_im(inout[11 * stride]);
18786 			 twr = c_re(W[10]);
18787 			 twi = c_im(W[10]);
18788 			 tre2_0_0 = (tr * twr) + (ti * twi);
18789 			 tim2_0_0 = (ti * twr) - (tr * twi);
18790 		    }
18791 		    {
18792 			 FFTW_REAL tr;
18793 			 FFTW_REAL ti;
18794 			 FFTW_REAL twr;
18795 			 FFTW_REAL twi;
18796 			 tr = c_re(inout[27 * stride]);
18797 			 ti = c_im(inout[27 * stride]);
18798 			 twr = c_re(W[26]);
18799 			 twi = c_im(W[26]);
18800 			 tre2_1_0 = (tr * twr) + (ti * twi);
18801 			 tim2_1_0 = (ti * twr) - (tr * twi);
18802 		    }
18803 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
18804 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
18805 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
18806 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
18807 	       }
18808 	       tre0_0_3 = tre1_0_0 + tre1_0_1;
18809 	       tim0_0_3 = tim1_0_0 + tim1_0_1;
18810 	       tre0_2_3 = tre1_0_0 - tre1_0_1;
18811 	       tim0_2_3 = tim1_0_0 - tim1_0_1;
18812 	       tre0_1_3 = tre1_1_0 - tim1_1_1;
18813 	       tim0_1_3 = tim1_1_0 + tre1_1_1;
18814 	       tre0_3_3 = tre1_1_0 + tim1_1_1;
18815 	       tim0_3_3 = tim1_1_0 - tre1_1_1;
18816 	  }
18817 	  {
18818 	       FFTW_REAL tre1_0_0;
18819 	       FFTW_REAL tim1_0_0;
18820 	       FFTW_REAL tre1_0_1;
18821 	       FFTW_REAL tim1_0_1;
18822 	       FFTW_REAL tre1_1_0;
18823 	       FFTW_REAL tim1_1_0;
18824 	       FFTW_REAL tre1_1_1;
18825 	       FFTW_REAL tim1_1_1;
18826 	       {
18827 		    FFTW_REAL tre2_0_0;
18828 		    FFTW_REAL tim2_0_0;
18829 		    FFTW_REAL tre2_1_0;
18830 		    FFTW_REAL tim2_1_0;
18831 		    {
18832 			 FFTW_REAL tr;
18833 			 FFTW_REAL ti;
18834 			 FFTW_REAL twr;
18835 			 FFTW_REAL twi;
18836 			 tr = c_re(inout[4 * stride]);
18837 			 ti = c_im(inout[4 * stride]);
18838 			 twr = c_re(W[3]);
18839 			 twi = c_im(W[3]);
18840 			 tre2_0_0 = (tr * twr) + (ti * twi);
18841 			 tim2_0_0 = (ti * twr) - (tr * twi);
18842 		    }
18843 		    {
18844 			 FFTW_REAL tr;
18845 			 FFTW_REAL ti;
18846 			 FFTW_REAL twr;
18847 			 FFTW_REAL twi;
18848 			 tr = c_re(inout[20 * stride]);
18849 			 ti = c_im(inout[20 * stride]);
18850 			 twr = c_re(W[19]);
18851 			 twi = c_im(W[19]);
18852 			 tre2_1_0 = (tr * twr) + (ti * twi);
18853 			 tim2_1_0 = (ti * twr) - (tr * twi);
18854 		    }
18855 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
18856 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
18857 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
18858 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
18859 	       }
18860 	       {
18861 		    FFTW_REAL tre2_0_0;
18862 		    FFTW_REAL tim2_0_0;
18863 		    FFTW_REAL tre2_1_0;
18864 		    FFTW_REAL tim2_1_0;
18865 		    {
18866 			 FFTW_REAL tr;
18867 			 FFTW_REAL ti;
18868 			 FFTW_REAL twr;
18869 			 FFTW_REAL twi;
18870 			 tr = c_re(inout[12 * stride]);
18871 			 ti = c_im(inout[12 * stride]);
18872 			 twr = c_re(W[11]);
18873 			 twi = c_im(W[11]);
18874 			 tre2_0_0 = (tr * twr) + (ti * twi);
18875 			 tim2_0_0 = (ti * twr) - (tr * twi);
18876 		    }
18877 		    {
18878 			 FFTW_REAL tr;
18879 			 FFTW_REAL ti;
18880 			 FFTW_REAL twr;
18881 			 FFTW_REAL twi;
18882 			 tr = c_re(inout[28 * stride]);
18883 			 ti = c_im(inout[28 * stride]);
18884 			 twr = c_re(W[27]);
18885 			 twi = c_im(W[27]);
18886 			 tre2_1_0 = (tr * twr) + (ti * twi);
18887 			 tim2_1_0 = (ti * twr) - (tr * twi);
18888 		    }
18889 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
18890 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
18891 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
18892 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
18893 	       }
18894 	       tre0_0_4 = tre1_0_0 + tre1_0_1;
18895 	       tim0_0_4 = tim1_0_0 + tim1_0_1;
18896 	       tre0_2_4 = tre1_0_0 - tre1_0_1;
18897 	       tim0_2_4 = tim1_0_0 - tim1_0_1;
18898 	       tre0_1_4 = tre1_1_0 - tim1_1_1;
18899 	       tim0_1_4 = tim1_1_0 + tre1_1_1;
18900 	       tre0_3_4 = tre1_1_0 + tim1_1_1;
18901 	       tim0_3_4 = tim1_1_0 - tre1_1_1;
18902 	  }
18903 	  {
18904 	       FFTW_REAL tre1_0_0;
18905 	       FFTW_REAL tim1_0_0;
18906 	       FFTW_REAL tre1_0_1;
18907 	       FFTW_REAL tim1_0_1;
18908 	       FFTW_REAL tre1_1_0;
18909 	       FFTW_REAL tim1_1_0;
18910 	       FFTW_REAL tre1_1_1;
18911 	       FFTW_REAL tim1_1_1;
18912 	       {
18913 		    FFTW_REAL tre2_0_0;
18914 		    FFTW_REAL tim2_0_0;
18915 		    FFTW_REAL tre2_1_0;
18916 		    FFTW_REAL tim2_1_0;
18917 		    {
18918 			 FFTW_REAL tr;
18919 			 FFTW_REAL ti;
18920 			 FFTW_REAL twr;
18921 			 FFTW_REAL twi;
18922 			 tr = c_re(inout[5 * stride]);
18923 			 ti = c_im(inout[5 * stride]);
18924 			 twr = c_re(W[4]);
18925 			 twi = c_im(W[4]);
18926 			 tre2_0_0 = (tr * twr) + (ti * twi);
18927 			 tim2_0_0 = (ti * twr) - (tr * twi);
18928 		    }
18929 		    {
18930 			 FFTW_REAL tr;
18931 			 FFTW_REAL ti;
18932 			 FFTW_REAL twr;
18933 			 FFTW_REAL twi;
18934 			 tr = c_re(inout[21 * stride]);
18935 			 ti = c_im(inout[21 * stride]);
18936 			 twr = c_re(W[20]);
18937 			 twi = c_im(W[20]);
18938 			 tre2_1_0 = (tr * twr) + (ti * twi);
18939 			 tim2_1_0 = (ti * twr) - (tr * twi);
18940 		    }
18941 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
18942 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
18943 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
18944 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
18945 	       }
18946 	       {
18947 		    FFTW_REAL tre2_0_0;
18948 		    FFTW_REAL tim2_0_0;
18949 		    FFTW_REAL tre2_1_0;
18950 		    FFTW_REAL tim2_1_0;
18951 		    {
18952 			 FFTW_REAL tr;
18953 			 FFTW_REAL ti;
18954 			 FFTW_REAL twr;
18955 			 FFTW_REAL twi;
18956 			 tr = c_re(inout[13 * stride]);
18957 			 ti = c_im(inout[13 * stride]);
18958 			 twr = c_re(W[12]);
18959 			 twi = c_im(W[12]);
18960 			 tre2_0_0 = (tr * twr) + (ti * twi);
18961 			 tim2_0_0 = (ti * twr) - (tr * twi);
18962 		    }
18963 		    {
18964 			 FFTW_REAL tr;
18965 			 FFTW_REAL ti;
18966 			 FFTW_REAL twr;
18967 			 FFTW_REAL twi;
18968 			 tr = c_re(inout[29 * stride]);
18969 			 ti = c_im(inout[29 * stride]);
18970 			 twr = c_re(W[28]);
18971 			 twi = c_im(W[28]);
18972 			 tre2_1_0 = (tr * twr) + (ti * twi);
18973 			 tim2_1_0 = (ti * twr) - (tr * twi);
18974 		    }
18975 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
18976 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
18977 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
18978 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
18979 	       }
18980 	       tre0_0_5 = tre1_0_0 + tre1_0_1;
18981 	       tim0_0_5 = tim1_0_0 + tim1_0_1;
18982 	       tre0_2_5 = tre1_0_0 - tre1_0_1;
18983 	       tim0_2_5 = tim1_0_0 - tim1_0_1;
18984 	       tre0_1_5 = tre1_1_0 - tim1_1_1;
18985 	       tim0_1_5 = tim1_1_0 + tre1_1_1;
18986 	       tre0_3_5 = tre1_1_0 + tim1_1_1;
18987 	       tim0_3_5 = tim1_1_0 - tre1_1_1;
18988 	  }
18989 	  {
18990 	       FFTW_REAL tre1_0_0;
18991 	       FFTW_REAL tim1_0_0;
18992 	       FFTW_REAL tre1_0_1;
18993 	       FFTW_REAL tim1_0_1;
18994 	       FFTW_REAL tre1_1_0;
18995 	       FFTW_REAL tim1_1_0;
18996 	       FFTW_REAL tre1_1_1;
18997 	       FFTW_REAL tim1_1_1;
18998 	       {
18999 		    FFTW_REAL tre2_0_0;
19000 		    FFTW_REAL tim2_0_0;
19001 		    FFTW_REAL tre2_1_0;
19002 		    FFTW_REAL tim2_1_0;
19003 		    {
19004 			 FFTW_REAL tr;
19005 			 FFTW_REAL ti;
19006 			 FFTW_REAL twr;
19007 			 FFTW_REAL twi;
19008 			 tr = c_re(inout[6 * stride]);
19009 			 ti = c_im(inout[6 * stride]);
19010 			 twr = c_re(W[5]);
19011 			 twi = c_im(W[5]);
19012 			 tre2_0_0 = (tr * twr) + (ti * twi);
19013 			 tim2_0_0 = (ti * twr) - (tr * twi);
19014 		    }
19015 		    {
19016 			 FFTW_REAL tr;
19017 			 FFTW_REAL ti;
19018 			 FFTW_REAL twr;
19019 			 FFTW_REAL twi;
19020 			 tr = c_re(inout[22 * stride]);
19021 			 ti = c_im(inout[22 * stride]);
19022 			 twr = c_re(W[21]);
19023 			 twi = c_im(W[21]);
19024 			 tre2_1_0 = (tr * twr) + (ti * twi);
19025 			 tim2_1_0 = (ti * twr) - (tr * twi);
19026 		    }
19027 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
19028 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
19029 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
19030 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
19031 	       }
19032 	       {
19033 		    FFTW_REAL tre2_0_0;
19034 		    FFTW_REAL tim2_0_0;
19035 		    FFTW_REAL tre2_1_0;
19036 		    FFTW_REAL tim2_1_0;
19037 		    {
19038 			 FFTW_REAL tr;
19039 			 FFTW_REAL ti;
19040 			 FFTW_REAL twr;
19041 			 FFTW_REAL twi;
19042 			 tr = c_re(inout[14 * stride]);
19043 			 ti = c_im(inout[14 * stride]);
19044 			 twr = c_re(W[13]);
19045 			 twi = c_im(W[13]);
19046 			 tre2_0_0 = (tr * twr) + (ti * twi);
19047 			 tim2_0_0 = (ti * twr) - (tr * twi);
19048 		    }
19049 		    {
19050 			 FFTW_REAL tr;
19051 			 FFTW_REAL ti;
19052 			 FFTW_REAL twr;
19053 			 FFTW_REAL twi;
19054 			 tr = c_re(inout[30 * stride]);
19055 			 ti = c_im(inout[30 * stride]);
19056 			 twr = c_re(W[29]);
19057 			 twi = c_im(W[29]);
19058 			 tre2_1_0 = (tr * twr) + (ti * twi);
19059 			 tim2_1_0 = (ti * twr) - (tr * twi);
19060 		    }
19061 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
19062 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
19063 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
19064 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
19065 	       }
19066 	       tre0_0_6 = tre1_0_0 + tre1_0_1;
19067 	       tim0_0_6 = tim1_0_0 + tim1_0_1;
19068 	       tre0_2_6 = tre1_0_0 - tre1_0_1;
19069 	       tim0_2_6 = tim1_0_0 - tim1_0_1;
19070 	       tre0_1_6 = tre1_1_0 - tim1_1_1;
19071 	       tim0_1_6 = tim1_1_0 + tre1_1_1;
19072 	       tre0_3_6 = tre1_1_0 + tim1_1_1;
19073 	       tim0_3_6 = tim1_1_0 - tre1_1_1;
19074 	  }
19075 	  {
19076 	       FFTW_REAL tre1_0_0;
19077 	       FFTW_REAL tim1_0_0;
19078 	       FFTW_REAL tre1_0_1;
19079 	       FFTW_REAL tim1_0_1;
19080 	       FFTW_REAL tre1_1_0;
19081 	       FFTW_REAL tim1_1_0;
19082 	       FFTW_REAL tre1_1_1;
19083 	       FFTW_REAL tim1_1_1;
19084 	       {
19085 		    FFTW_REAL tre2_0_0;
19086 		    FFTW_REAL tim2_0_0;
19087 		    FFTW_REAL tre2_1_0;
19088 		    FFTW_REAL tim2_1_0;
19089 		    {
19090 			 FFTW_REAL tr;
19091 			 FFTW_REAL ti;
19092 			 FFTW_REAL twr;
19093 			 FFTW_REAL twi;
19094 			 tr = c_re(inout[7 * stride]);
19095 			 ti = c_im(inout[7 * stride]);
19096 			 twr = c_re(W[6]);
19097 			 twi = c_im(W[6]);
19098 			 tre2_0_0 = (tr * twr) + (ti * twi);
19099 			 tim2_0_0 = (ti * twr) - (tr * twi);
19100 		    }
19101 		    {
19102 			 FFTW_REAL tr;
19103 			 FFTW_REAL ti;
19104 			 FFTW_REAL twr;
19105 			 FFTW_REAL twi;
19106 			 tr = c_re(inout[23 * stride]);
19107 			 ti = c_im(inout[23 * stride]);
19108 			 twr = c_re(W[22]);
19109 			 twi = c_im(W[22]);
19110 			 tre2_1_0 = (tr * twr) + (ti * twi);
19111 			 tim2_1_0 = (ti * twr) - (tr * twi);
19112 		    }
19113 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
19114 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
19115 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
19116 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
19117 	       }
19118 	       {
19119 		    FFTW_REAL tre2_0_0;
19120 		    FFTW_REAL tim2_0_0;
19121 		    FFTW_REAL tre2_1_0;
19122 		    FFTW_REAL tim2_1_0;
19123 		    {
19124 			 FFTW_REAL tr;
19125 			 FFTW_REAL ti;
19126 			 FFTW_REAL twr;
19127 			 FFTW_REAL twi;
19128 			 tr = c_re(inout[15 * stride]);
19129 			 ti = c_im(inout[15 * stride]);
19130 			 twr = c_re(W[14]);
19131 			 twi = c_im(W[14]);
19132 			 tre2_0_0 = (tr * twr) + (ti * twi);
19133 			 tim2_0_0 = (ti * twr) - (tr * twi);
19134 		    }
19135 		    {
19136 			 FFTW_REAL tr;
19137 			 FFTW_REAL ti;
19138 			 FFTW_REAL twr;
19139 			 FFTW_REAL twi;
19140 			 tr = c_re(inout[31 * stride]);
19141 			 ti = c_im(inout[31 * stride]);
19142 			 twr = c_re(W[30]);
19143 			 twi = c_im(W[30]);
19144 			 tre2_1_0 = (tr * twr) + (ti * twi);
19145 			 tim2_1_0 = (ti * twr) - (tr * twi);
19146 		    }
19147 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
19148 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
19149 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
19150 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
19151 	       }
19152 	       tre0_0_7 = tre1_0_0 + tre1_0_1;
19153 	       tim0_0_7 = tim1_0_0 + tim1_0_1;
19154 	       tre0_2_7 = tre1_0_0 - tre1_0_1;
19155 	       tim0_2_7 = tim1_0_0 - tim1_0_1;
19156 	       tre0_1_7 = tre1_1_0 - tim1_1_1;
19157 	       tim0_1_7 = tim1_1_0 + tre1_1_1;
19158 	       tre0_3_7 = tre1_1_0 + tim1_1_1;
19159 	       tim0_3_7 = tim1_1_0 - tre1_1_1;
19160 	  }
19161 	  {
19162 	       FFTW_REAL tre1_0_0;
19163 	       FFTW_REAL tim1_0_0;
19164 	       FFTW_REAL tre1_0_1;
19165 	       FFTW_REAL tim1_0_1;
19166 	       FFTW_REAL tre1_0_2;
19167 	       FFTW_REAL tim1_0_2;
19168 	       FFTW_REAL tre1_0_3;
19169 	       FFTW_REAL tim1_0_3;
19170 	       FFTW_REAL tre1_1_0;
19171 	       FFTW_REAL tim1_1_0;
19172 	       FFTW_REAL tre1_1_1;
19173 	       FFTW_REAL tim1_1_1;
19174 	       FFTW_REAL tre1_1_2;
19175 	       FFTW_REAL tim1_1_2;
19176 	       FFTW_REAL tre1_1_3;
19177 	       FFTW_REAL tim1_1_3;
19178 	       tre1_0_0 = tre0_0_0 + tre0_0_4;
19179 	       tim1_0_0 = tim0_0_0 + tim0_0_4;
19180 	       tre1_1_0 = tre0_0_0 - tre0_0_4;
19181 	       tim1_1_0 = tim0_0_0 - tim0_0_4;
19182 	       tre1_0_1 = tre0_0_1 + tre0_0_5;
19183 	       tim1_0_1 = tim0_0_1 + tim0_0_5;
19184 	       tre1_1_1 = tre0_0_1 - tre0_0_5;
19185 	       tim1_1_1 = tim0_0_1 - tim0_0_5;
19186 	       tre1_0_2 = tre0_0_2 + tre0_0_6;
19187 	       tim1_0_2 = tim0_0_2 + tim0_0_6;
19188 	       tre1_1_2 = tre0_0_2 - tre0_0_6;
19189 	       tim1_1_2 = tim0_0_2 - tim0_0_6;
19190 	       tre1_0_3 = tre0_0_3 + tre0_0_7;
19191 	       tim1_0_3 = tim0_0_3 + tim0_0_7;
19192 	       tre1_1_3 = tre0_0_3 - tre0_0_7;
19193 	       tim1_1_3 = tim0_0_3 - tim0_0_7;
19194 	       {
19195 		    FFTW_REAL tre2_0_0;
19196 		    FFTW_REAL tim2_0_0;
19197 		    FFTW_REAL tre2_0_1;
19198 		    FFTW_REAL tim2_0_1;
19199 		    FFTW_REAL tre2_1_0;
19200 		    FFTW_REAL tim2_1_0;
19201 		    FFTW_REAL tre2_1_1;
19202 		    FFTW_REAL tim2_1_1;
19203 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
19204 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
19205 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
19206 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
19207 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
19208 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
19209 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
19210 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
19211 		    c_re(inout[0]) = tre2_0_0 + tre2_0_1;
19212 		    c_im(inout[0]) = tim2_0_0 + tim2_0_1;
19213 		    c_re(inout[16 * stride]) = tre2_0_0 - tre2_0_1;
19214 		    c_im(inout[16 * stride]) = tim2_0_0 - tim2_0_1;
19215 		    c_re(inout[8 * stride]) = tre2_1_0 - tim2_1_1;
19216 		    c_im(inout[8 * stride]) = tim2_1_0 + tre2_1_1;
19217 		    c_re(inout[24 * stride]) = tre2_1_0 + tim2_1_1;
19218 		    c_im(inout[24 * stride]) = tim2_1_0 - tre2_1_1;
19219 	       }
19220 	       {
19221 		    FFTW_REAL tre2_0_0;
19222 		    FFTW_REAL tim2_0_0;
19223 		    FFTW_REAL tre2_0_1;
19224 		    FFTW_REAL tim2_0_1;
19225 		    FFTW_REAL tre2_1_0;
19226 		    FFTW_REAL tim2_1_0;
19227 		    FFTW_REAL tre2_1_1;
19228 		    FFTW_REAL tim2_1_1;
19229 		    tre2_0_0 = tre1_1_0 - tim1_1_2;
19230 		    tim2_0_0 = tim1_1_0 + tre1_1_2;
19231 		    tre2_1_0 = tre1_1_0 + tim1_1_2;
19232 		    tim2_1_0 = tim1_1_0 - tre1_1_2;
19233 		    {
19234 			 FFTW_REAL tre3_0_0;
19235 			 FFTW_REAL tim3_0_0;
19236 			 FFTW_REAL tre3_1_0;
19237 			 FFTW_REAL tim3_1_0;
19238 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
19239 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
19240 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
19241 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
19242 			 tre2_0_1 = tre3_0_0 - tre3_1_0;
19243 			 tim2_0_1 = tim3_0_0 + tim3_1_0;
19244 			 tre2_1_1 = tre3_0_0 + tre3_1_0;
19245 			 tim2_1_1 = tim3_0_0 - tim3_1_0;
19246 		    }
19247 		    c_re(inout[4 * stride]) = tre2_0_0 + tre2_0_1;
19248 		    c_im(inout[4 * stride]) = tim2_0_0 + tim2_0_1;
19249 		    c_re(inout[20 * stride]) = tre2_0_0 - tre2_0_1;
19250 		    c_im(inout[20 * stride]) = tim2_0_0 - tim2_0_1;
19251 		    c_re(inout[12 * stride]) = tre2_1_0 - tim2_1_1;
19252 		    c_im(inout[12 * stride]) = tim2_1_0 + tre2_1_1;
19253 		    c_re(inout[28 * stride]) = tre2_1_0 + tim2_1_1;
19254 		    c_im(inout[28 * stride]) = tim2_1_0 - tre2_1_1;
19255 	       }
19256 	  }
19257 	  {
19258 	       FFTW_REAL tre1_0_0;
19259 	       FFTW_REAL tim1_0_0;
19260 	       FFTW_REAL tre1_0_1;
19261 	       FFTW_REAL tim1_0_1;
19262 	       FFTW_REAL tre1_0_2;
19263 	       FFTW_REAL tim1_0_2;
19264 	       FFTW_REAL tre1_0_3;
19265 	       FFTW_REAL tim1_0_3;
19266 	       FFTW_REAL tre1_1_0;
19267 	       FFTW_REAL tim1_1_0;
19268 	       FFTW_REAL tre1_1_1;
19269 	       FFTW_REAL tim1_1_1;
19270 	       FFTW_REAL tre1_1_2;
19271 	       FFTW_REAL tim1_1_2;
19272 	       FFTW_REAL tre1_1_3;
19273 	       FFTW_REAL tim1_1_3;
19274 	       {
19275 		    FFTW_REAL tre2_1_0;
19276 		    FFTW_REAL tim2_1_0;
19277 		    tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_1_4 - tim0_1_4);
19278 		    tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_1_4 + tre0_1_4);
19279 		    tre1_0_0 = tre0_1_0 + tre2_1_0;
19280 		    tim1_0_0 = tim0_1_0 + tim2_1_0;
19281 		    tre1_1_0 = tre0_1_0 - tre2_1_0;
19282 		    tim1_1_0 = tim0_1_0 - tim2_1_0;
19283 	       }
19284 	       {
19285 		    FFTW_REAL tre2_0_0;
19286 		    FFTW_REAL tim2_0_0;
19287 		    FFTW_REAL tre2_1_0;
19288 		    FFTW_REAL tim2_1_0;
19289 		    tre2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tre0_1_1) - (((FFTW_REAL) FFTW_K195090322) * tim0_1_1);
19290 		    tim2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tim0_1_1) + (((FFTW_REAL) FFTW_K195090322) * tre0_1_1);
19291 		    tre2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tre0_1_5) - (((FFTW_REAL) FFTW_K831469612) * tim0_1_5);
19292 		    tim2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tim0_1_5) + (((FFTW_REAL) FFTW_K831469612) * tre0_1_5);
19293 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
19294 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
19295 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
19296 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
19297 	       }
19298 	       {
19299 		    FFTW_REAL tre2_0_0;
19300 		    FFTW_REAL tim2_0_0;
19301 		    FFTW_REAL tre2_1_0;
19302 		    FFTW_REAL tim2_1_0;
19303 		    tre2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_1_2) - (((FFTW_REAL) FFTW_K382683432) * tim0_1_2);
19304 		    tim2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_1_2) + (((FFTW_REAL) FFTW_K382683432) * tre0_1_2);
19305 		    tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_1_6) - (((FFTW_REAL) FFTW_K923879532) * tim0_1_6);
19306 		    tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_1_6) + (((FFTW_REAL) FFTW_K923879532) * tre0_1_6);
19307 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
19308 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
19309 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
19310 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
19311 	       }
19312 	       {
19313 		    FFTW_REAL tre2_0_0;
19314 		    FFTW_REAL tim2_0_0;
19315 		    FFTW_REAL tre2_1_0;
19316 		    FFTW_REAL tim2_1_0;
19317 		    tre2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_1_3) - (((FFTW_REAL) FFTW_K555570233) * tim0_1_3);
19318 		    tim2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_1_3) + (((FFTW_REAL) FFTW_K555570233) * tre0_1_3);
19319 		    tre2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tre0_1_7) - (((FFTW_REAL) FFTW_K980785280) * tim0_1_7);
19320 		    tim2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tim0_1_7) + (((FFTW_REAL) FFTW_K980785280) * tre0_1_7);
19321 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
19322 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
19323 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
19324 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
19325 	       }
19326 	       {
19327 		    FFTW_REAL tre2_0_0;
19328 		    FFTW_REAL tim2_0_0;
19329 		    FFTW_REAL tre2_0_1;
19330 		    FFTW_REAL tim2_0_1;
19331 		    FFTW_REAL tre2_1_0;
19332 		    FFTW_REAL tim2_1_0;
19333 		    FFTW_REAL tre2_1_1;
19334 		    FFTW_REAL tim2_1_1;
19335 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
19336 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
19337 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
19338 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
19339 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
19340 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
19341 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
19342 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
19343 		    c_re(inout[stride]) = tre2_0_0 + tre2_0_1;
19344 		    c_im(inout[stride]) = tim2_0_0 + tim2_0_1;
19345 		    c_re(inout[17 * stride]) = tre2_0_0 - tre2_0_1;
19346 		    c_im(inout[17 * stride]) = tim2_0_0 - tim2_0_1;
19347 		    c_re(inout[9 * stride]) = tre2_1_0 - tim2_1_1;
19348 		    c_im(inout[9 * stride]) = tim2_1_0 + tre2_1_1;
19349 		    c_re(inout[25 * stride]) = tre2_1_0 + tim2_1_1;
19350 		    c_im(inout[25 * stride]) = tim2_1_0 - tre2_1_1;
19351 	       }
19352 	       {
19353 		    FFTW_REAL tre2_0_0;
19354 		    FFTW_REAL tim2_0_0;
19355 		    FFTW_REAL tre2_0_1;
19356 		    FFTW_REAL tim2_0_1;
19357 		    FFTW_REAL tre2_1_0;
19358 		    FFTW_REAL tim2_1_0;
19359 		    FFTW_REAL tre2_1_1;
19360 		    FFTW_REAL tim2_1_1;
19361 		    tre2_0_0 = tre1_1_0 - tim1_1_2;
19362 		    tim2_0_0 = tim1_1_0 + tre1_1_2;
19363 		    tre2_1_0 = tre1_1_0 + tim1_1_2;
19364 		    tim2_1_0 = tim1_1_0 - tre1_1_2;
19365 		    {
19366 			 FFTW_REAL tre3_0_0;
19367 			 FFTW_REAL tim3_0_0;
19368 			 FFTW_REAL tre3_1_0;
19369 			 FFTW_REAL tim3_1_0;
19370 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
19371 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
19372 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
19373 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
19374 			 tre2_0_1 = tre3_0_0 - tre3_1_0;
19375 			 tim2_0_1 = tim3_0_0 + tim3_1_0;
19376 			 tre2_1_1 = tre3_0_0 + tre3_1_0;
19377 			 tim2_1_1 = tim3_0_0 - tim3_1_0;
19378 		    }
19379 		    c_re(inout[5 * stride]) = tre2_0_0 + tre2_0_1;
19380 		    c_im(inout[5 * stride]) = tim2_0_0 + tim2_0_1;
19381 		    c_re(inout[21 * stride]) = tre2_0_0 - tre2_0_1;
19382 		    c_im(inout[21 * stride]) = tim2_0_0 - tim2_0_1;
19383 		    c_re(inout[13 * stride]) = tre2_1_0 - tim2_1_1;
19384 		    c_im(inout[13 * stride]) = tim2_1_0 + tre2_1_1;
19385 		    c_re(inout[29 * stride]) = tre2_1_0 + tim2_1_1;
19386 		    c_im(inout[29 * stride]) = tim2_1_0 - tre2_1_1;
19387 	       }
19388 	  }
19389 	  {
19390 	       FFTW_REAL tre1_0_0;
19391 	       FFTW_REAL tim1_0_0;
19392 	       FFTW_REAL tre1_0_1;
19393 	       FFTW_REAL tim1_0_1;
19394 	       FFTW_REAL tre1_0_2;
19395 	       FFTW_REAL tim1_0_2;
19396 	       FFTW_REAL tre1_0_3;
19397 	       FFTW_REAL tim1_0_3;
19398 	       FFTW_REAL tre1_1_0;
19399 	       FFTW_REAL tim1_1_0;
19400 	       FFTW_REAL tre1_1_1;
19401 	       FFTW_REAL tim1_1_1;
19402 	       FFTW_REAL tre1_1_2;
19403 	       FFTW_REAL tim1_1_2;
19404 	       FFTW_REAL tre1_1_3;
19405 	       FFTW_REAL tim1_1_3;
19406 	       tre1_0_0 = tre0_2_0 - tim0_2_4;
19407 	       tim1_0_0 = tim0_2_0 + tre0_2_4;
19408 	       tre1_1_0 = tre0_2_0 + tim0_2_4;
19409 	       tim1_1_0 = tim0_2_0 - tre0_2_4;
19410 	       {
19411 		    FFTW_REAL tre2_0_0;
19412 		    FFTW_REAL tim2_0_0;
19413 		    FFTW_REAL tre2_1_0;
19414 		    FFTW_REAL tim2_1_0;
19415 		    tre2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_2_1) - (((FFTW_REAL) FFTW_K382683432) * tim0_2_1);
19416 		    tim2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_2_1) + (((FFTW_REAL) FFTW_K382683432) * tre0_2_1);
19417 		    tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_2_5) + (((FFTW_REAL) FFTW_K923879532) * tim0_2_5);
19418 		    tim2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_2_5) - (((FFTW_REAL) FFTW_K382683432) * tim0_2_5);
19419 		    tre1_0_1 = tre2_0_0 - tre2_1_0;
19420 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
19421 		    tre1_1_1 = tre2_0_0 + tre2_1_0;
19422 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
19423 	       }
19424 	       {
19425 		    FFTW_REAL tre2_0_0;
19426 		    FFTW_REAL tim2_0_0;
19427 		    FFTW_REAL tre2_1_0;
19428 		    FFTW_REAL tim2_1_0;
19429 		    tre2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_2_2 - tim0_2_2);
19430 		    tim2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_2_2 + tre0_2_2);
19431 		    tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_2_6 + tim0_2_6);
19432 		    tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_2_6 - tim0_2_6);
19433 		    tre1_0_2 = tre2_0_0 - tre2_1_0;
19434 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
19435 		    tre1_1_2 = tre2_0_0 + tre2_1_0;
19436 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
19437 	       }
19438 	       {
19439 		    FFTW_REAL tre2_0_0;
19440 		    FFTW_REAL tim2_0_0;
19441 		    FFTW_REAL tre2_1_0;
19442 		    FFTW_REAL tim2_1_0;
19443 		    tre2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_2_3) - (((FFTW_REAL) FFTW_K923879532) * tim0_2_3);
19444 		    tim2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_2_3) + (((FFTW_REAL) FFTW_K923879532) * tre0_2_3);
19445 		    tre2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_2_7) + (((FFTW_REAL) FFTW_K382683432) * tim0_2_7);
19446 		    tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_2_7) - (((FFTW_REAL) FFTW_K923879532) * tim0_2_7);
19447 		    tre1_0_3 = tre2_0_0 - tre2_1_0;
19448 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
19449 		    tre1_1_3 = tre2_0_0 + tre2_1_0;
19450 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
19451 	       }
19452 	       {
19453 		    FFTW_REAL tre2_0_0;
19454 		    FFTW_REAL tim2_0_0;
19455 		    FFTW_REAL tre2_0_1;
19456 		    FFTW_REAL tim2_0_1;
19457 		    FFTW_REAL tre2_1_0;
19458 		    FFTW_REAL tim2_1_0;
19459 		    FFTW_REAL tre2_1_1;
19460 		    FFTW_REAL tim2_1_1;
19461 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
19462 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
19463 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
19464 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
19465 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
19466 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
19467 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
19468 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
19469 		    c_re(inout[2 * stride]) = tre2_0_0 + tre2_0_1;
19470 		    c_im(inout[2 * stride]) = tim2_0_0 + tim2_0_1;
19471 		    c_re(inout[18 * stride]) = tre2_0_0 - tre2_0_1;
19472 		    c_im(inout[18 * stride]) = tim2_0_0 - tim2_0_1;
19473 		    c_re(inout[10 * stride]) = tre2_1_0 - tim2_1_1;
19474 		    c_im(inout[10 * stride]) = tim2_1_0 + tre2_1_1;
19475 		    c_re(inout[26 * stride]) = tre2_1_0 + tim2_1_1;
19476 		    c_im(inout[26 * stride]) = tim2_1_0 - tre2_1_1;
19477 	       }
19478 	       {
19479 		    FFTW_REAL tre2_0_0;
19480 		    FFTW_REAL tim2_0_0;
19481 		    FFTW_REAL tre2_0_1;
19482 		    FFTW_REAL tim2_0_1;
19483 		    FFTW_REAL tre2_1_0;
19484 		    FFTW_REAL tim2_1_0;
19485 		    FFTW_REAL tre2_1_1;
19486 		    FFTW_REAL tim2_1_1;
19487 		    tre2_0_0 = tre1_1_0 - tim1_1_2;
19488 		    tim2_0_0 = tim1_1_0 + tre1_1_2;
19489 		    tre2_1_0 = tre1_1_0 + tim1_1_2;
19490 		    tim2_1_0 = tim1_1_0 - tre1_1_2;
19491 		    {
19492 			 FFTW_REAL tre3_0_0;
19493 			 FFTW_REAL tim3_0_0;
19494 			 FFTW_REAL tre3_1_0;
19495 			 FFTW_REAL tim3_1_0;
19496 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
19497 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
19498 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
19499 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
19500 			 tre2_0_1 = tre3_0_0 - tre3_1_0;
19501 			 tim2_0_1 = tim3_0_0 + tim3_1_0;
19502 			 tre2_1_1 = tre3_0_0 + tre3_1_0;
19503 			 tim2_1_1 = tim3_0_0 - tim3_1_0;
19504 		    }
19505 		    c_re(inout[6 * stride]) = tre2_0_0 + tre2_0_1;
19506 		    c_im(inout[6 * stride]) = tim2_0_0 + tim2_0_1;
19507 		    c_re(inout[22 * stride]) = tre2_0_0 - tre2_0_1;
19508 		    c_im(inout[22 * stride]) = tim2_0_0 - tim2_0_1;
19509 		    c_re(inout[14 * stride]) = tre2_1_0 - tim2_1_1;
19510 		    c_im(inout[14 * stride]) = tim2_1_0 + tre2_1_1;
19511 		    c_re(inout[30 * stride]) = tre2_1_0 + tim2_1_1;
19512 		    c_im(inout[30 * stride]) = tim2_1_0 - tre2_1_1;
19513 	       }
19514 	  }
19515 	  {
19516 	       FFTW_REAL tre1_0_0;
19517 	       FFTW_REAL tim1_0_0;
19518 	       FFTW_REAL tre1_0_1;
19519 	       FFTW_REAL tim1_0_1;
19520 	       FFTW_REAL tre1_0_2;
19521 	       FFTW_REAL tim1_0_2;
19522 	       FFTW_REAL tre1_0_3;
19523 	       FFTW_REAL tim1_0_3;
19524 	       FFTW_REAL tre1_1_0;
19525 	       FFTW_REAL tim1_1_0;
19526 	       FFTW_REAL tre1_1_1;
19527 	       FFTW_REAL tim1_1_1;
19528 	       FFTW_REAL tre1_1_2;
19529 	       FFTW_REAL tim1_1_2;
19530 	       FFTW_REAL tre1_1_3;
19531 	       FFTW_REAL tim1_1_3;
19532 	       {
19533 		    FFTW_REAL tre2_1_0;
19534 		    FFTW_REAL tim2_1_0;
19535 		    tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_3_4 + tim0_3_4);
19536 		    tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_3_4 - tim0_3_4);
19537 		    tre1_0_0 = tre0_3_0 - tre2_1_0;
19538 		    tim1_0_0 = tim0_3_0 + tim2_1_0;
19539 		    tre1_1_0 = tre0_3_0 + tre2_1_0;
19540 		    tim1_1_0 = tim0_3_0 - tim2_1_0;
19541 	       }
19542 	       {
19543 		    FFTW_REAL tre2_0_0;
19544 		    FFTW_REAL tim2_0_0;
19545 		    FFTW_REAL tre2_1_0;
19546 		    FFTW_REAL tim2_1_0;
19547 		    tre2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_3_1) - (((FFTW_REAL) FFTW_K555570233) * tim0_3_1);
19548 		    tim2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_3_1) + (((FFTW_REAL) FFTW_K555570233) * tre0_3_1);
19549 		    tre2_1_0 = (((FFTW_REAL) FFTW_K980785280) * tre0_3_5) + (((FFTW_REAL) FFTW_K195090322) * tim0_3_5);
19550 		    tim2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tre0_3_5) - (((FFTW_REAL) FFTW_K980785280) * tim0_3_5);
19551 		    tre1_0_1 = tre2_0_0 - tre2_1_0;
19552 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
19553 		    tre1_1_1 = tre2_0_0 + tre2_1_0;
19554 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
19555 	       }
19556 	       {
19557 		    FFTW_REAL tre2_0_0;
19558 		    FFTW_REAL tim2_0_0;
19559 		    FFTW_REAL tre2_1_0;
19560 		    FFTW_REAL tim2_1_0;
19561 		    tre2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_3_2) - (((FFTW_REAL) FFTW_K923879532) * tim0_3_2);
19562 		    tim2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_3_2) + (((FFTW_REAL) FFTW_K923879532) * tre0_3_2);
19563 		    tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_3_6) - (((FFTW_REAL) FFTW_K923879532) * tre0_3_6);
19564 		    tim2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_3_6) + (((FFTW_REAL) FFTW_K382683432) * tre0_3_6);
19565 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
19566 		    tim1_0_2 = tim2_0_0 - tim2_1_0;
19567 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
19568 		    tim1_1_2 = tim2_0_0 + tim2_1_0;
19569 	       }
19570 	       {
19571 		    FFTW_REAL tre2_0_0;
19572 		    FFTW_REAL tim2_0_0;
19573 		    FFTW_REAL tre2_1_0;
19574 		    FFTW_REAL tim2_1_0;
19575 		    tre2_0_0 = (((FFTW_REAL) FFTW_K195090322) * tre0_3_3) + (((FFTW_REAL) FFTW_K980785280) * tim0_3_3);
19576 		    tim2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tre0_3_3) - (((FFTW_REAL) FFTW_K195090322) * tim0_3_3);
19577 		    tre2_1_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_3_7) - (((FFTW_REAL) FFTW_K555570233) * tre0_3_7);
19578 		    tim2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tim0_3_7) + (((FFTW_REAL) FFTW_K831469612) * tre0_3_7);
19579 		    tre1_0_3 = tre2_1_0 - tre2_0_0;
19580 		    tim1_0_3 = tim2_0_0 - tim2_1_0;
19581 		    tre1_1_3 = (-(tre2_0_0 + tre2_1_0));
19582 		    tim1_1_3 = tim2_0_0 + tim2_1_0;
19583 	       }
19584 	       {
19585 		    FFTW_REAL tre2_0_0;
19586 		    FFTW_REAL tim2_0_0;
19587 		    FFTW_REAL tre2_0_1;
19588 		    FFTW_REAL tim2_0_1;
19589 		    FFTW_REAL tre2_1_0;
19590 		    FFTW_REAL tim2_1_0;
19591 		    FFTW_REAL tre2_1_1;
19592 		    FFTW_REAL tim2_1_1;
19593 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
19594 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
19595 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
19596 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
19597 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
19598 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
19599 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
19600 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
19601 		    c_re(inout[3 * stride]) = tre2_0_0 + tre2_0_1;
19602 		    c_im(inout[3 * stride]) = tim2_0_0 + tim2_0_1;
19603 		    c_re(inout[19 * stride]) = tre2_0_0 - tre2_0_1;
19604 		    c_im(inout[19 * stride]) = tim2_0_0 - tim2_0_1;
19605 		    c_re(inout[11 * stride]) = tre2_1_0 - tim2_1_1;
19606 		    c_im(inout[11 * stride]) = tim2_1_0 + tre2_1_1;
19607 		    c_re(inout[27 * stride]) = tre2_1_0 + tim2_1_1;
19608 		    c_im(inout[27 * stride]) = tim2_1_0 - tre2_1_1;
19609 	       }
19610 	       {
19611 		    FFTW_REAL tre2_0_0;
19612 		    FFTW_REAL tim2_0_0;
19613 		    FFTW_REAL tre2_0_1;
19614 		    FFTW_REAL tim2_0_1;
19615 		    FFTW_REAL tre2_1_0;
19616 		    FFTW_REAL tim2_1_0;
19617 		    FFTW_REAL tre2_1_1;
19618 		    FFTW_REAL tim2_1_1;
19619 		    tre2_0_0 = tre1_1_0 - tim1_1_2;
19620 		    tim2_0_0 = tim1_1_0 + tre1_1_2;
19621 		    tre2_1_0 = tre1_1_0 + tim1_1_2;
19622 		    tim2_1_0 = tim1_1_0 - tre1_1_2;
19623 		    {
19624 			 FFTW_REAL tre3_0_0;
19625 			 FFTW_REAL tim3_0_0;
19626 			 FFTW_REAL tre3_1_0;
19627 			 FFTW_REAL tim3_1_0;
19628 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
19629 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
19630 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
19631 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
19632 			 tre2_0_1 = tre3_0_0 - tre3_1_0;
19633 			 tim2_0_1 = tim3_0_0 + tim3_1_0;
19634 			 tre2_1_1 = tre3_0_0 + tre3_1_0;
19635 			 tim2_1_1 = tim3_0_0 - tim3_1_0;
19636 		    }
19637 		    c_re(inout[7 * stride]) = tre2_0_0 + tre2_0_1;
19638 		    c_im(inout[7 * stride]) = tim2_0_0 + tim2_0_1;
19639 		    c_re(inout[23 * stride]) = tre2_0_0 - tre2_0_1;
19640 		    c_im(inout[23 * stride]) = tim2_0_0 - tim2_0_1;
19641 		    c_re(inout[15 * stride]) = tre2_1_0 - tim2_1_1;
19642 		    c_im(inout[15 * stride]) = tim2_1_0 + tre2_1_1;
19643 		    c_re(inout[31 * stride]) = tre2_1_0 + tim2_1_1;
19644 		    c_im(inout[31 * stride]) = tim2_1_0 - tre2_1_1;
19645 	       }
19646 	  }
19647      }
19648 }
19649 
19650 /* This function contains 22 FP additions and 12 FP multiplications */
19651 
fftwi_twiddle_4(FFTW_COMPLEX * A,const FFTW_COMPLEX * W,int stride,int m,int dist)19652 static void fftwi_twiddle_4(FFTW_COMPLEX *A, const FFTW_COMPLEX *W, int stride, int m, int dist)
19653 {
19654      int i;
19655      FFTW_COMPLEX *inout;
19656      inout = A;
19657      for (i = 0; i < m; i = i + 1, inout = inout + dist, W = W + 3) {
19658 	  FFTW_REAL tre0_0_0;
19659 	  FFTW_REAL tim0_0_0;
19660 	  FFTW_REAL tre0_0_1;
19661 	  FFTW_REAL tim0_0_1;
19662 	  FFTW_REAL tre0_1_0;
19663 	  FFTW_REAL tim0_1_0;
19664 	  FFTW_REAL tre0_1_1;
19665 	  FFTW_REAL tim0_1_1;
19666 	  {
19667 	       FFTW_REAL tre1_0_0;
19668 	       FFTW_REAL tim1_0_0;
19669 	       FFTW_REAL tre1_1_0;
19670 	       FFTW_REAL tim1_1_0;
19671 	       tre1_0_0 = c_re(inout[0]);
19672 	       tim1_0_0 = c_im(inout[0]);
19673 	       {
19674 		    FFTW_REAL tr;
19675 		    FFTW_REAL ti;
19676 		    FFTW_REAL twr;
19677 		    FFTW_REAL twi;
19678 		    tr = c_re(inout[2 * stride]);
19679 		    ti = c_im(inout[2 * stride]);
19680 		    twr = c_re(W[1]);
19681 		    twi = c_im(W[1]);
19682 		    tre1_1_0 = (tr * twr) + (ti * twi);
19683 		    tim1_1_0 = (ti * twr) - (tr * twi);
19684 	       }
19685 	       tre0_0_0 = tre1_0_0 + tre1_1_0;
19686 	       tim0_0_0 = tim1_0_0 + tim1_1_0;
19687 	       tre0_1_0 = tre1_0_0 - tre1_1_0;
19688 	       tim0_1_0 = tim1_0_0 - tim1_1_0;
19689 	  }
19690 	  {
19691 	       FFTW_REAL tre1_0_0;
19692 	       FFTW_REAL tim1_0_0;
19693 	       FFTW_REAL tre1_1_0;
19694 	       FFTW_REAL tim1_1_0;
19695 	       {
19696 		    FFTW_REAL tr;
19697 		    FFTW_REAL ti;
19698 		    FFTW_REAL twr;
19699 		    FFTW_REAL twi;
19700 		    tr = c_re(inout[stride]);
19701 		    ti = c_im(inout[stride]);
19702 		    twr = c_re(W[0]);
19703 		    twi = c_im(W[0]);
19704 		    tre1_0_0 = (tr * twr) + (ti * twi);
19705 		    tim1_0_0 = (ti * twr) - (tr * twi);
19706 	       }
19707 	       {
19708 		    FFTW_REAL tr;
19709 		    FFTW_REAL ti;
19710 		    FFTW_REAL twr;
19711 		    FFTW_REAL twi;
19712 		    tr = c_re(inout[3 * stride]);
19713 		    ti = c_im(inout[3 * stride]);
19714 		    twr = c_re(W[2]);
19715 		    twi = c_im(W[2]);
19716 		    tre1_1_0 = (tr * twr) + (ti * twi);
19717 		    tim1_1_0 = (ti * twr) - (tr * twi);
19718 	       }
19719 	       tre0_0_1 = tre1_0_0 + tre1_1_0;
19720 	       tim0_0_1 = tim1_0_0 + tim1_1_0;
19721 	       tre0_1_1 = tre1_0_0 - tre1_1_0;
19722 	       tim0_1_1 = tim1_0_0 - tim1_1_0;
19723 	  }
19724 	  c_re(inout[0]) = tre0_0_0 + tre0_0_1;
19725 	  c_im(inout[0]) = tim0_0_0 + tim0_0_1;
19726 	  c_re(inout[2 * stride]) = tre0_0_0 - tre0_0_1;
19727 	  c_im(inout[2 * stride]) = tim0_0_0 - tim0_0_1;
19728 	  c_re(inout[stride]) = tre0_1_0 - tim0_1_1;
19729 	  c_im(inout[stride]) = tim0_1_0 + tre0_1_1;
19730 	  c_re(inout[3 * stride]) = tre0_1_0 + tim0_1_1;
19731 	  c_im(inout[3 * stride]) = tim0_1_0 - tre0_1_1;
19732      }
19733 }
19734 
19735 /* This function contains 52 FP additions and 32 FP multiplications */
19736 
fftwi_twiddle_5(FFTW_COMPLEX * A,const FFTW_COMPLEX * W,int stride,int m,int dist)19737 static void fftwi_twiddle_5(FFTW_COMPLEX *A, const FFTW_COMPLEX *W, int stride, int m, int dist)
19738 {
19739      int i;
19740      FFTW_COMPLEX *inout;
19741      inout = A;
19742      for (i = 0; i < m; i = i + 1, inout = inout + dist, W = W + 4) {
19743 	  FFTW_REAL tre0_0_0;
19744 	  FFTW_REAL tim0_0_0;
19745 	  FFTW_REAL tre0_1_0;
19746 	  FFTW_REAL tim0_1_0;
19747 	  FFTW_REAL tre0_2_0;
19748 	  FFTW_REAL tim0_2_0;
19749 	  FFTW_REAL tre0_3_0;
19750 	  FFTW_REAL tim0_3_0;
19751 	  FFTW_REAL tre0_4_0;
19752 	  FFTW_REAL tim0_4_0;
19753 	  tre0_0_0 = c_re(inout[0]);
19754 	  tim0_0_0 = c_im(inout[0]);
19755 	  {
19756 	       FFTW_REAL tr;
19757 	       FFTW_REAL ti;
19758 	       FFTW_REAL twr;
19759 	       FFTW_REAL twi;
19760 	       tr = c_re(inout[stride]);
19761 	       ti = c_im(inout[stride]);
19762 	       twr = c_re(W[0]);
19763 	       twi = c_im(W[0]);
19764 	       tre0_1_0 = (tr * twr) + (ti * twi);
19765 	       tim0_1_0 = (ti * twr) - (tr * twi);
19766 	  }
19767 	  {
19768 	       FFTW_REAL tr;
19769 	       FFTW_REAL ti;
19770 	       FFTW_REAL twr;
19771 	       FFTW_REAL twi;
19772 	       tr = c_re(inout[2 * stride]);
19773 	       ti = c_im(inout[2 * stride]);
19774 	       twr = c_re(W[1]);
19775 	       twi = c_im(W[1]);
19776 	       tre0_2_0 = (tr * twr) + (ti * twi);
19777 	       tim0_2_0 = (ti * twr) - (tr * twi);
19778 	  }
19779 	  {
19780 	       FFTW_REAL tr;
19781 	       FFTW_REAL ti;
19782 	       FFTW_REAL twr;
19783 	       FFTW_REAL twi;
19784 	       tr = c_re(inout[3 * stride]);
19785 	       ti = c_im(inout[3 * stride]);
19786 	       twr = c_re(W[2]);
19787 	       twi = c_im(W[2]);
19788 	       tre0_3_0 = (tr * twr) + (ti * twi);
19789 	       tim0_3_0 = (ti * twr) - (tr * twi);
19790 	  }
19791 	  {
19792 	       FFTW_REAL tr;
19793 	       FFTW_REAL ti;
19794 	       FFTW_REAL twr;
19795 	       FFTW_REAL twi;
19796 	       tr = c_re(inout[4 * stride]);
19797 	       ti = c_im(inout[4 * stride]);
19798 	       twr = c_re(W[3]);
19799 	       twi = c_im(W[3]);
19800 	       tre0_4_0 = (tr * twr) + (ti * twi);
19801 	       tim0_4_0 = (ti * twr) - (tr * twi);
19802 	  }
19803 	  c_re(inout[0]) = tre0_0_0 + tre0_1_0 + tre0_2_0 + tre0_3_0 + tre0_4_0;
19804 	  c_im(inout[0]) = tim0_0_0 + tim0_1_0 + tim0_2_0 + tim0_3_0 + tim0_4_0;
19805 	  {
19806 	       FFTW_REAL tre1_0_0;
19807 	       FFTW_REAL tre1_1_0;
19808 	       tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_1_0 + tre0_4_0)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_2_0 + tre0_3_0));
19809 	       tre1_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tim0_4_0 - tim0_1_0)) + (((FFTW_REAL) FFTW_K587785252) * (tim0_3_0 - tim0_2_0));
19810 	       c_re(inout[stride]) = tre1_0_0 + tre1_1_0;
19811 	       c_re(inout[4 * stride]) = tre1_0_0 - tre1_1_0;
19812 	  }
19813 	  {
19814 	       FFTW_REAL tim1_0_0;
19815 	       FFTW_REAL tim1_1_0;
19816 	       tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_1_0 + tim0_4_0)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_2_0 + tim0_3_0));
19817 	       tim1_1_0 = (((FFTW_REAL) FFTW_K951056516) * (tre0_1_0 - tre0_4_0)) + (((FFTW_REAL) FFTW_K587785252) * (tre0_2_0 - tre0_3_0));
19818 	       c_im(inout[stride]) = tim1_0_0 + tim1_1_0;
19819 	       c_im(inout[4 * stride]) = tim1_0_0 - tim1_1_0;
19820 	  }
19821 	  {
19822 	       FFTW_REAL tre1_0_0;
19823 	       FFTW_REAL tre1_1_0;
19824 	       tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tre0_2_0 + tre0_3_0)) - (((FFTW_REAL) FFTW_K809016994) * (tre0_1_0 + tre0_4_0));
19825 	       tre1_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tim0_4_0 - tim0_1_0)) + (((FFTW_REAL) FFTW_K951056516) * (tim0_2_0 - tim0_3_0));
19826 	       c_re(inout[2 * stride]) = tre1_0_0 + tre1_1_0;
19827 	       c_re(inout[3 * stride]) = tre1_0_0 - tre1_1_0;
19828 	  }
19829 	  {
19830 	       FFTW_REAL tim1_0_0;
19831 	       FFTW_REAL tim1_1_0;
19832 	       tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K309016994) * (tim0_2_0 + tim0_3_0)) - (((FFTW_REAL) FFTW_K809016994) * (tim0_1_0 + tim0_4_0));
19833 	       tim1_1_0 = (((FFTW_REAL) FFTW_K587785252) * (tre0_1_0 - tre0_4_0)) + (((FFTW_REAL) FFTW_K951056516) * (tre0_3_0 - tre0_2_0));
19834 	       c_im(inout[2 * stride]) = tim1_0_0 + tim1_1_0;
19835 	       c_im(inout[3 * stride]) = tim1_0_0 - tim1_1_0;
19836 	  }
19837      }
19838 }
19839 
19840 /* This function contains 50 FP additions and 28 FP multiplications */
19841 
fftwi_twiddle_6(FFTW_COMPLEX * A,const FFTW_COMPLEX * W,int stride,int m,int dist)19842 static void fftwi_twiddle_6(FFTW_COMPLEX *A, const FFTW_COMPLEX *W, int stride, int m, int dist)
19843 {
19844      int i;
19845      FFTW_COMPLEX *inout;
19846      inout = A;
19847      for (i = 0; i < m; i = i + 1, inout = inout + dist, W = W + 5) {
19848 	  FFTW_REAL tre0_0_0;
19849 	  FFTW_REAL tim0_0_0;
19850 	  FFTW_REAL tre0_0_1;
19851 	  FFTW_REAL tim0_0_1;
19852 	  FFTW_REAL tre0_0_2;
19853 	  FFTW_REAL tim0_0_2;
19854 	  FFTW_REAL tre0_1_0;
19855 	  FFTW_REAL tim0_1_0;
19856 	  FFTW_REAL tre0_1_1;
19857 	  FFTW_REAL tim0_1_1;
19858 	  FFTW_REAL tre0_1_2;
19859 	  FFTW_REAL tim0_1_2;
19860 	  {
19861 	       FFTW_REAL tre1_0_0;
19862 	       FFTW_REAL tim1_0_0;
19863 	       FFTW_REAL tre1_1_0;
19864 	       FFTW_REAL tim1_1_0;
19865 	       tre1_0_0 = c_re(inout[0]);
19866 	       tim1_0_0 = c_im(inout[0]);
19867 	       {
19868 		    FFTW_REAL tr;
19869 		    FFTW_REAL ti;
19870 		    FFTW_REAL twr;
19871 		    FFTW_REAL twi;
19872 		    tr = c_re(inout[3 * stride]);
19873 		    ti = c_im(inout[3 * stride]);
19874 		    twr = c_re(W[2]);
19875 		    twi = c_im(W[2]);
19876 		    tre1_1_0 = (tr * twr) + (ti * twi);
19877 		    tim1_1_0 = (ti * twr) - (tr * twi);
19878 	       }
19879 	       tre0_0_0 = tre1_0_0 + tre1_1_0;
19880 	       tim0_0_0 = tim1_0_0 + tim1_1_0;
19881 	       tre0_1_0 = tre1_0_0 - tre1_1_0;
19882 	       tim0_1_0 = tim1_0_0 - tim1_1_0;
19883 	  }
19884 	  {
19885 	       FFTW_REAL tre1_0_0;
19886 	       FFTW_REAL tim1_0_0;
19887 	       FFTW_REAL tre1_1_0;
19888 	       FFTW_REAL tim1_1_0;
19889 	       {
19890 		    FFTW_REAL tr;
19891 		    FFTW_REAL ti;
19892 		    FFTW_REAL twr;
19893 		    FFTW_REAL twi;
19894 		    tr = c_re(inout[2 * stride]);
19895 		    ti = c_im(inout[2 * stride]);
19896 		    twr = c_re(W[1]);
19897 		    twi = c_im(W[1]);
19898 		    tre1_0_0 = (tr * twr) + (ti * twi);
19899 		    tim1_0_0 = (ti * twr) - (tr * twi);
19900 	       }
19901 	       {
19902 		    FFTW_REAL tr;
19903 		    FFTW_REAL ti;
19904 		    FFTW_REAL twr;
19905 		    FFTW_REAL twi;
19906 		    tr = c_re(inout[5 * stride]);
19907 		    ti = c_im(inout[5 * stride]);
19908 		    twr = c_re(W[4]);
19909 		    twi = c_im(W[4]);
19910 		    tre1_1_0 = (tr * twr) + (ti * twi);
19911 		    tim1_1_0 = (ti * twr) - (tr * twi);
19912 	       }
19913 	       tre0_0_1 = tre1_0_0 + tre1_1_0;
19914 	       tim0_0_1 = tim1_0_0 + tim1_1_0;
19915 	       tre0_1_1 = tre1_0_0 - tre1_1_0;
19916 	       tim0_1_1 = tim1_0_0 - tim1_1_0;
19917 	  }
19918 	  {
19919 	       FFTW_REAL tre1_0_0;
19920 	       FFTW_REAL tim1_0_0;
19921 	       FFTW_REAL tre1_1_0;
19922 	       FFTW_REAL tim1_1_0;
19923 	       {
19924 		    FFTW_REAL tr;
19925 		    FFTW_REAL ti;
19926 		    FFTW_REAL twr;
19927 		    FFTW_REAL twi;
19928 		    tr = c_re(inout[4 * stride]);
19929 		    ti = c_im(inout[4 * stride]);
19930 		    twr = c_re(W[3]);
19931 		    twi = c_im(W[3]);
19932 		    tre1_0_0 = (tr * twr) + (ti * twi);
19933 		    tim1_0_0 = (ti * twr) - (tr * twi);
19934 	       }
19935 	       {
19936 		    FFTW_REAL tr;
19937 		    FFTW_REAL ti;
19938 		    FFTW_REAL twr;
19939 		    FFTW_REAL twi;
19940 		    tr = c_re(inout[stride]);
19941 		    ti = c_im(inout[stride]);
19942 		    twr = c_re(W[0]);
19943 		    twi = c_im(W[0]);
19944 		    tre1_1_0 = (tr * twr) + (ti * twi);
19945 		    tim1_1_0 = (ti * twr) - (tr * twi);
19946 	       }
19947 	       tre0_0_2 = tre1_0_0 + tre1_1_0;
19948 	       tim0_0_2 = tim1_0_0 + tim1_1_0;
19949 	       tre0_1_2 = tre1_0_0 - tre1_1_0;
19950 	       tim0_1_2 = tim1_0_0 - tim1_1_0;
19951 	  }
19952 	  c_re(inout[0]) = tre0_0_0 + tre0_0_1 + tre0_0_2;
19953 	  c_im(inout[0]) = tim0_0_0 + tim0_0_1 + tim0_0_2;
19954 	  {
19955 	       FFTW_REAL tre2_0_0;
19956 	       FFTW_REAL tre2_1_0;
19957 	       tre2_0_0 = tre0_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre0_0_1 + tre0_0_2));
19958 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim0_0_2 - tim0_0_1);
19959 	       c_re(inout[4 * stride]) = tre2_0_0 + tre2_1_0;
19960 	       c_re(inout[2 * stride]) = tre2_0_0 - tre2_1_0;
19961 	  }
19962 	  {
19963 	       FFTW_REAL tim2_0_0;
19964 	       FFTW_REAL tim2_1_0;
19965 	       tim2_0_0 = tim0_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim0_0_1 + tim0_0_2));
19966 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre0_0_1 - tre0_0_2);
19967 	       c_im(inout[4 * stride]) = tim2_0_0 + tim2_1_0;
19968 	       c_im(inout[2 * stride]) = tim2_0_0 - tim2_1_0;
19969 	  }
19970 	  c_re(inout[3 * stride]) = tre0_1_0 + tre0_1_1 + tre0_1_2;
19971 	  c_im(inout[3 * stride]) = tim0_1_0 + tim0_1_1 + tim0_1_2;
19972 	  {
19973 	       FFTW_REAL tre2_0_0;
19974 	       FFTW_REAL tre2_1_0;
19975 	       tre2_0_0 = tre0_1_0 - (((FFTW_REAL) FFTW_K499999999) * (tre0_1_1 + tre0_1_2));
19976 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim0_1_2 - tim0_1_1);
19977 	       c_re(inout[stride]) = tre2_0_0 + tre2_1_0;
19978 	       c_re(inout[5 * stride]) = tre2_0_0 - tre2_1_0;
19979 	  }
19980 	  {
19981 	       FFTW_REAL tim2_0_0;
19982 	       FFTW_REAL tim2_1_0;
19983 	       tim2_0_0 = tim0_1_0 - (((FFTW_REAL) FFTW_K499999999) * (tim0_1_1 + tim0_1_2));
19984 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre0_1_1 - tre0_1_2);
19985 	       c_im(inout[stride]) = tim2_0_0 + tim2_1_0;
19986 	       c_im(inout[5 * stride]) = tim2_0_0 - tim2_1_0;
19987 	  }
19988      }
19989 }
19990 
19991 /* This function contains 1054 FP additions and 500 FP multiplications */
19992 
fftwi_twiddle_64(FFTW_COMPLEX * A,const FFTW_COMPLEX * W,int stride,int m,int dist)19993 static void fftwi_twiddle_64(FFTW_COMPLEX *A, const FFTW_COMPLEX *W, int stride, int m, int dist)
19994 {
19995      int i;
19996      FFTW_COMPLEX *inout;
19997      inout = A;
19998      for (i = 0; i < m; i = i + 1, inout = inout + dist, W = W + 63) {
19999 	  FFTW_REAL tre0_0_0;
20000 	  FFTW_REAL tim0_0_0;
20001 	  FFTW_REAL tre0_0_1;
20002 	  FFTW_REAL tim0_0_1;
20003 	  FFTW_REAL tre0_0_2;
20004 	  FFTW_REAL tim0_0_2;
20005 	  FFTW_REAL tre0_0_3;
20006 	  FFTW_REAL tim0_0_3;
20007 	  FFTW_REAL tre0_0_4;
20008 	  FFTW_REAL tim0_0_4;
20009 	  FFTW_REAL tre0_0_5;
20010 	  FFTW_REAL tim0_0_5;
20011 	  FFTW_REAL tre0_0_6;
20012 	  FFTW_REAL tim0_0_6;
20013 	  FFTW_REAL tre0_0_7;
20014 	  FFTW_REAL tim0_0_7;
20015 	  FFTW_REAL tre0_1_0;
20016 	  FFTW_REAL tim0_1_0;
20017 	  FFTW_REAL tre0_1_1;
20018 	  FFTW_REAL tim0_1_1;
20019 	  FFTW_REAL tre0_1_2;
20020 	  FFTW_REAL tim0_1_2;
20021 	  FFTW_REAL tre0_1_3;
20022 	  FFTW_REAL tim0_1_3;
20023 	  FFTW_REAL tre0_1_4;
20024 	  FFTW_REAL tim0_1_4;
20025 	  FFTW_REAL tre0_1_5;
20026 	  FFTW_REAL tim0_1_5;
20027 	  FFTW_REAL tre0_1_6;
20028 	  FFTW_REAL tim0_1_6;
20029 	  FFTW_REAL tre0_1_7;
20030 	  FFTW_REAL tim0_1_7;
20031 	  FFTW_REAL tre0_2_0;
20032 	  FFTW_REAL tim0_2_0;
20033 	  FFTW_REAL tre0_2_1;
20034 	  FFTW_REAL tim0_2_1;
20035 	  FFTW_REAL tre0_2_2;
20036 	  FFTW_REAL tim0_2_2;
20037 	  FFTW_REAL tre0_2_3;
20038 	  FFTW_REAL tim0_2_3;
20039 	  FFTW_REAL tre0_2_4;
20040 	  FFTW_REAL tim0_2_4;
20041 	  FFTW_REAL tre0_2_5;
20042 	  FFTW_REAL tim0_2_5;
20043 	  FFTW_REAL tre0_2_6;
20044 	  FFTW_REAL tim0_2_6;
20045 	  FFTW_REAL tre0_2_7;
20046 	  FFTW_REAL tim0_2_7;
20047 	  FFTW_REAL tre0_3_0;
20048 	  FFTW_REAL tim0_3_0;
20049 	  FFTW_REAL tre0_3_1;
20050 	  FFTW_REAL tim0_3_1;
20051 	  FFTW_REAL tre0_3_2;
20052 	  FFTW_REAL tim0_3_2;
20053 	  FFTW_REAL tre0_3_3;
20054 	  FFTW_REAL tim0_3_3;
20055 	  FFTW_REAL tre0_3_4;
20056 	  FFTW_REAL tim0_3_4;
20057 	  FFTW_REAL tre0_3_5;
20058 	  FFTW_REAL tim0_3_5;
20059 	  FFTW_REAL tre0_3_6;
20060 	  FFTW_REAL tim0_3_6;
20061 	  FFTW_REAL tre0_3_7;
20062 	  FFTW_REAL tim0_3_7;
20063 	  FFTW_REAL tre0_4_0;
20064 	  FFTW_REAL tim0_4_0;
20065 	  FFTW_REAL tre0_4_1;
20066 	  FFTW_REAL tim0_4_1;
20067 	  FFTW_REAL tre0_4_2;
20068 	  FFTW_REAL tim0_4_2;
20069 	  FFTW_REAL tre0_4_3;
20070 	  FFTW_REAL tim0_4_3;
20071 	  FFTW_REAL tre0_4_4;
20072 	  FFTW_REAL tim0_4_4;
20073 	  FFTW_REAL tre0_4_5;
20074 	  FFTW_REAL tim0_4_5;
20075 	  FFTW_REAL tre0_4_6;
20076 	  FFTW_REAL tim0_4_6;
20077 	  FFTW_REAL tre0_4_7;
20078 	  FFTW_REAL tim0_4_7;
20079 	  FFTW_REAL tre0_5_0;
20080 	  FFTW_REAL tim0_5_0;
20081 	  FFTW_REAL tre0_5_1;
20082 	  FFTW_REAL tim0_5_1;
20083 	  FFTW_REAL tre0_5_2;
20084 	  FFTW_REAL tim0_5_2;
20085 	  FFTW_REAL tre0_5_3;
20086 	  FFTW_REAL tim0_5_3;
20087 	  FFTW_REAL tre0_5_4;
20088 	  FFTW_REAL tim0_5_4;
20089 	  FFTW_REAL tre0_5_5;
20090 	  FFTW_REAL tim0_5_5;
20091 	  FFTW_REAL tre0_5_6;
20092 	  FFTW_REAL tim0_5_6;
20093 	  FFTW_REAL tre0_5_7;
20094 	  FFTW_REAL tim0_5_7;
20095 	  FFTW_REAL tre0_6_0;
20096 	  FFTW_REAL tim0_6_0;
20097 	  FFTW_REAL tre0_6_1;
20098 	  FFTW_REAL tim0_6_1;
20099 	  FFTW_REAL tre0_6_2;
20100 	  FFTW_REAL tim0_6_2;
20101 	  FFTW_REAL tre0_6_3;
20102 	  FFTW_REAL tim0_6_3;
20103 	  FFTW_REAL tre0_6_4;
20104 	  FFTW_REAL tim0_6_4;
20105 	  FFTW_REAL tre0_6_5;
20106 	  FFTW_REAL tim0_6_5;
20107 	  FFTW_REAL tre0_6_6;
20108 	  FFTW_REAL tim0_6_6;
20109 	  FFTW_REAL tre0_6_7;
20110 	  FFTW_REAL tim0_6_7;
20111 	  FFTW_REAL tre0_7_0;
20112 	  FFTW_REAL tim0_7_0;
20113 	  FFTW_REAL tre0_7_1;
20114 	  FFTW_REAL tim0_7_1;
20115 	  FFTW_REAL tre0_7_2;
20116 	  FFTW_REAL tim0_7_2;
20117 	  FFTW_REAL tre0_7_3;
20118 	  FFTW_REAL tim0_7_3;
20119 	  FFTW_REAL tre0_7_4;
20120 	  FFTW_REAL tim0_7_4;
20121 	  FFTW_REAL tre0_7_5;
20122 	  FFTW_REAL tim0_7_5;
20123 	  FFTW_REAL tre0_7_6;
20124 	  FFTW_REAL tim0_7_6;
20125 	  FFTW_REAL tre0_7_7;
20126 	  FFTW_REAL tim0_7_7;
20127 	  {
20128 	       FFTW_REAL tre1_0_0;
20129 	       FFTW_REAL tim1_0_0;
20130 	       FFTW_REAL tre1_0_1;
20131 	       FFTW_REAL tim1_0_1;
20132 	       FFTW_REAL tre1_0_2;
20133 	       FFTW_REAL tim1_0_2;
20134 	       FFTW_REAL tre1_0_3;
20135 	       FFTW_REAL tim1_0_3;
20136 	       FFTW_REAL tre1_1_0;
20137 	       FFTW_REAL tim1_1_0;
20138 	       FFTW_REAL tre1_1_1;
20139 	       FFTW_REAL tim1_1_1;
20140 	       FFTW_REAL tre1_1_2;
20141 	       FFTW_REAL tim1_1_2;
20142 	       FFTW_REAL tre1_1_3;
20143 	       FFTW_REAL tim1_1_3;
20144 	       {
20145 		    FFTW_REAL tre2_0_0;
20146 		    FFTW_REAL tim2_0_0;
20147 		    FFTW_REAL tre2_1_0;
20148 		    FFTW_REAL tim2_1_0;
20149 		    tre2_0_0 = c_re(inout[0]);
20150 		    tim2_0_0 = c_im(inout[0]);
20151 		    {
20152 			 FFTW_REAL tr;
20153 			 FFTW_REAL ti;
20154 			 FFTW_REAL twr;
20155 			 FFTW_REAL twi;
20156 			 tr = c_re(inout[32 * stride]);
20157 			 ti = c_im(inout[32 * stride]);
20158 			 twr = c_re(W[31]);
20159 			 twi = c_im(W[31]);
20160 			 tre2_1_0 = (tr * twr) + (ti * twi);
20161 			 tim2_1_0 = (ti * twr) - (tr * twi);
20162 		    }
20163 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
20164 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
20165 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
20166 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
20167 	       }
20168 	       {
20169 		    FFTW_REAL tre2_0_0;
20170 		    FFTW_REAL tim2_0_0;
20171 		    FFTW_REAL tre2_1_0;
20172 		    FFTW_REAL tim2_1_0;
20173 		    {
20174 			 FFTW_REAL tr;
20175 			 FFTW_REAL ti;
20176 			 FFTW_REAL twr;
20177 			 FFTW_REAL twi;
20178 			 tr = c_re(inout[8 * stride]);
20179 			 ti = c_im(inout[8 * stride]);
20180 			 twr = c_re(W[7]);
20181 			 twi = c_im(W[7]);
20182 			 tre2_0_0 = (tr * twr) + (ti * twi);
20183 			 tim2_0_0 = (ti * twr) - (tr * twi);
20184 		    }
20185 		    {
20186 			 FFTW_REAL tr;
20187 			 FFTW_REAL ti;
20188 			 FFTW_REAL twr;
20189 			 FFTW_REAL twi;
20190 			 tr = c_re(inout[40 * stride]);
20191 			 ti = c_im(inout[40 * stride]);
20192 			 twr = c_re(W[39]);
20193 			 twi = c_im(W[39]);
20194 			 tre2_1_0 = (tr * twr) + (ti * twi);
20195 			 tim2_1_0 = (ti * twr) - (tr * twi);
20196 		    }
20197 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
20198 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
20199 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
20200 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
20201 	       }
20202 	       {
20203 		    FFTW_REAL tre2_0_0;
20204 		    FFTW_REAL tim2_0_0;
20205 		    FFTW_REAL tre2_1_0;
20206 		    FFTW_REAL tim2_1_0;
20207 		    {
20208 			 FFTW_REAL tr;
20209 			 FFTW_REAL ti;
20210 			 FFTW_REAL twr;
20211 			 FFTW_REAL twi;
20212 			 tr = c_re(inout[16 * stride]);
20213 			 ti = c_im(inout[16 * stride]);
20214 			 twr = c_re(W[15]);
20215 			 twi = c_im(W[15]);
20216 			 tre2_0_0 = (tr * twr) + (ti * twi);
20217 			 tim2_0_0 = (ti * twr) - (tr * twi);
20218 		    }
20219 		    {
20220 			 FFTW_REAL tr;
20221 			 FFTW_REAL ti;
20222 			 FFTW_REAL twr;
20223 			 FFTW_REAL twi;
20224 			 tr = c_re(inout[48 * stride]);
20225 			 ti = c_im(inout[48 * stride]);
20226 			 twr = c_re(W[47]);
20227 			 twi = c_im(W[47]);
20228 			 tre2_1_0 = (tr * twr) + (ti * twi);
20229 			 tim2_1_0 = (ti * twr) - (tr * twi);
20230 		    }
20231 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
20232 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
20233 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
20234 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
20235 	       }
20236 	       {
20237 		    FFTW_REAL tre2_0_0;
20238 		    FFTW_REAL tim2_0_0;
20239 		    FFTW_REAL tre2_1_0;
20240 		    FFTW_REAL tim2_1_0;
20241 		    {
20242 			 FFTW_REAL tr;
20243 			 FFTW_REAL ti;
20244 			 FFTW_REAL twr;
20245 			 FFTW_REAL twi;
20246 			 tr = c_re(inout[24 * stride]);
20247 			 ti = c_im(inout[24 * stride]);
20248 			 twr = c_re(W[23]);
20249 			 twi = c_im(W[23]);
20250 			 tre2_0_0 = (tr * twr) + (ti * twi);
20251 			 tim2_0_0 = (ti * twr) - (tr * twi);
20252 		    }
20253 		    {
20254 			 FFTW_REAL tr;
20255 			 FFTW_REAL ti;
20256 			 FFTW_REAL twr;
20257 			 FFTW_REAL twi;
20258 			 tr = c_re(inout[56 * stride]);
20259 			 ti = c_im(inout[56 * stride]);
20260 			 twr = c_re(W[55]);
20261 			 twi = c_im(W[55]);
20262 			 tre2_1_0 = (tr * twr) + (ti * twi);
20263 			 tim2_1_0 = (ti * twr) - (tr * twi);
20264 		    }
20265 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
20266 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
20267 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
20268 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
20269 	       }
20270 	       {
20271 		    FFTW_REAL tre2_0_0;
20272 		    FFTW_REAL tim2_0_0;
20273 		    FFTW_REAL tre2_0_1;
20274 		    FFTW_REAL tim2_0_1;
20275 		    FFTW_REAL tre2_1_0;
20276 		    FFTW_REAL tim2_1_0;
20277 		    FFTW_REAL tre2_1_1;
20278 		    FFTW_REAL tim2_1_1;
20279 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
20280 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
20281 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
20282 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
20283 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
20284 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
20285 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
20286 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
20287 		    tre0_0_0 = tre2_0_0 + tre2_0_1;
20288 		    tim0_0_0 = tim2_0_0 + tim2_0_1;
20289 		    tre0_4_0 = tre2_0_0 - tre2_0_1;
20290 		    tim0_4_0 = tim2_0_0 - tim2_0_1;
20291 		    tre0_2_0 = tre2_1_0 - tim2_1_1;
20292 		    tim0_2_0 = tim2_1_0 + tre2_1_1;
20293 		    tre0_6_0 = tre2_1_0 + tim2_1_1;
20294 		    tim0_6_0 = tim2_1_0 - tre2_1_1;
20295 	       }
20296 	       {
20297 		    FFTW_REAL tre2_0_0;
20298 		    FFTW_REAL tim2_0_0;
20299 		    FFTW_REAL tre2_0_1;
20300 		    FFTW_REAL tim2_0_1;
20301 		    FFTW_REAL tre2_1_0;
20302 		    FFTW_REAL tim2_1_0;
20303 		    FFTW_REAL tre2_1_1;
20304 		    FFTW_REAL tim2_1_1;
20305 		    tre2_0_0 = tre1_1_0 - tim1_1_2;
20306 		    tim2_0_0 = tim1_1_0 + tre1_1_2;
20307 		    tre2_1_0 = tre1_1_0 + tim1_1_2;
20308 		    tim2_1_0 = tim1_1_0 - tre1_1_2;
20309 		    {
20310 			 FFTW_REAL tre3_0_0;
20311 			 FFTW_REAL tim3_0_0;
20312 			 FFTW_REAL tre3_1_0;
20313 			 FFTW_REAL tim3_1_0;
20314 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
20315 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
20316 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
20317 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
20318 			 tre2_0_1 = tre3_0_0 - tre3_1_0;
20319 			 tim2_0_1 = tim3_0_0 + tim3_1_0;
20320 			 tre2_1_1 = tre3_0_0 + tre3_1_0;
20321 			 tim2_1_1 = tim3_0_0 - tim3_1_0;
20322 		    }
20323 		    tre0_1_0 = tre2_0_0 + tre2_0_1;
20324 		    tim0_1_0 = tim2_0_0 + tim2_0_1;
20325 		    tre0_5_0 = tre2_0_0 - tre2_0_1;
20326 		    tim0_5_0 = tim2_0_0 - tim2_0_1;
20327 		    tre0_3_0 = tre2_1_0 - tim2_1_1;
20328 		    tim0_3_0 = tim2_1_0 + tre2_1_1;
20329 		    tre0_7_0 = tre2_1_0 + tim2_1_1;
20330 		    tim0_7_0 = tim2_1_0 - tre2_1_1;
20331 	       }
20332 	  }
20333 	  {
20334 	       FFTW_REAL tre1_0_0;
20335 	       FFTW_REAL tim1_0_0;
20336 	       FFTW_REAL tre1_0_1;
20337 	       FFTW_REAL tim1_0_1;
20338 	       FFTW_REAL tre1_0_2;
20339 	       FFTW_REAL tim1_0_2;
20340 	       FFTW_REAL tre1_0_3;
20341 	       FFTW_REAL tim1_0_3;
20342 	       FFTW_REAL tre1_1_0;
20343 	       FFTW_REAL tim1_1_0;
20344 	       FFTW_REAL tre1_1_1;
20345 	       FFTW_REAL tim1_1_1;
20346 	       FFTW_REAL tre1_1_2;
20347 	       FFTW_REAL tim1_1_2;
20348 	       FFTW_REAL tre1_1_3;
20349 	       FFTW_REAL tim1_1_3;
20350 	       {
20351 		    FFTW_REAL tre2_0_0;
20352 		    FFTW_REAL tim2_0_0;
20353 		    FFTW_REAL tre2_1_0;
20354 		    FFTW_REAL tim2_1_0;
20355 		    {
20356 			 FFTW_REAL tr;
20357 			 FFTW_REAL ti;
20358 			 FFTW_REAL twr;
20359 			 FFTW_REAL twi;
20360 			 tr = c_re(inout[stride]);
20361 			 ti = c_im(inout[stride]);
20362 			 twr = c_re(W[0]);
20363 			 twi = c_im(W[0]);
20364 			 tre2_0_0 = (tr * twr) + (ti * twi);
20365 			 tim2_0_0 = (ti * twr) - (tr * twi);
20366 		    }
20367 		    {
20368 			 FFTW_REAL tr;
20369 			 FFTW_REAL ti;
20370 			 FFTW_REAL twr;
20371 			 FFTW_REAL twi;
20372 			 tr = c_re(inout[33 * stride]);
20373 			 ti = c_im(inout[33 * stride]);
20374 			 twr = c_re(W[32]);
20375 			 twi = c_im(W[32]);
20376 			 tre2_1_0 = (tr * twr) + (ti * twi);
20377 			 tim2_1_0 = (ti * twr) - (tr * twi);
20378 		    }
20379 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
20380 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
20381 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
20382 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
20383 	       }
20384 	       {
20385 		    FFTW_REAL tre2_0_0;
20386 		    FFTW_REAL tim2_0_0;
20387 		    FFTW_REAL tre2_1_0;
20388 		    FFTW_REAL tim2_1_0;
20389 		    {
20390 			 FFTW_REAL tr;
20391 			 FFTW_REAL ti;
20392 			 FFTW_REAL twr;
20393 			 FFTW_REAL twi;
20394 			 tr = c_re(inout[9 * stride]);
20395 			 ti = c_im(inout[9 * stride]);
20396 			 twr = c_re(W[8]);
20397 			 twi = c_im(W[8]);
20398 			 tre2_0_0 = (tr * twr) + (ti * twi);
20399 			 tim2_0_0 = (ti * twr) - (tr * twi);
20400 		    }
20401 		    {
20402 			 FFTW_REAL tr;
20403 			 FFTW_REAL ti;
20404 			 FFTW_REAL twr;
20405 			 FFTW_REAL twi;
20406 			 tr = c_re(inout[41 * stride]);
20407 			 ti = c_im(inout[41 * stride]);
20408 			 twr = c_re(W[40]);
20409 			 twi = c_im(W[40]);
20410 			 tre2_1_0 = (tr * twr) + (ti * twi);
20411 			 tim2_1_0 = (ti * twr) - (tr * twi);
20412 		    }
20413 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
20414 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
20415 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
20416 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
20417 	       }
20418 	       {
20419 		    FFTW_REAL tre2_0_0;
20420 		    FFTW_REAL tim2_0_0;
20421 		    FFTW_REAL tre2_1_0;
20422 		    FFTW_REAL tim2_1_0;
20423 		    {
20424 			 FFTW_REAL tr;
20425 			 FFTW_REAL ti;
20426 			 FFTW_REAL twr;
20427 			 FFTW_REAL twi;
20428 			 tr = c_re(inout[17 * stride]);
20429 			 ti = c_im(inout[17 * stride]);
20430 			 twr = c_re(W[16]);
20431 			 twi = c_im(W[16]);
20432 			 tre2_0_0 = (tr * twr) + (ti * twi);
20433 			 tim2_0_0 = (ti * twr) - (tr * twi);
20434 		    }
20435 		    {
20436 			 FFTW_REAL tr;
20437 			 FFTW_REAL ti;
20438 			 FFTW_REAL twr;
20439 			 FFTW_REAL twi;
20440 			 tr = c_re(inout[49 * stride]);
20441 			 ti = c_im(inout[49 * stride]);
20442 			 twr = c_re(W[48]);
20443 			 twi = c_im(W[48]);
20444 			 tre2_1_0 = (tr * twr) + (ti * twi);
20445 			 tim2_1_0 = (ti * twr) - (tr * twi);
20446 		    }
20447 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
20448 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
20449 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
20450 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
20451 	       }
20452 	       {
20453 		    FFTW_REAL tre2_0_0;
20454 		    FFTW_REAL tim2_0_0;
20455 		    FFTW_REAL tre2_1_0;
20456 		    FFTW_REAL tim2_1_0;
20457 		    {
20458 			 FFTW_REAL tr;
20459 			 FFTW_REAL ti;
20460 			 FFTW_REAL twr;
20461 			 FFTW_REAL twi;
20462 			 tr = c_re(inout[25 * stride]);
20463 			 ti = c_im(inout[25 * stride]);
20464 			 twr = c_re(W[24]);
20465 			 twi = c_im(W[24]);
20466 			 tre2_0_0 = (tr * twr) + (ti * twi);
20467 			 tim2_0_0 = (ti * twr) - (tr * twi);
20468 		    }
20469 		    {
20470 			 FFTW_REAL tr;
20471 			 FFTW_REAL ti;
20472 			 FFTW_REAL twr;
20473 			 FFTW_REAL twi;
20474 			 tr = c_re(inout[57 * stride]);
20475 			 ti = c_im(inout[57 * stride]);
20476 			 twr = c_re(W[56]);
20477 			 twi = c_im(W[56]);
20478 			 tre2_1_0 = (tr * twr) + (ti * twi);
20479 			 tim2_1_0 = (ti * twr) - (tr * twi);
20480 		    }
20481 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
20482 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
20483 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
20484 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
20485 	       }
20486 	       {
20487 		    FFTW_REAL tre2_0_0;
20488 		    FFTW_REAL tim2_0_0;
20489 		    FFTW_REAL tre2_0_1;
20490 		    FFTW_REAL tim2_0_1;
20491 		    FFTW_REAL tre2_1_0;
20492 		    FFTW_REAL tim2_1_0;
20493 		    FFTW_REAL tre2_1_1;
20494 		    FFTW_REAL tim2_1_1;
20495 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
20496 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
20497 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
20498 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
20499 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
20500 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
20501 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
20502 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
20503 		    tre0_0_1 = tre2_0_0 + tre2_0_1;
20504 		    tim0_0_1 = tim2_0_0 + tim2_0_1;
20505 		    tre0_4_1 = tre2_0_0 - tre2_0_1;
20506 		    tim0_4_1 = tim2_0_0 - tim2_0_1;
20507 		    tre0_2_1 = tre2_1_0 - tim2_1_1;
20508 		    tim0_2_1 = tim2_1_0 + tre2_1_1;
20509 		    tre0_6_1 = tre2_1_0 + tim2_1_1;
20510 		    tim0_6_1 = tim2_1_0 - tre2_1_1;
20511 	       }
20512 	       {
20513 		    FFTW_REAL tre2_0_0;
20514 		    FFTW_REAL tim2_0_0;
20515 		    FFTW_REAL tre2_0_1;
20516 		    FFTW_REAL tim2_0_1;
20517 		    FFTW_REAL tre2_1_0;
20518 		    FFTW_REAL tim2_1_0;
20519 		    FFTW_REAL tre2_1_1;
20520 		    FFTW_REAL tim2_1_1;
20521 		    tre2_0_0 = tre1_1_0 - tim1_1_2;
20522 		    tim2_0_0 = tim1_1_0 + tre1_1_2;
20523 		    tre2_1_0 = tre1_1_0 + tim1_1_2;
20524 		    tim2_1_0 = tim1_1_0 - tre1_1_2;
20525 		    {
20526 			 FFTW_REAL tre3_0_0;
20527 			 FFTW_REAL tim3_0_0;
20528 			 FFTW_REAL tre3_1_0;
20529 			 FFTW_REAL tim3_1_0;
20530 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
20531 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
20532 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
20533 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
20534 			 tre2_0_1 = tre3_0_0 - tre3_1_0;
20535 			 tim2_0_1 = tim3_0_0 + tim3_1_0;
20536 			 tre2_1_1 = tre3_0_0 + tre3_1_0;
20537 			 tim2_1_1 = tim3_0_0 - tim3_1_0;
20538 		    }
20539 		    tre0_1_1 = tre2_0_0 + tre2_0_1;
20540 		    tim0_1_1 = tim2_0_0 + tim2_0_1;
20541 		    tre0_5_1 = tre2_0_0 - tre2_0_1;
20542 		    tim0_5_1 = tim2_0_0 - tim2_0_1;
20543 		    tre0_3_1 = tre2_1_0 - tim2_1_1;
20544 		    tim0_3_1 = tim2_1_0 + tre2_1_1;
20545 		    tre0_7_1 = tre2_1_0 + tim2_1_1;
20546 		    tim0_7_1 = tim2_1_0 - tre2_1_1;
20547 	       }
20548 	  }
20549 	  {
20550 	       FFTW_REAL tre1_0_0;
20551 	       FFTW_REAL tim1_0_0;
20552 	       FFTW_REAL tre1_0_1;
20553 	       FFTW_REAL tim1_0_1;
20554 	       FFTW_REAL tre1_0_2;
20555 	       FFTW_REAL tim1_0_2;
20556 	       FFTW_REAL tre1_0_3;
20557 	       FFTW_REAL tim1_0_3;
20558 	       FFTW_REAL tre1_1_0;
20559 	       FFTW_REAL tim1_1_0;
20560 	       FFTW_REAL tre1_1_1;
20561 	       FFTW_REAL tim1_1_1;
20562 	       FFTW_REAL tre1_1_2;
20563 	       FFTW_REAL tim1_1_2;
20564 	       FFTW_REAL tre1_1_3;
20565 	       FFTW_REAL tim1_1_3;
20566 	       {
20567 		    FFTW_REAL tre2_0_0;
20568 		    FFTW_REAL tim2_0_0;
20569 		    FFTW_REAL tre2_1_0;
20570 		    FFTW_REAL tim2_1_0;
20571 		    {
20572 			 FFTW_REAL tr;
20573 			 FFTW_REAL ti;
20574 			 FFTW_REAL twr;
20575 			 FFTW_REAL twi;
20576 			 tr = c_re(inout[2 * stride]);
20577 			 ti = c_im(inout[2 * stride]);
20578 			 twr = c_re(W[1]);
20579 			 twi = c_im(W[1]);
20580 			 tre2_0_0 = (tr * twr) + (ti * twi);
20581 			 tim2_0_0 = (ti * twr) - (tr * twi);
20582 		    }
20583 		    {
20584 			 FFTW_REAL tr;
20585 			 FFTW_REAL ti;
20586 			 FFTW_REAL twr;
20587 			 FFTW_REAL twi;
20588 			 tr = c_re(inout[34 * stride]);
20589 			 ti = c_im(inout[34 * stride]);
20590 			 twr = c_re(W[33]);
20591 			 twi = c_im(W[33]);
20592 			 tre2_1_0 = (tr * twr) + (ti * twi);
20593 			 tim2_1_0 = (ti * twr) - (tr * twi);
20594 		    }
20595 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
20596 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
20597 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
20598 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
20599 	       }
20600 	       {
20601 		    FFTW_REAL tre2_0_0;
20602 		    FFTW_REAL tim2_0_0;
20603 		    FFTW_REAL tre2_1_0;
20604 		    FFTW_REAL tim2_1_0;
20605 		    {
20606 			 FFTW_REAL tr;
20607 			 FFTW_REAL ti;
20608 			 FFTW_REAL twr;
20609 			 FFTW_REAL twi;
20610 			 tr = c_re(inout[10 * stride]);
20611 			 ti = c_im(inout[10 * stride]);
20612 			 twr = c_re(W[9]);
20613 			 twi = c_im(W[9]);
20614 			 tre2_0_0 = (tr * twr) + (ti * twi);
20615 			 tim2_0_0 = (ti * twr) - (tr * twi);
20616 		    }
20617 		    {
20618 			 FFTW_REAL tr;
20619 			 FFTW_REAL ti;
20620 			 FFTW_REAL twr;
20621 			 FFTW_REAL twi;
20622 			 tr = c_re(inout[42 * stride]);
20623 			 ti = c_im(inout[42 * stride]);
20624 			 twr = c_re(W[41]);
20625 			 twi = c_im(W[41]);
20626 			 tre2_1_0 = (tr * twr) + (ti * twi);
20627 			 tim2_1_0 = (ti * twr) - (tr * twi);
20628 		    }
20629 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
20630 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
20631 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
20632 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
20633 	       }
20634 	       {
20635 		    FFTW_REAL tre2_0_0;
20636 		    FFTW_REAL tim2_0_0;
20637 		    FFTW_REAL tre2_1_0;
20638 		    FFTW_REAL tim2_1_0;
20639 		    {
20640 			 FFTW_REAL tr;
20641 			 FFTW_REAL ti;
20642 			 FFTW_REAL twr;
20643 			 FFTW_REAL twi;
20644 			 tr = c_re(inout[18 * stride]);
20645 			 ti = c_im(inout[18 * stride]);
20646 			 twr = c_re(W[17]);
20647 			 twi = c_im(W[17]);
20648 			 tre2_0_0 = (tr * twr) + (ti * twi);
20649 			 tim2_0_0 = (ti * twr) - (tr * twi);
20650 		    }
20651 		    {
20652 			 FFTW_REAL tr;
20653 			 FFTW_REAL ti;
20654 			 FFTW_REAL twr;
20655 			 FFTW_REAL twi;
20656 			 tr = c_re(inout[50 * stride]);
20657 			 ti = c_im(inout[50 * stride]);
20658 			 twr = c_re(W[49]);
20659 			 twi = c_im(W[49]);
20660 			 tre2_1_0 = (tr * twr) + (ti * twi);
20661 			 tim2_1_0 = (ti * twr) - (tr * twi);
20662 		    }
20663 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
20664 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
20665 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
20666 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
20667 	       }
20668 	       {
20669 		    FFTW_REAL tre2_0_0;
20670 		    FFTW_REAL tim2_0_0;
20671 		    FFTW_REAL tre2_1_0;
20672 		    FFTW_REAL tim2_1_0;
20673 		    {
20674 			 FFTW_REAL tr;
20675 			 FFTW_REAL ti;
20676 			 FFTW_REAL twr;
20677 			 FFTW_REAL twi;
20678 			 tr = c_re(inout[26 * stride]);
20679 			 ti = c_im(inout[26 * stride]);
20680 			 twr = c_re(W[25]);
20681 			 twi = c_im(W[25]);
20682 			 tre2_0_0 = (tr * twr) + (ti * twi);
20683 			 tim2_0_0 = (ti * twr) - (tr * twi);
20684 		    }
20685 		    {
20686 			 FFTW_REAL tr;
20687 			 FFTW_REAL ti;
20688 			 FFTW_REAL twr;
20689 			 FFTW_REAL twi;
20690 			 tr = c_re(inout[58 * stride]);
20691 			 ti = c_im(inout[58 * stride]);
20692 			 twr = c_re(W[57]);
20693 			 twi = c_im(W[57]);
20694 			 tre2_1_0 = (tr * twr) + (ti * twi);
20695 			 tim2_1_0 = (ti * twr) - (tr * twi);
20696 		    }
20697 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
20698 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
20699 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
20700 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
20701 	       }
20702 	       {
20703 		    FFTW_REAL tre2_0_0;
20704 		    FFTW_REAL tim2_0_0;
20705 		    FFTW_REAL tre2_0_1;
20706 		    FFTW_REAL tim2_0_1;
20707 		    FFTW_REAL tre2_1_0;
20708 		    FFTW_REAL tim2_1_0;
20709 		    FFTW_REAL tre2_1_1;
20710 		    FFTW_REAL tim2_1_1;
20711 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
20712 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
20713 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
20714 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
20715 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
20716 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
20717 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
20718 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
20719 		    tre0_0_2 = tre2_0_0 + tre2_0_1;
20720 		    tim0_0_2 = tim2_0_0 + tim2_0_1;
20721 		    tre0_4_2 = tre2_0_0 - tre2_0_1;
20722 		    tim0_4_2 = tim2_0_0 - tim2_0_1;
20723 		    tre0_2_2 = tre2_1_0 - tim2_1_1;
20724 		    tim0_2_2 = tim2_1_0 + tre2_1_1;
20725 		    tre0_6_2 = tre2_1_0 + tim2_1_1;
20726 		    tim0_6_2 = tim2_1_0 - tre2_1_1;
20727 	       }
20728 	       {
20729 		    FFTW_REAL tre2_0_0;
20730 		    FFTW_REAL tim2_0_0;
20731 		    FFTW_REAL tre2_0_1;
20732 		    FFTW_REAL tim2_0_1;
20733 		    FFTW_REAL tre2_1_0;
20734 		    FFTW_REAL tim2_1_0;
20735 		    FFTW_REAL tre2_1_1;
20736 		    FFTW_REAL tim2_1_1;
20737 		    tre2_0_0 = tre1_1_0 - tim1_1_2;
20738 		    tim2_0_0 = tim1_1_0 + tre1_1_2;
20739 		    tre2_1_0 = tre1_1_0 + tim1_1_2;
20740 		    tim2_1_0 = tim1_1_0 - tre1_1_2;
20741 		    {
20742 			 FFTW_REAL tre3_0_0;
20743 			 FFTW_REAL tim3_0_0;
20744 			 FFTW_REAL tre3_1_0;
20745 			 FFTW_REAL tim3_1_0;
20746 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
20747 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
20748 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
20749 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
20750 			 tre2_0_1 = tre3_0_0 - tre3_1_0;
20751 			 tim2_0_1 = tim3_0_0 + tim3_1_0;
20752 			 tre2_1_1 = tre3_0_0 + tre3_1_0;
20753 			 tim2_1_1 = tim3_0_0 - tim3_1_0;
20754 		    }
20755 		    tre0_1_2 = tre2_0_0 + tre2_0_1;
20756 		    tim0_1_2 = tim2_0_0 + tim2_0_1;
20757 		    tre0_5_2 = tre2_0_0 - tre2_0_1;
20758 		    tim0_5_2 = tim2_0_0 - tim2_0_1;
20759 		    tre0_3_2 = tre2_1_0 - tim2_1_1;
20760 		    tim0_3_2 = tim2_1_0 + tre2_1_1;
20761 		    tre0_7_2 = tre2_1_0 + tim2_1_1;
20762 		    tim0_7_2 = tim2_1_0 - tre2_1_1;
20763 	       }
20764 	  }
20765 	  {
20766 	       FFTW_REAL tre1_0_0;
20767 	       FFTW_REAL tim1_0_0;
20768 	       FFTW_REAL tre1_0_1;
20769 	       FFTW_REAL tim1_0_1;
20770 	       FFTW_REAL tre1_0_2;
20771 	       FFTW_REAL tim1_0_2;
20772 	       FFTW_REAL tre1_0_3;
20773 	       FFTW_REAL tim1_0_3;
20774 	       FFTW_REAL tre1_1_0;
20775 	       FFTW_REAL tim1_1_0;
20776 	       FFTW_REAL tre1_1_1;
20777 	       FFTW_REAL tim1_1_1;
20778 	       FFTW_REAL tre1_1_2;
20779 	       FFTW_REAL tim1_1_2;
20780 	       FFTW_REAL tre1_1_3;
20781 	       FFTW_REAL tim1_1_3;
20782 	       {
20783 		    FFTW_REAL tre2_0_0;
20784 		    FFTW_REAL tim2_0_0;
20785 		    FFTW_REAL tre2_1_0;
20786 		    FFTW_REAL tim2_1_0;
20787 		    {
20788 			 FFTW_REAL tr;
20789 			 FFTW_REAL ti;
20790 			 FFTW_REAL twr;
20791 			 FFTW_REAL twi;
20792 			 tr = c_re(inout[3 * stride]);
20793 			 ti = c_im(inout[3 * stride]);
20794 			 twr = c_re(W[2]);
20795 			 twi = c_im(W[2]);
20796 			 tre2_0_0 = (tr * twr) + (ti * twi);
20797 			 tim2_0_0 = (ti * twr) - (tr * twi);
20798 		    }
20799 		    {
20800 			 FFTW_REAL tr;
20801 			 FFTW_REAL ti;
20802 			 FFTW_REAL twr;
20803 			 FFTW_REAL twi;
20804 			 tr = c_re(inout[35 * stride]);
20805 			 ti = c_im(inout[35 * stride]);
20806 			 twr = c_re(W[34]);
20807 			 twi = c_im(W[34]);
20808 			 tre2_1_0 = (tr * twr) + (ti * twi);
20809 			 tim2_1_0 = (ti * twr) - (tr * twi);
20810 		    }
20811 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
20812 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
20813 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
20814 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
20815 	       }
20816 	       {
20817 		    FFTW_REAL tre2_0_0;
20818 		    FFTW_REAL tim2_0_0;
20819 		    FFTW_REAL tre2_1_0;
20820 		    FFTW_REAL tim2_1_0;
20821 		    {
20822 			 FFTW_REAL tr;
20823 			 FFTW_REAL ti;
20824 			 FFTW_REAL twr;
20825 			 FFTW_REAL twi;
20826 			 tr = c_re(inout[11 * stride]);
20827 			 ti = c_im(inout[11 * stride]);
20828 			 twr = c_re(W[10]);
20829 			 twi = c_im(W[10]);
20830 			 tre2_0_0 = (tr * twr) + (ti * twi);
20831 			 tim2_0_0 = (ti * twr) - (tr * twi);
20832 		    }
20833 		    {
20834 			 FFTW_REAL tr;
20835 			 FFTW_REAL ti;
20836 			 FFTW_REAL twr;
20837 			 FFTW_REAL twi;
20838 			 tr = c_re(inout[43 * stride]);
20839 			 ti = c_im(inout[43 * stride]);
20840 			 twr = c_re(W[42]);
20841 			 twi = c_im(W[42]);
20842 			 tre2_1_0 = (tr * twr) + (ti * twi);
20843 			 tim2_1_0 = (ti * twr) - (tr * twi);
20844 		    }
20845 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
20846 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
20847 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
20848 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
20849 	       }
20850 	       {
20851 		    FFTW_REAL tre2_0_0;
20852 		    FFTW_REAL tim2_0_0;
20853 		    FFTW_REAL tre2_1_0;
20854 		    FFTW_REAL tim2_1_0;
20855 		    {
20856 			 FFTW_REAL tr;
20857 			 FFTW_REAL ti;
20858 			 FFTW_REAL twr;
20859 			 FFTW_REAL twi;
20860 			 tr = c_re(inout[19 * stride]);
20861 			 ti = c_im(inout[19 * stride]);
20862 			 twr = c_re(W[18]);
20863 			 twi = c_im(W[18]);
20864 			 tre2_0_0 = (tr * twr) + (ti * twi);
20865 			 tim2_0_0 = (ti * twr) - (tr * twi);
20866 		    }
20867 		    {
20868 			 FFTW_REAL tr;
20869 			 FFTW_REAL ti;
20870 			 FFTW_REAL twr;
20871 			 FFTW_REAL twi;
20872 			 tr = c_re(inout[51 * stride]);
20873 			 ti = c_im(inout[51 * stride]);
20874 			 twr = c_re(W[50]);
20875 			 twi = c_im(W[50]);
20876 			 tre2_1_0 = (tr * twr) + (ti * twi);
20877 			 tim2_1_0 = (ti * twr) - (tr * twi);
20878 		    }
20879 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
20880 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
20881 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
20882 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
20883 	       }
20884 	       {
20885 		    FFTW_REAL tre2_0_0;
20886 		    FFTW_REAL tim2_0_0;
20887 		    FFTW_REAL tre2_1_0;
20888 		    FFTW_REAL tim2_1_0;
20889 		    {
20890 			 FFTW_REAL tr;
20891 			 FFTW_REAL ti;
20892 			 FFTW_REAL twr;
20893 			 FFTW_REAL twi;
20894 			 tr = c_re(inout[27 * stride]);
20895 			 ti = c_im(inout[27 * stride]);
20896 			 twr = c_re(W[26]);
20897 			 twi = c_im(W[26]);
20898 			 tre2_0_0 = (tr * twr) + (ti * twi);
20899 			 tim2_0_0 = (ti * twr) - (tr * twi);
20900 		    }
20901 		    {
20902 			 FFTW_REAL tr;
20903 			 FFTW_REAL ti;
20904 			 FFTW_REAL twr;
20905 			 FFTW_REAL twi;
20906 			 tr = c_re(inout[59 * stride]);
20907 			 ti = c_im(inout[59 * stride]);
20908 			 twr = c_re(W[58]);
20909 			 twi = c_im(W[58]);
20910 			 tre2_1_0 = (tr * twr) + (ti * twi);
20911 			 tim2_1_0 = (ti * twr) - (tr * twi);
20912 		    }
20913 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
20914 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
20915 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
20916 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
20917 	       }
20918 	       {
20919 		    FFTW_REAL tre2_0_0;
20920 		    FFTW_REAL tim2_0_0;
20921 		    FFTW_REAL tre2_0_1;
20922 		    FFTW_REAL tim2_0_1;
20923 		    FFTW_REAL tre2_1_0;
20924 		    FFTW_REAL tim2_1_0;
20925 		    FFTW_REAL tre2_1_1;
20926 		    FFTW_REAL tim2_1_1;
20927 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
20928 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
20929 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
20930 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
20931 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
20932 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
20933 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
20934 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
20935 		    tre0_0_3 = tre2_0_0 + tre2_0_1;
20936 		    tim0_0_3 = tim2_0_0 + tim2_0_1;
20937 		    tre0_4_3 = tre2_0_0 - tre2_0_1;
20938 		    tim0_4_3 = tim2_0_0 - tim2_0_1;
20939 		    tre0_2_3 = tre2_1_0 - tim2_1_1;
20940 		    tim0_2_3 = tim2_1_0 + tre2_1_1;
20941 		    tre0_6_3 = tre2_1_0 + tim2_1_1;
20942 		    tim0_6_3 = tim2_1_0 - tre2_1_1;
20943 	       }
20944 	       {
20945 		    FFTW_REAL tre2_0_0;
20946 		    FFTW_REAL tim2_0_0;
20947 		    FFTW_REAL tre2_0_1;
20948 		    FFTW_REAL tim2_0_1;
20949 		    FFTW_REAL tre2_1_0;
20950 		    FFTW_REAL tim2_1_0;
20951 		    FFTW_REAL tre2_1_1;
20952 		    FFTW_REAL tim2_1_1;
20953 		    tre2_0_0 = tre1_1_0 - tim1_1_2;
20954 		    tim2_0_0 = tim1_1_0 + tre1_1_2;
20955 		    tre2_1_0 = tre1_1_0 + tim1_1_2;
20956 		    tim2_1_0 = tim1_1_0 - tre1_1_2;
20957 		    {
20958 			 FFTW_REAL tre3_0_0;
20959 			 FFTW_REAL tim3_0_0;
20960 			 FFTW_REAL tre3_1_0;
20961 			 FFTW_REAL tim3_1_0;
20962 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
20963 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
20964 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
20965 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
20966 			 tre2_0_1 = tre3_0_0 - tre3_1_0;
20967 			 tim2_0_1 = tim3_0_0 + tim3_1_0;
20968 			 tre2_1_1 = tre3_0_0 + tre3_1_0;
20969 			 tim2_1_1 = tim3_0_0 - tim3_1_0;
20970 		    }
20971 		    tre0_1_3 = tre2_0_0 + tre2_0_1;
20972 		    tim0_1_3 = tim2_0_0 + tim2_0_1;
20973 		    tre0_5_3 = tre2_0_0 - tre2_0_1;
20974 		    tim0_5_3 = tim2_0_0 - tim2_0_1;
20975 		    tre0_3_3 = tre2_1_0 - tim2_1_1;
20976 		    tim0_3_3 = tim2_1_0 + tre2_1_1;
20977 		    tre0_7_3 = tre2_1_0 + tim2_1_1;
20978 		    tim0_7_3 = tim2_1_0 - tre2_1_1;
20979 	       }
20980 	  }
20981 	  {
20982 	       FFTW_REAL tre1_0_0;
20983 	       FFTW_REAL tim1_0_0;
20984 	       FFTW_REAL tre1_0_1;
20985 	       FFTW_REAL tim1_0_1;
20986 	       FFTW_REAL tre1_0_2;
20987 	       FFTW_REAL tim1_0_2;
20988 	       FFTW_REAL tre1_0_3;
20989 	       FFTW_REAL tim1_0_3;
20990 	       FFTW_REAL tre1_1_0;
20991 	       FFTW_REAL tim1_1_0;
20992 	       FFTW_REAL tre1_1_1;
20993 	       FFTW_REAL tim1_1_1;
20994 	       FFTW_REAL tre1_1_2;
20995 	       FFTW_REAL tim1_1_2;
20996 	       FFTW_REAL tre1_1_3;
20997 	       FFTW_REAL tim1_1_3;
20998 	       {
20999 		    FFTW_REAL tre2_0_0;
21000 		    FFTW_REAL tim2_0_0;
21001 		    FFTW_REAL tre2_1_0;
21002 		    FFTW_REAL tim2_1_0;
21003 		    {
21004 			 FFTW_REAL tr;
21005 			 FFTW_REAL ti;
21006 			 FFTW_REAL twr;
21007 			 FFTW_REAL twi;
21008 			 tr = c_re(inout[4 * stride]);
21009 			 ti = c_im(inout[4 * stride]);
21010 			 twr = c_re(W[3]);
21011 			 twi = c_im(W[3]);
21012 			 tre2_0_0 = (tr * twr) + (ti * twi);
21013 			 tim2_0_0 = (ti * twr) - (tr * twi);
21014 		    }
21015 		    {
21016 			 FFTW_REAL tr;
21017 			 FFTW_REAL ti;
21018 			 FFTW_REAL twr;
21019 			 FFTW_REAL twi;
21020 			 tr = c_re(inout[36 * stride]);
21021 			 ti = c_im(inout[36 * stride]);
21022 			 twr = c_re(W[35]);
21023 			 twi = c_im(W[35]);
21024 			 tre2_1_0 = (tr * twr) + (ti * twi);
21025 			 tim2_1_0 = (ti * twr) - (tr * twi);
21026 		    }
21027 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
21028 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
21029 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
21030 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
21031 	       }
21032 	       {
21033 		    FFTW_REAL tre2_0_0;
21034 		    FFTW_REAL tim2_0_0;
21035 		    FFTW_REAL tre2_1_0;
21036 		    FFTW_REAL tim2_1_0;
21037 		    {
21038 			 FFTW_REAL tr;
21039 			 FFTW_REAL ti;
21040 			 FFTW_REAL twr;
21041 			 FFTW_REAL twi;
21042 			 tr = c_re(inout[12 * stride]);
21043 			 ti = c_im(inout[12 * stride]);
21044 			 twr = c_re(W[11]);
21045 			 twi = c_im(W[11]);
21046 			 tre2_0_0 = (tr * twr) + (ti * twi);
21047 			 tim2_0_0 = (ti * twr) - (tr * twi);
21048 		    }
21049 		    {
21050 			 FFTW_REAL tr;
21051 			 FFTW_REAL ti;
21052 			 FFTW_REAL twr;
21053 			 FFTW_REAL twi;
21054 			 tr = c_re(inout[44 * stride]);
21055 			 ti = c_im(inout[44 * stride]);
21056 			 twr = c_re(W[43]);
21057 			 twi = c_im(W[43]);
21058 			 tre2_1_0 = (tr * twr) + (ti * twi);
21059 			 tim2_1_0 = (ti * twr) - (tr * twi);
21060 		    }
21061 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
21062 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
21063 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
21064 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
21065 	       }
21066 	       {
21067 		    FFTW_REAL tre2_0_0;
21068 		    FFTW_REAL tim2_0_0;
21069 		    FFTW_REAL tre2_1_0;
21070 		    FFTW_REAL tim2_1_0;
21071 		    {
21072 			 FFTW_REAL tr;
21073 			 FFTW_REAL ti;
21074 			 FFTW_REAL twr;
21075 			 FFTW_REAL twi;
21076 			 tr = c_re(inout[20 * stride]);
21077 			 ti = c_im(inout[20 * stride]);
21078 			 twr = c_re(W[19]);
21079 			 twi = c_im(W[19]);
21080 			 tre2_0_0 = (tr * twr) + (ti * twi);
21081 			 tim2_0_0 = (ti * twr) - (tr * twi);
21082 		    }
21083 		    {
21084 			 FFTW_REAL tr;
21085 			 FFTW_REAL ti;
21086 			 FFTW_REAL twr;
21087 			 FFTW_REAL twi;
21088 			 tr = c_re(inout[52 * stride]);
21089 			 ti = c_im(inout[52 * stride]);
21090 			 twr = c_re(W[51]);
21091 			 twi = c_im(W[51]);
21092 			 tre2_1_0 = (tr * twr) + (ti * twi);
21093 			 tim2_1_0 = (ti * twr) - (tr * twi);
21094 		    }
21095 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
21096 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
21097 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
21098 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
21099 	       }
21100 	       {
21101 		    FFTW_REAL tre2_0_0;
21102 		    FFTW_REAL tim2_0_0;
21103 		    FFTW_REAL tre2_1_0;
21104 		    FFTW_REAL tim2_1_0;
21105 		    {
21106 			 FFTW_REAL tr;
21107 			 FFTW_REAL ti;
21108 			 FFTW_REAL twr;
21109 			 FFTW_REAL twi;
21110 			 tr = c_re(inout[28 * stride]);
21111 			 ti = c_im(inout[28 * stride]);
21112 			 twr = c_re(W[27]);
21113 			 twi = c_im(W[27]);
21114 			 tre2_0_0 = (tr * twr) + (ti * twi);
21115 			 tim2_0_0 = (ti * twr) - (tr * twi);
21116 		    }
21117 		    {
21118 			 FFTW_REAL tr;
21119 			 FFTW_REAL ti;
21120 			 FFTW_REAL twr;
21121 			 FFTW_REAL twi;
21122 			 tr = c_re(inout[60 * stride]);
21123 			 ti = c_im(inout[60 * stride]);
21124 			 twr = c_re(W[59]);
21125 			 twi = c_im(W[59]);
21126 			 tre2_1_0 = (tr * twr) + (ti * twi);
21127 			 tim2_1_0 = (ti * twr) - (tr * twi);
21128 		    }
21129 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
21130 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
21131 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
21132 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
21133 	       }
21134 	       {
21135 		    FFTW_REAL tre2_0_0;
21136 		    FFTW_REAL tim2_0_0;
21137 		    FFTW_REAL tre2_0_1;
21138 		    FFTW_REAL tim2_0_1;
21139 		    FFTW_REAL tre2_1_0;
21140 		    FFTW_REAL tim2_1_0;
21141 		    FFTW_REAL tre2_1_1;
21142 		    FFTW_REAL tim2_1_1;
21143 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
21144 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
21145 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
21146 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
21147 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
21148 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
21149 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
21150 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
21151 		    tre0_0_4 = tre2_0_0 + tre2_0_1;
21152 		    tim0_0_4 = tim2_0_0 + tim2_0_1;
21153 		    tre0_4_4 = tre2_0_0 - tre2_0_1;
21154 		    tim0_4_4 = tim2_0_0 - tim2_0_1;
21155 		    tre0_2_4 = tre2_1_0 - tim2_1_1;
21156 		    tim0_2_4 = tim2_1_0 + tre2_1_1;
21157 		    tre0_6_4 = tre2_1_0 + tim2_1_1;
21158 		    tim0_6_4 = tim2_1_0 - tre2_1_1;
21159 	       }
21160 	       {
21161 		    FFTW_REAL tre2_0_0;
21162 		    FFTW_REAL tim2_0_0;
21163 		    FFTW_REAL tre2_0_1;
21164 		    FFTW_REAL tim2_0_1;
21165 		    FFTW_REAL tre2_1_0;
21166 		    FFTW_REAL tim2_1_0;
21167 		    FFTW_REAL tre2_1_1;
21168 		    FFTW_REAL tim2_1_1;
21169 		    tre2_0_0 = tre1_1_0 - tim1_1_2;
21170 		    tim2_0_0 = tim1_1_0 + tre1_1_2;
21171 		    tre2_1_0 = tre1_1_0 + tim1_1_2;
21172 		    tim2_1_0 = tim1_1_0 - tre1_1_2;
21173 		    {
21174 			 FFTW_REAL tre3_0_0;
21175 			 FFTW_REAL tim3_0_0;
21176 			 FFTW_REAL tre3_1_0;
21177 			 FFTW_REAL tim3_1_0;
21178 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
21179 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
21180 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
21181 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
21182 			 tre2_0_1 = tre3_0_0 - tre3_1_0;
21183 			 tim2_0_1 = tim3_0_0 + tim3_1_0;
21184 			 tre2_1_1 = tre3_0_0 + tre3_1_0;
21185 			 tim2_1_1 = tim3_0_0 - tim3_1_0;
21186 		    }
21187 		    tre0_1_4 = tre2_0_0 + tre2_0_1;
21188 		    tim0_1_4 = tim2_0_0 + tim2_0_1;
21189 		    tre0_5_4 = tre2_0_0 - tre2_0_1;
21190 		    tim0_5_4 = tim2_0_0 - tim2_0_1;
21191 		    tre0_3_4 = tre2_1_0 - tim2_1_1;
21192 		    tim0_3_4 = tim2_1_0 + tre2_1_1;
21193 		    tre0_7_4 = tre2_1_0 + tim2_1_1;
21194 		    tim0_7_4 = tim2_1_0 - tre2_1_1;
21195 	       }
21196 	  }
21197 	  {
21198 	       FFTW_REAL tre1_0_0;
21199 	       FFTW_REAL tim1_0_0;
21200 	       FFTW_REAL tre1_0_1;
21201 	       FFTW_REAL tim1_0_1;
21202 	       FFTW_REAL tre1_0_2;
21203 	       FFTW_REAL tim1_0_2;
21204 	       FFTW_REAL tre1_0_3;
21205 	       FFTW_REAL tim1_0_3;
21206 	       FFTW_REAL tre1_1_0;
21207 	       FFTW_REAL tim1_1_0;
21208 	       FFTW_REAL tre1_1_1;
21209 	       FFTW_REAL tim1_1_1;
21210 	       FFTW_REAL tre1_1_2;
21211 	       FFTW_REAL tim1_1_2;
21212 	       FFTW_REAL tre1_1_3;
21213 	       FFTW_REAL tim1_1_3;
21214 	       {
21215 		    FFTW_REAL tre2_0_0;
21216 		    FFTW_REAL tim2_0_0;
21217 		    FFTW_REAL tre2_1_0;
21218 		    FFTW_REAL tim2_1_0;
21219 		    {
21220 			 FFTW_REAL tr;
21221 			 FFTW_REAL ti;
21222 			 FFTW_REAL twr;
21223 			 FFTW_REAL twi;
21224 			 tr = c_re(inout[5 * stride]);
21225 			 ti = c_im(inout[5 * stride]);
21226 			 twr = c_re(W[4]);
21227 			 twi = c_im(W[4]);
21228 			 tre2_0_0 = (tr * twr) + (ti * twi);
21229 			 tim2_0_0 = (ti * twr) - (tr * twi);
21230 		    }
21231 		    {
21232 			 FFTW_REAL tr;
21233 			 FFTW_REAL ti;
21234 			 FFTW_REAL twr;
21235 			 FFTW_REAL twi;
21236 			 tr = c_re(inout[37 * stride]);
21237 			 ti = c_im(inout[37 * stride]);
21238 			 twr = c_re(W[36]);
21239 			 twi = c_im(W[36]);
21240 			 tre2_1_0 = (tr * twr) + (ti * twi);
21241 			 tim2_1_0 = (ti * twr) - (tr * twi);
21242 		    }
21243 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
21244 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
21245 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
21246 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
21247 	       }
21248 	       {
21249 		    FFTW_REAL tre2_0_0;
21250 		    FFTW_REAL tim2_0_0;
21251 		    FFTW_REAL tre2_1_0;
21252 		    FFTW_REAL tim2_1_0;
21253 		    {
21254 			 FFTW_REAL tr;
21255 			 FFTW_REAL ti;
21256 			 FFTW_REAL twr;
21257 			 FFTW_REAL twi;
21258 			 tr = c_re(inout[13 * stride]);
21259 			 ti = c_im(inout[13 * stride]);
21260 			 twr = c_re(W[12]);
21261 			 twi = c_im(W[12]);
21262 			 tre2_0_0 = (tr * twr) + (ti * twi);
21263 			 tim2_0_0 = (ti * twr) - (tr * twi);
21264 		    }
21265 		    {
21266 			 FFTW_REAL tr;
21267 			 FFTW_REAL ti;
21268 			 FFTW_REAL twr;
21269 			 FFTW_REAL twi;
21270 			 tr = c_re(inout[45 * stride]);
21271 			 ti = c_im(inout[45 * stride]);
21272 			 twr = c_re(W[44]);
21273 			 twi = c_im(W[44]);
21274 			 tre2_1_0 = (tr * twr) + (ti * twi);
21275 			 tim2_1_0 = (ti * twr) - (tr * twi);
21276 		    }
21277 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
21278 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
21279 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
21280 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
21281 	       }
21282 	       {
21283 		    FFTW_REAL tre2_0_0;
21284 		    FFTW_REAL tim2_0_0;
21285 		    FFTW_REAL tre2_1_0;
21286 		    FFTW_REAL tim2_1_0;
21287 		    {
21288 			 FFTW_REAL tr;
21289 			 FFTW_REAL ti;
21290 			 FFTW_REAL twr;
21291 			 FFTW_REAL twi;
21292 			 tr = c_re(inout[21 * stride]);
21293 			 ti = c_im(inout[21 * stride]);
21294 			 twr = c_re(W[20]);
21295 			 twi = c_im(W[20]);
21296 			 tre2_0_0 = (tr * twr) + (ti * twi);
21297 			 tim2_0_0 = (ti * twr) - (tr * twi);
21298 		    }
21299 		    {
21300 			 FFTW_REAL tr;
21301 			 FFTW_REAL ti;
21302 			 FFTW_REAL twr;
21303 			 FFTW_REAL twi;
21304 			 tr = c_re(inout[53 * stride]);
21305 			 ti = c_im(inout[53 * stride]);
21306 			 twr = c_re(W[52]);
21307 			 twi = c_im(W[52]);
21308 			 tre2_1_0 = (tr * twr) + (ti * twi);
21309 			 tim2_1_0 = (ti * twr) - (tr * twi);
21310 		    }
21311 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
21312 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
21313 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
21314 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
21315 	       }
21316 	       {
21317 		    FFTW_REAL tre2_0_0;
21318 		    FFTW_REAL tim2_0_0;
21319 		    FFTW_REAL tre2_1_0;
21320 		    FFTW_REAL tim2_1_0;
21321 		    {
21322 			 FFTW_REAL tr;
21323 			 FFTW_REAL ti;
21324 			 FFTW_REAL twr;
21325 			 FFTW_REAL twi;
21326 			 tr = c_re(inout[29 * stride]);
21327 			 ti = c_im(inout[29 * stride]);
21328 			 twr = c_re(W[28]);
21329 			 twi = c_im(W[28]);
21330 			 tre2_0_0 = (tr * twr) + (ti * twi);
21331 			 tim2_0_0 = (ti * twr) - (tr * twi);
21332 		    }
21333 		    {
21334 			 FFTW_REAL tr;
21335 			 FFTW_REAL ti;
21336 			 FFTW_REAL twr;
21337 			 FFTW_REAL twi;
21338 			 tr = c_re(inout[61 * stride]);
21339 			 ti = c_im(inout[61 * stride]);
21340 			 twr = c_re(W[60]);
21341 			 twi = c_im(W[60]);
21342 			 tre2_1_0 = (tr * twr) + (ti * twi);
21343 			 tim2_1_0 = (ti * twr) - (tr * twi);
21344 		    }
21345 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
21346 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
21347 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
21348 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
21349 	       }
21350 	       {
21351 		    FFTW_REAL tre2_0_0;
21352 		    FFTW_REAL tim2_0_0;
21353 		    FFTW_REAL tre2_0_1;
21354 		    FFTW_REAL tim2_0_1;
21355 		    FFTW_REAL tre2_1_0;
21356 		    FFTW_REAL tim2_1_0;
21357 		    FFTW_REAL tre2_1_1;
21358 		    FFTW_REAL tim2_1_1;
21359 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
21360 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
21361 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
21362 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
21363 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
21364 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
21365 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
21366 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
21367 		    tre0_0_5 = tre2_0_0 + tre2_0_1;
21368 		    tim0_0_5 = tim2_0_0 + tim2_0_1;
21369 		    tre0_4_5 = tre2_0_0 - tre2_0_1;
21370 		    tim0_4_5 = tim2_0_0 - tim2_0_1;
21371 		    tre0_2_5 = tre2_1_0 - tim2_1_1;
21372 		    tim0_2_5 = tim2_1_0 + tre2_1_1;
21373 		    tre0_6_5 = tre2_1_0 + tim2_1_1;
21374 		    tim0_6_5 = tim2_1_0 - tre2_1_1;
21375 	       }
21376 	       {
21377 		    FFTW_REAL tre2_0_0;
21378 		    FFTW_REAL tim2_0_0;
21379 		    FFTW_REAL tre2_0_1;
21380 		    FFTW_REAL tim2_0_1;
21381 		    FFTW_REAL tre2_1_0;
21382 		    FFTW_REAL tim2_1_0;
21383 		    FFTW_REAL tre2_1_1;
21384 		    FFTW_REAL tim2_1_1;
21385 		    tre2_0_0 = tre1_1_0 - tim1_1_2;
21386 		    tim2_0_0 = tim1_1_0 + tre1_1_2;
21387 		    tre2_1_0 = tre1_1_0 + tim1_1_2;
21388 		    tim2_1_0 = tim1_1_0 - tre1_1_2;
21389 		    {
21390 			 FFTW_REAL tre3_0_0;
21391 			 FFTW_REAL tim3_0_0;
21392 			 FFTW_REAL tre3_1_0;
21393 			 FFTW_REAL tim3_1_0;
21394 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
21395 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
21396 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
21397 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
21398 			 tre2_0_1 = tre3_0_0 - tre3_1_0;
21399 			 tim2_0_1 = tim3_0_0 + tim3_1_0;
21400 			 tre2_1_1 = tre3_0_0 + tre3_1_0;
21401 			 tim2_1_1 = tim3_0_0 - tim3_1_0;
21402 		    }
21403 		    tre0_1_5 = tre2_0_0 + tre2_0_1;
21404 		    tim0_1_5 = tim2_0_0 + tim2_0_1;
21405 		    tre0_5_5 = tre2_0_0 - tre2_0_1;
21406 		    tim0_5_5 = tim2_0_0 - tim2_0_1;
21407 		    tre0_3_5 = tre2_1_0 - tim2_1_1;
21408 		    tim0_3_5 = tim2_1_0 + tre2_1_1;
21409 		    tre0_7_5 = tre2_1_0 + tim2_1_1;
21410 		    tim0_7_5 = tim2_1_0 - tre2_1_1;
21411 	       }
21412 	  }
21413 	  {
21414 	       FFTW_REAL tre1_0_0;
21415 	       FFTW_REAL tim1_0_0;
21416 	       FFTW_REAL tre1_0_1;
21417 	       FFTW_REAL tim1_0_1;
21418 	       FFTW_REAL tre1_0_2;
21419 	       FFTW_REAL tim1_0_2;
21420 	       FFTW_REAL tre1_0_3;
21421 	       FFTW_REAL tim1_0_3;
21422 	       FFTW_REAL tre1_1_0;
21423 	       FFTW_REAL tim1_1_0;
21424 	       FFTW_REAL tre1_1_1;
21425 	       FFTW_REAL tim1_1_1;
21426 	       FFTW_REAL tre1_1_2;
21427 	       FFTW_REAL tim1_1_2;
21428 	       FFTW_REAL tre1_1_3;
21429 	       FFTW_REAL tim1_1_3;
21430 	       {
21431 		    FFTW_REAL tre2_0_0;
21432 		    FFTW_REAL tim2_0_0;
21433 		    FFTW_REAL tre2_1_0;
21434 		    FFTW_REAL tim2_1_0;
21435 		    {
21436 			 FFTW_REAL tr;
21437 			 FFTW_REAL ti;
21438 			 FFTW_REAL twr;
21439 			 FFTW_REAL twi;
21440 			 tr = c_re(inout[6 * stride]);
21441 			 ti = c_im(inout[6 * stride]);
21442 			 twr = c_re(W[5]);
21443 			 twi = c_im(W[5]);
21444 			 tre2_0_0 = (tr * twr) + (ti * twi);
21445 			 tim2_0_0 = (ti * twr) - (tr * twi);
21446 		    }
21447 		    {
21448 			 FFTW_REAL tr;
21449 			 FFTW_REAL ti;
21450 			 FFTW_REAL twr;
21451 			 FFTW_REAL twi;
21452 			 tr = c_re(inout[38 * stride]);
21453 			 ti = c_im(inout[38 * stride]);
21454 			 twr = c_re(W[37]);
21455 			 twi = c_im(W[37]);
21456 			 tre2_1_0 = (tr * twr) + (ti * twi);
21457 			 tim2_1_0 = (ti * twr) - (tr * twi);
21458 		    }
21459 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
21460 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
21461 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
21462 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
21463 	       }
21464 	       {
21465 		    FFTW_REAL tre2_0_0;
21466 		    FFTW_REAL tim2_0_0;
21467 		    FFTW_REAL tre2_1_0;
21468 		    FFTW_REAL tim2_1_0;
21469 		    {
21470 			 FFTW_REAL tr;
21471 			 FFTW_REAL ti;
21472 			 FFTW_REAL twr;
21473 			 FFTW_REAL twi;
21474 			 tr = c_re(inout[14 * stride]);
21475 			 ti = c_im(inout[14 * stride]);
21476 			 twr = c_re(W[13]);
21477 			 twi = c_im(W[13]);
21478 			 tre2_0_0 = (tr * twr) + (ti * twi);
21479 			 tim2_0_0 = (ti * twr) - (tr * twi);
21480 		    }
21481 		    {
21482 			 FFTW_REAL tr;
21483 			 FFTW_REAL ti;
21484 			 FFTW_REAL twr;
21485 			 FFTW_REAL twi;
21486 			 tr = c_re(inout[46 * stride]);
21487 			 ti = c_im(inout[46 * stride]);
21488 			 twr = c_re(W[45]);
21489 			 twi = c_im(W[45]);
21490 			 tre2_1_0 = (tr * twr) + (ti * twi);
21491 			 tim2_1_0 = (ti * twr) - (tr * twi);
21492 		    }
21493 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
21494 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
21495 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
21496 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
21497 	       }
21498 	       {
21499 		    FFTW_REAL tre2_0_0;
21500 		    FFTW_REAL tim2_0_0;
21501 		    FFTW_REAL tre2_1_0;
21502 		    FFTW_REAL tim2_1_0;
21503 		    {
21504 			 FFTW_REAL tr;
21505 			 FFTW_REAL ti;
21506 			 FFTW_REAL twr;
21507 			 FFTW_REAL twi;
21508 			 tr = c_re(inout[22 * stride]);
21509 			 ti = c_im(inout[22 * stride]);
21510 			 twr = c_re(W[21]);
21511 			 twi = c_im(W[21]);
21512 			 tre2_0_0 = (tr * twr) + (ti * twi);
21513 			 tim2_0_0 = (ti * twr) - (tr * twi);
21514 		    }
21515 		    {
21516 			 FFTW_REAL tr;
21517 			 FFTW_REAL ti;
21518 			 FFTW_REAL twr;
21519 			 FFTW_REAL twi;
21520 			 tr = c_re(inout[54 * stride]);
21521 			 ti = c_im(inout[54 * stride]);
21522 			 twr = c_re(W[53]);
21523 			 twi = c_im(W[53]);
21524 			 tre2_1_0 = (tr * twr) + (ti * twi);
21525 			 tim2_1_0 = (ti * twr) - (tr * twi);
21526 		    }
21527 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
21528 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
21529 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
21530 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
21531 	       }
21532 	       {
21533 		    FFTW_REAL tre2_0_0;
21534 		    FFTW_REAL tim2_0_0;
21535 		    FFTW_REAL tre2_1_0;
21536 		    FFTW_REAL tim2_1_0;
21537 		    {
21538 			 FFTW_REAL tr;
21539 			 FFTW_REAL ti;
21540 			 FFTW_REAL twr;
21541 			 FFTW_REAL twi;
21542 			 tr = c_re(inout[30 * stride]);
21543 			 ti = c_im(inout[30 * stride]);
21544 			 twr = c_re(W[29]);
21545 			 twi = c_im(W[29]);
21546 			 tre2_0_0 = (tr * twr) + (ti * twi);
21547 			 tim2_0_0 = (ti * twr) - (tr * twi);
21548 		    }
21549 		    {
21550 			 FFTW_REAL tr;
21551 			 FFTW_REAL ti;
21552 			 FFTW_REAL twr;
21553 			 FFTW_REAL twi;
21554 			 tr = c_re(inout[62 * stride]);
21555 			 ti = c_im(inout[62 * stride]);
21556 			 twr = c_re(W[61]);
21557 			 twi = c_im(W[61]);
21558 			 tre2_1_0 = (tr * twr) + (ti * twi);
21559 			 tim2_1_0 = (ti * twr) - (tr * twi);
21560 		    }
21561 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
21562 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
21563 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
21564 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
21565 	       }
21566 	       {
21567 		    FFTW_REAL tre2_0_0;
21568 		    FFTW_REAL tim2_0_0;
21569 		    FFTW_REAL tre2_0_1;
21570 		    FFTW_REAL tim2_0_1;
21571 		    FFTW_REAL tre2_1_0;
21572 		    FFTW_REAL tim2_1_0;
21573 		    FFTW_REAL tre2_1_1;
21574 		    FFTW_REAL tim2_1_1;
21575 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
21576 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
21577 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
21578 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
21579 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
21580 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
21581 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
21582 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
21583 		    tre0_0_6 = tre2_0_0 + tre2_0_1;
21584 		    tim0_0_6 = tim2_0_0 + tim2_0_1;
21585 		    tre0_4_6 = tre2_0_0 - tre2_0_1;
21586 		    tim0_4_6 = tim2_0_0 - tim2_0_1;
21587 		    tre0_2_6 = tre2_1_0 - tim2_1_1;
21588 		    tim0_2_6 = tim2_1_0 + tre2_1_1;
21589 		    tre0_6_6 = tre2_1_0 + tim2_1_1;
21590 		    tim0_6_6 = tim2_1_0 - tre2_1_1;
21591 	       }
21592 	       {
21593 		    FFTW_REAL tre2_0_0;
21594 		    FFTW_REAL tim2_0_0;
21595 		    FFTW_REAL tre2_0_1;
21596 		    FFTW_REAL tim2_0_1;
21597 		    FFTW_REAL tre2_1_0;
21598 		    FFTW_REAL tim2_1_0;
21599 		    FFTW_REAL tre2_1_1;
21600 		    FFTW_REAL tim2_1_1;
21601 		    tre2_0_0 = tre1_1_0 - tim1_1_2;
21602 		    tim2_0_0 = tim1_1_0 + tre1_1_2;
21603 		    tre2_1_0 = tre1_1_0 + tim1_1_2;
21604 		    tim2_1_0 = tim1_1_0 - tre1_1_2;
21605 		    {
21606 			 FFTW_REAL tre3_0_0;
21607 			 FFTW_REAL tim3_0_0;
21608 			 FFTW_REAL tre3_1_0;
21609 			 FFTW_REAL tim3_1_0;
21610 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
21611 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
21612 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
21613 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
21614 			 tre2_0_1 = tre3_0_0 - tre3_1_0;
21615 			 tim2_0_1 = tim3_0_0 + tim3_1_0;
21616 			 tre2_1_1 = tre3_0_0 + tre3_1_0;
21617 			 tim2_1_1 = tim3_0_0 - tim3_1_0;
21618 		    }
21619 		    tre0_1_6 = tre2_0_0 + tre2_0_1;
21620 		    tim0_1_6 = tim2_0_0 + tim2_0_1;
21621 		    tre0_5_6 = tre2_0_0 - tre2_0_1;
21622 		    tim0_5_6 = tim2_0_0 - tim2_0_1;
21623 		    tre0_3_6 = tre2_1_0 - tim2_1_1;
21624 		    tim0_3_6 = tim2_1_0 + tre2_1_1;
21625 		    tre0_7_6 = tre2_1_0 + tim2_1_1;
21626 		    tim0_7_6 = tim2_1_0 - tre2_1_1;
21627 	       }
21628 	  }
21629 	  {
21630 	       FFTW_REAL tre1_0_0;
21631 	       FFTW_REAL tim1_0_0;
21632 	       FFTW_REAL tre1_0_1;
21633 	       FFTW_REAL tim1_0_1;
21634 	       FFTW_REAL tre1_0_2;
21635 	       FFTW_REAL tim1_0_2;
21636 	       FFTW_REAL tre1_0_3;
21637 	       FFTW_REAL tim1_0_3;
21638 	       FFTW_REAL tre1_1_0;
21639 	       FFTW_REAL tim1_1_0;
21640 	       FFTW_REAL tre1_1_1;
21641 	       FFTW_REAL tim1_1_1;
21642 	       FFTW_REAL tre1_1_2;
21643 	       FFTW_REAL tim1_1_2;
21644 	       FFTW_REAL tre1_1_3;
21645 	       FFTW_REAL tim1_1_3;
21646 	       {
21647 		    FFTW_REAL tre2_0_0;
21648 		    FFTW_REAL tim2_0_0;
21649 		    FFTW_REAL tre2_1_0;
21650 		    FFTW_REAL tim2_1_0;
21651 		    {
21652 			 FFTW_REAL tr;
21653 			 FFTW_REAL ti;
21654 			 FFTW_REAL twr;
21655 			 FFTW_REAL twi;
21656 			 tr = c_re(inout[7 * stride]);
21657 			 ti = c_im(inout[7 * stride]);
21658 			 twr = c_re(W[6]);
21659 			 twi = c_im(W[6]);
21660 			 tre2_0_0 = (tr * twr) + (ti * twi);
21661 			 tim2_0_0 = (ti * twr) - (tr * twi);
21662 		    }
21663 		    {
21664 			 FFTW_REAL tr;
21665 			 FFTW_REAL ti;
21666 			 FFTW_REAL twr;
21667 			 FFTW_REAL twi;
21668 			 tr = c_re(inout[39 * stride]);
21669 			 ti = c_im(inout[39 * stride]);
21670 			 twr = c_re(W[38]);
21671 			 twi = c_im(W[38]);
21672 			 tre2_1_0 = (tr * twr) + (ti * twi);
21673 			 tim2_1_0 = (ti * twr) - (tr * twi);
21674 		    }
21675 		    tre1_0_0 = tre2_0_0 + tre2_1_0;
21676 		    tim1_0_0 = tim2_0_0 + tim2_1_0;
21677 		    tre1_1_0 = tre2_0_0 - tre2_1_0;
21678 		    tim1_1_0 = tim2_0_0 - tim2_1_0;
21679 	       }
21680 	       {
21681 		    FFTW_REAL tre2_0_0;
21682 		    FFTW_REAL tim2_0_0;
21683 		    FFTW_REAL tre2_1_0;
21684 		    FFTW_REAL tim2_1_0;
21685 		    {
21686 			 FFTW_REAL tr;
21687 			 FFTW_REAL ti;
21688 			 FFTW_REAL twr;
21689 			 FFTW_REAL twi;
21690 			 tr = c_re(inout[15 * stride]);
21691 			 ti = c_im(inout[15 * stride]);
21692 			 twr = c_re(W[14]);
21693 			 twi = c_im(W[14]);
21694 			 tre2_0_0 = (tr * twr) + (ti * twi);
21695 			 tim2_0_0 = (ti * twr) - (tr * twi);
21696 		    }
21697 		    {
21698 			 FFTW_REAL tr;
21699 			 FFTW_REAL ti;
21700 			 FFTW_REAL twr;
21701 			 FFTW_REAL twi;
21702 			 tr = c_re(inout[47 * stride]);
21703 			 ti = c_im(inout[47 * stride]);
21704 			 twr = c_re(W[46]);
21705 			 twi = c_im(W[46]);
21706 			 tre2_1_0 = (tr * twr) + (ti * twi);
21707 			 tim2_1_0 = (ti * twr) - (tr * twi);
21708 		    }
21709 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
21710 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
21711 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
21712 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
21713 	       }
21714 	       {
21715 		    FFTW_REAL tre2_0_0;
21716 		    FFTW_REAL tim2_0_0;
21717 		    FFTW_REAL tre2_1_0;
21718 		    FFTW_REAL tim2_1_0;
21719 		    {
21720 			 FFTW_REAL tr;
21721 			 FFTW_REAL ti;
21722 			 FFTW_REAL twr;
21723 			 FFTW_REAL twi;
21724 			 tr = c_re(inout[23 * stride]);
21725 			 ti = c_im(inout[23 * stride]);
21726 			 twr = c_re(W[22]);
21727 			 twi = c_im(W[22]);
21728 			 tre2_0_0 = (tr * twr) + (ti * twi);
21729 			 tim2_0_0 = (ti * twr) - (tr * twi);
21730 		    }
21731 		    {
21732 			 FFTW_REAL tr;
21733 			 FFTW_REAL ti;
21734 			 FFTW_REAL twr;
21735 			 FFTW_REAL twi;
21736 			 tr = c_re(inout[55 * stride]);
21737 			 ti = c_im(inout[55 * stride]);
21738 			 twr = c_re(W[54]);
21739 			 twi = c_im(W[54]);
21740 			 tre2_1_0 = (tr * twr) + (ti * twi);
21741 			 tim2_1_0 = (ti * twr) - (tr * twi);
21742 		    }
21743 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
21744 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
21745 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
21746 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
21747 	       }
21748 	       {
21749 		    FFTW_REAL tre2_0_0;
21750 		    FFTW_REAL tim2_0_0;
21751 		    FFTW_REAL tre2_1_0;
21752 		    FFTW_REAL tim2_1_0;
21753 		    {
21754 			 FFTW_REAL tr;
21755 			 FFTW_REAL ti;
21756 			 FFTW_REAL twr;
21757 			 FFTW_REAL twi;
21758 			 tr = c_re(inout[31 * stride]);
21759 			 ti = c_im(inout[31 * stride]);
21760 			 twr = c_re(W[30]);
21761 			 twi = c_im(W[30]);
21762 			 tre2_0_0 = (tr * twr) + (ti * twi);
21763 			 tim2_0_0 = (ti * twr) - (tr * twi);
21764 		    }
21765 		    {
21766 			 FFTW_REAL tr;
21767 			 FFTW_REAL ti;
21768 			 FFTW_REAL twr;
21769 			 FFTW_REAL twi;
21770 			 tr = c_re(inout[63 * stride]);
21771 			 ti = c_im(inout[63 * stride]);
21772 			 twr = c_re(W[62]);
21773 			 twi = c_im(W[62]);
21774 			 tre2_1_0 = (tr * twr) + (ti * twi);
21775 			 tim2_1_0 = (ti * twr) - (tr * twi);
21776 		    }
21777 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
21778 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
21779 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
21780 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
21781 	       }
21782 	       {
21783 		    FFTW_REAL tre2_0_0;
21784 		    FFTW_REAL tim2_0_0;
21785 		    FFTW_REAL tre2_0_1;
21786 		    FFTW_REAL tim2_0_1;
21787 		    FFTW_REAL tre2_1_0;
21788 		    FFTW_REAL tim2_1_0;
21789 		    FFTW_REAL tre2_1_1;
21790 		    FFTW_REAL tim2_1_1;
21791 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
21792 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
21793 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
21794 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
21795 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
21796 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
21797 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
21798 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
21799 		    tre0_0_7 = tre2_0_0 + tre2_0_1;
21800 		    tim0_0_7 = tim2_0_0 + tim2_0_1;
21801 		    tre0_4_7 = tre2_0_0 - tre2_0_1;
21802 		    tim0_4_7 = tim2_0_0 - tim2_0_1;
21803 		    tre0_2_7 = tre2_1_0 - tim2_1_1;
21804 		    tim0_2_7 = tim2_1_0 + tre2_1_1;
21805 		    tre0_6_7 = tre2_1_0 + tim2_1_1;
21806 		    tim0_6_7 = tim2_1_0 - tre2_1_1;
21807 	       }
21808 	       {
21809 		    FFTW_REAL tre2_0_0;
21810 		    FFTW_REAL tim2_0_0;
21811 		    FFTW_REAL tre2_0_1;
21812 		    FFTW_REAL tim2_0_1;
21813 		    FFTW_REAL tre2_1_0;
21814 		    FFTW_REAL tim2_1_0;
21815 		    FFTW_REAL tre2_1_1;
21816 		    FFTW_REAL tim2_1_1;
21817 		    tre2_0_0 = tre1_1_0 - tim1_1_2;
21818 		    tim2_0_0 = tim1_1_0 + tre1_1_2;
21819 		    tre2_1_0 = tre1_1_0 + tim1_1_2;
21820 		    tim2_1_0 = tim1_1_0 - tre1_1_2;
21821 		    {
21822 			 FFTW_REAL tre3_0_0;
21823 			 FFTW_REAL tim3_0_0;
21824 			 FFTW_REAL tre3_1_0;
21825 			 FFTW_REAL tim3_1_0;
21826 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
21827 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
21828 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
21829 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
21830 			 tre2_0_1 = tre3_0_0 - tre3_1_0;
21831 			 tim2_0_1 = tim3_0_0 + tim3_1_0;
21832 			 tre2_1_1 = tre3_0_0 + tre3_1_0;
21833 			 tim2_1_1 = tim3_0_0 - tim3_1_0;
21834 		    }
21835 		    tre0_1_7 = tre2_0_0 + tre2_0_1;
21836 		    tim0_1_7 = tim2_0_0 + tim2_0_1;
21837 		    tre0_5_7 = tre2_0_0 - tre2_0_1;
21838 		    tim0_5_7 = tim2_0_0 - tim2_0_1;
21839 		    tre0_3_7 = tre2_1_0 - tim2_1_1;
21840 		    tim0_3_7 = tim2_1_0 + tre2_1_1;
21841 		    tre0_7_7 = tre2_1_0 + tim2_1_1;
21842 		    tim0_7_7 = tim2_1_0 - tre2_1_1;
21843 	       }
21844 	  }
21845 	  {
21846 	       FFTW_REAL tre1_0_0;
21847 	       FFTW_REAL tim1_0_0;
21848 	       FFTW_REAL tre1_0_1;
21849 	       FFTW_REAL tim1_0_1;
21850 	       FFTW_REAL tre1_0_2;
21851 	       FFTW_REAL tim1_0_2;
21852 	       FFTW_REAL tre1_0_3;
21853 	       FFTW_REAL tim1_0_3;
21854 	       FFTW_REAL tre1_1_0;
21855 	       FFTW_REAL tim1_1_0;
21856 	       FFTW_REAL tre1_1_1;
21857 	       FFTW_REAL tim1_1_1;
21858 	       FFTW_REAL tre1_1_2;
21859 	       FFTW_REAL tim1_1_2;
21860 	       FFTW_REAL tre1_1_3;
21861 	       FFTW_REAL tim1_1_3;
21862 	       tre1_0_0 = tre0_0_0 + tre0_0_4;
21863 	       tim1_0_0 = tim0_0_0 + tim0_0_4;
21864 	       tre1_1_0 = tre0_0_0 - tre0_0_4;
21865 	       tim1_1_0 = tim0_0_0 - tim0_0_4;
21866 	       tre1_0_1 = tre0_0_1 + tre0_0_5;
21867 	       tim1_0_1 = tim0_0_1 + tim0_0_5;
21868 	       tre1_1_1 = tre0_0_1 - tre0_0_5;
21869 	       tim1_1_1 = tim0_0_1 - tim0_0_5;
21870 	       tre1_0_2 = tre0_0_2 + tre0_0_6;
21871 	       tim1_0_2 = tim0_0_2 + tim0_0_6;
21872 	       tre1_1_2 = tre0_0_2 - tre0_0_6;
21873 	       tim1_1_2 = tim0_0_2 - tim0_0_6;
21874 	       tre1_0_3 = tre0_0_3 + tre0_0_7;
21875 	       tim1_0_3 = tim0_0_3 + tim0_0_7;
21876 	       tre1_1_3 = tre0_0_3 - tre0_0_7;
21877 	       tim1_1_3 = tim0_0_3 - tim0_0_7;
21878 	       {
21879 		    FFTW_REAL tre2_0_0;
21880 		    FFTW_REAL tim2_0_0;
21881 		    FFTW_REAL tre2_0_1;
21882 		    FFTW_REAL tim2_0_1;
21883 		    FFTW_REAL tre2_1_0;
21884 		    FFTW_REAL tim2_1_0;
21885 		    FFTW_REAL tre2_1_1;
21886 		    FFTW_REAL tim2_1_1;
21887 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
21888 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
21889 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
21890 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
21891 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
21892 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
21893 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
21894 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
21895 		    c_re(inout[0]) = tre2_0_0 + tre2_0_1;
21896 		    c_im(inout[0]) = tim2_0_0 + tim2_0_1;
21897 		    c_re(inout[32 * stride]) = tre2_0_0 - tre2_0_1;
21898 		    c_im(inout[32 * stride]) = tim2_0_0 - tim2_0_1;
21899 		    c_re(inout[16 * stride]) = tre2_1_0 - tim2_1_1;
21900 		    c_im(inout[16 * stride]) = tim2_1_0 + tre2_1_1;
21901 		    c_re(inout[48 * stride]) = tre2_1_0 + tim2_1_1;
21902 		    c_im(inout[48 * stride]) = tim2_1_0 - tre2_1_1;
21903 	       }
21904 	       {
21905 		    FFTW_REAL tre2_0_0;
21906 		    FFTW_REAL tim2_0_0;
21907 		    FFTW_REAL tre2_0_1;
21908 		    FFTW_REAL tim2_0_1;
21909 		    FFTW_REAL tre2_1_0;
21910 		    FFTW_REAL tim2_1_0;
21911 		    FFTW_REAL tre2_1_1;
21912 		    FFTW_REAL tim2_1_1;
21913 		    tre2_0_0 = tre1_1_0 - tim1_1_2;
21914 		    tim2_0_0 = tim1_1_0 + tre1_1_2;
21915 		    tre2_1_0 = tre1_1_0 + tim1_1_2;
21916 		    tim2_1_0 = tim1_1_0 - tre1_1_2;
21917 		    {
21918 			 FFTW_REAL tre3_0_0;
21919 			 FFTW_REAL tim3_0_0;
21920 			 FFTW_REAL tre3_1_0;
21921 			 FFTW_REAL tim3_1_0;
21922 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
21923 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
21924 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
21925 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
21926 			 tre2_0_1 = tre3_0_0 - tre3_1_0;
21927 			 tim2_0_1 = tim3_0_0 + tim3_1_0;
21928 			 tre2_1_1 = tre3_0_0 + tre3_1_0;
21929 			 tim2_1_1 = tim3_0_0 - tim3_1_0;
21930 		    }
21931 		    c_re(inout[8 * stride]) = tre2_0_0 + tre2_0_1;
21932 		    c_im(inout[8 * stride]) = tim2_0_0 + tim2_0_1;
21933 		    c_re(inout[40 * stride]) = tre2_0_0 - tre2_0_1;
21934 		    c_im(inout[40 * stride]) = tim2_0_0 - tim2_0_1;
21935 		    c_re(inout[24 * stride]) = tre2_1_0 - tim2_1_1;
21936 		    c_im(inout[24 * stride]) = tim2_1_0 + tre2_1_1;
21937 		    c_re(inout[56 * stride]) = tre2_1_0 + tim2_1_1;
21938 		    c_im(inout[56 * stride]) = tim2_1_0 - tre2_1_1;
21939 	       }
21940 	  }
21941 	  {
21942 	       FFTW_REAL tre1_0_0;
21943 	       FFTW_REAL tim1_0_0;
21944 	       FFTW_REAL tre1_0_1;
21945 	       FFTW_REAL tim1_0_1;
21946 	       FFTW_REAL tre1_0_2;
21947 	       FFTW_REAL tim1_0_2;
21948 	       FFTW_REAL tre1_0_3;
21949 	       FFTW_REAL tim1_0_3;
21950 	       FFTW_REAL tre1_1_0;
21951 	       FFTW_REAL tim1_1_0;
21952 	       FFTW_REAL tre1_1_1;
21953 	       FFTW_REAL tim1_1_1;
21954 	       FFTW_REAL tre1_1_2;
21955 	       FFTW_REAL tim1_1_2;
21956 	       FFTW_REAL tre1_1_3;
21957 	       FFTW_REAL tim1_1_3;
21958 	       {
21959 		    FFTW_REAL tre2_1_0;
21960 		    FFTW_REAL tim2_1_0;
21961 		    tre2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_1_4) - (((FFTW_REAL) FFTW_K382683432) * tim0_1_4);
21962 		    tim2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_1_4) + (((FFTW_REAL) FFTW_K382683432) * tre0_1_4);
21963 		    tre1_0_0 = tre0_1_0 + tre2_1_0;
21964 		    tim1_0_0 = tim0_1_0 + tim2_1_0;
21965 		    tre1_1_0 = tre0_1_0 - tre2_1_0;
21966 		    tim1_1_0 = tim0_1_0 - tim2_1_0;
21967 	       }
21968 	       {
21969 		    FFTW_REAL tre2_0_0;
21970 		    FFTW_REAL tim2_0_0;
21971 		    FFTW_REAL tre2_1_0;
21972 		    FFTW_REAL tim2_1_0;
21973 		    tre2_0_0 = (((FFTW_REAL) FFTW_K995184726) * tre0_1_1) - (((FFTW_REAL) FFTW_K098017140) * tim0_1_1);
21974 		    tim2_0_0 = (((FFTW_REAL) FFTW_K995184726) * tim0_1_1) + (((FFTW_REAL) FFTW_K098017140) * tre0_1_1);
21975 		    tre2_1_0 = (((FFTW_REAL) FFTW_K881921264) * tre0_1_5) - (((FFTW_REAL) FFTW_K471396736) * tim0_1_5);
21976 		    tim2_1_0 = (((FFTW_REAL) FFTW_K881921264) * tim0_1_5) + (((FFTW_REAL) FFTW_K471396736) * tre0_1_5);
21977 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
21978 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
21979 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
21980 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
21981 	       }
21982 	       {
21983 		    FFTW_REAL tre2_0_0;
21984 		    FFTW_REAL tim2_0_0;
21985 		    FFTW_REAL tre2_1_0;
21986 		    FFTW_REAL tim2_1_0;
21987 		    tre2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tre0_1_2) - (((FFTW_REAL) FFTW_K195090322) * tim0_1_2);
21988 		    tim2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tim0_1_2) + (((FFTW_REAL) FFTW_K195090322) * tre0_1_2);
21989 		    tre2_1_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_1_6) - (((FFTW_REAL) FFTW_K555570233) * tim0_1_6);
21990 		    tim2_1_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_1_6) + (((FFTW_REAL) FFTW_K555570233) * tre0_1_6);
21991 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
21992 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
21993 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
21994 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
21995 	       }
21996 	       {
21997 		    FFTW_REAL tre2_0_0;
21998 		    FFTW_REAL tim2_0_0;
21999 		    FFTW_REAL tre2_1_0;
22000 		    FFTW_REAL tim2_1_0;
22001 		    tre2_0_0 = (((FFTW_REAL) FFTW_K956940335) * tre0_1_3) - (((FFTW_REAL) FFTW_K290284677) * tim0_1_3);
22002 		    tim2_0_0 = (((FFTW_REAL) FFTW_K956940335) * tim0_1_3) + (((FFTW_REAL) FFTW_K290284677) * tre0_1_3);
22003 		    tre2_1_0 = (((FFTW_REAL) FFTW_K773010453) * tre0_1_7) - (((FFTW_REAL) FFTW_K634393284) * tim0_1_7);
22004 		    tim2_1_0 = (((FFTW_REAL) FFTW_K773010453) * tim0_1_7) + (((FFTW_REAL) FFTW_K634393284) * tre0_1_7);
22005 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
22006 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
22007 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
22008 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
22009 	       }
22010 	       {
22011 		    FFTW_REAL tre2_0_0;
22012 		    FFTW_REAL tim2_0_0;
22013 		    FFTW_REAL tre2_0_1;
22014 		    FFTW_REAL tim2_0_1;
22015 		    FFTW_REAL tre2_1_0;
22016 		    FFTW_REAL tim2_1_0;
22017 		    FFTW_REAL tre2_1_1;
22018 		    FFTW_REAL tim2_1_1;
22019 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
22020 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
22021 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
22022 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
22023 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
22024 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
22025 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
22026 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
22027 		    c_re(inout[stride]) = tre2_0_0 + tre2_0_1;
22028 		    c_im(inout[stride]) = tim2_0_0 + tim2_0_1;
22029 		    c_re(inout[33 * stride]) = tre2_0_0 - tre2_0_1;
22030 		    c_im(inout[33 * stride]) = tim2_0_0 - tim2_0_1;
22031 		    c_re(inout[17 * stride]) = tre2_1_0 - tim2_1_1;
22032 		    c_im(inout[17 * stride]) = tim2_1_0 + tre2_1_1;
22033 		    c_re(inout[49 * stride]) = tre2_1_0 + tim2_1_1;
22034 		    c_im(inout[49 * stride]) = tim2_1_0 - tre2_1_1;
22035 	       }
22036 	       {
22037 		    FFTW_REAL tre2_0_0;
22038 		    FFTW_REAL tim2_0_0;
22039 		    FFTW_REAL tre2_0_1;
22040 		    FFTW_REAL tim2_0_1;
22041 		    FFTW_REAL tre2_1_0;
22042 		    FFTW_REAL tim2_1_0;
22043 		    FFTW_REAL tre2_1_1;
22044 		    FFTW_REAL tim2_1_1;
22045 		    tre2_0_0 = tre1_1_0 - tim1_1_2;
22046 		    tim2_0_0 = tim1_1_0 + tre1_1_2;
22047 		    tre2_1_0 = tre1_1_0 + tim1_1_2;
22048 		    tim2_1_0 = tim1_1_0 - tre1_1_2;
22049 		    {
22050 			 FFTW_REAL tre3_0_0;
22051 			 FFTW_REAL tim3_0_0;
22052 			 FFTW_REAL tre3_1_0;
22053 			 FFTW_REAL tim3_1_0;
22054 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
22055 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
22056 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
22057 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
22058 			 tre2_0_1 = tre3_0_0 - tre3_1_0;
22059 			 tim2_0_1 = tim3_0_0 + tim3_1_0;
22060 			 tre2_1_1 = tre3_0_0 + tre3_1_0;
22061 			 tim2_1_1 = tim3_0_0 - tim3_1_0;
22062 		    }
22063 		    c_re(inout[9 * stride]) = tre2_0_0 + tre2_0_1;
22064 		    c_im(inout[9 * stride]) = tim2_0_0 + tim2_0_1;
22065 		    c_re(inout[41 * stride]) = tre2_0_0 - tre2_0_1;
22066 		    c_im(inout[41 * stride]) = tim2_0_0 - tim2_0_1;
22067 		    c_re(inout[25 * stride]) = tre2_1_0 - tim2_1_1;
22068 		    c_im(inout[25 * stride]) = tim2_1_0 + tre2_1_1;
22069 		    c_re(inout[57 * stride]) = tre2_1_0 + tim2_1_1;
22070 		    c_im(inout[57 * stride]) = tim2_1_0 - tre2_1_1;
22071 	       }
22072 	  }
22073 	  {
22074 	       FFTW_REAL tre1_0_0;
22075 	       FFTW_REAL tim1_0_0;
22076 	       FFTW_REAL tre1_0_1;
22077 	       FFTW_REAL tim1_0_1;
22078 	       FFTW_REAL tre1_0_2;
22079 	       FFTW_REAL tim1_0_2;
22080 	       FFTW_REAL tre1_0_3;
22081 	       FFTW_REAL tim1_0_3;
22082 	       FFTW_REAL tre1_1_0;
22083 	       FFTW_REAL tim1_1_0;
22084 	       FFTW_REAL tre1_1_1;
22085 	       FFTW_REAL tim1_1_1;
22086 	       FFTW_REAL tre1_1_2;
22087 	       FFTW_REAL tim1_1_2;
22088 	       FFTW_REAL tre1_1_3;
22089 	       FFTW_REAL tim1_1_3;
22090 	       {
22091 		    FFTW_REAL tre2_1_0;
22092 		    FFTW_REAL tim2_1_0;
22093 		    tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_2_4 - tim0_2_4);
22094 		    tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_2_4 + tre0_2_4);
22095 		    tre1_0_0 = tre0_2_0 + tre2_1_0;
22096 		    tim1_0_0 = tim0_2_0 + tim2_1_0;
22097 		    tre1_1_0 = tre0_2_0 - tre2_1_0;
22098 		    tim1_1_0 = tim0_2_0 - tim2_1_0;
22099 	       }
22100 	       {
22101 		    FFTW_REAL tre2_0_0;
22102 		    FFTW_REAL tim2_0_0;
22103 		    FFTW_REAL tre2_1_0;
22104 		    FFTW_REAL tim2_1_0;
22105 		    tre2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tre0_2_1) - (((FFTW_REAL) FFTW_K195090322) * tim0_2_1);
22106 		    tim2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tim0_2_1) + (((FFTW_REAL) FFTW_K195090322) * tre0_2_1);
22107 		    tre2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tre0_2_5) - (((FFTW_REAL) FFTW_K831469612) * tim0_2_5);
22108 		    tim2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tim0_2_5) + (((FFTW_REAL) FFTW_K831469612) * tre0_2_5);
22109 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
22110 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
22111 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
22112 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
22113 	       }
22114 	       {
22115 		    FFTW_REAL tre2_0_0;
22116 		    FFTW_REAL tim2_0_0;
22117 		    FFTW_REAL tre2_1_0;
22118 		    FFTW_REAL tim2_1_0;
22119 		    tre2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_2_2) - (((FFTW_REAL) FFTW_K382683432) * tim0_2_2);
22120 		    tim2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_2_2) + (((FFTW_REAL) FFTW_K382683432) * tre0_2_2);
22121 		    tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_2_6) - (((FFTW_REAL) FFTW_K923879532) * tim0_2_6);
22122 		    tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_2_6) + (((FFTW_REAL) FFTW_K923879532) * tre0_2_6);
22123 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
22124 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
22125 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
22126 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
22127 	       }
22128 	       {
22129 		    FFTW_REAL tre2_0_0;
22130 		    FFTW_REAL tim2_0_0;
22131 		    FFTW_REAL tre2_1_0;
22132 		    FFTW_REAL tim2_1_0;
22133 		    tre2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_2_3) - (((FFTW_REAL) FFTW_K555570233) * tim0_2_3);
22134 		    tim2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_2_3) + (((FFTW_REAL) FFTW_K555570233) * tre0_2_3);
22135 		    tre2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tre0_2_7) - (((FFTW_REAL) FFTW_K980785280) * tim0_2_7);
22136 		    tim2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tim0_2_7) + (((FFTW_REAL) FFTW_K980785280) * tre0_2_7);
22137 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
22138 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
22139 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
22140 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
22141 	       }
22142 	       {
22143 		    FFTW_REAL tre2_0_0;
22144 		    FFTW_REAL tim2_0_0;
22145 		    FFTW_REAL tre2_0_1;
22146 		    FFTW_REAL tim2_0_1;
22147 		    FFTW_REAL tre2_1_0;
22148 		    FFTW_REAL tim2_1_0;
22149 		    FFTW_REAL tre2_1_1;
22150 		    FFTW_REAL tim2_1_1;
22151 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
22152 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
22153 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
22154 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
22155 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
22156 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
22157 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
22158 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
22159 		    c_re(inout[2 * stride]) = tre2_0_0 + tre2_0_1;
22160 		    c_im(inout[2 * stride]) = tim2_0_0 + tim2_0_1;
22161 		    c_re(inout[34 * stride]) = tre2_0_0 - tre2_0_1;
22162 		    c_im(inout[34 * stride]) = tim2_0_0 - tim2_0_1;
22163 		    c_re(inout[18 * stride]) = tre2_1_0 - tim2_1_1;
22164 		    c_im(inout[18 * stride]) = tim2_1_0 + tre2_1_1;
22165 		    c_re(inout[50 * stride]) = tre2_1_0 + tim2_1_1;
22166 		    c_im(inout[50 * stride]) = tim2_1_0 - tre2_1_1;
22167 	       }
22168 	       {
22169 		    FFTW_REAL tre2_0_0;
22170 		    FFTW_REAL tim2_0_0;
22171 		    FFTW_REAL tre2_0_1;
22172 		    FFTW_REAL tim2_0_1;
22173 		    FFTW_REAL tre2_1_0;
22174 		    FFTW_REAL tim2_1_0;
22175 		    FFTW_REAL tre2_1_1;
22176 		    FFTW_REAL tim2_1_1;
22177 		    tre2_0_0 = tre1_1_0 - tim1_1_2;
22178 		    tim2_0_0 = tim1_1_0 + tre1_1_2;
22179 		    tre2_1_0 = tre1_1_0 + tim1_1_2;
22180 		    tim2_1_0 = tim1_1_0 - tre1_1_2;
22181 		    {
22182 			 FFTW_REAL tre3_0_0;
22183 			 FFTW_REAL tim3_0_0;
22184 			 FFTW_REAL tre3_1_0;
22185 			 FFTW_REAL tim3_1_0;
22186 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
22187 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
22188 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
22189 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
22190 			 tre2_0_1 = tre3_0_0 - tre3_1_0;
22191 			 tim2_0_1 = tim3_0_0 + tim3_1_0;
22192 			 tre2_1_1 = tre3_0_0 + tre3_1_0;
22193 			 tim2_1_1 = tim3_0_0 - tim3_1_0;
22194 		    }
22195 		    c_re(inout[10 * stride]) = tre2_0_0 + tre2_0_1;
22196 		    c_im(inout[10 * stride]) = tim2_0_0 + tim2_0_1;
22197 		    c_re(inout[42 * stride]) = tre2_0_0 - tre2_0_1;
22198 		    c_im(inout[42 * stride]) = tim2_0_0 - tim2_0_1;
22199 		    c_re(inout[26 * stride]) = tre2_1_0 - tim2_1_1;
22200 		    c_im(inout[26 * stride]) = tim2_1_0 + tre2_1_1;
22201 		    c_re(inout[58 * stride]) = tre2_1_0 + tim2_1_1;
22202 		    c_im(inout[58 * stride]) = tim2_1_0 - tre2_1_1;
22203 	       }
22204 	  }
22205 	  {
22206 	       FFTW_REAL tre1_0_0;
22207 	       FFTW_REAL tim1_0_0;
22208 	       FFTW_REAL tre1_0_1;
22209 	       FFTW_REAL tim1_0_1;
22210 	       FFTW_REAL tre1_0_2;
22211 	       FFTW_REAL tim1_0_2;
22212 	       FFTW_REAL tre1_0_3;
22213 	       FFTW_REAL tim1_0_3;
22214 	       FFTW_REAL tre1_1_0;
22215 	       FFTW_REAL tim1_1_0;
22216 	       FFTW_REAL tre1_1_1;
22217 	       FFTW_REAL tim1_1_1;
22218 	       FFTW_REAL tre1_1_2;
22219 	       FFTW_REAL tim1_1_2;
22220 	       FFTW_REAL tre1_1_3;
22221 	       FFTW_REAL tim1_1_3;
22222 	       {
22223 		    FFTW_REAL tre2_1_0;
22224 		    FFTW_REAL tim2_1_0;
22225 		    tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_3_4) - (((FFTW_REAL) FFTW_K923879532) * tim0_3_4);
22226 		    tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_3_4) + (((FFTW_REAL) FFTW_K923879532) * tre0_3_4);
22227 		    tre1_0_0 = tre0_3_0 + tre2_1_0;
22228 		    tim1_0_0 = tim0_3_0 + tim2_1_0;
22229 		    tre1_1_0 = tre0_3_0 - tre2_1_0;
22230 		    tim1_1_0 = tim0_3_0 - tim2_1_0;
22231 	       }
22232 	       {
22233 		    FFTW_REAL tre2_0_0;
22234 		    FFTW_REAL tim2_0_0;
22235 		    FFTW_REAL tre2_1_0;
22236 		    FFTW_REAL tim2_1_0;
22237 		    tre2_0_0 = (((FFTW_REAL) FFTW_K956940335) * tre0_3_1) - (((FFTW_REAL) FFTW_K290284677) * tim0_3_1);
22238 		    tim2_0_0 = (((FFTW_REAL) FFTW_K956940335) * tim0_3_1) + (((FFTW_REAL) FFTW_K290284677) * tre0_3_1);
22239 		    tre2_1_0 = (((FFTW_REAL) FFTW_K098017140) * tre0_3_5) - (((FFTW_REAL) FFTW_K995184726) * tim0_3_5);
22240 		    tim2_1_0 = (((FFTW_REAL) FFTW_K098017140) * tim0_3_5) + (((FFTW_REAL) FFTW_K995184726) * tre0_3_5);
22241 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
22242 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
22243 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
22244 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
22245 	       }
22246 	       {
22247 		    FFTW_REAL tre2_0_0;
22248 		    FFTW_REAL tim2_0_0;
22249 		    FFTW_REAL tre2_1_0;
22250 		    FFTW_REAL tim2_1_0;
22251 		    tre2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_3_2) - (((FFTW_REAL) FFTW_K555570233) * tim0_3_2);
22252 		    tim2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_3_2) + (((FFTW_REAL) FFTW_K555570233) * tre0_3_2);
22253 		    tre2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tre0_3_6) + (((FFTW_REAL) FFTW_K980785280) * tim0_3_6);
22254 		    tim2_1_0 = (((FFTW_REAL) FFTW_K980785280) * tre0_3_6) - (((FFTW_REAL) FFTW_K195090322) * tim0_3_6);
22255 		    tre1_0_2 = tre2_0_0 - tre2_1_0;
22256 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
22257 		    tre1_1_2 = tre2_0_0 + tre2_1_0;
22258 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
22259 	       }
22260 	       {
22261 		    FFTW_REAL tre2_0_0;
22262 		    FFTW_REAL tim2_0_0;
22263 		    FFTW_REAL tre2_1_0;
22264 		    FFTW_REAL tim2_1_0;
22265 		    tre2_0_0 = (((FFTW_REAL) FFTW_K634393284) * tre0_3_3) - (((FFTW_REAL) FFTW_K773010453) * tim0_3_3);
22266 		    tim2_0_0 = (((FFTW_REAL) FFTW_K634393284) * tim0_3_3) + (((FFTW_REAL) FFTW_K773010453) * tre0_3_3);
22267 		    tre2_1_0 = (((FFTW_REAL) FFTW_K471396736) * tre0_3_7) + (((FFTW_REAL) FFTW_K881921264) * tim0_3_7);
22268 		    tim2_1_0 = (((FFTW_REAL) FFTW_K881921264) * tre0_3_7) - (((FFTW_REAL) FFTW_K471396736) * tim0_3_7);
22269 		    tre1_0_3 = tre2_0_0 - tre2_1_0;
22270 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
22271 		    tre1_1_3 = tre2_0_0 + tre2_1_0;
22272 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
22273 	       }
22274 	       {
22275 		    FFTW_REAL tre2_0_0;
22276 		    FFTW_REAL tim2_0_0;
22277 		    FFTW_REAL tre2_0_1;
22278 		    FFTW_REAL tim2_0_1;
22279 		    FFTW_REAL tre2_1_0;
22280 		    FFTW_REAL tim2_1_0;
22281 		    FFTW_REAL tre2_1_1;
22282 		    FFTW_REAL tim2_1_1;
22283 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
22284 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
22285 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
22286 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
22287 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
22288 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
22289 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
22290 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
22291 		    c_re(inout[3 * stride]) = tre2_0_0 + tre2_0_1;
22292 		    c_im(inout[3 * stride]) = tim2_0_0 + tim2_0_1;
22293 		    c_re(inout[35 * stride]) = tre2_0_0 - tre2_0_1;
22294 		    c_im(inout[35 * stride]) = tim2_0_0 - tim2_0_1;
22295 		    c_re(inout[19 * stride]) = tre2_1_0 - tim2_1_1;
22296 		    c_im(inout[19 * stride]) = tim2_1_0 + tre2_1_1;
22297 		    c_re(inout[51 * stride]) = tre2_1_0 + tim2_1_1;
22298 		    c_im(inout[51 * stride]) = tim2_1_0 - tre2_1_1;
22299 	       }
22300 	       {
22301 		    FFTW_REAL tre2_0_0;
22302 		    FFTW_REAL tim2_0_0;
22303 		    FFTW_REAL tre2_0_1;
22304 		    FFTW_REAL tim2_0_1;
22305 		    FFTW_REAL tre2_1_0;
22306 		    FFTW_REAL tim2_1_0;
22307 		    FFTW_REAL tre2_1_1;
22308 		    FFTW_REAL tim2_1_1;
22309 		    tre2_0_0 = tre1_1_0 - tim1_1_2;
22310 		    tim2_0_0 = tim1_1_0 + tre1_1_2;
22311 		    tre2_1_0 = tre1_1_0 + tim1_1_2;
22312 		    tim2_1_0 = tim1_1_0 - tre1_1_2;
22313 		    {
22314 			 FFTW_REAL tre3_0_0;
22315 			 FFTW_REAL tim3_0_0;
22316 			 FFTW_REAL tre3_1_0;
22317 			 FFTW_REAL tim3_1_0;
22318 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
22319 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
22320 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
22321 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
22322 			 tre2_0_1 = tre3_0_0 - tre3_1_0;
22323 			 tim2_0_1 = tim3_0_0 + tim3_1_0;
22324 			 tre2_1_1 = tre3_0_0 + tre3_1_0;
22325 			 tim2_1_1 = tim3_0_0 - tim3_1_0;
22326 		    }
22327 		    c_re(inout[11 * stride]) = tre2_0_0 + tre2_0_1;
22328 		    c_im(inout[11 * stride]) = tim2_0_0 + tim2_0_1;
22329 		    c_re(inout[43 * stride]) = tre2_0_0 - tre2_0_1;
22330 		    c_im(inout[43 * stride]) = tim2_0_0 - tim2_0_1;
22331 		    c_re(inout[27 * stride]) = tre2_1_0 - tim2_1_1;
22332 		    c_im(inout[27 * stride]) = tim2_1_0 + tre2_1_1;
22333 		    c_re(inout[59 * stride]) = tre2_1_0 + tim2_1_1;
22334 		    c_im(inout[59 * stride]) = tim2_1_0 - tre2_1_1;
22335 	       }
22336 	  }
22337 	  {
22338 	       FFTW_REAL tre1_0_0;
22339 	       FFTW_REAL tim1_0_0;
22340 	       FFTW_REAL tre1_0_1;
22341 	       FFTW_REAL tim1_0_1;
22342 	       FFTW_REAL tre1_0_2;
22343 	       FFTW_REAL tim1_0_2;
22344 	       FFTW_REAL tre1_0_3;
22345 	       FFTW_REAL tim1_0_3;
22346 	       FFTW_REAL tre1_1_0;
22347 	       FFTW_REAL tim1_1_0;
22348 	       FFTW_REAL tre1_1_1;
22349 	       FFTW_REAL tim1_1_1;
22350 	       FFTW_REAL tre1_1_2;
22351 	       FFTW_REAL tim1_1_2;
22352 	       FFTW_REAL tre1_1_3;
22353 	       FFTW_REAL tim1_1_3;
22354 	       tre1_0_0 = tre0_4_0 - tim0_4_4;
22355 	       tim1_0_0 = tim0_4_0 + tre0_4_4;
22356 	       tre1_1_0 = tre0_4_0 + tim0_4_4;
22357 	       tim1_1_0 = tim0_4_0 - tre0_4_4;
22358 	       {
22359 		    FFTW_REAL tre2_0_0;
22360 		    FFTW_REAL tim2_0_0;
22361 		    FFTW_REAL tre2_1_0;
22362 		    FFTW_REAL tim2_1_0;
22363 		    tre2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_4_1) - (((FFTW_REAL) FFTW_K382683432) * tim0_4_1);
22364 		    tim2_0_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_4_1) + (((FFTW_REAL) FFTW_K382683432) * tre0_4_1);
22365 		    tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_4_5) + (((FFTW_REAL) FFTW_K923879532) * tim0_4_5);
22366 		    tim2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_4_5) - (((FFTW_REAL) FFTW_K382683432) * tim0_4_5);
22367 		    tre1_0_1 = tre2_0_0 - tre2_1_0;
22368 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
22369 		    tre1_1_1 = tre2_0_0 + tre2_1_0;
22370 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
22371 	       }
22372 	       {
22373 		    FFTW_REAL tre2_0_0;
22374 		    FFTW_REAL tim2_0_0;
22375 		    FFTW_REAL tre2_1_0;
22376 		    FFTW_REAL tim2_1_0;
22377 		    tre2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_4_2 - tim0_4_2);
22378 		    tim2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_4_2 + tre0_4_2);
22379 		    tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_4_6 + tim0_4_6);
22380 		    tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_4_6 - tim0_4_6);
22381 		    tre1_0_2 = tre2_0_0 - tre2_1_0;
22382 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
22383 		    tre1_1_2 = tre2_0_0 + tre2_1_0;
22384 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
22385 	       }
22386 	       {
22387 		    FFTW_REAL tre2_0_0;
22388 		    FFTW_REAL tim2_0_0;
22389 		    FFTW_REAL tre2_1_0;
22390 		    FFTW_REAL tim2_1_0;
22391 		    tre2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_4_3) - (((FFTW_REAL) FFTW_K923879532) * tim0_4_3);
22392 		    tim2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_4_3) + (((FFTW_REAL) FFTW_K923879532) * tre0_4_3);
22393 		    tre2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_4_7) + (((FFTW_REAL) FFTW_K382683432) * tim0_4_7);
22394 		    tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_4_7) - (((FFTW_REAL) FFTW_K923879532) * tim0_4_7);
22395 		    tre1_0_3 = tre2_0_0 - tre2_1_0;
22396 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
22397 		    tre1_1_3 = tre2_0_0 + tre2_1_0;
22398 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
22399 	       }
22400 	       {
22401 		    FFTW_REAL tre2_0_0;
22402 		    FFTW_REAL tim2_0_0;
22403 		    FFTW_REAL tre2_0_1;
22404 		    FFTW_REAL tim2_0_1;
22405 		    FFTW_REAL tre2_1_0;
22406 		    FFTW_REAL tim2_1_0;
22407 		    FFTW_REAL tre2_1_1;
22408 		    FFTW_REAL tim2_1_1;
22409 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
22410 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
22411 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
22412 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
22413 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
22414 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
22415 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
22416 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
22417 		    c_re(inout[4 * stride]) = tre2_0_0 + tre2_0_1;
22418 		    c_im(inout[4 * stride]) = tim2_0_0 + tim2_0_1;
22419 		    c_re(inout[36 * stride]) = tre2_0_0 - tre2_0_1;
22420 		    c_im(inout[36 * stride]) = tim2_0_0 - tim2_0_1;
22421 		    c_re(inout[20 * stride]) = tre2_1_0 - tim2_1_1;
22422 		    c_im(inout[20 * stride]) = tim2_1_0 + tre2_1_1;
22423 		    c_re(inout[52 * stride]) = tre2_1_0 + tim2_1_1;
22424 		    c_im(inout[52 * stride]) = tim2_1_0 - tre2_1_1;
22425 	       }
22426 	       {
22427 		    FFTW_REAL tre2_0_0;
22428 		    FFTW_REAL tim2_0_0;
22429 		    FFTW_REAL tre2_0_1;
22430 		    FFTW_REAL tim2_0_1;
22431 		    FFTW_REAL tre2_1_0;
22432 		    FFTW_REAL tim2_1_0;
22433 		    FFTW_REAL tre2_1_1;
22434 		    FFTW_REAL tim2_1_1;
22435 		    tre2_0_0 = tre1_1_0 - tim1_1_2;
22436 		    tim2_0_0 = tim1_1_0 + tre1_1_2;
22437 		    tre2_1_0 = tre1_1_0 + tim1_1_2;
22438 		    tim2_1_0 = tim1_1_0 - tre1_1_2;
22439 		    {
22440 			 FFTW_REAL tre3_0_0;
22441 			 FFTW_REAL tim3_0_0;
22442 			 FFTW_REAL tre3_1_0;
22443 			 FFTW_REAL tim3_1_0;
22444 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
22445 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
22446 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
22447 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
22448 			 tre2_0_1 = tre3_0_0 - tre3_1_0;
22449 			 tim2_0_1 = tim3_0_0 + tim3_1_0;
22450 			 tre2_1_1 = tre3_0_0 + tre3_1_0;
22451 			 tim2_1_1 = tim3_0_0 - tim3_1_0;
22452 		    }
22453 		    c_re(inout[12 * stride]) = tre2_0_0 + tre2_0_1;
22454 		    c_im(inout[12 * stride]) = tim2_0_0 + tim2_0_1;
22455 		    c_re(inout[44 * stride]) = tre2_0_0 - tre2_0_1;
22456 		    c_im(inout[44 * stride]) = tim2_0_0 - tim2_0_1;
22457 		    c_re(inout[28 * stride]) = tre2_1_0 - tim2_1_1;
22458 		    c_im(inout[28 * stride]) = tim2_1_0 + tre2_1_1;
22459 		    c_re(inout[60 * stride]) = tre2_1_0 + tim2_1_1;
22460 		    c_im(inout[60 * stride]) = tim2_1_0 - tre2_1_1;
22461 	       }
22462 	  }
22463 	  {
22464 	       FFTW_REAL tre1_0_0;
22465 	       FFTW_REAL tim1_0_0;
22466 	       FFTW_REAL tre1_0_1;
22467 	       FFTW_REAL tim1_0_1;
22468 	       FFTW_REAL tre1_0_2;
22469 	       FFTW_REAL tim1_0_2;
22470 	       FFTW_REAL tre1_0_3;
22471 	       FFTW_REAL tim1_0_3;
22472 	       FFTW_REAL tre1_1_0;
22473 	       FFTW_REAL tim1_1_0;
22474 	       FFTW_REAL tre1_1_1;
22475 	       FFTW_REAL tim1_1_1;
22476 	       FFTW_REAL tre1_1_2;
22477 	       FFTW_REAL tim1_1_2;
22478 	       FFTW_REAL tre1_1_3;
22479 	       FFTW_REAL tim1_1_3;
22480 	       {
22481 		    FFTW_REAL tre2_1_0;
22482 		    FFTW_REAL tim2_1_0;
22483 		    tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_5_4) + (((FFTW_REAL) FFTW_K923879532) * tim0_5_4);
22484 		    tim2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_5_4) - (((FFTW_REAL) FFTW_K382683432) * tim0_5_4);
22485 		    tre1_0_0 = tre0_5_0 - tre2_1_0;
22486 		    tim1_0_0 = tim0_5_0 + tim2_1_0;
22487 		    tre1_1_0 = tre0_5_0 + tre2_1_0;
22488 		    tim1_1_0 = tim0_5_0 - tim2_1_0;
22489 	       }
22490 	       {
22491 		    FFTW_REAL tre2_0_0;
22492 		    FFTW_REAL tim2_0_0;
22493 		    FFTW_REAL tre2_1_0;
22494 		    FFTW_REAL tim2_1_0;
22495 		    tre2_0_0 = (((FFTW_REAL) FFTW_K881921264) * tre0_5_1) - (((FFTW_REAL) FFTW_K471396736) * tim0_5_1);
22496 		    tim2_0_0 = (((FFTW_REAL) FFTW_K881921264) * tim0_5_1) + (((FFTW_REAL) FFTW_K471396736) * tre0_5_1);
22497 		    tre2_1_0 = (((FFTW_REAL) FFTW_K773010453) * tre0_5_5) + (((FFTW_REAL) FFTW_K634393284) * tim0_5_5);
22498 		    tim2_1_0 = (((FFTW_REAL) FFTW_K634393284) * tre0_5_5) - (((FFTW_REAL) FFTW_K773010453) * tim0_5_5);
22499 		    tre1_0_1 = tre2_0_0 - tre2_1_0;
22500 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
22501 		    tre1_1_1 = tre2_0_0 + tre2_1_0;
22502 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
22503 	       }
22504 	       {
22505 		    FFTW_REAL tre2_0_0;
22506 		    FFTW_REAL tim2_0_0;
22507 		    FFTW_REAL tre2_1_0;
22508 		    FFTW_REAL tim2_1_0;
22509 		    tre2_0_0 = (((FFTW_REAL) FFTW_K555570233) * tre0_5_2) - (((FFTW_REAL) FFTW_K831469612) * tim0_5_2);
22510 		    tim2_0_0 = (((FFTW_REAL) FFTW_K555570233) * tim0_5_2) + (((FFTW_REAL) FFTW_K831469612) * tre0_5_2);
22511 		    tre2_1_0 = (((FFTW_REAL) FFTW_K980785280) * tre0_5_6) + (((FFTW_REAL) FFTW_K195090322) * tim0_5_6);
22512 		    tim2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tre0_5_6) - (((FFTW_REAL) FFTW_K980785280) * tim0_5_6);
22513 		    tre1_0_2 = tre2_0_0 - tre2_1_0;
22514 		    tim1_0_2 = tim2_0_0 + tim2_1_0;
22515 		    tre1_1_2 = tre2_0_0 + tre2_1_0;
22516 		    tim1_1_2 = tim2_0_0 - tim2_1_0;
22517 	       }
22518 	       {
22519 		    FFTW_REAL tre2_0_0;
22520 		    FFTW_REAL tim2_0_0;
22521 		    FFTW_REAL tre2_1_0;
22522 		    FFTW_REAL tim2_1_0;
22523 		    tre2_0_0 = (((FFTW_REAL) FFTW_K098017140) * tre0_5_3) - (((FFTW_REAL) FFTW_K995184726) * tim0_5_3);
22524 		    tim2_0_0 = (((FFTW_REAL) FFTW_K098017140) * tim0_5_3) + (((FFTW_REAL) FFTW_K995184726) * tre0_5_3);
22525 		    tre2_1_0 = (((FFTW_REAL) FFTW_K290284677) * tim0_5_7) - (((FFTW_REAL) FFTW_K956940335) * tre0_5_7);
22526 		    tim2_1_0 = (((FFTW_REAL) FFTW_K956940335) * tim0_5_7) + (((FFTW_REAL) FFTW_K290284677) * tre0_5_7);
22527 		    tre1_0_3 = tre2_0_0 + tre2_1_0;
22528 		    tim1_0_3 = tim2_0_0 - tim2_1_0;
22529 		    tre1_1_3 = tre2_0_0 - tre2_1_0;
22530 		    tim1_1_3 = tim2_0_0 + tim2_1_0;
22531 	       }
22532 	       {
22533 		    FFTW_REAL tre2_0_0;
22534 		    FFTW_REAL tim2_0_0;
22535 		    FFTW_REAL tre2_0_1;
22536 		    FFTW_REAL tim2_0_1;
22537 		    FFTW_REAL tre2_1_0;
22538 		    FFTW_REAL tim2_1_0;
22539 		    FFTW_REAL tre2_1_1;
22540 		    FFTW_REAL tim2_1_1;
22541 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
22542 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
22543 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
22544 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
22545 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
22546 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
22547 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
22548 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
22549 		    c_re(inout[5 * stride]) = tre2_0_0 + tre2_0_1;
22550 		    c_im(inout[5 * stride]) = tim2_0_0 + tim2_0_1;
22551 		    c_re(inout[37 * stride]) = tre2_0_0 - tre2_0_1;
22552 		    c_im(inout[37 * stride]) = tim2_0_0 - tim2_0_1;
22553 		    c_re(inout[21 * stride]) = tre2_1_0 - tim2_1_1;
22554 		    c_im(inout[21 * stride]) = tim2_1_0 + tre2_1_1;
22555 		    c_re(inout[53 * stride]) = tre2_1_0 + tim2_1_1;
22556 		    c_im(inout[53 * stride]) = tim2_1_0 - tre2_1_1;
22557 	       }
22558 	       {
22559 		    FFTW_REAL tre2_0_0;
22560 		    FFTW_REAL tim2_0_0;
22561 		    FFTW_REAL tre2_0_1;
22562 		    FFTW_REAL tim2_0_1;
22563 		    FFTW_REAL tre2_1_0;
22564 		    FFTW_REAL tim2_1_0;
22565 		    FFTW_REAL tre2_1_1;
22566 		    FFTW_REAL tim2_1_1;
22567 		    tre2_0_0 = tre1_1_0 - tim1_1_2;
22568 		    tim2_0_0 = tim1_1_0 + tre1_1_2;
22569 		    tre2_1_0 = tre1_1_0 + tim1_1_2;
22570 		    tim2_1_0 = tim1_1_0 - tre1_1_2;
22571 		    {
22572 			 FFTW_REAL tre3_0_0;
22573 			 FFTW_REAL tim3_0_0;
22574 			 FFTW_REAL tre3_1_0;
22575 			 FFTW_REAL tim3_1_0;
22576 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
22577 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
22578 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
22579 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
22580 			 tre2_0_1 = tre3_0_0 - tre3_1_0;
22581 			 tim2_0_1 = tim3_0_0 + tim3_1_0;
22582 			 tre2_1_1 = tre3_0_0 + tre3_1_0;
22583 			 tim2_1_1 = tim3_0_0 - tim3_1_0;
22584 		    }
22585 		    c_re(inout[13 * stride]) = tre2_0_0 + tre2_0_1;
22586 		    c_im(inout[13 * stride]) = tim2_0_0 + tim2_0_1;
22587 		    c_re(inout[45 * stride]) = tre2_0_0 - tre2_0_1;
22588 		    c_im(inout[45 * stride]) = tim2_0_0 - tim2_0_1;
22589 		    c_re(inout[29 * stride]) = tre2_1_0 - tim2_1_1;
22590 		    c_im(inout[29 * stride]) = tim2_1_0 + tre2_1_1;
22591 		    c_re(inout[61 * stride]) = tre2_1_0 + tim2_1_1;
22592 		    c_im(inout[61 * stride]) = tim2_1_0 - tre2_1_1;
22593 	       }
22594 	  }
22595 	  {
22596 	       FFTW_REAL tre1_0_0;
22597 	       FFTW_REAL tim1_0_0;
22598 	       FFTW_REAL tre1_0_1;
22599 	       FFTW_REAL tim1_0_1;
22600 	       FFTW_REAL tre1_0_2;
22601 	       FFTW_REAL tim1_0_2;
22602 	       FFTW_REAL tre1_0_3;
22603 	       FFTW_REAL tim1_0_3;
22604 	       FFTW_REAL tre1_1_0;
22605 	       FFTW_REAL tim1_1_0;
22606 	       FFTW_REAL tre1_1_1;
22607 	       FFTW_REAL tim1_1_1;
22608 	       FFTW_REAL tre1_1_2;
22609 	       FFTW_REAL tim1_1_2;
22610 	       FFTW_REAL tre1_1_3;
22611 	       FFTW_REAL tim1_1_3;
22612 	       {
22613 		    FFTW_REAL tre2_1_0;
22614 		    FFTW_REAL tim2_1_0;
22615 		    tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_6_4 + tim0_6_4);
22616 		    tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_6_4 - tim0_6_4);
22617 		    tre1_0_0 = tre0_6_0 - tre2_1_0;
22618 		    tim1_0_0 = tim0_6_0 + tim2_1_0;
22619 		    tre1_1_0 = tre0_6_0 + tre2_1_0;
22620 		    tim1_1_0 = tim0_6_0 - tim2_1_0;
22621 	       }
22622 	       {
22623 		    FFTW_REAL tre2_0_0;
22624 		    FFTW_REAL tim2_0_0;
22625 		    FFTW_REAL tre2_1_0;
22626 		    FFTW_REAL tim2_1_0;
22627 		    tre2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tre0_6_1) - (((FFTW_REAL) FFTW_K555570233) * tim0_6_1);
22628 		    tim2_0_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_6_1) + (((FFTW_REAL) FFTW_K555570233) * tre0_6_1);
22629 		    tre2_1_0 = (((FFTW_REAL) FFTW_K980785280) * tre0_6_5) + (((FFTW_REAL) FFTW_K195090322) * tim0_6_5);
22630 		    tim2_1_0 = (((FFTW_REAL) FFTW_K195090322) * tre0_6_5) - (((FFTW_REAL) FFTW_K980785280) * tim0_6_5);
22631 		    tre1_0_1 = tre2_0_0 - tre2_1_0;
22632 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
22633 		    tre1_1_1 = tre2_0_0 + tre2_1_0;
22634 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
22635 	       }
22636 	       {
22637 		    FFTW_REAL tre2_0_0;
22638 		    FFTW_REAL tim2_0_0;
22639 		    FFTW_REAL tre2_1_0;
22640 		    FFTW_REAL tim2_1_0;
22641 		    tre2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_6_2) - (((FFTW_REAL) FFTW_K923879532) * tim0_6_2);
22642 		    tim2_0_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_6_2) + (((FFTW_REAL) FFTW_K923879532) * tre0_6_2);
22643 		    tre2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tim0_6_6) - (((FFTW_REAL) FFTW_K923879532) * tre0_6_6);
22644 		    tim2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tim0_6_6) + (((FFTW_REAL) FFTW_K382683432) * tre0_6_6);
22645 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
22646 		    tim1_0_2 = tim2_0_0 - tim2_1_0;
22647 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
22648 		    tim1_1_2 = tim2_0_0 + tim2_1_0;
22649 	       }
22650 	       {
22651 		    FFTW_REAL tre2_0_0;
22652 		    FFTW_REAL tim2_0_0;
22653 		    FFTW_REAL tre2_1_0;
22654 		    FFTW_REAL tim2_1_0;
22655 		    tre2_0_0 = (((FFTW_REAL) FFTW_K195090322) * tre0_6_3) + (((FFTW_REAL) FFTW_K980785280) * tim0_6_3);
22656 		    tim2_0_0 = (((FFTW_REAL) FFTW_K980785280) * tre0_6_3) - (((FFTW_REAL) FFTW_K195090322) * tim0_6_3);
22657 		    tre2_1_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_6_7) - (((FFTW_REAL) FFTW_K555570233) * tre0_6_7);
22658 		    tim2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tim0_6_7) + (((FFTW_REAL) FFTW_K831469612) * tre0_6_7);
22659 		    tre1_0_3 = tre2_1_0 - tre2_0_0;
22660 		    tim1_0_3 = tim2_0_0 - tim2_1_0;
22661 		    tre1_1_3 = (-(tre2_0_0 + tre2_1_0));
22662 		    tim1_1_3 = tim2_0_0 + tim2_1_0;
22663 	       }
22664 	       {
22665 		    FFTW_REAL tre2_0_0;
22666 		    FFTW_REAL tim2_0_0;
22667 		    FFTW_REAL tre2_0_1;
22668 		    FFTW_REAL tim2_0_1;
22669 		    FFTW_REAL tre2_1_0;
22670 		    FFTW_REAL tim2_1_0;
22671 		    FFTW_REAL tre2_1_1;
22672 		    FFTW_REAL tim2_1_1;
22673 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
22674 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
22675 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
22676 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
22677 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
22678 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
22679 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
22680 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
22681 		    c_re(inout[6 * stride]) = tre2_0_0 + tre2_0_1;
22682 		    c_im(inout[6 * stride]) = tim2_0_0 + tim2_0_1;
22683 		    c_re(inout[38 * stride]) = tre2_0_0 - tre2_0_1;
22684 		    c_im(inout[38 * stride]) = tim2_0_0 - tim2_0_1;
22685 		    c_re(inout[22 * stride]) = tre2_1_0 - tim2_1_1;
22686 		    c_im(inout[22 * stride]) = tim2_1_0 + tre2_1_1;
22687 		    c_re(inout[54 * stride]) = tre2_1_0 + tim2_1_1;
22688 		    c_im(inout[54 * stride]) = tim2_1_0 - tre2_1_1;
22689 	       }
22690 	       {
22691 		    FFTW_REAL tre2_0_0;
22692 		    FFTW_REAL tim2_0_0;
22693 		    FFTW_REAL tre2_0_1;
22694 		    FFTW_REAL tim2_0_1;
22695 		    FFTW_REAL tre2_1_0;
22696 		    FFTW_REAL tim2_1_0;
22697 		    FFTW_REAL tre2_1_1;
22698 		    FFTW_REAL tim2_1_1;
22699 		    tre2_0_0 = tre1_1_0 - tim1_1_2;
22700 		    tim2_0_0 = tim1_1_0 + tre1_1_2;
22701 		    tre2_1_0 = tre1_1_0 + tim1_1_2;
22702 		    tim2_1_0 = tim1_1_0 - tre1_1_2;
22703 		    {
22704 			 FFTW_REAL tre3_0_0;
22705 			 FFTW_REAL tim3_0_0;
22706 			 FFTW_REAL tre3_1_0;
22707 			 FFTW_REAL tim3_1_0;
22708 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
22709 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
22710 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
22711 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
22712 			 tre2_0_1 = tre3_0_0 - tre3_1_0;
22713 			 tim2_0_1 = tim3_0_0 + tim3_1_0;
22714 			 tre2_1_1 = tre3_0_0 + tre3_1_0;
22715 			 tim2_1_1 = tim3_0_0 - tim3_1_0;
22716 		    }
22717 		    c_re(inout[14 * stride]) = tre2_0_0 + tre2_0_1;
22718 		    c_im(inout[14 * stride]) = tim2_0_0 + tim2_0_1;
22719 		    c_re(inout[46 * stride]) = tre2_0_0 - tre2_0_1;
22720 		    c_im(inout[46 * stride]) = tim2_0_0 - tim2_0_1;
22721 		    c_re(inout[30 * stride]) = tre2_1_0 - tim2_1_1;
22722 		    c_im(inout[30 * stride]) = tim2_1_0 + tre2_1_1;
22723 		    c_re(inout[62 * stride]) = tre2_1_0 + tim2_1_1;
22724 		    c_im(inout[62 * stride]) = tim2_1_0 - tre2_1_1;
22725 	       }
22726 	  }
22727 	  {
22728 	       FFTW_REAL tre1_0_0;
22729 	       FFTW_REAL tim1_0_0;
22730 	       FFTW_REAL tre1_0_1;
22731 	       FFTW_REAL tim1_0_1;
22732 	       FFTW_REAL tre1_0_2;
22733 	       FFTW_REAL tim1_0_2;
22734 	       FFTW_REAL tre1_0_3;
22735 	       FFTW_REAL tim1_0_3;
22736 	       FFTW_REAL tre1_1_0;
22737 	       FFTW_REAL tim1_1_0;
22738 	       FFTW_REAL tre1_1_1;
22739 	       FFTW_REAL tim1_1_1;
22740 	       FFTW_REAL tre1_1_2;
22741 	       FFTW_REAL tim1_1_2;
22742 	       FFTW_REAL tre1_1_3;
22743 	       FFTW_REAL tim1_1_3;
22744 	       {
22745 		    FFTW_REAL tre2_1_0;
22746 		    FFTW_REAL tim2_1_0;
22747 		    tre2_1_0 = (((FFTW_REAL) FFTW_K923879532) * tre0_7_4) + (((FFTW_REAL) FFTW_K382683432) * tim0_7_4);
22748 		    tim2_1_0 = (((FFTW_REAL) FFTW_K382683432) * tre0_7_4) - (((FFTW_REAL) FFTW_K923879532) * tim0_7_4);
22749 		    tre1_0_0 = tre0_7_0 - tre2_1_0;
22750 		    tim1_0_0 = tim0_7_0 + tim2_1_0;
22751 		    tre1_1_0 = tre0_7_0 + tre2_1_0;
22752 		    tim1_1_0 = tim0_7_0 - tim2_1_0;
22753 	       }
22754 	       {
22755 		    FFTW_REAL tre2_0_0;
22756 		    FFTW_REAL tim2_0_0;
22757 		    FFTW_REAL tre2_1_0;
22758 		    FFTW_REAL tim2_1_0;
22759 		    tre2_0_0 = (((FFTW_REAL) FFTW_K773010453) * tre0_7_1) - (((FFTW_REAL) FFTW_K634393284) * tim0_7_1);
22760 		    tim2_0_0 = (((FFTW_REAL) FFTW_K773010453) * tim0_7_1) + (((FFTW_REAL) FFTW_K634393284) * tre0_7_1);
22761 		    tre2_1_0 = (((FFTW_REAL) FFTW_K290284677) * tim0_7_5) - (((FFTW_REAL) FFTW_K956940335) * tre0_7_5);
22762 		    tim2_1_0 = (((FFTW_REAL) FFTW_K956940335) * tim0_7_5) + (((FFTW_REAL) FFTW_K290284677) * tre0_7_5);
22763 		    tre1_0_1 = tre2_0_0 + tre2_1_0;
22764 		    tim1_0_1 = tim2_0_0 - tim2_1_0;
22765 		    tre1_1_1 = tre2_0_0 - tre2_1_0;
22766 		    tim1_1_1 = tim2_0_0 + tim2_1_0;
22767 	       }
22768 	       {
22769 		    FFTW_REAL tre2_0_0;
22770 		    FFTW_REAL tim2_0_0;
22771 		    FFTW_REAL tre2_1_0;
22772 		    FFTW_REAL tim2_1_0;
22773 		    tre2_0_0 = (((FFTW_REAL) FFTW_K195090322) * tre0_7_2) - (((FFTW_REAL) FFTW_K980785280) * tim0_7_2);
22774 		    tim2_0_0 = (((FFTW_REAL) FFTW_K195090322) * tim0_7_2) + (((FFTW_REAL) FFTW_K980785280) * tre0_7_2);
22775 		    tre2_1_0 = (((FFTW_REAL) FFTW_K831469612) * tim0_7_6) - (((FFTW_REAL) FFTW_K555570233) * tre0_7_6);
22776 		    tim2_1_0 = (((FFTW_REAL) FFTW_K555570233) * tim0_7_6) + (((FFTW_REAL) FFTW_K831469612) * tre0_7_6);
22777 		    tre1_0_2 = tre2_0_0 + tre2_1_0;
22778 		    tim1_0_2 = tim2_0_0 - tim2_1_0;
22779 		    tre1_1_2 = tre2_0_0 - tre2_1_0;
22780 		    tim1_1_2 = tim2_0_0 + tim2_1_0;
22781 	       }
22782 	       {
22783 		    FFTW_REAL tre2_0_0;
22784 		    FFTW_REAL tim2_0_0;
22785 		    FFTW_REAL tre2_1_0;
22786 		    FFTW_REAL tim2_1_0;
22787 		    tre2_0_0 = (((FFTW_REAL) FFTW_K471396736) * tre0_7_3) + (((FFTW_REAL) FFTW_K881921264) * tim0_7_3);
22788 		    tim2_0_0 = (((FFTW_REAL) FFTW_K881921264) * tre0_7_3) - (((FFTW_REAL) FFTW_K471396736) * tim0_7_3);
22789 		    tre2_1_0 = (((FFTW_REAL) FFTW_K098017140) * tre0_7_7) + (((FFTW_REAL) FFTW_K995184726) * tim0_7_7);
22790 		    tim2_1_0 = (((FFTW_REAL) FFTW_K098017140) * tim0_7_7) - (((FFTW_REAL) FFTW_K995184726) * tre0_7_7);
22791 		    tre1_0_3 = tre2_1_0 - tre2_0_0;
22792 		    tim1_0_3 = tim2_0_0 + tim2_1_0;
22793 		    tre1_1_3 = (-(tre2_0_0 + tre2_1_0));
22794 		    tim1_1_3 = tim2_0_0 - tim2_1_0;
22795 	       }
22796 	       {
22797 		    FFTW_REAL tre2_0_0;
22798 		    FFTW_REAL tim2_0_0;
22799 		    FFTW_REAL tre2_0_1;
22800 		    FFTW_REAL tim2_0_1;
22801 		    FFTW_REAL tre2_1_0;
22802 		    FFTW_REAL tim2_1_0;
22803 		    FFTW_REAL tre2_1_1;
22804 		    FFTW_REAL tim2_1_1;
22805 		    tre2_0_0 = tre1_0_0 + tre1_0_2;
22806 		    tim2_0_0 = tim1_0_0 + tim1_0_2;
22807 		    tre2_1_0 = tre1_0_0 - tre1_0_2;
22808 		    tim2_1_0 = tim1_0_0 - tim1_0_2;
22809 		    tre2_0_1 = tre1_0_1 + tre1_0_3;
22810 		    tim2_0_1 = tim1_0_1 + tim1_0_3;
22811 		    tre2_1_1 = tre1_0_1 - tre1_0_3;
22812 		    tim2_1_1 = tim1_0_1 - tim1_0_3;
22813 		    c_re(inout[7 * stride]) = tre2_0_0 + tre2_0_1;
22814 		    c_im(inout[7 * stride]) = tim2_0_0 + tim2_0_1;
22815 		    c_re(inout[39 * stride]) = tre2_0_0 - tre2_0_1;
22816 		    c_im(inout[39 * stride]) = tim2_0_0 - tim2_0_1;
22817 		    c_re(inout[23 * stride]) = tre2_1_0 - tim2_1_1;
22818 		    c_im(inout[23 * stride]) = tim2_1_0 + tre2_1_1;
22819 		    c_re(inout[55 * stride]) = tre2_1_0 + tim2_1_1;
22820 		    c_im(inout[55 * stride]) = tim2_1_0 - tre2_1_1;
22821 	       }
22822 	       {
22823 		    FFTW_REAL tre2_0_0;
22824 		    FFTW_REAL tim2_0_0;
22825 		    FFTW_REAL tre2_0_1;
22826 		    FFTW_REAL tim2_0_1;
22827 		    FFTW_REAL tre2_1_0;
22828 		    FFTW_REAL tim2_1_0;
22829 		    FFTW_REAL tre2_1_1;
22830 		    FFTW_REAL tim2_1_1;
22831 		    tre2_0_0 = tre1_1_0 - tim1_1_2;
22832 		    tim2_0_0 = tim1_1_0 + tre1_1_2;
22833 		    tre2_1_0 = tre1_1_0 + tim1_1_2;
22834 		    tim2_1_0 = tim1_1_0 - tre1_1_2;
22835 		    {
22836 			 FFTW_REAL tre3_0_0;
22837 			 FFTW_REAL tim3_0_0;
22838 			 FFTW_REAL tre3_1_0;
22839 			 FFTW_REAL tim3_1_0;
22840 			 tre3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_1 - tim1_1_1);
22841 			 tim3_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim1_1_1 + tre1_1_1);
22842 			 tre3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 + tim1_1_3);
22843 			 tim3_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre1_1_3 - tim1_1_3);
22844 			 tre2_0_1 = tre3_0_0 - tre3_1_0;
22845 			 tim2_0_1 = tim3_0_0 + tim3_1_0;
22846 			 tre2_1_1 = tre3_0_0 + tre3_1_0;
22847 			 tim2_1_1 = tim3_0_0 - tim3_1_0;
22848 		    }
22849 		    c_re(inout[15 * stride]) = tre2_0_0 + tre2_0_1;
22850 		    c_im(inout[15 * stride]) = tim2_0_0 + tim2_0_1;
22851 		    c_re(inout[47 * stride]) = tre2_0_0 - tre2_0_1;
22852 		    c_im(inout[47 * stride]) = tim2_0_0 - tim2_0_1;
22853 		    c_re(inout[31 * stride]) = tre2_1_0 - tim2_1_1;
22854 		    c_im(inout[31 * stride]) = tim2_1_0 + tre2_1_1;
22855 		    c_re(inout[63 * stride]) = tre2_1_0 + tim2_1_1;
22856 		    c_im(inout[63 * stride]) = tim2_1_0 - tre2_1_1;
22857 	       }
22858 	  }
22859      }
22860 }
22861 
22862 /* This function contains 102 FP additions and 60 FP multiplications */
22863 
fftwi_twiddle_7(FFTW_COMPLEX * A,const FFTW_COMPLEX * W,int stride,int m,int dist)22864 static void fftwi_twiddle_7(FFTW_COMPLEX *A, const FFTW_COMPLEX *W, int stride, int m, int dist)
22865 {
22866      int i;
22867      FFTW_COMPLEX *inout;
22868      inout = A;
22869      for (i = 0; i < m; i = i + 1, inout = inout + dist, W = W + 6) {
22870 	  FFTW_REAL tre0_0_0;
22871 	  FFTW_REAL tim0_0_0;
22872 	  FFTW_REAL tre0_1_0;
22873 	  FFTW_REAL tim0_1_0;
22874 	  FFTW_REAL tre0_2_0;
22875 	  FFTW_REAL tim0_2_0;
22876 	  FFTW_REAL tre0_3_0;
22877 	  FFTW_REAL tim0_3_0;
22878 	  FFTW_REAL tre0_4_0;
22879 	  FFTW_REAL tim0_4_0;
22880 	  FFTW_REAL tre0_5_0;
22881 	  FFTW_REAL tim0_5_0;
22882 	  FFTW_REAL tre0_6_0;
22883 	  FFTW_REAL tim0_6_0;
22884 	  tre0_0_0 = c_re(inout[0]);
22885 	  tim0_0_0 = c_im(inout[0]);
22886 	  {
22887 	       FFTW_REAL tr;
22888 	       FFTW_REAL ti;
22889 	       FFTW_REAL twr;
22890 	       FFTW_REAL twi;
22891 	       tr = c_re(inout[stride]);
22892 	       ti = c_im(inout[stride]);
22893 	       twr = c_re(W[0]);
22894 	       twi = c_im(W[0]);
22895 	       tre0_1_0 = (tr * twr) + (ti * twi);
22896 	       tim0_1_0 = (ti * twr) - (tr * twi);
22897 	  }
22898 	  {
22899 	       FFTW_REAL tr;
22900 	       FFTW_REAL ti;
22901 	       FFTW_REAL twr;
22902 	       FFTW_REAL twi;
22903 	       tr = c_re(inout[2 * stride]);
22904 	       ti = c_im(inout[2 * stride]);
22905 	       twr = c_re(W[1]);
22906 	       twi = c_im(W[1]);
22907 	       tre0_2_0 = (tr * twr) + (ti * twi);
22908 	       tim0_2_0 = (ti * twr) - (tr * twi);
22909 	  }
22910 	  {
22911 	       FFTW_REAL tr;
22912 	       FFTW_REAL ti;
22913 	       FFTW_REAL twr;
22914 	       FFTW_REAL twi;
22915 	       tr = c_re(inout[3 * stride]);
22916 	       ti = c_im(inout[3 * stride]);
22917 	       twr = c_re(W[2]);
22918 	       twi = c_im(W[2]);
22919 	       tre0_3_0 = (tr * twr) + (ti * twi);
22920 	       tim0_3_0 = (ti * twr) - (tr * twi);
22921 	  }
22922 	  {
22923 	       FFTW_REAL tr;
22924 	       FFTW_REAL ti;
22925 	       FFTW_REAL twr;
22926 	       FFTW_REAL twi;
22927 	       tr = c_re(inout[4 * stride]);
22928 	       ti = c_im(inout[4 * stride]);
22929 	       twr = c_re(W[3]);
22930 	       twi = c_im(W[3]);
22931 	       tre0_4_0 = (tr * twr) + (ti * twi);
22932 	       tim0_4_0 = (ti * twr) - (tr * twi);
22933 	  }
22934 	  {
22935 	       FFTW_REAL tr;
22936 	       FFTW_REAL ti;
22937 	       FFTW_REAL twr;
22938 	       FFTW_REAL twi;
22939 	       tr = c_re(inout[5 * stride]);
22940 	       ti = c_im(inout[5 * stride]);
22941 	       twr = c_re(W[4]);
22942 	       twi = c_im(W[4]);
22943 	       tre0_5_0 = (tr * twr) + (ti * twi);
22944 	       tim0_5_0 = (ti * twr) - (tr * twi);
22945 	  }
22946 	  {
22947 	       FFTW_REAL tr;
22948 	       FFTW_REAL ti;
22949 	       FFTW_REAL twr;
22950 	       FFTW_REAL twi;
22951 	       tr = c_re(inout[6 * stride]);
22952 	       ti = c_im(inout[6 * stride]);
22953 	       twr = c_re(W[5]);
22954 	       twi = c_im(W[5]);
22955 	       tre0_6_0 = (tr * twr) + (ti * twi);
22956 	       tim0_6_0 = (ti * twr) - (tr * twi);
22957 	  }
22958 	  c_re(inout[0]) = tre0_0_0 + tre0_1_0 + tre0_2_0 + tre0_3_0 + tre0_4_0 + tre0_5_0 + tre0_6_0;
22959 	  c_im(inout[0]) = tim0_0_0 + tim0_1_0 + tim0_2_0 + tim0_3_0 + tim0_4_0 + tim0_5_0 + tim0_6_0;
22960 	  {
22961 	       FFTW_REAL tre1_0_0;
22962 	       FFTW_REAL tre1_1_0;
22963 	       tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tre0_1_0 + tre0_6_0)) - (((FFTW_REAL) FFTW_K900968867) * (tre0_3_0 + tre0_4_0)) - (((FFTW_REAL) FFTW_K222520933) * (tre0_2_0 + tre0_5_0));
22964 	       tre1_1_0 = (((FFTW_REAL) FFTW_K781831482) * (tim0_6_0 - tim0_1_0)) + (((FFTW_REAL) FFTW_K974927912) * (tim0_5_0 - tim0_2_0)) + (((FFTW_REAL) FFTW_K433883739) * (tim0_4_0 - tim0_3_0));
22965 	       c_re(inout[stride]) = tre1_0_0 + tre1_1_0;
22966 	       c_re(inout[6 * stride]) = tre1_0_0 - tre1_1_0;
22967 	  }
22968 	  {
22969 	       FFTW_REAL tim1_0_0;
22970 	       FFTW_REAL tim1_1_0;
22971 	       tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tim0_1_0 + tim0_6_0)) - (((FFTW_REAL) FFTW_K900968867) * (tim0_3_0 + tim0_4_0)) - (((FFTW_REAL) FFTW_K222520933) * (tim0_2_0 + tim0_5_0));
22972 	       tim1_1_0 = (((FFTW_REAL) FFTW_K781831482) * (tre0_1_0 - tre0_6_0)) + (((FFTW_REAL) FFTW_K974927912) * (tre0_2_0 - tre0_5_0)) + (((FFTW_REAL) FFTW_K433883739) * (tre0_3_0 - tre0_4_0));
22973 	       c_im(inout[stride]) = tim1_0_0 + tim1_1_0;
22974 	       c_im(inout[6 * stride]) = tim1_0_0 - tim1_1_0;
22975 	  }
22976 	  {
22977 	       FFTW_REAL tre1_0_0;
22978 	       FFTW_REAL tre1_1_0;
22979 	       tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tre0_3_0 + tre0_4_0)) - (((FFTW_REAL) FFTW_K900968867) * (tre0_2_0 + tre0_5_0)) - (((FFTW_REAL) FFTW_K222520933) * (tre0_1_0 + tre0_6_0));
22980 	       tre1_1_0 = (((FFTW_REAL) FFTW_K974927912) * (tim0_6_0 - tim0_1_0)) + (((FFTW_REAL) FFTW_K433883739) * (tim0_2_0 - tim0_5_0)) + (((FFTW_REAL) FFTW_K781831482) * (tim0_3_0 - tim0_4_0));
22981 	       c_re(inout[2 * stride]) = tre1_0_0 + tre1_1_0;
22982 	       c_re(inout[5 * stride]) = tre1_0_0 - tre1_1_0;
22983 	  }
22984 	  {
22985 	       FFTW_REAL tim1_0_0;
22986 	       FFTW_REAL tim1_1_0;
22987 	       tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tim0_3_0 + tim0_4_0)) - (((FFTW_REAL) FFTW_K900968867) * (tim0_2_0 + tim0_5_0)) - (((FFTW_REAL) FFTW_K222520933) * (tim0_1_0 + tim0_6_0));
22988 	       tim1_1_0 = (((FFTW_REAL) FFTW_K974927912) * (tre0_1_0 - tre0_6_0)) + (((FFTW_REAL) FFTW_K433883739) * (tre0_5_0 - tre0_2_0)) + (((FFTW_REAL) FFTW_K781831482) * (tre0_4_0 - tre0_3_0));
22989 	       c_im(inout[2 * stride]) = tim1_0_0 + tim1_1_0;
22990 	       c_im(inout[5 * stride]) = tim1_0_0 - tim1_1_0;
22991 	  }
22992 	  {
22993 	       FFTW_REAL tre1_0_0;
22994 	       FFTW_REAL tre1_1_0;
22995 	       tre1_0_0 = tre0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tre0_2_0 + tre0_5_0)) - (((FFTW_REAL) FFTW_K222520933) * (tre0_3_0 + tre0_4_0)) - (((FFTW_REAL) FFTW_K900968867) * (tre0_1_0 + tre0_6_0));
22996 	       tre1_1_0 = (((FFTW_REAL) FFTW_K433883739) * (tim0_6_0 - tim0_1_0)) + (((FFTW_REAL) FFTW_K781831482) * (tim0_2_0 - tim0_5_0)) + (((FFTW_REAL) FFTW_K974927912) * (tim0_4_0 - tim0_3_0));
22997 	       c_re(inout[3 * stride]) = tre1_0_0 + tre1_1_0;
22998 	       c_re(inout[4 * stride]) = tre1_0_0 - tre1_1_0;
22999 	  }
23000 	  {
23001 	       FFTW_REAL tim1_0_0;
23002 	       FFTW_REAL tim1_1_0;
23003 	       tim1_0_0 = tim0_0_0 + (((FFTW_REAL) FFTW_K623489801) * (tim0_2_0 + tim0_5_0)) - (((FFTW_REAL) FFTW_K222520933) * (tim0_3_0 + tim0_4_0)) - (((FFTW_REAL) FFTW_K900968867) * (tim0_1_0 + tim0_6_0));
23004 	       tim1_1_0 = (((FFTW_REAL) FFTW_K433883739) * (tre0_1_0 - tre0_6_0)) + (((FFTW_REAL) FFTW_K781831482) * (tre0_5_0 - tre0_2_0)) + (((FFTW_REAL) FFTW_K974927912) * (tre0_3_0 - tre0_4_0));
23005 	       c_im(inout[3 * stride]) = tim1_0_0 + tim1_1_0;
23006 	       c_im(inout[4 * stride]) = tim1_0_0 - tim1_1_0;
23007 	  }
23008      }
23009 }
23010 
23011 /* This function contains 66 FP additions and 32 FP multiplications */
23012 
fftwi_twiddle_8(FFTW_COMPLEX * A,const FFTW_COMPLEX * W,int stride,int m,int dist)23013 static void fftwi_twiddle_8(FFTW_COMPLEX *A, const FFTW_COMPLEX *W, int stride, int m, int dist)
23014 {
23015      int i;
23016      FFTW_COMPLEX *inout;
23017      inout = A;
23018      for (i = 0; i < m; i = i + 1, inout = inout + dist, W = W + 7) {
23019 	  FFTW_REAL tre0_0_0;
23020 	  FFTW_REAL tim0_0_0;
23021 	  FFTW_REAL tre0_0_1;
23022 	  FFTW_REAL tim0_0_1;
23023 	  FFTW_REAL tre0_0_2;
23024 	  FFTW_REAL tim0_0_2;
23025 	  FFTW_REAL tre0_0_3;
23026 	  FFTW_REAL tim0_0_3;
23027 	  FFTW_REAL tre0_1_0;
23028 	  FFTW_REAL tim0_1_0;
23029 	  FFTW_REAL tre0_1_1;
23030 	  FFTW_REAL tim0_1_1;
23031 	  FFTW_REAL tre0_1_2;
23032 	  FFTW_REAL tim0_1_2;
23033 	  FFTW_REAL tre0_1_3;
23034 	  FFTW_REAL tim0_1_3;
23035 	  {
23036 	       FFTW_REAL tre1_0_0;
23037 	       FFTW_REAL tim1_0_0;
23038 	       FFTW_REAL tre1_1_0;
23039 	       FFTW_REAL tim1_1_0;
23040 	       tre1_0_0 = c_re(inout[0]);
23041 	       tim1_0_0 = c_im(inout[0]);
23042 	       {
23043 		    FFTW_REAL tr;
23044 		    FFTW_REAL ti;
23045 		    FFTW_REAL twr;
23046 		    FFTW_REAL twi;
23047 		    tr = c_re(inout[4 * stride]);
23048 		    ti = c_im(inout[4 * stride]);
23049 		    twr = c_re(W[3]);
23050 		    twi = c_im(W[3]);
23051 		    tre1_1_0 = (tr * twr) + (ti * twi);
23052 		    tim1_1_0 = (ti * twr) - (tr * twi);
23053 	       }
23054 	       tre0_0_0 = tre1_0_0 + tre1_1_0;
23055 	       tim0_0_0 = tim1_0_0 + tim1_1_0;
23056 	       tre0_1_0 = tre1_0_0 - tre1_1_0;
23057 	       tim0_1_0 = tim1_0_0 - tim1_1_0;
23058 	  }
23059 	  {
23060 	       FFTW_REAL tre1_0_0;
23061 	       FFTW_REAL tim1_0_0;
23062 	       FFTW_REAL tre1_1_0;
23063 	       FFTW_REAL tim1_1_0;
23064 	       {
23065 		    FFTW_REAL tr;
23066 		    FFTW_REAL ti;
23067 		    FFTW_REAL twr;
23068 		    FFTW_REAL twi;
23069 		    tr = c_re(inout[stride]);
23070 		    ti = c_im(inout[stride]);
23071 		    twr = c_re(W[0]);
23072 		    twi = c_im(W[0]);
23073 		    tre1_0_0 = (tr * twr) + (ti * twi);
23074 		    tim1_0_0 = (ti * twr) - (tr * twi);
23075 	       }
23076 	       {
23077 		    FFTW_REAL tr;
23078 		    FFTW_REAL ti;
23079 		    FFTW_REAL twr;
23080 		    FFTW_REAL twi;
23081 		    tr = c_re(inout[5 * stride]);
23082 		    ti = c_im(inout[5 * stride]);
23083 		    twr = c_re(W[4]);
23084 		    twi = c_im(W[4]);
23085 		    tre1_1_0 = (tr * twr) + (ti * twi);
23086 		    tim1_1_0 = (ti * twr) - (tr * twi);
23087 	       }
23088 	       tre0_0_1 = tre1_0_0 + tre1_1_0;
23089 	       tim0_0_1 = tim1_0_0 + tim1_1_0;
23090 	       tre0_1_1 = tre1_0_0 - tre1_1_0;
23091 	       tim0_1_1 = tim1_0_0 - tim1_1_0;
23092 	  }
23093 	  {
23094 	       FFTW_REAL tre1_0_0;
23095 	       FFTW_REAL tim1_0_0;
23096 	       FFTW_REAL tre1_1_0;
23097 	       FFTW_REAL tim1_1_0;
23098 	       {
23099 		    FFTW_REAL tr;
23100 		    FFTW_REAL ti;
23101 		    FFTW_REAL twr;
23102 		    FFTW_REAL twi;
23103 		    tr = c_re(inout[2 * stride]);
23104 		    ti = c_im(inout[2 * stride]);
23105 		    twr = c_re(W[1]);
23106 		    twi = c_im(W[1]);
23107 		    tre1_0_0 = (tr * twr) + (ti * twi);
23108 		    tim1_0_0 = (ti * twr) - (tr * twi);
23109 	       }
23110 	       {
23111 		    FFTW_REAL tr;
23112 		    FFTW_REAL ti;
23113 		    FFTW_REAL twr;
23114 		    FFTW_REAL twi;
23115 		    tr = c_re(inout[6 * stride]);
23116 		    ti = c_im(inout[6 * stride]);
23117 		    twr = c_re(W[5]);
23118 		    twi = c_im(W[5]);
23119 		    tre1_1_0 = (tr * twr) + (ti * twi);
23120 		    tim1_1_0 = (ti * twr) - (tr * twi);
23121 	       }
23122 	       tre0_0_2 = tre1_0_0 + tre1_1_0;
23123 	       tim0_0_2 = tim1_0_0 + tim1_1_0;
23124 	       tre0_1_2 = tre1_0_0 - tre1_1_0;
23125 	       tim0_1_2 = tim1_0_0 - tim1_1_0;
23126 	  }
23127 	  {
23128 	       FFTW_REAL tre1_0_0;
23129 	       FFTW_REAL tim1_0_0;
23130 	       FFTW_REAL tre1_1_0;
23131 	       FFTW_REAL tim1_1_0;
23132 	       {
23133 		    FFTW_REAL tr;
23134 		    FFTW_REAL ti;
23135 		    FFTW_REAL twr;
23136 		    FFTW_REAL twi;
23137 		    tr = c_re(inout[3 * stride]);
23138 		    ti = c_im(inout[3 * stride]);
23139 		    twr = c_re(W[2]);
23140 		    twi = c_im(W[2]);
23141 		    tre1_0_0 = (tr * twr) + (ti * twi);
23142 		    tim1_0_0 = (ti * twr) - (tr * twi);
23143 	       }
23144 	       {
23145 		    FFTW_REAL tr;
23146 		    FFTW_REAL ti;
23147 		    FFTW_REAL twr;
23148 		    FFTW_REAL twi;
23149 		    tr = c_re(inout[7 * stride]);
23150 		    ti = c_im(inout[7 * stride]);
23151 		    twr = c_re(W[6]);
23152 		    twi = c_im(W[6]);
23153 		    tre1_1_0 = (tr * twr) + (ti * twi);
23154 		    tim1_1_0 = (ti * twr) - (tr * twi);
23155 	       }
23156 	       tre0_0_3 = tre1_0_0 + tre1_1_0;
23157 	       tim0_0_3 = tim1_0_0 + tim1_1_0;
23158 	       tre0_1_3 = tre1_0_0 - tre1_1_0;
23159 	       tim0_1_3 = tim1_0_0 - tim1_1_0;
23160 	  }
23161 	  {
23162 	       FFTW_REAL tre1_0_0;
23163 	       FFTW_REAL tim1_0_0;
23164 	       FFTW_REAL tre1_0_1;
23165 	       FFTW_REAL tim1_0_1;
23166 	       FFTW_REAL tre1_1_0;
23167 	       FFTW_REAL tim1_1_0;
23168 	       FFTW_REAL tre1_1_1;
23169 	       FFTW_REAL tim1_1_1;
23170 	       tre1_0_0 = tre0_0_0 + tre0_0_2;
23171 	       tim1_0_0 = tim0_0_0 + tim0_0_2;
23172 	       tre1_1_0 = tre0_0_0 - tre0_0_2;
23173 	       tim1_1_0 = tim0_0_0 - tim0_0_2;
23174 	       tre1_0_1 = tre0_0_1 + tre0_0_3;
23175 	       tim1_0_1 = tim0_0_1 + tim0_0_3;
23176 	       tre1_1_1 = tre0_0_1 - tre0_0_3;
23177 	       tim1_1_1 = tim0_0_1 - tim0_0_3;
23178 	       c_re(inout[0]) = tre1_0_0 + tre1_0_1;
23179 	       c_im(inout[0]) = tim1_0_0 + tim1_0_1;
23180 	       c_re(inout[4 * stride]) = tre1_0_0 - tre1_0_1;
23181 	       c_im(inout[4 * stride]) = tim1_0_0 - tim1_0_1;
23182 	       c_re(inout[2 * stride]) = tre1_1_0 - tim1_1_1;
23183 	       c_im(inout[2 * stride]) = tim1_1_0 + tre1_1_1;
23184 	       c_re(inout[6 * stride]) = tre1_1_0 + tim1_1_1;
23185 	       c_im(inout[6 * stride]) = tim1_1_0 - tre1_1_1;
23186 	  }
23187 	  {
23188 	       FFTW_REAL tre1_0_0;
23189 	       FFTW_REAL tim1_0_0;
23190 	       FFTW_REAL tre1_0_1;
23191 	       FFTW_REAL tim1_0_1;
23192 	       FFTW_REAL tre1_1_0;
23193 	       FFTW_REAL tim1_1_0;
23194 	       FFTW_REAL tre1_1_1;
23195 	       FFTW_REAL tim1_1_1;
23196 	       tre1_0_0 = tre0_1_0 - tim0_1_2;
23197 	       tim1_0_0 = tim0_1_0 + tre0_1_2;
23198 	       tre1_1_0 = tre0_1_0 + tim0_1_2;
23199 	       tim1_1_0 = tim0_1_0 - tre0_1_2;
23200 	       {
23201 		    FFTW_REAL tre2_0_0;
23202 		    FFTW_REAL tim2_0_0;
23203 		    FFTW_REAL tre2_1_0;
23204 		    FFTW_REAL tim2_1_0;
23205 		    tre2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_1_1 - tim0_1_1);
23206 		    tim2_0_0 = ((FFTW_REAL) FFTW_K707106781) * (tim0_1_1 + tre0_1_1);
23207 		    tre2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_1_3 + tim0_1_3);
23208 		    tim2_1_0 = ((FFTW_REAL) FFTW_K707106781) * (tre0_1_3 - tim0_1_3);
23209 		    tre1_0_1 = tre2_0_0 - tre2_1_0;
23210 		    tim1_0_1 = tim2_0_0 + tim2_1_0;
23211 		    tre1_1_1 = tre2_0_0 + tre2_1_0;
23212 		    tim1_1_1 = tim2_0_0 - tim2_1_0;
23213 	       }
23214 	       c_re(inout[stride]) = tre1_0_0 + tre1_0_1;
23215 	       c_im(inout[stride]) = tim1_0_0 + tim1_0_1;
23216 	       c_re(inout[5 * stride]) = tre1_0_0 - tre1_0_1;
23217 	       c_im(inout[5 * stride]) = tim1_0_0 - tim1_0_1;
23218 	       c_re(inout[3 * stride]) = tre1_1_0 - tim1_1_1;
23219 	       c_im(inout[3 * stride]) = tim1_1_0 + tre1_1_1;
23220 	       c_re(inout[7 * stride]) = tre1_1_0 + tim1_1_1;
23221 	       c_im(inout[7 * stride]) = tim1_1_0 - tre1_1_1;
23222 	  }
23223      }
23224 }
23225 
23226 /* This function contains 108 FP additions and 72 FP multiplications */
23227 
fftwi_twiddle_9(FFTW_COMPLEX * A,const FFTW_COMPLEX * W,int stride,int m,int dist)23228 static void fftwi_twiddle_9(FFTW_COMPLEX *A, const FFTW_COMPLEX *W, int stride, int m, int dist)
23229 {
23230      int i;
23231      FFTW_COMPLEX *inout;
23232      inout = A;
23233      for (i = 0; i < m; i = i + 1, inout = inout + dist, W = W + 8) {
23234 	  FFTW_REAL tre0_0_0;
23235 	  FFTW_REAL tim0_0_0;
23236 	  FFTW_REAL tre0_0_1;
23237 	  FFTW_REAL tim0_0_1;
23238 	  FFTW_REAL tre0_0_2;
23239 	  FFTW_REAL tim0_0_2;
23240 	  FFTW_REAL tre0_1_0;
23241 	  FFTW_REAL tim0_1_0;
23242 	  FFTW_REAL tre0_1_1;
23243 	  FFTW_REAL tim0_1_1;
23244 	  FFTW_REAL tre0_1_2;
23245 	  FFTW_REAL tim0_1_2;
23246 	  FFTW_REAL tre0_2_0;
23247 	  FFTW_REAL tim0_2_0;
23248 	  FFTW_REAL tre0_2_1;
23249 	  FFTW_REAL tim0_2_1;
23250 	  FFTW_REAL tre0_2_2;
23251 	  FFTW_REAL tim0_2_2;
23252 	  {
23253 	       FFTW_REAL tre1_0_0;
23254 	       FFTW_REAL tim1_0_0;
23255 	       FFTW_REAL tre1_1_0;
23256 	       FFTW_REAL tim1_1_0;
23257 	       FFTW_REAL tre1_2_0;
23258 	       FFTW_REAL tim1_2_0;
23259 	       tre1_0_0 = c_re(inout[0]);
23260 	       tim1_0_0 = c_im(inout[0]);
23261 	       {
23262 		    FFTW_REAL tr;
23263 		    FFTW_REAL ti;
23264 		    FFTW_REAL twr;
23265 		    FFTW_REAL twi;
23266 		    tr = c_re(inout[3 * stride]);
23267 		    ti = c_im(inout[3 * stride]);
23268 		    twr = c_re(W[2]);
23269 		    twi = c_im(W[2]);
23270 		    tre1_1_0 = (tr * twr) + (ti * twi);
23271 		    tim1_1_0 = (ti * twr) - (tr * twi);
23272 	       }
23273 	       {
23274 		    FFTW_REAL tr;
23275 		    FFTW_REAL ti;
23276 		    FFTW_REAL twr;
23277 		    FFTW_REAL twi;
23278 		    tr = c_re(inout[6 * stride]);
23279 		    ti = c_im(inout[6 * stride]);
23280 		    twr = c_re(W[5]);
23281 		    twi = c_im(W[5]);
23282 		    tre1_2_0 = (tr * twr) + (ti * twi);
23283 		    tim1_2_0 = (ti * twr) - (tr * twi);
23284 	       }
23285 	       tre0_0_0 = tre1_0_0 + tre1_1_0 + tre1_2_0;
23286 	       tim0_0_0 = tim1_0_0 + tim1_1_0 + tim1_2_0;
23287 	       {
23288 		    FFTW_REAL tre2_0_0;
23289 		    FFTW_REAL tre2_1_0;
23290 		    tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
23291 		    tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_2_0 - tim1_1_0);
23292 		    tre0_1_0 = tre2_0_0 + tre2_1_0;
23293 		    tre0_2_0 = tre2_0_0 - tre2_1_0;
23294 	       }
23295 	       {
23296 		    FFTW_REAL tim2_0_0;
23297 		    FFTW_REAL tim2_1_0;
23298 		    tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
23299 		    tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_1_0 - tre1_2_0);
23300 		    tim0_1_0 = tim2_0_0 + tim2_1_0;
23301 		    tim0_2_0 = tim2_0_0 - tim2_1_0;
23302 	       }
23303 	  }
23304 	  {
23305 	       FFTW_REAL tre1_0_0;
23306 	       FFTW_REAL tim1_0_0;
23307 	       FFTW_REAL tre1_1_0;
23308 	       FFTW_REAL tim1_1_0;
23309 	       FFTW_REAL tre1_2_0;
23310 	       FFTW_REAL tim1_2_0;
23311 	       {
23312 		    FFTW_REAL tr;
23313 		    FFTW_REAL ti;
23314 		    FFTW_REAL twr;
23315 		    FFTW_REAL twi;
23316 		    tr = c_re(inout[stride]);
23317 		    ti = c_im(inout[stride]);
23318 		    twr = c_re(W[0]);
23319 		    twi = c_im(W[0]);
23320 		    tre1_0_0 = (tr * twr) + (ti * twi);
23321 		    tim1_0_0 = (ti * twr) - (tr * twi);
23322 	       }
23323 	       {
23324 		    FFTW_REAL tr;
23325 		    FFTW_REAL ti;
23326 		    FFTW_REAL twr;
23327 		    FFTW_REAL twi;
23328 		    tr = c_re(inout[4 * stride]);
23329 		    ti = c_im(inout[4 * stride]);
23330 		    twr = c_re(W[3]);
23331 		    twi = c_im(W[3]);
23332 		    tre1_1_0 = (tr * twr) + (ti * twi);
23333 		    tim1_1_0 = (ti * twr) - (tr * twi);
23334 	       }
23335 	       {
23336 		    FFTW_REAL tr;
23337 		    FFTW_REAL ti;
23338 		    FFTW_REAL twr;
23339 		    FFTW_REAL twi;
23340 		    tr = c_re(inout[7 * stride]);
23341 		    ti = c_im(inout[7 * stride]);
23342 		    twr = c_re(W[6]);
23343 		    twi = c_im(W[6]);
23344 		    tre1_2_0 = (tr * twr) + (ti * twi);
23345 		    tim1_2_0 = (ti * twr) - (tr * twi);
23346 	       }
23347 	       tre0_0_1 = tre1_0_0 + tre1_1_0 + tre1_2_0;
23348 	       tim0_0_1 = tim1_0_0 + tim1_1_0 + tim1_2_0;
23349 	       {
23350 		    FFTW_REAL tre2_0_0;
23351 		    FFTW_REAL tre2_1_0;
23352 		    tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
23353 		    tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_2_0 - tim1_1_0);
23354 		    tre0_1_1 = tre2_0_0 + tre2_1_0;
23355 		    tre0_2_1 = tre2_0_0 - tre2_1_0;
23356 	       }
23357 	       {
23358 		    FFTW_REAL tim2_0_0;
23359 		    FFTW_REAL tim2_1_0;
23360 		    tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
23361 		    tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_1_0 - tre1_2_0);
23362 		    tim0_1_1 = tim2_0_0 + tim2_1_0;
23363 		    tim0_2_1 = tim2_0_0 - tim2_1_0;
23364 	       }
23365 	  }
23366 	  {
23367 	       FFTW_REAL tre1_0_0;
23368 	       FFTW_REAL tim1_0_0;
23369 	       FFTW_REAL tre1_1_0;
23370 	       FFTW_REAL tim1_1_0;
23371 	       FFTW_REAL tre1_2_0;
23372 	       FFTW_REAL tim1_2_0;
23373 	       {
23374 		    FFTW_REAL tr;
23375 		    FFTW_REAL ti;
23376 		    FFTW_REAL twr;
23377 		    FFTW_REAL twi;
23378 		    tr = c_re(inout[2 * stride]);
23379 		    ti = c_im(inout[2 * stride]);
23380 		    twr = c_re(W[1]);
23381 		    twi = c_im(W[1]);
23382 		    tre1_0_0 = (tr * twr) + (ti * twi);
23383 		    tim1_0_0 = (ti * twr) - (tr * twi);
23384 	       }
23385 	       {
23386 		    FFTW_REAL tr;
23387 		    FFTW_REAL ti;
23388 		    FFTW_REAL twr;
23389 		    FFTW_REAL twi;
23390 		    tr = c_re(inout[5 * stride]);
23391 		    ti = c_im(inout[5 * stride]);
23392 		    twr = c_re(W[4]);
23393 		    twi = c_im(W[4]);
23394 		    tre1_1_0 = (tr * twr) + (ti * twi);
23395 		    tim1_1_0 = (ti * twr) - (tr * twi);
23396 	       }
23397 	       {
23398 		    FFTW_REAL tr;
23399 		    FFTW_REAL ti;
23400 		    FFTW_REAL twr;
23401 		    FFTW_REAL twi;
23402 		    tr = c_re(inout[8 * stride]);
23403 		    ti = c_im(inout[8 * stride]);
23404 		    twr = c_re(W[7]);
23405 		    twi = c_im(W[7]);
23406 		    tre1_2_0 = (tr * twr) + (ti * twi);
23407 		    tim1_2_0 = (ti * twr) - (tr * twi);
23408 	       }
23409 	       tre0_0_2 = tre1_0_0 + tre1_1_0 + tre1_2_0;
23410 	       tim0_0_2 = tim1_0_0 + tim1_1_0 + tim1_2_0;
23411 	       {
23412 		    FFTW_REAL tre2_0_0;
23413 		    FFTW_REAL tre2_1_0;
23414 		    tre2_0_0 = tre1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
23415 		    tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_2_0 - tim1_1_0);
23416 		    tre0_1_2 = tre2_0_0 + tre2_1_0;
23417 		    tre0_2_2 = tre2_0_0 - tre2_1_0;
23418 	       }
23419 	       {
23420 		    FFTW_REAL tim2_0_0;
23421 		    FFTW_REAL tim2_1_0;
23422 		    tim2_0_0 = tim1_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
23423 		    tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_1_0 - tre1_2_0);
23424 		    tim0_1_2 = tim2_0_0 + tim2_1_0;
23425 		    tim0_2_2 = tim2_0_0 - tim2_1_0;
23426 	       }
23427 	  }
23428 	  c_re(inout[0]) = tre0_0_0 + tre0_0_1 + tre0_0_2;
23429 	  c_im(inout[0]) = tim0_0_0 + tim0_0_1 + tim0_0_2;
23430 	  {
23431 	       FFTW_REAL tre2_0_0;
23432 	       FFTW_REAL tre2_1_0;
23433 	       tre2_0_0 = tre0_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tre0_0_1 + tre0_0_2));
23434 	       tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim0_0_2 - tim0_0_1);
23435 	       c_re(inout[3 * stride]) = tre2_0_0 + tre2_1_0;
23436 	       c_re(inout[6 * stride]) = tre2_0_0 - tre2_1_0;
23437 	  }
23438 	  {
23439 	       FFTW_REAL tim2_0_0;
23440 	       FFTW_REAL tim2_1_0;
23441 	       tim2_0_0 = tim0_0_0 - (((FFTW_REAL) FFTW_K499999999) * (tim0_0_1 + tim0_0_2));
23442 	       tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre0_0_1 - tre0_0_2);
23443 	       c_im(inout[3 * stride]) = tim2_0_0 + tim2_1_0;
23444 	       c_im(inout[6 * stride]) = tim2_0_0 - tim2_1_0;
23445 	  }
23446 	  {
23447 	       FFTW_REAL tre1_1_0;
23448 	       FFTW_REAL tim1_1_0;
23449 	       FFTW_REAL tre1_2_0;
23450 	       FFTW_REAL tim1_2_0;
23451 	       tre1_1_0 = (((FFTW_REAL) FFTW_K766044443) * tre0_1_1) - (((FFTW_REAL) FFTW_K642787609) * tim0_1_1);
23452 	       tim1_1_0 = (((FFTW_REAL) FFTW_K766044443) * tim0_1_1) + (((FFTW_REAL) FFTW_K642787609) * tre0_1_1);
23453 	       tre1_2_0 = (((FFTW_REAL) FFTW_K173648177) * tre0_1_2) - (((FFTW_REAL) FFTW_K984807753) * tim0_1_2);
23454 	       tim1_2_0 = (((FFTW_REAL) FFTW_K173648177) * tim0_1_2) + (((FFTW_REAL) FFTW_K984807753) * tre0_1_2);
23455 	       c_re(inout[stride]) = tre0_1_0 + tre1_1_0 + tre1_2_0;
23456 	       c_im(inout[stride]) = tim0_1_0 + tim1_1_0 + tim1_2_0;
23457 	       {
23458 		    FFTW_REAL tre2_0_0;
23459 		    FFTW_REAL tre2_1_0;
23460 		    tre2_0_0 = tre0_1_0 - (((FFTW_REAL) FFTW_K499999999) * (tre1_1_0 + tre1_2_0));
23461 		    tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_2_0 - tim1_1_0);
23462 		    c_re(inout[4 * stride]) = tre2_0_0 + tre2_1_0;
23463 		    c_re(inout[7 * stride]) = tre2_0_0 - tre2_1_0;
23464 	       }
23465 	       {
23466 		    FFTW_REAL tim2_0_0;
23467 		    FFTW_REAL tim2_1_0;
23468 		    tim2_0_0 = tim0_1_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
23469 		    tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_1_0 - tre1_2_0);
23470 		    c_im(inout[4 * stride]) = tim2_0_0 + tim2_1_0;
23471 		    c_im(inout[7 * stride]) = tim2_0_0 - tim2_1_0;
23472 	       }
23473 	  }
23474 	  {
23475 	       FFTW_REAL tre1_1_0;
23476 	       FFTW_REAL tim1_1_0;
23477 	       FFTW_REAL tre1_2_0;
23478 	       FFTW_REAL tim1_2_0;
23479 	       tre1_1_0 = (((FFTW_REAL) FFTW_K173648177) * tre0_2_1) - (((FFTW_REAL) FFTW_K984807753) * tim0_2_1);
23480 	       tim1_1_0 = (((FFTW_REAL) FFTW_K173648177) * tim0_2_1) + (((FFTW_REAL) FFTW_K984807753) * tre0_2_1);
23481 	       tre1_2_0 = (((FFTW_REAL) FFTW_K939692620) * tre0_2_2) + (((FFTW_REAL) FFTW_K342020143) * tim0_2_2);
23482 	       tim1_2_0 = (((FFTW_REAL) FFTW_K342020143) * tre0_2_2) - (((FFTW_REAL) FFTW_K939692620) * tim0_2_2);
23483 	       c_re(inout[2 * stride]) = tre0_2_0 + tre1_1_0 - tre1_2_0;
23484 	       c_im(inout[2 * stride]) = tim0_2_0 + tim1_1_0 + tim1_2_0;
23485 	       {
23486 		    FFTW_REAL tre2_0_0;
23487 		    FFTW_REAL tre2_1_0;
23488 		    tre2_0_0 = tre0_2_0 + (((FFTW_REAL) FFTW_K499999999) * (tre1_2_0 - tre1_1_0));
23489 		    tre2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tim1_2_0 - tim1_1_0);
23490 		    c_re(inout[5 * stride]) = tre2_0_0 + tre2_1_0;
23491 		    c_re(inout[8 * stride]) = tre2_0_0 - tre2_1_0;
23492 	       }
23493 	       {
23494 		    FFTW_REAL tim2_0_0;
23495 		    FFTW_REAL tim2_1_0;
23496 		    tim2_0_0 = tim0_2_0 - (((FFTW_REAL) FFTW_K499999999) * (tim1_1_0 + tim1_2_0));
23497 		    tim2_1_0 = ((FFTW_REAL) FFTW_K866025403) * (tre1_1_0 + tre1_2_0);
23498 		    c_im(inout[5 * stride]) = tim2_0_0 + tim2_1_0;
23499 		    c_im(inout[8 * stride]) = tim2_0_0 - tim2_1_0;
23500 	       }
23501 	  }
23502      }
23503 }
23504 
23505 
fftw_twiddle_generic(FFTW_COMPLEX * A,const FFTW_COMPLEX * W,int m,int r,int n,int stride)23506 static void fftw_twiddle_generic(FFTW_COMPLEX *A, const FFTW_COMPLEX *W,
23507 			  int m, int r, int n, int stride)
23508 {
23509      int i, j, k;
23510      const FFTW_COMPLEX *jp;
23511      FFTW_COMPLEX *kp;
23512      FFTW_COMPLEX *tmp = (FFTW_COMPLEX *)
23513      fftw_malloc(r * sizeof(FFTW_COMPLEX));
23514 
23515      for (i = 0; i < m; ++i) {
23516 	  for (k = 0, kp = tmp; k < r; ++k, kp++) {
23517 	       FFTW_REAL r0, i0, rt, it, rw, iw;
23518 	       int l1 = i + m * k;
23519 	       int l0;
23520 
23521 	       r0 = i0 = 0.0;
23522 	       for (j = 0, jp = A + i * stride, l0 = 0; j < r; ++j,
23523 		    jp += m * stride) {
23524 		    rw = c_re(W[l0]);
23525 		    iw = c_im(W[l0]);
23526 		    rt = c_re(*jp);
23527 		    it = c_im(*jp);
23528 		    r0 += rt * rw - it * iw;
23529 		    i0 += rt * iw + it * rw;
23530 		    l0 += l1;
23531 		    if (l0 > n)
23532 			 l0 -= n;
23533 	       }
23534 	       c_re(*kp) = r0;
23535 	       c_im(*kp) = i0;
23536 	  }
23537 	  for (k = 0, kp = A + i * stride; k < r; ++k, kp += m * stride)
23538 	       *kp = tmp[k];
23539      }
23540 
23541      fftw_free(tmp);
23542 }
23543 
fftwi_twiddle_generic(FFTW_COMPLEX * A,const FFTW_COMPLEX * W,int m,int r,int n,int stride)23544 static void fftwi_twiddle_generic(FFTW_COMPLEX *A, const FFTW_COMPLEX *W,
23545 			   int m, int r, int n, int stride)
23546 {
23547      int i, j, k;
23548      const FFTW_COMPLEX *jp;
23549      FFTW_COMPLEX *kp;
23550      FFTW_COMPLEX *tmp = (FFTW_COMPLEX *)
23551      fftw_malloc(r * sizeof(FFTW_COMPLEX));
23552 
23553      for (i = 0; i < m; ++i) {
23554 	  for (k = 0, kp = tmp; k < r; ++k, kp++) {
23555 	       FFTW_REAL r0, i0, rt, it, rw, iw;
23556 	       int l1 = i + m * k;
23557 	       int l0;
23558 
23559 	       r0 = i0 = 0.0;
23560 	       for (j = 0, jp = A + i * stride, l0 = 0; j < r; ++j,
23561 		    jp += m * stride) {
23562 		    rw = c_re(W[l0]);
23563 		    iw = c_im(W[l0]);
23564 		    rt = c_re(*jp);
23565 		    it = c_im(*jp);
23566 		    r0 += rt * rw + it * iw;
23567 		    i0 += it * rw - rt * iw;
23568 		    l0 += l1;
23569 		    if (l0 > n)
23570 			 l0 -= n;
23571 	       }
23572 	       c_re(*kp) = r0;
23573 	       c_im(*kp) = i0;
23574 	  }
23575 	  for (k = 0, kp = A + i * stride; k < r; ++k, kp += m * stride)
23576 	       *kp = tmp[k];
23577      }
23578 
23579      fftw_free(tmp);
23580 }
23581 
23582 
23583 /*
23584  * malloc.c -- memory allocation related functions
23585  */
23586 
fftw_malloc(size_t n)23587 static void *fftw_malloc(size_t n)
23588 {
23589      void *p;
23590 
23591      if (n == 0)
23592 	  n = 1;
23593 
23594      p = malloc(n);
23595 
23596      if (!p)
23597 	  fftw_die("fftw_malloc: out of memory\n");
23598 
23599      return p;
23600 }
23601 
fftw_free(void * p)23602 static void fftw_free(void *p)
23603 {
23604      if (p) {
23605 	  free(p);
23606      }
23607 }
23608 
23609 /*
23610  * planner.c -- find the optimal plan
23611  */
23612 
23613 
23614 
23615 /* constructors --- I wish I had ML */
make_node(void)23616 static fftw_plan_node *make_node(void)
23617 {
23618      fftw_plan_node *p = (fftw_plan_node *)
23619      fftw_malloc(sizeof(fftw_plan_node));
23620      p->refcnt = 0;
23621      fftw_node_cnt++;
23622      return p;
23623 }
23624 
use_node(fftw_plan_node * p)23625 static void use_node(fftw_plan_node *p)
23626 {
23627      ++p->refcnt;
23628 }
23629 
make_node_notw(int size,notw_codelet * codelet)23630 static fftw_plan_node *make_node_notw(int size, notw_codelet *codelet)
23631 {
23632      fftw_plan_node *p = make_node();
23633 
23634      p->type = FFTW_NOTW;
23635      p->nodeu.notw.size = size;
23636      p->nodeu.notw.codelet = codelet;
23637      return p;
23638 }
23639 
make_node_twiddle(int n,int size,twiddle_codelet * codelet,fftw_plan_node * recurse,int flags)23640 static fftw_plan_node *make_node_twiddle(int n, int size, twiddle_codelet *codelet,
23641 					 fftw_plan_node *recurse,
23642 					 int flags)
23643 {
23644      fftw_plan_node *p = make_node();
23645 
23646      p->type = FFTW_TWIDDLE;
23647      p->nodeu.twiddle.size = size;
23648      p->nodeu.twiddle.codelet = codelet;
23649      p->nodeu.twiddle.recurse = recurse;
23650      use_node(recurse);
23651      p->nodeu.twiddle.tw = 0;
23652      return p;
23653 }
23654 
make_node_generic(int n,int size,generic_codelet * codelet,fftw_plan_node * recurse,int flags)23655 static fftw_plan_node *make_node_generic(int n, int size,
23656 					 generic_codelet *codelet,
23657 					 fftw_plan_node *recurse,
23658 					 int flags)
23659 {
23660      fftw_plan_node *p = make_node();
23661 
23662      p->type = FFTW_GENERIC;
23663      p->nodeu.generic.size = size;
23664      p->nodeu.generic.codelet = codelet;
23665      p->nodeu.generic.recurse = recurse;
23666      use_node(recurse);
23667      p->nodeu.generic.tw = 0;
23668      return p;
23669 }
23670 
destroy_tree(fftw_plan_node * p)23671 static void destroy_tree(fftw_plan_node *p)
23672 {
23673      if (p) {
23674 	  --p->refcnt;
23675 	  if (p->refcnt == 0) {
23676 	       switch (p->type) {
23677 		   case FFTW_NOTW:
23678 			break;
23679 
23680 		   case FFTW_TWIDDLE:
23681 			if (p->nodeu.twiddle.tw)
23682 			     fftw_destroy_twiddle(p->nodeu.twiddle.tw);
23683 			destroy_tree(p->nodeu.twiddle.recurse);
23684 			break;
23685 
23686 		   case FFTW_GENERIC:
23687 			if (p->nodeu.generic.tw)
23688 			     fftw_destroy_twiddle(p->nodeu.generic.tw);
23689 			destroy_tree(p->nodeu.generic.recurse);
23690 			break;
23691 	       }
23692 
23693 	       fftw_free(p);
23694 	       fftw_node_cnt--;
23695 	  }
23696      }
23697 }
23698 
23699 /* create a plan with twiddle factors, and other bells and whistles */
make_plan(int n,fftw_direction dir,fftw_plan_node * root,int flags,enum fftw_node_type wisdom_type,int wisdom_signature)23700 static fftw_plan make_plan(int n, fftw_direction dir,
23701 			   fftw_plan_node *root, int flags,
23702 			   enum fftw_node_type wisdom_type,
23703 			   int wisdom_signature)
23704 {
23705      fftw_plan p = (fftw_plan) fftw_malloc(sizeof(struct fftw_plan_struct));
23706 
23707      p->n = n;
23708      p->dir = dir;
23709      p->flags = flags;
23710      use_node(root);
23711      p->root = root;
23712      p->cost = 0.0;
23713      p->wisdom_type = wisdom_type;
23714      p->wisdom_signature = wisdom_signature;
23715      p->next = (fftw_plan) 0;
23716      p->refcnt = 0;
23717      fftw_plan_cnt++;
23718      return p;
23719 }
23720 
23721 /*
23722  * complete with twiddle factors (because nodes don't have
23723  * them when FFTW_ESTIMATE is set)
23724  */
complete_twiddle(fftw_plan_node * p,int n)23725 static void complete_twiddle(fftw_plan_node *p, int n)
23726 {
23727      int r;
23728      switch (p->type) {
23729 	 case FFTW_NOTW:
23730 	      break;
23731 
23732 	 case FFTW_TWIDDLE:
23733 	      r = p->nodeu.twiddle.size;
23734 	      if (!p->nodeu.twiddle.tw)
23735 		   p->nodeu.twiddle.tw = fftw_create_twiddle(n, r, n / r);
23736 	      complete_twiddle(p->nodeu.twiddle.recurse, n / r);
23737 	      break;
23738 
23739 	 case FFTW_GENERIC:
23740 	      r = p->nodeu.generic.size;
23741 	      if (!p->nodeu.generic.tw)
23742 		   p->nodeu.generic.tw = fftw_create_twiddle(n, 2, n);
23743 	      complete_twiddle(p->nodeu.generic.recurse, n / r);
23744 	      break;
23745      }
23746 }
23747 
use_plan(fftw_plan p)23748 static void use_plan(fftw_plan p)
23749 {
23750      ++p->refcnt;
23751 }
23752 
destroy_plan(fftw_plan p)23753 static void destroy_plan(fftw_plan p)
23754 {
23755      --p->refcnt;
23756 
23757      if (p->refcnt == 0) {
23758 	  destroy_tree(p->root);
23759 	  fftw_plan_cnt--;
23760 	  fftw_free(p);
23761      }
23762 }
23763 
23764 /* end of constructors */
23765 
23766 /* management of plan tables */
make_empty_table(fftw_plan * table)23767 static void make_empty_table(fftw_plan *table)
23768 {
23769      *table = (fftw_plan) 0;
23770 }
23771 
insert(fftw_plan * table,fftw_plan this_plan,int n)23772 static void insert(fftw_plan *table, fftw_plan this_plan, int n)
23773 {
23774      use_plan(this_plan);
23775      this_plan->n = n;
23776      this_plan->next = *table;
23777      *table = this_plan;
23778 }
23779 
lookup(fftw_plan * table,int n,int flags)23780 static fftw_plan lookup(fftw_plan *table, int n, int flags)
23781 {
23782      fftw_plan p;
23783 
23784      for (p = *table; p &&
23785 	  ((p->n != n) || (p->flags != flags)); p = p->next);
23786 
23787      return p;
23788 }
23789 
destroy_table(fftw_plan * table)23790 static void destroy_table(fftw_plan *table)
23791 {
23792      fftw_plan p, q;
23793 
23794      for (p = *table; p; p = q) {
23795 	  q = p->next;
23796 	  destroy_plan(p);
23797      }
23798 }
23799 
estimate_node(fftw_plan_node * p)23800 static double estimate_node(fftw_plan_node *p)
23801 {
23802      int k;
23803 
23804      switch (p->type) {
23805 	 case FFTW_NOTW:
23806 	      k = p->nodeu.notw.size;
23807 	      return 1.0 + 0.1 * (k - NOTW_OPTIMAL_SIZE) * (k - NOTW_OPTIMAL_SIZE);
23808 
23809 	 case FFTW_TWIDDLE:
23810 	      k = p->nodeu.twiddle.size;
23811 	      return 1.0 + 0.1 * (k - TWIDDLE_OPTIMAL_SIZE) * (k - TWIDDLE_OPTIMAL_SIZE) + estimate_node(p->nodeu.twiddle.recurse);
23812 
23813 	 case FFTW_GENERIC:
23814 	      k = p->nodeu.generic.size;
23815 	      return 10.0 + k * k + estimate_node(p->nodeu.generic.recurse);
23816      }
23817      return 1.0E20;
23818 }
23819 
23820 /* auxiliary functions */
compute_cost(fftw_plan plan)23821 static void compute_cost(fftw_plan plan)
23822 {
23823 	  double c;
23824 	  c = plan->n * estimate_node(plan->root);
23825 	  plan->cost = c;
23826 }
23827 
23828 /* pick the better of two plans and destroy the other one. */
pick_better(fftw_plan p1,fftw_plan p2)23829 static fftw_plan pick_better(fftw_plan p1, fftw_plan p2)
23830 {
23831      if (!p1)
23832 	  return p2;
23833 
23834      if (!p2)
23835 	  return p1;
23836 
23837      if (p1->cost > p2->cost) {
23838 	  destroy_plan(p1);
23839 	  return p2;
23840      } else {
23841 	  destroy_plan(p2);
23842 	  return p1;
23843      }
23844 }
23845 
23846 /* find the smallest prime factor of n */
factor(int n)23847 static int factor(int n)
23848 {
23849      int r;
23850 
23851      /* try 2 */
23852      if ((n & 1) == 0)
23853 	  return 2;
23854 
23855      /* try odd numbers up to sqrt(n) */
23856      for (r = 3; r * r <= n; r += 2)
23857 	  if (n % r == 0)
23858 	       return r;
23859 
23860      /* n is prime */
23861      return n;
23862 }
23863 
23864 /*
23865  * Some macrology for the planner.  If you have to write
23866  * the same line of code twice, there must be some bug.
23867  */
23868 #define NOTW_ITERATOR(p, dir)                                \
23869       config_notw *p =                                       \
23870 	  p = (dir == FFTW_FORWARD ?                         \
23871 	       fftw_config_notw : fftwi_config_notw)
23872 
23873 #define TWIDDLE_ITERATOR(p, dir)                             \
23874       config_twiddle *p =                                    \
23875 	  p = (dir == FFTW_FORWARD ?                         \
23876 	       fftw_config_twiddle : fftwi_config_twiddle);
23877 
23878 #define FORALL_NOTW(p)             \
23879 	 for (; p->size; ++p)
23880 
23881 #define FORALL_TWIDDLE(p)          \
23882 	 for (; p->size; ++p)
23883 
23884 /******************************************
23885  *      Recursive planner                 *
23886  ******************************************/
23887 static fftw_plan planner(fftw_plan *table, int n, fftw_direction dir, int flags);
23888 
23889 /*
23890  * the planner consists of two parts: one that tries to
23891  * use accumulated wisdom, and one that does not.
23892  * A small driver invokes both parts in sequence
23893  */
23894 
23895 /*
23896  * planner with no wisdom: try all combinations and pick
23897  * the best
23898  */
planner_normal(fftw_plan * table,int n,fftw_direction dir,int flags)23899 static fftw_plan planner_normal(fftw_plan *table, int n, fftw_direction dir, int flags)
23900 {
23901      fftw_plan best = (fftw_plan) 0;
23902      fftw_plan newplan;
23903      fftw_plan_node *node;
23904 
23905      /* see if we have any codelet that solves the problem */
23906      {
23907 	  NOTW_ITERATOR(p, dir);
23908 
23909 	  FORALL_NOTW(p) {
23910 	       if (p->size == n) {
23911 		    node = make_node_notw(n, p->codelet);
23912 		    newplan = make_plan(n, dir, node, flags,
23913 					FFTW_NOTW, p->signature);
23914 		    use_plan(newplan);
23915 		    compute_cost(newplan);
23916 		    best = pick_better(newplan, best);
23917 	       }
23918 	  }
23919      }
23920 
23921      /* Then, try all available twiddle codelets */
23922      {
23923 	  TWIDDLE_ITERATOR(p, dir);
23924 
23925 	  FORALL_TWIDDLE(p) {
23926 	       if ((n % p->size) == 0 &&
23927 		   (!best || n != p->size)) {
23928 		    fftw_plan r = planner(table, n / p->size, dir, flags);
23929 		    node = make_node_twiddle(n, p->size, p->codelet,
23930 					     r->root, flags);
23931 		    newplan = make_plan(n, dir, node, flags,
23932 					FFTW_TWIDDLE, p->signature);
23933 		    use_plan(newplan);
23934 		    destroy_plan(r);
23935 		    compute_cost(newplan);
23936 		    best = pick_better(newplan, best);
23937 	       }
23938 	  }
23939      }
23940 
23941      /*
23942       * if no plan has been found so far, resort to generic codelets
23943       */
23944      if (!best) {
23945 	  generic_codelet *codelet = (dir == FFTW_FORWARD ?
23946 			   fftw_twiddle_generic : fftwi_twiddle_generic);
23947 	  int size = factor(n);
23948 	  fftw_plan r = planner(table, n / size, dir, flags);
23949 
23950 	  node = make_node_generic(n, size, codelet, r->root, flags);
23951 	  newplan = make_plan(n, dir, node, flags, FFTW_GENERIC, 0);
23952 	  use_plan(newplan);
23953 	  destroy_plan(r);
23954 	  compute_cost(newplan);
23955 	  best = pick_better(newplan, best);
23956      }
23957 
23958      return best;
23959 }
23960 
planner(fftw_plan * table,int n,fftw_direction dir,int flags)23961 static fftw_plan planner(fftw_plan *table, int n, fftw_direction dir, int flags)
23962 {
23963      fftw_plan best = (fftw_plan) 0;
23964 
23965      /* see if plan has already been computed */
23966      best = lookup(table, n, flags);
23967      if (best) {
23968 	  use_plan(best);
23969 	  return best;
23970      }
23971 
23972      best = planner_normal(table, n, dir, flags);
23973 	 if (best) {
23974 		 insert(table, best, n);
23975 	 }
23976 
23977      return best;
23978 }
23979 
print_node(FILE * f,fftw_plan_node * p,int indent)23980 static void print_node(FILE * f, fftw_plan_node *p, int indent)
23981 {
23982      if (p) {
23983 	  switch (p->type) {
23984 	      case FFTW_NOTW:
23985 		   fprintf(f, "%*sFFTW_NOTW %d\n", indent, "",
23986 			   p->nodeu.notw.size);
23987 		   break;
23988 	      case FFTW_TWIDDLE:
23989 		   fprintf(f, "%*sFFTW_TWIDDLE %d\n", indent, "",
23990 			   p->nodeu.twiddle.size);
23991 		   print_node(f, p->nodeu.twiddle.recurse, indent);
23992 		   break;
23993 	      case FFTW_GENERIC:
23994 		   fprintf(f, "%*sFFTW_GENERIC %d\n", indent, "",
23995 			   p->nodeu.generic.size);
23996 		   print_node(f, p->nodeu.generic.recurse, indent);
23997 		   break;
23998 	  }
23999      }
24000 }
24001 
24002 /*
24003  * twiddle.c -- compute twiddle factors
24004  * These are the twiddle factors for *direct* fft.  Flip sign to get
24005  * the inverse
24006  */
24007 
24008 /*
24009  * compute the W coefficients (that is, powers of the root of 1)
24010  * and store them into an array.
24011  */
fftw_compute_twiddle(int n,int r,int m,FFTW_COMPLEX * W)24012 static void fftw_compute_twiddle(int n, int r, int m, FFTW_COMPLEX *W)
24013 {
24014      double twoPiOverN;
24015      int i, j;
24016 
24017      twoPiOverN = FFTW_K2PI / (double) n;
24018      for (i = 0; i < m; ++i)
24019 	  for (j = 1; j < r; ++j) {
24020 	       int k = i * (r - 1) + (j - 1);
24021 	       c_re(W[k]) = cos(twoPiOverN * (double) i * (double) j);
24022 	       c_im(W[k]) = -sin(twoPiOverN * (double) i * (double) j);
24023 	  }
24024 }
24025 
24026 /*
24027  * these routines implement a simple reference-count-based
24028  * management of twiddle structures
24029  */
24030 
24031 
fftw_create_twiddle(int n,int r,int m)24032 static fftw_twiddle *fftw_create_twiddle(int n, int r, int m)
24033 {
24034      fftw_twiddle *tw;
24035      FFTW_COMPLEX *W;
24036 
24037      /* lookup for this n in the twiddle list */
24038      for (tw = twlist; tw; tw = tw->next)
24039 	  if (tw->n == n && tw->r == r && tw->m == m) {
24040 	       ++tw->refcnt;
24041 	       return tw;
24042 	  }
24043      /* not found --- allocate a new struct twiddle */
24044      tw = (fftw_twiddle *) fftw_malloc(sizeof(fftw_twiddle));
24045      W = (FFTW_COMPLEX *) fftw_malloc(m * (r - 1) * sizeof(FFTW_COMPLEX));
24046      fftw_twiddle_size += n;
24047 
24048      tw->n = n;
24049      tw->r = r;
24050      tw->m = m;
24051      tw->twarray = W;
24052      tw->refcnt = 1;
24053      fftw_compute_twiddle(n, r, m, W);
24054 
24055      /* enqueue the new struct */
24056      tw->next = twlist;
24057      twlist = tw;
24058 
24059      return tw;
24060 }
24061 
fftw_destroy_twiddle(fftw_twiddle * tw)24062 static void fftw_destroy_twiddle(fftw_twiddle * tw)
24063 {
24064      fftw_twiddle **p;
24065      --tw->refcnt;
24066 
24067      if (tw->refcnt == 0) {
24068 	  /* remove from the list of known twiddle factors */
24069 	  for (p = &twlist; p; p = &((*p)->next))
24070 	       if (*p == tw) {
24071 		    *p = tw->next;
24072 		    fftw_twiddle_size -= tw->n;
24073 		    fftw_free(tw->twarray);
24074 		    fftw_free(tw);
24075 		    return;
24076 	       }
24077 	  fftw_die("BUG in fftw_destroy_twiddle\n");
24078      }
24079 }
24080 
24081 /* NON STATIC FUNCTION BELOW THIS LINE */
24082 
24083 /* user interface */
fftw(fftw_plan plan,int howmany,FFTW_COMPLEX * in,int istride,int idist,FFTW_COMPLEX * out,int ostride,int odist)24084 void fftw(fftw_plan plan, int howmany, FFTW_COMPLEX* in, int istride, int idist, FFTW_COMPLEX* out, int ostride, int odist)
24085 {
24086 	int n = plan->n;
24087 
24088 	if (plan->flags & FFTW_IN_PLACE) {
24089 		if (howmany == 1) {
24090 			executor_simple_inplace(n, in, out, plan->root, istride);
24091 		}
24092 		else {
24093 			executor_many_inplace(n, in, out, plan->root, istride, howmany,
24094 				idist);
24095 		}
24096 	}
24097 	else {
24098 		if (howmany == 1) {
24099 			fftw_executor_simple(n, in, out, plan->root, istride, ostride);
24100 		}
24101 		else {
24102 			executor_many(n, in, out, plan->root, istride, ostride,
24103 				howmany, idist, odist);
24104 		}
24105 	}
24106 }
24107 
24108 /*********** Initializing the FFTWND Auxiliary Data **********/
24109 
qe_fftw2d_create_plan(int nx,int ny,fftw_direction dir,int flags)24110 fftwnd_plan qe_fftw2d_create_plan(int nx, int ny, fftw_direction dir, int flags)
24111 {
24112 	int n[2];
24113 
24114 	n[0] = nx;
24115 	n[1] = ny;
24116 
24117 	return qe_fftwnd_create_plan(2, n, dir, flags);
24118 }
24119 
qe_fftw3d_create_plan(int nx,int ny,int nz,fftw_direction dir,int flags)24120 fftwnd_plan qe_fftw3d_create_plan(int nx, int ny, int nz, fftw_direction dir,
24121 	int flags)
24122 {
24123 	int n[3];
24124 
24125 	n[0] = nx;
24126 	n[1] = ny;
24127 	n[2] = nz;
24128 
24129 	return qe_fftwnd_create_plan(3, n, dir, flags);
24130 }
24131 
qe_fftwnd_create_plan(int rank,const int * n,fftw_direction dir,int flags)24132 fftwnd_plan qe_fftwnd_create_plan(int rank, const int* n, fftw_direction dir, int flags)
24133 {
24134 	int i, j, max_dim = 0;
24135 	fftwnd_plan p;
24136 	int cur_flags;
24137 
24138 	if (flags & FFTW_MEASURE) {
24139 		fprintf(stdout, "FFTW in QE: this is a stripped down version of FFTW, FFTW_MEASURE is not supported. Use the complete library\n");
24140 		flags = flags & ~FFTW_MEASURE;
24141 	}
24142 
24143 	if (rank < 0)
24144 		return 0;
24145 
24146 	for (i = 0; i < rank; ++i)
24147 		if (n[i] <= 0)
24148 			return 0;
24149 
24150 	p = (fftwnd_plan)fftw_malloc(sizeof(fftwnd_aux_data));
24151 	p->n = 0;
24152 	p->n_before = 0;
24153 	p->n_after = 0;
24154 	p->plans = 0;
24155 	p->work = 0;
24156 
24157 	p->rank = rank;
24158 	p->is_in_place = flags & FFTW_IN_PLACE;
24159 
24160 	if (rank == 0)
24161 		return 0;
24162 
24163 	p->n = (int*)fftw_malloc(sizeof(int) * rank);
24164 	p->n_before = (int*)fftw_malloc(sizeof(int) * rank);
24165 	p->n_after = (int*)fftw_malloc(sizeof(int) * rank);
24166 	p->plans = (fftw_plan*)fftw_malloc(rank * sizeof(fftw_plan));
24167 	p->n_before[0] = 1;
24168 	p->n_after[rank - 1] = 1;
24169 
24170 	for (i = 0; i < rank; ++i) {
24171 		p->n[i] = n[i];
24172 
24173 		if (i) {
24174 			p->n_before[i] = p->n_before[i - 1] * n[i - 1];
24175 			p->n_after[rank - 1 - i] = p->n_after[rank - i] * n[rank - i];
24176 		}
24177 		if (i < rank - 1 || (flags & FFTW_IN_PLACE)) {
24178 			/* fft's except the last dimension are always in-place */
24179 			cur_flags = flags | FFTW_IN_PLACE;
24180 			for (j = i - 1; j >= 0 && n[i] != n[j]; --j);
24181 
24182 			if (n[i] > max_dim)
24183 				max_dim = n[i];
24184 		}
24185 		else {
24186 			cur_flags = flags;
24187 			/* we must create a separate plan for the last dimension */
24188 			j = -1;
24189 		}
24190 
24191 		if (j >= 0) {
24192 			/*
24193 			 * If a plan already exists for this size
24194 			 * array, reuse it:
24195 			 */
24196 			p->plans[i] = p->plans[j];
24197 		}
24198 		else {
24199 			/* generate a new plan: */
24200 			p->plans[i] = qe_fftw_create_plan(n[i], dir, cur_flags);
24201 			if (!p->plans[i]) {
24202 				qe_fftwnd_destroy_plan(p);
24203 				return 0;
24204 			}
24205 		}
24206 	}
24207 
24208 	/* Create work array for in-place FFTs: */
24209 	if (max_dim > 0)
24210 		p->work = (FFTW_COMPLEX*)
24211 		fftw_malloc(sizeof(FFTW_COMPLEX) * max_dim);
24212 
24213 	return p;
24214 }
24215 
24216 /************* Freeing the FFTWND Auxiliary Data *************/
24217 
qe_fftwnd_destroy_plan(fftwnd_plan plan)24218 void qe_fftwnd_destroy_plan(fftwnd_plan plan)
24219 {
24220 	if (plan) {
24221 		if (plan->plans) {
24222 			int i, j;
24223 
24224 			for (i = 0; i < plan->rank; ++i) {
24225 				for (j = i - 1;
24226 					j >= 0 && plan->plans[i] != plan->plans[j];
24227 					--j);
24228 				if (j < 0 && plan->plans[i])
24229 					qe_fftw_destroy_plan(plan->plans[i]);
24230 			}
24231 			fftw_free(plan->plans);
24232 		}
24233 		if (plan->n)
24234 			fftw_free(plan->n);
24235 
24236 		if (plan->n_before)
24237 			fftw_free(plan->n_before);
24238 
24239 		if (plan->n_after)
24240 			fftw_free(plan->n_after);
24241 
24242 		if (plan->work)
24243 			fftw_free(plan->work);
24244 
24245 		fftw_free(plan);
24246 	}
24247 }
24248 
24249 /************** Computing the N-Dimensional FFT **************/
24250 
fftwnd(fftwnd_plan plan,int howmany,FFTW_COMPLEX * in,int istride,int idist,FFTW_COMPLEX * out,int ostride,int odist)24251 void fftwnd(fftwnd_plan plan, int howmany,
24252 	FFTW_COMPLEX * in, int istride, int idist,
24253 	FFTW_COMPLEX * out, int ostride, int odist)
24254 {
24255 	if (plan->is_in_place)	/* fft is in-place */
24256 		switch (plan->rank) {
24257 		case 0:
24258 			break;
24259 		case 1:
24260 			fftw(plan->plans[0], howmany, in, istride, idist,
24261 				plan->work, 1, 0);
24262 			break;
24263 		case 2:
24264 			fftw2d_in_place_aux(plan, howmany, in, istride, idist);
24265 			break;
24266 		case 3:
24267 			fftw3d_in_place_aux(plan, howmany, in, istride, idist);
24268 			break;
24269 		default:
24270 			fftwnd_in_place_aux(plan, howmany, in, istride, idist);
24271 		}
24272 	else {
24273 		if (in == out || out == 0)
24274 			fftw_die("Illegal attempt to perform in-place FFT!\n");
24275 		switch (plan->rank) {
24276 		case 0:
24277 			break;
24278 		case 1:
24279 			fftw(plan->plans[0], howmany, in, istride, idist,
24280 				out, ostride, odist);
24281 			break;
24282 		case 2:
24283 			fftw2d_out_of_place_aux(plan, howmany, in, istride,
24284 				idist, out, ostride, odist);
24285 			break;
24286 		case 3:
24287 			fftw3d_out_of_place_aux(plan, howmany, in, istride,
24288 				idist, out, ostride, odist);
24289 			break;
24290 		default:
24291 			fftwnd_out_of_place_aux(plan, howmany, in, istride,
24292 				idist, out, ostride, odist);
24293 		}
24294 	}
24295 }
24296 
24297 
24298 /*
24299  * Naive O(n^2) algorithm, used for testing purposes
24300  */
fftw_naive(int n,FFTW_COMPLEX * in,FFTW_COMPLEX * out)24301 void fftw_naive(int n, FFTW_COMPLEX* in, FFTW_COMPLEX* out)
24302 {
24303 	int i, j;
24304 	FFTW_COMPLEX sum;
24305 	FFTW_COMPLEX w;
24306 	FFTW_REAL pi = 3.1415926535897932384626434;
24307 
24308 	for (j = 0; j < n; ++j) {
24309 		c_re(sum) = c_im(sum) = 0.0;
24310 		for (i = 0; i < n; ++i) {
24311 			c_re(w) = cos((2.0 * pi * (i * j % n)) / n);
24312 			c_im(w) = -sin((2.0 * pi * (i * j % n)) / n);
24313 			c_re(sum) += c_re(in[i]) * c_re(w) - c_im(in[i]) * c_im(w);
24314 			c_im(sum) += c_im(in[i]) * c_re(w) + c_re(in[i]) * c_im(w);
24315 		}
24316 		out[j] = sum;
24317 	}
24318 	return;
24319 }
24320 
24321 /*
24322  * Naive O(n^2) algorithm, for the inverse.
24323  */
fftwi_naive(int n,FFTW_COMPLEX * in,FFTW_COMPLEX * out)24324 void fftwi_naive(int n, FFTW_COMPLEX * in, FFTW_COMPLEX * out)
24325 {
24326 	int i, j;
24327 	FFTW_COMPLEX sum;
24328 	FFTW_COMPLEX w;
24329 	FFTW_REAL pi = 3.1415926535897932384626434;
24330 
24331 	for (j = 0; j < n; ++j) {
24332 		c_re(sum) = c_im(sum) = 0.0;
24333 		for (i = 0; i < n; ++i) {
24334 			c_re(w) = cos((2.0 * pi * (i * j % n)) / n);
24335 			c_im(w) = sin((2.0 * pi * (i * j % n)) / n);
24336 			c_re(sum) += c_re(in[i]) * c_re(w) - c_im(in[i]) * c_im(w);
24337 			c_im(sum) += c_im(in[i]) * c_re(w) + c_re(in[i]) * c_im(w);
24338 		}
24339 		out[j] = sum;
24340 	}
24341 	return;
24342 }
24343 
24344 
qe_fftw_create_plan(int n,fftw_direction dir,int flags)24345 fftw_plan qe_fftw_create_plan(int n, fftw_direction dir, int flags)
24346 {
24347 	fftw_plan table;
24348 	fftw_plan p1;
24349 
24350 	if (flags & FFTW_MEASURE) {
24351 		fprintf(stdout, "FFTW in QE: this is a stripped down version of FFTW, FFTW_MEASURE is not supported. Use the complete library\n");
24352 		flags = flags & ~FFTW_MEASURE;
24353 	}
24354 
24355 	/* validate parameters */
24356 	if (n <= 0)
24357 		return (fftw_plan)0;
24358 
24359 	if ((dir != FFTW_FORWARD) && (dir != FFTW_BACKWARD))
24360 		return (fftw_plan)0;
24361 
24362 	make_empty_table(&table);
24363 	p1 = planner(&table, n, dir, flags);
24364 	destroy_table(&table);
24365 
24366 	complete_twiddle(p1->root, n);
24367 	return p1;
24368 }
24369 
qe_fftw_destroy_plan(fftw_plan plan)24370 void qe_fftw_destroy_plan(fftw_plan plan)
24371 {
24372 	destroy_plan(plan);
24373 }
24374 
fftw_fprint_plan(FILE * f,fftw_plan p)24375 void fftw_fprint_plan(FILE* f, fftw_plan p)
24376 {
24377 	fprintf(f, "plan: (cost = %e)\n", p->cost);
24378 	print_node(f, p->root, 0);
24379 }
24380 
fftw_print_plan(fftw_plan p)24381 void fftw_print_plan(fftw_plan p)
24382 {
24383 	fftw_fprint_plan(stdout, p);
24384 }