1 /*
2 * Copyright (c) 1997-1999, 2003 Massachusetts Institute of Technology
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 *
18 */
19
20 /* This file was automatically generated --- DO NOT EDIT */
21 /* Generated on Mon Mar 24 02:08:07 EST 2003 */
22
23 #include "fftw-int.h"
24 #include "fftw.h"
25
26 /* Generated by: /homee/stevenj/cvs/fftw/gensrc/genfft -magic-alignment-check -magic-twiddle-load-all -magic-variables 4 -magic-loopi -hc2hc-forward 10 */
27
28 /*
29 * This function contains 168 FP additions, 84 FP multiplications,
30 * (or, 126 additions, 42 multiplications, 42 fused multiply/add),
31 * 43 stack variables, and 80 memory accesses
32 */
33 static const fftw_real K250000000 =
34 FFTW_KONST(+0.250000000000000000000000000000000000000000000);
35 static const fftw_real K559016994 =
36 FFTW_KONST(+0.559016994374947424102293417182819058860154590);
37 static const fftw_real K587785252 =
38 FFTW_KONST(+0.587785252292473129168705954639072768597652438);
39 static const fftw_real K951056516 =
40 FFTW_KONST(+0.951056516295153572116439333379382143405698634);
41
42 /*
43 * Generator Id's :
44 * $Id: exprdag.ml,v 1.43 2003/03/16 23:43:46 stevenj Exp $
45 * $Id: fft.ml,v 1.44 2003/03/16 23:43:46 stevenj Exp $
46 * $Id: to_c.ml,v 1.26 2003/03/16 23:43:46 stevenj Exp $
47 */
48
fftw_hc2hc_forward_10(fftw_real * A,const fftw_complex * W,int iostride,int m,int dist)49 void fftw_hc2hc_forward_10(fftw_real *A, const fftw_complex *W,
50 int iostride, int m, int dist)
51 {
52 int i;
53 fftw_real *X;
54 fftw_real *Y;
55 X = A;
56 Y = A + (10 * iostride);
57 {
58 fftw_real tmp178;
59 fftw_real tmp181;
60 fftw_real tmp162;
61 fftw_real tmp167;
62 fftw_real tmp165;
63 fftw_real tmp168;
64 fftw_real tmp174;
65 fftw_real tmp183;
66 fftw_real tmp155;
67 fftw_real tmp170;
68 fftw_real tmp158;
69 fftw_real tmp171;
70 fftw_real tmp173;
71 fftw_real tmp182;
72 fftw_real tmp176;
73 fftw_real tmp177;
74 ASSERT_ALIGNED_DOUBLE;
75 tmp176 = X[0];
76 tmp177 = X[5 * iostride];
77 tmp178 = tmp176 - tmp177;
78 tmp181 = tmp176 + tmp177;
79 {
80 fftw_real tmp160;
81 fftw_real tmp161;
82 fftw_real tmp163;
83 fftw_real tmp164;
84 ASSERT_ALIGNED_DOUBLE;
85 tmp160 = X[4 * iostride];
86 tmp161 = X[9 * iostride];
87 tmp162 = tmp160 - tmp161;
88 tmp167 = tmp160 + tmp161;
89 tmp163 = X[6 * iostride];
90 tmp164 = X[iostride];
91 tmp165 = tmp163 - tmp164;
92 tmp168 = tmp163 + tmp164;
93 }
94 tmp174 = tmp162 + tmp165;
95 tmp183 = tmp167 + tmp168;
96 {
97 fftw_real tmp153;
98 fftw_real tmp154;
99 fftw_real tmp156;
100 fftw_real tmp157;
101 ASSERT_ALIGNED_DOUBLE;
102 tmp153 = X[2 * iostride];
103 tmp154 = X[7 * iostride];
104 tmp155 = tmp153 - tmp154;
105 tmp170 = tmp153 + tmp154;
106 tmp156 = X[8 * iostride];
107 tmp157 = X[3 * iostride];
108 tmp158 = tmp156 - tmp157;
109 tmp171 = tmp156 + tmp157;
110 }
111 tmp173 = tmp155 + tmp158;
112 tmp182 = tmp170 + tmp171;
113 {
114 fftw_real tmp159;
115 fftw_real tmp166;
116 fftw_real tmp186;
117 fftw_real tmp184;
118 fftw_real tmp185;
119 ASSERT_ALIGNED_DOUBLE;
120 tmp159 = tmp155 - tmp158;
121 tmp166 = tmp162 - tmp165;
122 Y[-iostride] =
123 -((K951056516 * tmp159) + (K587785252 * tmp166));
124 Y[-3 * iostride] =
125 (K587785252 * tmp159) - (K951056516 * tmp166);
126 tmp186 = K559016994 * (tmp182 - tmp183);
127 tmp184 = tmp182 + tmp183;
128 tmp185 = tmp181 - (K250000000 * tmp184);
129 X[2 * iostride] = tmp185 - tmp186;
130 X[4 * iostride] = tmp186 + tmp185;
131 X[0] = tmp184 + tmp181;
132 }
133 {
134 fftw_real tmp169;
135 fftw_real tmp172;
136 fftw_real tmp175;
137 fftw_real tmp179;
138 fftw_real tmp180;
139 ASSERT_ALIGNED_DOUBLE;
140 tmp169 = tmp167 - tmp168;
141 tmp172 = tmp170 - tmp171;
142 Y[-2 * iostride] =
143 (K951056516 * tmp169) - (K587785252 * tmp172);
144 Y[-4 * iostride] =
145 (K951056516 * tmp172) + (K587785252 * tmp169);
146 tmp175 = K559016994 * (tmp173 - tmp174);
147 tmp179 = tmp173 + tmp174;
148 tmp180 = tmp178 - (K250000000 * tmp179);
149 X[iostride] = tmp175 + tmp180;
150 X[3 * iostride] = tmp180 - tmp175;
151 X[5 * iostride] = tmp179 + tmp178;
152 }
153 }
154 X = X + dist;
155 Y = Y - dist;
156 for (i = 2; i < m; i = i + 2, X = X + dist, Y = Y - dist, W = W + 9) {
157 fftw_real tmp39;
158 fftw_real tmp87;
159 fftw_real tmp132;
160 fftw_real tmp144;
161 fftw_real tmp73;
162 fftw_real tmp84;
163 fftw_real tmp85;
164 fftw_real tmp91;
165 fftw_real tmp92;
166 fftw_real tmp93;
167 fftw_real tmp100;
168 fftw_real tmp103;
169 fftw_real tmp128;
170 fftw_real tmp121;
171 fftw_real tmp122;
172 fftw_real tmp142;
173 fftw_real tmp50;
174 fftw_real tmp61;
175 fftw_real tmp62;
176 fftw_real tmp88;
177 fftw_real tmp89;
178 fftw_real tmp90;
179 fftw_real tmp107;
180 fftw_real tmp110;
181 fftw_real tmp127;
182 fftw_real tmp118;
183 fftw_real tmp119;
184 fftw_real tmp141;
185 ASSERT_ALIGNED_DOUBLE;
186 {
187 fftw_real tmp33;
188 fftw_real tmp131;
189 fftw_real tmp38;
190 fftw_real tmp130;
191 ASSERT_ALIGNED_DOUBLE;
192 tmp33 = X[0];
193 tmp131 = Y[-9 * iostride];
194 {
195 fftw_real tmp35;
196 fftw_real tmp37;
197 fftw_real tmp34;
198 fftw_real tmp36;
199 ASSERT_ALIGNED_DOUBLE;
200 tmp35 = X[5 * iostride];
201 tmp37 = Y[-4 * iostride];
202 tmp34 = c_re(W[4]);
203 tmp36 = c_im(W[4]);
204 tmp38 = (tmp34 * tmp35) - (tmp36 * tmp37);
205 tmp130 = (tmp36 * tmp35) + (tmp34 * tmp37);
206 }
207 tmp39 = tmp33 - tmp38;
208 tmp87 = tmp33 + tmp38;
209 tmp132 = tmp130 + tmp131;
210 tmp144 = tmp131 - tmp130;
211 }
212 {
213 fftw_real tmp67;
214 fftw_real tmp98;
215 fftw_real tmp83;
216 fftw_real tmp102;
217 fftw_real tmp72;
218 fftw_real tmp99;
219 fftw_real tmp78;
220 fftw_real tmp101;
221 ASSERT_ALIGNED_DOUBLE;
222 {
223 fftw_real tmp64;
224 fftw_real tmp66;
225 fftw_real tmp63;
226 fftw_real tmp65;
227 ASSERT_ALIGNED_DOUBLE;
228 tmp64 = X[4 * iostride];
229 tmp66 = Y[-5 * iostride];
230 tmp63 = c_re(W[3]);
231 tmp65 = c_im(W[3]);
232 tmp67 = (tmp63 * tmp64) - (tmp65 * tmp66);
233 tmp98 = (tmp65 * tmp64) + (tmp63 * tmp66);
234 }
235 {
236 fftw_real tmp80;
237 fftw_real tmp82;
238 fftw_real tmp79;
239 fftw_real tmp81;
240 ASSERT_ALIGNED_DOUBLE;
241 tmp80 = X[iostride];
242 tmp82 = Y[-8 * iostride];
243 tmp79 = c_re(W[0]);
244 tmp81 = c_im(W[0]);
245 tmp83 = (tmp79 * tmp80) - (tmp81 * tmp82);
246 tmp102 = (tmp81 * tmp80) + (tmp79 * tmp82);
247 }
248 {
249 fftw_real tmp69;
250 fftw_real tmp71;
251 fftw_real tmp68;
252 fftw_real tmp70;
253 ASSERT_ALIGNED_DOUBLE;
254 tmp69 = X[9 * iostride];
255 tmp71 = Y[0];
256 tmp68 = c_re(W[8]);
257 tmp70 = c_im(W[8]);
258 tmp72 = (tmp68 * tmp69) - (tmp70 * tmp71);
259 tmp99 = (tmp70 * tmp69) + (tmp68 * tmp71);
260 }
261 {
262 fftw_real tmp75;
263 fftw_real tmp77;
264 fftw_real tmp74;
265 fftw_real tmp76;
266 ASSERT_ALIGNED_DOUBLE;
267 tmp75 = X[6 * iostride];
268 tmp77 = Y[-3 * iostride];
269 tmp74 = c_re(W[5]);
270 tmp76 = c_im(W[5]);
271 tmp78 = (tmp74 * tmp75) - (tmp76 * tmp77);
272 tmp101 = (tmp76 * tmp75) + (tmp74 * tmp77);
273 }
274 tmp73 = tmp67 - tmp72;
275 tmp84 = tmp78 - tmp83;
276 tmp85 = tmp73 + tmp84;
277 tmp91 = tmp67 + tmp72;
278 tmp92 = tmp78 + tmp83;
279 tmp93 = tmp91 + tmp92;
280 tmp100 = tmp98 + tmp99;
281 tmp103 = tmp101 + tmp102;
282 tmp128 = tmp100 + tmp103;
283 tmp121 = tmp98 - tmp99;
284 tmp122 = tmp101 - tmp102;
285 tmp142 = tmp121 + tmp122;
286 }
287 {
288 fftw_real tmp44;
289 fftw_real tmp105;
290 fftw_real tmp60;
291 fftw_real tmp109;
292 fftw_real tmp49;
293 fftw_real tmp106;
294 fftw_real tmp55;
295 fftw_real tmp108;
296 ASSERT_ALIGNED_DOUBLE;
297 {
298 fftw_real tmp41;
299 fftw_real tmp43;
300 fftw_real tmp40;
301 fftw_real tmp42;
302 ASSERT_ALIGNED_DOUBLE;
303 tmp41 = X[2 * iostride];
304 tmp43 = Y[-7 * iostride];
305 tmp40 = c_re(W[1]);
306 tmp42 = c_im(W[1]);
307 tmp44 = (tmp40 * tmp41) - (tmp42 * tmp43);
308 tmp105 = (tmp42 * tmp41) + (tmp40 * tmp43);
309 }
310 {
311 fftw_real tmp57;
312 fftw_real tmp59;
313 fftw_real tmp56;
314 fftw_real tmp58;
315 ASSERT_ALIGNED_DOUBLE;
316 tmp57 = X[3 * iostride];
317 tmp59 = Y[-6 * iostride];
318 tmp56 = c_re(W[2]);
319 tmp58 = c_im(W[2]);
320 tmp60 = (tmp56 * tmp57) - (tmp58 * tmp59);
321 tmp109 = (tmp58 * tmp57) + (tmp56 * tmp59);
322 }
323 {
324 fftw_real tmp46;
325 fftw_real tmp48;
326 fftw_real tmp45;
327 fftw_real tmp47;
328 ASSERT_ALIGNED_DOUBLE;
329 tmp46 = X[7 * iostride];
330 tmp48 = Y[-2 * iostride];
331 tmp45 = c_re(W[6]);
332 tmp47 = c_im(W[6]);
333 tmp49 = (tmp45 * tmp46) - (tmp47 * tmp48);
334 tmp106 = (tmp47 * tmp46) + (tmp45 * tmp48);
335 }
336 {
337 fftw_real tmp52;
338 fftw_real tmp54;
339 fftw_real tmp51;
340 fftw_real tmp53;
341 ASSERT_ALIGNED_DOUBLE;
342 tmp52 = X[8 * iostride];
343 tmp54 = Y[-iostride];
344 tmp51 = c_re(W[7]);
345 tmp53 = c_im(W[7]);
346 tmp55 = (tmp51 * tmp52) - (tmp53 * tmp54);
347 tmp108 = (tmp53 * tmp52) + (tmp51 * tmp54);
348 }
349 tmp50 = tmp44 - tmp49;
350 tmp61 = tmp55 - tmp60;
351 tmp62 = tmp50 + tmp61;
352 tmp88 = tmp44 + tmp49;
353 tmp89 = tmp55 + tmp60;
354 tmp90 = tmp88 + tmp89;
355 tmp107 = tmp105 + tmp106;
356 tmp110 = tmp108 + tmp109;
357 tmp127 = tmp107 + tmp110;
358 tmp118 = tmp105 - tmp106;
359 tmp119 = tmp108 - tmp109;
360 tmp141 = tmp118 + tmp119;
361 }
362 {
363 fftw_real tmp115;
364 fftw_real tmp86;
365 fftw_real tmp116;
366 fftw_real tmp124;
367 fftw_real tmp126;
368 fftw_real tmp120;
369 fftw_real tmp123;
370 fftw_real tmp125;
371 fftw_real tmp117;
372 ASSERT_ALIGNED_DOUBLE;
373 tmp115 = K559016994 * (tmp62 - tmp85);
374 tmp86 = tmp62 + tmp85;
375 tmp116 = tmp39 - (K250000000 * tmp86);
376 tmp120 = tmp118 - tmp119;
377 tmp123 = tmp121 - tmp122;
378 tmp124 = (K951056516 * tmp120) + (K587785252 * tmp123);
379 tmp126 = (K951056516 * tmp123) - (K587785252 * tmp120);
380 Y[-5 * iostride] = tmp39 + tmp86;
381 tmp125 = tmp116 - tmp115;
382 Y[-7 * iostride] = tmp125 - tmp126;
383 X[3 * iostride] = tmp125 + tmp126;
384 tmp117 = tmp115 + tmp116;
385 Y[-9 * iostride] = tmp117 - tmp124;
386 X[iostride] = tmp117 + tmp124;
387 }
388 {
389 fftw_real tmp148;
390 fftw_real tmp143;
391 fftw_real tmp149;
392 fftw_real tmp147;
393 fftw_real tmp151;
394 fftw_real tmp145;
395 fftw_real tmp146;
396 fftw_real tmp152;
397 fftw_real tmp150;
398 ASSERT_ALIGNED_DOUBLE;
399 tmp148 = K559016994 * (tmp141 - tmp142);
400 tmp143 = tmp141 + tmp142;
401 tmp149 = tmp144 - (K250000000 * tmp143);
402 tmp145 = tmp50 - tmp61;
403 tmp146 = tmp73 - tmp84;
404 tmp147 = (K951056516 * tmp145) + (K587785252 * tmp146);
405 tmp151 = (K587785252 * tmp145) - (K951056516 * tmp146);
406 X[5 * iostride] = -(tmp143 + tmp144);
407 tmp152 = tmp149 - tmp148;
408 X[7 * iostride] = tmp151 - tmp152;
409 Y[-3 * iostride] = tmp151 + tmp152;
410 tmp150 = tmp148 + tmp149;
411 X[9 * iostride] = -(tmp147 + tmp150);
412 Y[-iostride] = tmp150 - tmp147;
413 }
414 {
415 fftw_real tmp96;
416 fftw_real tmp94;
417 fftw_real tmp95;
418 fftw_real tmp112;
419 fftw_real tmp114;
420 fftw_real tmp104;
421 fftw_real tmp111;
422 fftw_real tmp113;
423 fftw_real tmp97;
424 ASSERT_ALIGNED_DOUBLE;
425 tmp96 = K559016994 * (tmp90 - tmp93);
426 tmp94 = tmp90 + tmp93;
427 tmp95 = tmp87 - (K250000000 * tmp94);
428 tmp104 = tmp100 - tmp103;
429 tmp111 = tmp107 - tmp110;
430 tmp112 = (K951056516 * tmp104) - (K587785252 * tmp111);
431 tmp114 = (K951056516 * tmp111) + (K587785252 * tmp104);
432 X[0] = tmp87 + tmp94;
433 tmp113 = tmp96 + tmp95;
434 X[4 * iostride] = tmp113 - tmp114;
435 Y[-6 * iostride] = tmp113 + tmp114;
436 tmp97 = tmp95 - tmp96;
437 X[2 * iostride] = tmp97 - tmp112;
438 Y[-8 * iostride] = tmp97 + tmp112;
439 }
440 {
441 fftw_real tmp134;
442 fftw_real tmp129;
443 fftw_real tmp133;
444 fftw_real tmp138;
445 fftw_real tmp140;
446 fftw_real tmp136;
447 fftw_real tmp137;
448 fftw_real tmp139;
449 fftw_real tmp135;
450 ASSERT_ALIGNED_DOUBLE;
451 tmp134 = K559016994 * (tmp127 - tmp128);
452 tmp129 = tmp127 + tmp128;
453 tmp133 = tmp132 - (K250000000 * tmp129);
454 tmp136 = tmp91 - tmp92;
455 tmp137 = tmp88 - tmp89;
456 tmp138 = (K951056516 * tmp136) - (K587785252 * tmp137);
457 tmp140 = (K951056516 * tmp137) + (K587785252 * tmp136);
458 Y[0] = tmp129 + tmp132;
459 tmp139 = tmp134 + tmp133;
460 X[6 * iostride] = -(tmp139 - tmp140);
461 Y[-4 * iostride] = tmp140 + tmp139;
462 tmp135 = tmp133 - tmp134;
463 X[8 * iostride] = -(tmp135 - tmp138);
464 Y[-2 * iostride] = tmp138 + tmp135;
465 }
466 }
467 if (i == m) {
468 fftw_real tmp1;
469 fftw_real tmp24;
470 fftw_real tmp8;
471 fftw_real tmp10;
472 fftw_real tmp25;
473 fftw_real tmp26;
474 fftw_real tmp14;
475 fftw_real tmp28;
476 fftw_real tmp23;
477 fftw_real tmp17;
478 ASSERT_ALIGNED_DOUBLE;
479 tmp1 = X[0];
480 tmp24 = X[5 * iostride];
481 {
482 fftw_real tmp2;
483 fftw_real tmp3;
484 fftw_real tmp4;
485 fftw_real tmp5;
486 fftw_real tmp6;
487 fftw_real tmp7;
488 ASSERT_ALIGNED_DOUBLE;
489 tmp2 = X[4 * iostride];
490 tmp3 = X[6 * iostride];
491 tmp4 = tmp2 - tmp3;
492 tmp5 = X[8 * iostride];
493 tmp6 = X[2 * iostride];
494 tmp7 = tmp5 - tmp6;
495 tmp8 = tmp4 + tmp7;
496 tmp10 = K559016994 * (tmp4 - tmp7);
497 tmp25 = tmp2 + tmp3;
498 tmp26 = tmp5 + tmp6;
499 }
500 {
501 fftw_real tmp12;
502 fftw_real tmp13;
503 fftw_real tmp22;
504 fftw_real tmp15;
505 fftw_real tmp16;
506 fftw_real tmp21;
507 ASSERT_ALIGNED_DOUBLE;
508 tmp12 = X[iostride];
509 tmp13 = X[9 * iostride];
510 tmp22 = tmp12 + tmp13;
511 tmp15 = X[3 * iostride];
512 tmp16 = X[7 * iostride];
513 tmp21 = tmp15 + tmp16;
514 tmp14 = tmp12 - tmp13;
515 tmp28 = K559016994 * (tmp22 + tmp21);
516 tmp23 = tmp21 - tmp22;
517 tmp17 = tmp15 - tmp16;
518 }
519 X[2 * iostride] = tmp1 + tmp8;
520 {
521 fftw_real tmp18;
522 fftw_real tmp20;
523 fftw_real tmp11;
524 fftw_real tmp19;
525 fftw_real tmp9;
526 ASSERT_ALIGNED_DOUBLE;
527 tmp18 = (K587785252 * tmp14) - (K951056516 * tmp17);
528 tmp20 = (K951056516 * tmp14) + (K587785252 * tmp17);
529 tmp9 = tmp1 - (K250000000 * tmp8);
530 tmp11 = tmp9 - tmp10;
531 tmp19 = tmp10 + tmp9;
532 X[3 * iostride] = tmp11 - tmp18;
533 X[iostride] = tmp11 + tmp18;
534 X[4 * iostride] = tmp19 - tmp20;
535 X[0] = tmp19 + tmp20;
536 }
537 Y[-2 * iostride] = tmp23 - tmp24;
538 {
539 fftw_real tmp27;
540 fftw_real tmp32;
541 fftw_real tmp30;
542 fftw_real tmp31;
543 fftw_real tmp29;
544 ASSERT_ALIGNED_DOUBLE;
545 tmp27 = (K951056516 * tmp25) + (K587785252 * tmp26);
546 tmp32 = (K951056516 * tmp26) - (K587785252 * tmp25);
547 tmp29 = (K250000000 * tmp23) + tmp24;
548 tmp30 = tmp28 + tmp29;
549 tmp31 = tmp29 - tmp28;
550 Y[0] = -(tmp27 + tmp30);
551 Y[-4 * iostride] = tmp27 - tmp30;
552 Y[-iostride] = tmp31 - tmp32;
553 Y[-3 * iostride] = tmp32 + tmp31;
554 }
555 }
556 }
557
558 static const int twiddle_order[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9 };
559 fftw_codelet_desc fftw_hc2hc_forward_10_desc = {
560 "fftw_hc2hc_forward_10",
561 (void (*)()) fftw_hc2hc_forward_10,
562 10,
563 FFTW_FORWARD,
564 FFTW_HC2HC,
565 223,
566 9,
567 twiddle_order,
568 };
569