1 #if !defined HAVE_SHORTFHTDITCORE_H__
2 #define HAVE_SHORTFHTDITCORE_H__
3 // This file is part of the FXT library.
4 // Copyright (C) 2010, 2012, 2017 Joerg Arndt
5 // License: GNU General Public License version 3 or later,
6 // see the file COPYING.txt in the main directory.
7
8
9 #include "fxttypes.h"
10 #include "aux0/sumdiff.h"
11 #include "aux0/cmult.h"
12
13 #include <cmath>
14
15
16 template <typename Type>
fht_dit_core_2(Type * f)17 inline void fht_dit_core_2(Type *f)
18 // unrolled version for length 2
19 {
20 sumdiff(f[0], f[1]);
21 }
22 // -------------------------
23 // opcount by generator: #mult=0=0/pt #add=2=1/pt
24
25 template <typename Type>
fht_dit_core_4(Type * f)26 inline void fht_dit_core_4(Type *f)
27 // unrolled version for length 4
28 {
29 { // start initial loop
30 { // fi = 0
31 Type f0, f1, f2, f3;
32 sumdiff(f[0], f[1], f0, f1);
33 sumdiff(f[2], f[3], f2, f3);
34 sumdiff(f0, f2, f[0], f[2]);
35 sumdiff(f1, f3, f[1], f[3]);
36 }
37 } // end initial loop
38 }
39 // -------------------------
40 // opcount by generator: #mult=0=0/pt #add=8=2/pt
41
42 template <typename Type>
fht_dit_core_8(Type * f)43 inline void fht_dit_core_8(Type *f)
44 // unrolled version for length 8
45 {
46 { // start initial loop
47 { // fi = 0 gi = 1
48 Type g0, f0, f1, g1;
49 sumdiff(f[0], f[1], f0, g0);
50 sumdiff(f[2], f[3], f1, g1);
51 sumdiff(f0, f1);
52 sumdiff(g0, g1);
53 Type s1, c1, s2, c2;
54 sumdiff(f[4], f[5], s1, c1);
55 sumdiff(f[6], f[7], s2, c2);
56 sumdiff(s1, s2);
57 sumdiff(f0, s1, f[0], f[4]);
58 sumdiff(f1, s2, f[2], f[6]);
59 c1 *= M_SQRT2;
60 c2 *= M_SQRT2;
61 sumdiff(g0, c1, f[1], f[5]);
62 sumdiff(g1, c2, f[3], f[7]);
63 }
64 } // end initial loop
65 }
66 // -------------------------
67 // opcount by generator: #mult=2=0.25/pt #add=22=2.75/pt
68
69 template <typename Type>
fht_dit_core_16(Type * f)70 inline void fht_dit_core_16(Type *f)
71 // unrolled version for length 16
72 {
73 { // start initial loop
74 { // fi = 0
75 Type f0, f1, f2, f3;
76 sumdiff(f[0], f[1], f0, f1);
77 sumdiff(f[2], f[3], f2, f3);
78 sumdiff(f0, f2, f[0], f[2]);
79 sumdiff(f1, f3, f[1], f[3]);
80 }
81 { // fi = 4
82 Type f0, f1, f2, f3;
83 sumdiff(f[4], f[5], f0, f1);
84 sumdiff(f[6], f[7], f2, f3);
85 sumdiff(f0, f2, f[4], f[6]);
86 sumdiff(f1, f3, f[5], f[7]);
87 }
88 { // fi = 8
89 Type f0, f1, f2, f3;
90 sumdiff(f[8], f[9], f0, f1);
91 sumdiff(f[10], f[11], f2, f3);
92 sumdiff(f0, f2, f[8], f[10]);
93 sumdiff(f1, f3, f[9], f[11]);
94 }
95 { // fi = 12
96 Type f0, f1, f2, f3;
97 sumdiff(f[12], f[13], f0, f1);
98 sumdiff(f[14], f[15], f2, f3);
99 sumdiff(f0, f2, f[12], f[14]);
100 sumdiff(f1, f3, f[13], f[15]);
101 }
102 } // end initial loop
103
104 { // -------- ldk=2 k4=16
105 Type f0, f1, f2, f3;
106 // do loop:
107 sumdiff(f[0], f[4], f0, f1);
108 sumdiff(f[8], f[12], f2, f3);
109 sumdiff(f0, f2, f[0], f[8]);
110 sumdiff(f1, f3, f[4], f[12]);
111 sumdiff(f[2], f[6], f0, f1);
112 f3 = M_SQRT2 * f[14];
113 f2 = M_SQRT2 * f[10];
114 sumdiff(f0, f2, f[2], f[10]);
115 sumdiff(f1, f3, f[6], f[14]);
116 }
117
118 { // kh=2
119 Type a, b, g0, f0, f1, g1, f2, g2, f3, g3;
120 { // ---- i=1
121 double c1=.923879532511286756128183189397; // == cos(Pi*1/8) == cos(Pi*1/8)
122 double s1=.382683432365089771728459984029; // == sin(Pi*1/8) == sin(Pi*1/8)
123 // c2 = s2 = sqrt(1/2)
124 // do loop II:
125 sumdiff(f[5], f[7], a, b);
126 a *= M_SQRT1_2;
127 b *= M_SQRT1_2;
128 sumdiff(f[1], a, f0, f1);
129 sumdiff(f[3], b, g0, g1);
130 sumdiff(f[13], f[15], a, b);
131 a *= M_SQRT1_2;
132 b *= M_SQRT1_2;
133 sumdiff(f[9], a, f2, f3);
134 sumdiff(f[11], b, g2, g3);
135 cmult(s1, c1, f2, g3, b, a);
136 sumdiff(f0, a, f[1], f[9]);
137 sumdiff(g1, b, f[7], f[15]);
138 cmult(c1, s1, g2, f3, b, a);
139 sumdiff(g0, a, f[3], f[11]);
140 sumdiff(f1, b, f[5], f[13]);
141 }
142 }
143 }
144 // -------------------------
145 // opcount by generator: #mult=14=0.875/pt #add=70=4.375/pt
146
147 template <typename Type>
fht_dit_core_32(Type * f)148 inline void fht_dit_core_32(Type *f)
149 // unrolled version for length 32
150 {
151 { // start initial loop
152 { // fi = 0 gi = 1
153 Type g0, f0, f1, g1;
154 sumdiff(f[0], f[1], f0, g0);
155 sumdiff(f[2], f[3], f1, g1);
156 sumdiff(f0, f1);
157 sumdiff(g0, g1);
158 Type s1, c1, s2, c2;
159 sumdiff(f[4], f[5], s1, c1);
160 sumdiff(f[6], f[7], s2, c2);
161 sumdiff(s1, s2);
162 sumdiff(f0, s1, f[0], f[4]);
163 sumdiff(f1, s2, f[2], f[6]);
164 c1 *= M_SQRT2;
165 c2 *= M_SQRT2;
166 sumdiff(g0, c1, f[1], f[5]);
167 sumdiff(g1, c2, f[3], f[7]);
168 }
169 { // fi = 8 gi = 9
170 Type g0, f0, f1, g1;
171 sumdiff(f[8], f[9], f0, g0);
172 sumdiff(f[10], f[11], f1, g1);
173 sumdiff(f0, f1);
174 sumdiff(g0, g1);
175 Type s1, c1, s2, c2;
176 sumdiff(f[12], f[13], s1, c1);
177 sumdiff(f[14], f[15], s2, c2);
178 sumdiff(s1, s2);
179 sumdiff(f0, s1, f[8], f[12]);
180 sumdiff(f1, s2, f[10], f[14]);
181 c1 *= M_SQRT2;
182 c2 *= M_SQRT2;
183 sumdiff(g0, c1, f[9], f[13]);
184 sumdiff(g1, c2, f[11], f[15]);
185 }
186 { // fi = 16 gi = 17
187 Type g0, f0, f1, g1;
188 sumdiff(f[16], f[17], f0, g0);
189 sumdiff(f[18], f[19], f1, g1);
190 sumdiff(f0, f1);
191 sumdiff(g0, g1);
192 Type s1, c1, s2, c2;
193 sumdiff(f[20], f[21], s1, c1);
194 sumdiff(f[22], f[23], s2, c2);
195 sumdiff(s1, s2);
196 sumdiff(f0, s1, f[16], f[20]);
197 sumdiff(f1, s2, f[18], f[22]);
198 c1 *= M_SQRT2;
199 c2 *= M_SQRT2;
200 sumdiff(g0, c1, f[17], f[21]);
201 sumdiff(g1, c2, f[19], f[23]);
202 }
203 { // fi = 24 gi = 25
204 Type g0, f0, f1, g1;
205 sumdiff(f[24], f[25], f0, g0);
206 sumdiff(f[26], f[27], f1, g1);
207 sumdiff(f0, f1);
208 sumdiff(g0, g1);
209 Type s1, c1, s2, c2;
210 sumdiff(f[28], f[29], s1, c1);
211 sumdiff(f[30], f[31], s2, c2);
212 sumdiff(s1, s2);
213 sumdiff(f0, s1, f[24], f[28]);
214 sumdiff(f1, s2, f[26], f[30]);
215 c1 *= M_SQRT2;
216 c2 *= M_SQRT2;
217 sumdiff(g0, c1, f[25], f[29]);
218 sumdiff(g1, c2, f[27], f[31]);
219 }
220 } // end initial loop
221
222 { // -------- ldk=3 k4=32
223 Type f0, f1, f2, f3;
224 // do loop:
225 sumdiff(f[0], f[8], f0, f1);
226 sumdiff(f[16], f[24], f2, f3);
227 sumdiff(f0, f2, f[0], f[16]);
228 sumdiff(f1, f3, f[8], f[24]);
229 sumdiff(f[4], f[12], f0, f1);
230 f3 = M_SQRT2 * f[28];
231 f2 = M_SQRT2 * f[20];
232 sumdiff(f0, f2, f[4], f[20]);
233 sumdiff(f1, f3, f[12], f[28]);
234 }
235
236 { // kh=4
237 Type a, b, g0, f0, f1, g1, f2, g2, f3, g3;
238 { // ---- i=1
239 double c1=.980785280403230449126182236134; // == cos(Pi*1/16) == cos(Pi*1/16)
240 double s1=.195090322016128267848284868476; // == sin(Pi*1/16) == sin(Pi*1/16)
241 double c2=.923879532511286756128183189397; // == cos(Pi*2/16) == cos(Pi*1/8)
242 double s2=.382683432365089771728459984029; // == sin(Pi*2/16) == sin(Pi*1/8)
243 // do loop II:
244 cmult(s2, c2, f[9], f[15], b, a);
245 sumdiff(f[1], a, f0, f1);
246 sumdiff(f[7], b, g0, g1);
247 cmult(s2, c2, f[25], f[31], b, a);
248 sumdiff(f[17], a, f2, f3);
249 sumdiff(f[23], b, g2, g3);
250 cmult(s1, c1, f2, g3, b, a);
251 sumdiff(f0, a, f[1], f[17]);
252 sumdiff(g1, b, f[15], f[31]);
253 cmult(c1, s1, g2, f3, b, a);
254 sumdiff(g0, a, f[7], f[23]);
255 sumdiff(f1, b, f[9], f[25]);
256 }
257 { // ---- i=2
258 double c1=.923879532511286756128183189397; // == cos(Pi*2/16) == cos(Pi*1/8)
259 double s1=.382683432365089771728459984029; // == sin(Pi*2/16) == sin(Pi*1/8)
260 // c2 = s2 = sqrt(1/2)
261 // do loop II:
262 sumdiff(f[10], f[14], a, b);
263 a *= M_SQRT1_2;
264 b *= M_SQRT1_2;
265 sumdiff(f[2], a, f0, f1);
266 sumdiff(f[6], b, g0, g1);
267 sumdiff(f[26], f[30], a, b);
268 a *= M_SQRT1_2;
269 b *= M_SQRT1_2;
270 sumdiff(f[18], a, f2, f3);
271 sumdiff(f[22], b, g2, g3);
272 cmult(s1, c1, f2, g3, b, a);
273 sumdiff(f0, a, f[2], f[18]);
274 sumdiff(g1, b, f[14], f[30]);
275 cmult(c1, s1, g2, f3, b, a);
276 sumdiff(g0, a, f[6], f[22]);
277 sumdiff(f1, b, f[10], f[26]);
278 }
279 { // ---- i=3
280 double c1=.831469612302545237078788377618; // == cos(Pi*3/16) == cos(Pi*3/16)
281 double s1=.555570233019602224742830813947; // == sin(Pi*3/16) == sin(Pi*3/16)
282 double c2=.382683432365089771728459984032; // == cos(Pi*6/16) == cos(Pi*3/8)
283 double s2=.923879532511286756128183189396; // == sin(Pi*6/16) == sin(Pi*3/8)
284 // do loop II:
285 cmult(s2, c2, f[11], f[13], b, a);
286 sumdiff(f[3], a, f0, f1);
287 sumdiff(f[5], b, g0, g1);
288 cmult(s2, c2, f[27], f[29], b, a);
289 sumdiff(f[19], a, f2, f3);
290 sumdiff(f[21], b, g2, g3);
291 cmult(s1, c1, f2, g3, b, a);
292 sumdiff(f0, a, f[3], f[19]);
293 sumdiff(g1, b, f[13], f[29]);
294 cmult(c1, s1, g2, f3, b, a);
295 sumdiff(g0, a, f[5], f[21]);
296 sumdiff(f1, b, f[11], f[27]);
297 }
298 }
299 }
300 // -------------------------
301 // opcount by generator: #mult=54=1.6875/pt #add=174=5.4375/pt
302
303 template <typename Type>
fht_dit_core_64(Type * f)304 inline void fht_dit_core_64(Type *f)
305 // unrolled version for length 64
306 {
307 { // start initial loop
308 { // fi = 0
309 Type f0, f1, f2, f3;
310 sumdiff(f[0], f[1], f0, f1);
311 sumdiff(f[2], f[3], f2, f3);
312 sumdiff(f0, f2, f[0], f[2]);
313 sumdiff(f1, f3, f[1], f[3]);
314 }
315 { // fi = 4
316 Type f0, f1, f2, f3;
317 sumdiff(f[4], f[5], f0, f1);
318 sumdiff(f[6], f[7], f2, f3);
319 sumdiff(f0, f2, f[4], f[6]);
320 sumdiff(f1, f3, f[5], f[7]);
321 }
322 { // fi = 8
323 Type f0, f1, f2, f3;
324 sumdiff(f[8], f[9], f0, f1);
325 sumdiff(f[10], f[11], f2, f3);
326 sumdiff(f0, f2, f[8], f[10]);
327 sumdiff(f1, f3, f[9], f[11]);
328 }
329 { // fi = 12
330 Type f0, f1, f2, f3;
331 sumdiff(f[12], f[13], f0, f1);
332 sumdiff(f[14], f[15], f2, f3);
333 sumdiff(f0, f2, f[12], f[14]);
334 sumdiff(f1, f3, f[13], f[15]);
335 }
336 { // fi = 16
337 Type f0, f1, f2, f3;
338 sumdiff(f[16], f[17], f0, f1);
339 sumdiff(f[18], f[19], f2, f3);
340 sumdiff(f0, f2, f[16], f[18]);
341 sumdiff(f1, f3, f[17], f[19]);
342 }
343 { // fi = 20
344 Type f0, f1, f2, f3;
345 sumdiff(f[20], f[21], f0, f1);
346 sumdiff(f[22], f[23], f2, f3);
347 sumdiff(f0, f2, f[20], f[22]);
348 sumdiff(f1, f3, f[21], f[23]);
349 }
350 { // fi = 24
351 Type f0, f1, f2, f3;
352 sumdiff(f[24], f[25], f0, f1);
353 sumdiff(f[26], f[27], f2, f3);
354 sumdiff(f0, f2, f[24], f[26]);
355 sumdiff(f1, f3, f[25], f[27]);
356 }
357 { // fi = 28
358 Type f0, f1, f2, f3;
359 sumdiff(f[28], f[29], f0, f1);
360 sumdiff(f[30], f[31], f2, f3);
361 sumdiff(f0, f2, f[28], f[30]);
362 sumdiff(f1, f3, f[29], f[31]);
363 }
364 { // fi = 32
365 Type f0, f1, f2, f3;
366 sumdiff(f[32], f[33], f0, f1);
367 sumdiff(f[34], f[35], f2, f3);
368 sumdiff(f0, f2, f[32], f[34]);
369 sumdiff(f1, f3, f[33], f[35]);
370 }
371 { // fi = 36
372 Type f0, f1, f2, f3;
373 sumdiff(f[36], f[37], f0, f1);
374 sumdiff(f[38], f[39], f2, f3);
375 sumdiff(f0, f2, f[36], f[38]);
376 sumdiff(f1, f3, f[37], f[39]);
377 }
378 { // fi = 40
379 Type f0, f1, f2, f3;
380 sumdiff(f[40], f[41], f0, f1);
381 sumdiff(f[42], f[43], f2, f3);
382 sumdiff(f0, f2, f[40], f[42]);
383 sumdiff(f1, f3, f[41], f[43]);
384 }
385 { // fi = 44
386 Type f0, f1, f2, f3;
387 sumdiff(f[44], f[45], f0, f1);
388 sumdiff(f[46], f[47], f2, f3);
389 sumdiff(f0, f2, f[44], f[46]);
390 sumdiff(f1, f3, f[45], f[47]);
391 }
392 { // fi = 48
393 Type f0, f1, f2, f3;
394 sumdiff(f[48], f[49], f0, f1);
395 sumdiff(f[50], f[51], f2, f3);
396 sumdiff(f0, f2, f[48], f[50]);
397 sumdiff(f1, f3, f[49], f[51]);
398 }
399 { // fi = 52
400 Type f0, f1, f2, f3;
401 sumdiff(f[52], f[53], f0, f1);
402 sumdiff(f[54], f[55], f2, f3);
403 sumdiff(f0, f2, f[52], f[54]);
404 sumdiff(f1, f3, f[53], f[55]);
405 }
406 { // fi = 56
407 Type f0, f1, f2, f3;
408 sumdiff(f[56], f[57], f0, f1);
409 sumdiff(f[58], f[59], f2, f3);
410 sumdiff(f0, f2, f[56], f[58]);
411 sumdiff(f1, f3, f[57], f[59]);
412 }
413 { // fi = 60
414 Type f0, f1, f2, f3;
415 sumdiff(f[60], f[61], f0, f1);
416 sumdiff(f[62], f[63], f2, f3);
417 sumdiff(f0, f2, f[60], f[62]);
418 sumdiff(f1, f3, f[61], f[63]);
419 }
420 } // end initial loop
421
422 { // -------- ldk=2 k4=16
423 Type f0, f1, f2, f3;
424 // do loop:
425 sumdiff(f[0], f[4], f0, f1);
426 sumdiff(f[8], f[12], f2, f3);
427 sumdiff(f0, f2, f[0], f[8]);
428 sumdiff(f1, f3, f[4], f[12]);
429 sumdiff(f[2], f[6], f0, f1);
430 f3 = M_SQRT2 * f[14];
431 f2 = M_SQRT2 * f[10];
432 sumdiff(f0, f2, f[2], f[10]);
433 sumdiff(f1, f3, f[6], f[14]);
434 // do loop:
435 sumdiff(f[16], f[20], f0, f1);
436 sumdiff(f[24], f[28], f2, f3);
437 sumdiff(f0, f2, f[16], f[24]);
438 sumdiff(f1, f3, f[20], f[28]);
439 sumdiff(f[18], f[22], f0, f1);
440 f3 = M_SQRT2 * f[30];
441 f2 = M_SQRT2 * f[26];
442 sumdiff(f0, f2, f[18], f[26]);
443 sumdiff(f1, f3, f[22], f[30]);
444 // do loop:
445 sumdiff(f[32], f[36], f0, f1);
446 sumdiff(f[40], f[44], f2, f3);
447 sumdiff(f0, f2, f[32], f[40]);
448 sumdiff(f1, f3, f[36], f[44]);
449 sumdiff(f[34], f[38], f0, f1);
450 f3 = M_SQRT2 * f[46];
451 f2 = M_SQRT2 * f[42];
452 sumdiff(f0, f2, f[34], f[42]);
453 sumdiff(f1, f3, f[38], f[46]);
454 // do loop:
455 sumdiff(f[48], f[52], f0, f1);
456 sumdiff(f[56], f[60], f2, f3);
457 sumdiff(f0, f2, f[48], f[56]);
458 sumdiff(f1, f3, f[52], f[60]);
459 sumdiff(f[50], f[54], f0, f1);
460 f3 = M_SQRT2 * f[62];
461 f2 = M_SQRT2 * f[58];
462 sumdiff(f0, f2, f[50], f[58]);
463 sumdiff(f1, f3, f[54], f[62]);
464 }
465
466 { // kh=2
467 Type a, b, g0, f0, f1, g1, f2, g2, f3, g3;
468 { // ---- i=1
469 double c1=.923879532511286756128183189397; // == cos(Pi*1/8) == cos(Pi*1/8)
470 double s1=.382683432365089771728459984029; // == sin(Pi*1/8) == sin(Pi*1/8)
471 // c2 = s2 = sqrt(1/2)
472 // do loop II:
473 sumdiff(f[5], f[7], a, b);
474 a *= M_SQRT1_2;
475 b *= M_SQRT1_2;
476 sumdiff(f[1], a, f0, f1);
477 sumdiff(f[3], b, g0, g1);
478 sumdiff(f[13], f[15], a, b);
479 a *= M_SQRT1_2;
480 b *= M_SQRT1_2;
481 sumdiff(f[9], a, f2, f3);
482 sumdiff(f[11], b, g2, g3);
483 cmult(s1, c1, f2, g3, b, a);
484 sumdiff(f0, a, f[1], f[9]);
485 sumdiff(g1, b, f[7], f[15]);
486 cmult(c1, s1, g2, f3, b, a);
487 sumdiff(g0, a, f[3], f[11]);
488 sumdiff(f1, b, f[5], f[13]);
489 // do loop II:
490 sumdiff(f[21], f[23], a, b);
491 a *= M_SQRT1_2;
492 b *= M_SQRT1_2;
493 sumdiff(f[17], a, f0, f1);
494 sumdiff(f[19], b, g0, g1);
495 sumdiff(f[29], f[31], a, b);
496 a *= M_SQRT1_2;
497 b *= M_SQRT1_2;
498 sumdiff(f[25], a, f2, f3);
499 sumdiff(f[27], b, g2, g3);
500 cmult(s1, c1, f2, g3, b, a);
501 sumdiff(f0, a, f[17], f[25]);
502 sumdiff(g1, b, f[23], f[31]);
503 cmult(c1, s1, g2, f3, b, a);
504 sumdiff(g0, a, f[19], f[27]);
505 sumdiff(f1, b, f[21], f[29]);
506 // do loop II:
507 sumdiff(f[37], f[39], a, b);
508 a *= M_SQRT1_2;
509 b *= M_SQRT1_2;
510 sumdiff(f[33], a, f0, f1);
511 sumdiff(f[35], b, g0, g1);
512 sumdiff(f[45], f[47], a, b);
513 a *= M_SQRT1_2;
514 b *= M_SQRT1_2;
515 sumdiff(f[41], a, f2, f3);
516 sumdiff(f[43], b, g2, g3);
517 cmult(s1, c1, f2, g3, b, a);
518 sumdiff(f0, a, f[33], f[41]);
519 sumdiff(g1, b, f[39], f[47]);
520 cmult(c1, s1, g2, f3, b, a);
521 sumdiff(g0, a, f[35], f[43]);
522 sumdiff(f1, b, f[37], f[45]);
523 // do loop II:
524 sumdiff(f[53], f[55], a, b);
525 a *= M_SQRT1_2;
526 b *= M_SQRT1_2;
527 sumdiff(f[49], a, f0, f1);
528 sumdiff(f[51], b, g0, g1);
529 sumdiff(f[61], f[63], a, b);
530 a *= M_SQRT1_2;
531 b *= M_SQRT1_2;
532 sumdiff(f[57], a, f2, f3);
533 sumdiff(f[59], b, g2, g3);
534 cmult(s1, c1, f2, g3, b, a);
535 sumdiff(f0, a, f[49], f[57]);
536 sumdiff(g1, b, f[55], f[63]);
537 cmult(c1, s1, g2, f3, b, a);
538 sumdiff(g0, a, f[51], f[59]);
539 sumdiff(f1, b, f[53], f[61]);
540 }
541 }
542
543 { // -------- ldk=4 k4=64
544 Type f0, f1, f2, f3;
545 // do loop:
546 sumdiff(f[0], f[16], f0, f1);
547 sumdiff(f[32], f[48], f2, f3);
548 sumdiff(f0, f2, f[0], f[32]);
549 sumdiff(f1, f3, f[16], f[48]);
550 sumdiff(f[8], f[24], f0, f1);
551 f3 = M_SQRT2 * f[56];
552 f2 = M_SQRT2 * f[40];
553 sumdiff(f0, f2, f[8], f[40]);
554 sumdiff(f1, f3, f[24], f[56]);
555 }
556
557 { // kh=8
558 Type a, b, g0, f0, f1, g1, f2, g2, f3, g3;
559 { // ---- i=1
560 double c1=.995184726672196886244836953109; // == cos(Pi*1/32) == cos(Pi*1/32)
561 double s1=.098017140329560601994195563888; // == sin(Pi*1/32) == sin(Pi*1/32)
562 double c2=.980785280403230449126182236134; // == cos(Pi*2/32) == cos(Pi*1/16)
563 double s2=.195090322016128267848284868476; // == sin(Pi*2/32) == sin(Pi*1/16)
564 // do loop II:
565 cmult(s2, c2, f[17], f[31], b, a);
566 sumdiff(f[1], a, f0, f1);
567 sumdiff(f[15], b, g0, g1);
568 cmult(s2, c2, f[49], f[63], b, a);
569 sumdiff(f[33], a, f2, f3);
570 sumdiff(f[47], b, g2, g3);
571 cmult(s1, c1, f2, g3, b, a);
572 sumdiff(f0, a, f[1], f[33]);
573 sumdiff(g1, b, f[31], f[63]);
574 cmult(c1, s1, g2, f3, b, a);
575 sumdiff(g0, a, f[15], f[47]);
576 sumdiff(f1, b, f[17], f[49]);
577 }
578 { // ---- i=2
579 double c1=.980785280403230449126182236134; // == cos(Pi*2/32) == cos(Pi*1/16)
580 double s1=.195090322016128267848284868476; // == sin(Pi*2/32) == sin(Pi*1/16)
581 double c2=.923879532511286756128183189397; // == cos(Pi*4/32) == cos(Pi*1/8)
582 double s2=.382683432365089771728459984029; // == sin(Pi*4/32) == sin(Pi*1/8)
583 // do loop II:
584 cmult(s2, c2, f[18], f[30], b, a);
585 sumdiff(f[2], a, f0, f1);
586 sumdiff(f[14], b, g0, g1);
587 cmult(s2, c2, f[50], f[62], b, a);
588 sumdiff(f[34], a, f2, f3);
589 sumdiff(f[46], b, g2, g3);
590 cmult(s1, c1, f2, g3, b, a);
591 sumdiff(f0, a, f[2], f[34]);
592 sumdiff(g1, b, f[30], f[62]);
593 cmult(c1, s1, g2, f3, b, a);
594 sumdiff(g0, a, f[14], f[46]);
595 sumdiff(f1, b, f[18], f[50]);
596 }
597 { // ---- i=3
598 double c1=.956940335732208864935797886980; // == cos(Pi*3/32) == cos(Pi*3/32)
599 double s1=.290284677254462367636192375816; // == sin(Pi*3/32) == sin(Pi*3/32)
600 double c2=.831469612302545237078788377618; // == cos(Pi*6/32) == cos(Pi*3/16)
601 double s2=.555570233019602224742830813947; // == sin(Pi*6/32) == sin(Pi*3/16)
602 // do loop II:
603 cmult(s2, c2, f[19], f[29], b, a);
604 sumdiff(f[3], a, f0, f1);
605 sumdiff(f[13], b, g0, g1);
606 cmult(s2, c2, f[51], f[61], b, a);
607 sumdiff(f[35], a, f2, f3);
608 sumdiff(f[45], b, g2, g3);
609 cmult(s1, c1, f2, g3, b, a);
610 sumdiff(f0, a, f[3], f[35]);
611 sumdiff(g1, b, f[29], f[61]);
612 cmult(c1, s1, g2, f3, b, a);
613 sumdiff(g0, a, f[13], f[45]);
614 sumdiff(f1, b, f[19], f[51]);
615 }
616 { // ---- i=4
617 double c1=.923879532511286756128183189397; // == cos(Pi*4/32) == cos(Pi*1/8)
618 double s1=.382683432365089771728459984029; // == sin(Pi*4/32) == sin(Pi*1/8)
619 // c2 = s2 = sqrt(1/2)
620 // do loop II:
621 sumdiff(f[20], f[28], a, b);
622 a *= M_SQRT1_2;
623 b *= M_SQRT1_2;
624 sumdiff(f[4], a, f0, f1);
625 sumdiff(f[12], b, g0, g1);
626 sumdiff(f[52], f[60], a, b);
627 a *= M_SQRT1_2;
628 b *= M_SQRT1_2;
629 sumdiff(f[36], a, f2, f3);
630 sumdiff(f[44], b, g2, g3);
631 cmult(s1, c1, f2, g3, b, a);
632 sumdiff(f0, a, f[4], f[36]);
633 sumdiff(g1, b, f[28], f[60]);
634 cmult(c1, s1, g2, f3, b, a);
635 sumdiff(g0, a, f[12], f[44]);
636 sumdiff(f1, b, f[20], f[52]);
637 }
638 { // ---- i=5
639 double c1=.881921264348355029712756863661; // == cos(Pi*5/32) == cos(Pi*5/32)
640 double s1=.471396736825997648556387625904; // == sin(Pi*5/32) == sin(Pi*5/32)
641 double c2=.555570233019602224742830813950; // == cos(Pi*10/32) == cos(Pi*5/16)
642 double s2=.831469612302545237078788377616; // == sin(Pi*10/32) == sin(Pi*5/16)
643 // do loop II:
644 cmult(s2, c2, f[21], f[27], b, a);
645 sumdiff(f[5], a, f0, f1);
646 sumdiff(f[11], b, g0, g1);
647 cmult(s2, c2, f[53], f[59], b, a);
648 sumdiff(f[37], a, f2, f3);
649 sumdiff(f[43], b, g2, g3);
650 cmult(s1, c1, f2, g3, b, a);
651 sumdiff(f0, a, f[5], f[37]);
652 sumdiff(g1, b, f[27], f[59]);
653 cmult(c1, s1, g2, f3, b, a);
654 sumdiff(g0, a, f[11], f[43]);
655 sumdiff(f1, b, f[21], f[53]);
656 }
657 { // ---- i=6
658 double c1=.831469612302545237078788377618; // == cos(Pi*6/32) == cos(Pi*3/16)
659 double s1=.555570233019602224742830813947; // == sin(Pi*6/32) == sin(Pi*3/16)
660 double c2=.382683432365089771728459984032; // == cos(Pi*12/32) == cos(Pi*3/8)
661 double s2=.923879532511286756128183189396; // == sin(Pi*12/32) == sin(Pi*3/8)
662 // do loop II:
663 cmult(s2, c2, f[22], f[26], b, a);
664 sumdiff(f[6], a, f0, f1);
665 sumdiff(f[10], b, g0, g1);
666 cmult(s2, c2, f[54], f[58], b, a);
667 sumdiff(f[38], a, f2, f3);
668 sumdiff(f[42], b, g2, g3);
669 cmult(s1, c1, f2, g3, b, a);
670 sumdiff(f0, a, f[6], f[38]);
671 sumdiff(g1, b, f[26], f[58]);
672 cmult(c1, s1, g2, f3, b, a);
673 sumdiff(g0, a, f[10], f[42]);
674 sumdiff(f1, b, f[22], f[54]);
675 }
676 { // ---- i=7
677 double c1=.773010453362736960810906609759; // == cos(Pi*7/32) == cos(Pi*7/32)
678 double s1=.634393284163645498215171613224; // == sin(Pi*7/32) == sin(Pi*7/32)
679 double c2=.195090322016128267848284868478; // == cos(Pi*14/32) == cos(Pi*7/16)
680 double s2=.980785280403230449126182236133; // == sin(Pi*14/32) == sin(Pi*7/16)
681 // do loop II:
682 cmult(s2, c2, f[23], f[25], b, a);
683 sumdiff(f[7], a, f0, f1);
684 sumdiff(f[9], b, g0, g1);
685 cmult(s2, c2, f[55], f[57], b, a);
686 sumdiff(f[39], a, f2, f3);
687 sumdiff(f[41], b, g2, g3);
688 cmult(s1, c1, f2, g3, b, a);
689 sumdiff(f0, a, f[7], f[39]);
690 sumdiff(g1, b, f[25], f[57]);
691 cmult(c1, s1, g2, f3, b, a);
692 sumdiff(g0, a, f[9], f[41]);
693 sumdiff(f1, b, f[23], f[55]);
694 }
695 }
696 }
697 // -------------------------
698 // opcount by generator: #mult=166=2.59375/pt #add=462=7.21875/pt
699
700
701 #endif // !defined HAVE_SHORTFHTDITCORE_H__
702