1 #include "PRE"
2
u4(register complex * a)3 inline void u4(register complex *a)
4 {
5 register real t1, t2, t3, t4, t5, t6, t7, t8;
6
7 t1 = a[0].re + a[1].re;
8 t2 = a[0].im + a[1].im;
9 t6 = a[3].re + a[2].re;
10 t4 = t1;
11 t5 = t2;
12 t4 += t6;
13 t1 -= t6;
14 t3 = VOL a[0].re;
15 t6 = a[2].im + a[3].im;
16 a[0].re = t4;
17 t4 = VOL a[0].im;
18 t5 += t6;
19 t3 -= a[1].re;
20 t2 -= t6;
21 t4 -= a[1].im;
22 a[0].im = t5;
23 t7 = a[2].im - a[3].im;
24 t8 = a[3].re - a[2].re;
25 t5 = t3;
26 t6 = t4;
27 t3 -= t7;
28 t4 -= t8;
29 t5 += t7;
30 t6 += t8;
31 a[2].re = t1;
32 a[1].re = t5;
33 a[3].re = t3;
34 a[1].im = t6;
35 a[3].im = t4;
36 a[2].im = t2;
37 }
38
u8(register complex * a)39 void u8(register complex *a)
40 {
41 register real t1, t2, t3, t4, t5, t6, t7, t8;
42
43 u4(a);
44
45 t7 = a[4].re - a[5].re;
46 t8 = a[4].im - a[5].im;
47 t1 = a[4].re + a[5].re;
48 a[5].re = t7;
49 t2 = a[4].im + a[5].im;
50 a[5].im = t8;
51
52 t7 = a[6].re - a[7].re;
53 t8 = a[6].im - a[7].im;
54 t3 = a[6].re + a[7].re;
55 a[7].re = t7;
56 t4 = a[6].im + a[7].im;
57 a[7].im = t8;
58
59 t8 = t3;
60 t7 = t2;
61 t7 -= t4;
62 t8 -= t1;
63 t1 += t3;
64 t2 += t4;
65 t3 = a[2].re - t7;
66 t7 += a[2].re;
67 t4 = a[2].im - t8;
68 t8 += a[2].im;
69 a[6].re = t3;
70 t5 = a[0].re - t1;
71 t1 += a[0].re;
72 a[2].re = t7;
73 t6 = a[0].im - t2;
74 t2 += a[0].im;
75 a[4].re = t5;
76 a[0].re = t1;
77 a[4].im = t6;
78 a[0].im = t2;
79 a[2].im = t8;
80 a[6].im = t4;
81
82 t1 = a[5].re + a[5].im;
83 t2 = a[5].im - a[5].re;
84 t1 *= sqrthalf;
85 t3 = a[7].re - a[7].im;
86 t2 *= sqrthalf;
87 t4 = a[7].im + a[7].re;
88 t3 *= sqrthalf;
89 t7 = t2;
90 t4 *= sqrthalf;
91 t8 = t3;
92 t8 -= t1;
93 t7 -= t4;
94 t1 += t3;
95 t2 += t4;
96 t4 = a[3].im - t8;
97 t8 += a[3].im;
98 t3 = a[3].re - t7;
99 t7 += a[3].re;
100 a[7].im = t4;
101 t5 = a[1].re - t1;
102 t1 += a[1].re;
103 a[3].im = t8;
104 t6 = a[1].im - t2;
105 t2 += a[1].im;
106 a[5].re = t5;
107 a[1].re = t1;
108 a[5].im = t6;
109 a[3].re = t7;
110 a[7].re = t3;
111 a[1].im = t2;
112 }
113
u16(register complex * a)114 void u16(register complex *a)
115 {
116 register real t1, t2, t3, t4, t5, t6, t7, t8;
117
118 u8(a);
119 u4(a + 8);
120 u4(a + 12);
121
122 UNTRANSFORMZERO(a[0],a[4],a[8],a[12]);
123 UNTRANSFORMHALF(a[2],a[6],a[10],a[14]);
124 UNTRANSFORM(a[1],a[5],a[9],a[13],d16[0].re,d16[0].im);
125 UNTRANSFORM(a[3],a[7],a[11],a[15],d16[0].im,d16[0].re);
126 }
127
128 /* a[0...8n-1], w[0...2n-2] */
upass(register complex * a,register const complex * w,register unsigned int n)129 void upass(register complex *a,register const complex *w,register unsigned int n)
130 {
131 register real t1, t2, t3, t4, t5, t6, t7, t8;
132 register unsigned int k;
133 register complex *b;
134
135 b = a + 4 * n;
136 k = n - 1;
137 n <<= 3;
138
139 UNTRANSFORMZERO(A0[0],A1[0],A2[0],A3[0]);
140 UNTRANSFORM(A0[1],A1[1],A2[1],A3[1],w[0].re,w[0].im);
141
142 for (;;) {
143 UNTRANSFORM(A0[2],A1[2],A2[2],A3[2],w[1].re,w[1].im);
144 UNTRANSFORM(A0[3],A1[3],A2[3],A3[3],w[2].re,w[2].im);
145 if (!--k) break;
146 a += 2;
147 b += 2;
148 w += 2;
149 }
150 }
151
u32(register complex * a)152 void u32(register complex *a)
153 {
154 u16(a);
155 u8(a + 16);
156 u8(a + 24);
157 upass(a,d32,4);
158 }
159
u64(register complex * a)160 void u64(register complex *a)
161 {
162 u32(a);
163 u16(a + 32);
164 u16(a + 48);
165 upass(a,d64,8);
166 }
167
u128(register complex * a)168 void u128(register complex *a)
169 {
170 u64(a);
171 u32(a + 64);
172 u32(a + 96);
173 upass(a,d128,16);
174 }
175
u256(register complex * a)176 void u256(register complex *a)
177 {
178 u128(a);
179 u64(a + 128);
180 u64(a + 192);
181 upass(a,d256,32);
182 }
183
u512(register complex * a)184 void u512(register complex *a)
185 {
186 u256(a);
187 u128(a + 256);
188 u128(a + 384);
189 upass(a,d512,64);
190 }
191