1 /*
2   This file is for transpose of the Gao-Mateer FFT
3   Functions with names ending with _tr are (roughly) the transpose of the corresponding functions in fft.c
4 */
5 
6 #include "fft_tr.h"
7 
8 #include "transpose.h"
9 
10 #include <stdint.h>
11 
12 static void radix_conversions_tr(vec in[][ GFBITS ]) {
13     int i, j, k;
14 
15     const vec mask[6][2] = {
16         {0x2222222222222222, 0x4444444444444444},
17         {0x0C0C0C0C0C0C0C0C, 0x3030303030303030},
18         {0x00F000F000F000F0, 0x0F000F000F000F00},
19         {0x0000FF000000FF00, 0x00FF000000FF0000},
20         {0x00000000FFFF0000, 0x0000FFFF00000000},
21         {0xFFFFFFFF00000000, 0x00000000FFFFFFFF}
22     };
23 
24     const vec s[6][4][GFBITS] = {
25 #include "scalars_4x.inc"
26     };
27 
28     //
29 
30     for (j = 6; j >= 0; j--) {
31         if (j < 6) {
32             PQCLEAN_MCELIECE460896_VEC_vec_mul(in[0], in[0], s[j][0]); // scaling
33             PQCLEAN_MCELIECE460896_VEC_vec_mul(in[1], in[1], s[j][1]); // scaling
34             PQCLEAN_MCELIECE460896_VEC_vec_mul(in[2], in[2], s[j][2]); // scaling
35             PQCLEAN_MCELIECE460896_VEC_vec_mul(in[3], in[3], s[j][3]); // scaling
36         }
37 
38         for (k = j; k <= 4; k++) {
39             for (i = 0; i < GFBITS; i++) {
40                 in[0][i] ^= (in[0][i] & mask[k][0]) << (1 << k);
41                 in[0][i] ^= (in[0][i] & mask[k][1]) << (1 << k);
42                 in[1][i] ^= (in[1][i] & mask[k][0]) << (1 << k);
43                 in[1][i] ^= (in[1][i] & mask[k][1]) << (1 << k);
44                 in[2][i] ^= (in[2][i] & mask[k][0]) << (1 << k);
45                 in[2][i] ^= (in[2][i] & mask[k][1]) << (1 << k);
46                 in[3][i] ^= (in[3][i] & mask[k][0]) << (1 << k);
47                 in[3][i] ^= (in[3][i] & mask[k][1]) << (1 << k);
48             }
49         }
50 
51         if (j <= 5) {
52             for (i = 0; i < GFBITS; i++) {
53                 in[1][i] ^= in[0][i] >> 32;
54                 in[1][i] ^= in[1][i] << 32;
55 
56                 in[3][i] ^= in[2][i] >> 32;
57                 in[3][i] ^= in[3][i] << 32;
58             }
59         }
60 
61         for (i = 0; i < GFBITS; i++) {
62             in[3][i] ^= in[2][i] ^= in[1][i];
63         }
64     }
65 }
66 
67 static void butterflies_tr(vec out[][ GFBITS ], vec in[][ GFBITS ]) {
68     int i, j, k, s, b;
69 
70     vec tmp[ GFBITS ];
71     vec pre[6][2][ GFBITS ];
72     vec buf[2][64];
73 
74     const vec consts[ 128 ][ GFBITS ] = {
75 #include "consts.inc"
76     };
77 
78     uint64_t consts_ptr = 128;
79 
80     const unsigned char reversal[128] = {
81         0, 64, 32, 96, 16, 80, 48, 112,
82         8, 72, 40, 104, 24, 88, 56, 120,
83         4, 68, 36, 100, 20, 84, 52, 116,
84         12, 76, 44, 108, 28, 92, 60, 124,
85         2, 66, 34, 98, 18, 82, 50, 114,
86         10, 74, 42, 106, 26, 90, 58, 122,
87         6, 70, 38, 102, 22, 86, 54, 118,
88         14, 78, 46, 110, 30, 94, 62, 126,
89         1, 65, 33, 97, 17, 81, 49, 113,
90         9, 73, 41, 105, 25, 89, 57, 121,
91         5, 69, 37, 101, 21, 85, 53, 117,
92         13, 77, 45, 109, 29, 93, 61, 125,
93         3, 67, 35, 99, 19, 83, 51, 115,
94         11, 75, 43, 107, 27, 91, 59, 123,
95         7, 71, 39, 103, 23, 87, 55, 119,
96         15, 79, 47, 111, 31, 95, 63, 127
97     };
98 
99     const uint16_t beta[6] = {5246, 5306, 6039, 6685, 4905, 6755};
100 
101     //
102 
103     for (i = 6; i >= 0; i--) {
104         s = 1 << i;
105         consts_ptr -= s;
106 
107         for (j = 0; j < 128; j += 2 * s) {
108             for (k = j; k < j + s; k++) {
109                 for (b = 0; b < GFBITS; b++) {
110                     in[k][b] ^= in[k + s][b];
111                 }
112 
113                 PQCLEAN_MCELIECE460896_VEC_vec_mul(tmp, in[k], consts[ consts_ptr + (k - j) ]);
114 
115                 for (b = 0; b < GFBITS; b++) {
116                     in[k + s][b] ^= tmp[b];
117                 }
118             }
119         }
120     }
121 
122     for (i = 0; i < GFBITS; i++) {
123         for (k = 0; k < 128; k++) {
124             (&buf[0][0])[ k ] = in[ reversal[k] ][i];
125         }
126 
127         PQCLEAN_MCELIECE460896_VEC_transpose_64x64(buf[0], buf[0]);
128         PQCLEAN_MCELIECE460896_VEC_transpose_64x64(buf[1], buf[1]);
129 
130         for (k = 0; k < 2; k++) {
131             pre[0][k][i] = buf[k][32];
132             buf[k][33] ^= buf[k][32];
133             pre[1][k][i] = buf[k][33];
134             buf[k][35] ^= buf[k][33];
135             pre[0][k][i] ^= buf[k][35];
136             buf[k][34] ^= buf[k][35];
137             pre[2][k][i] = buf[k][34];
138             buf[k][38] ^= buf[k][34];
139             pre[0][k][i] ^= buf[k][38];
140             buf[k][39] ^= buf[k][38];
141             pre[1][k][i] ^= buf[k][39];
142             buf[k][37] ^= buf[k][39];
143             pre[0][k][i] ^= buf[k][37];
144             buf[k][36] ^= buf[k][37];
145             pre[3][k][i] = buf[k][36];
146             buf[k][44] ^= buf[k][36];
147             pre[0][k][i] ^= buf[k][44];
148             buf[k][45] ^= buf[k][44];
149             pre[1][k][i] ^= buf[k][45];
150             buf[k][47] ^= buf[k][45];
151             pre[0][k][i] ^= buf[k][47];
152             buf[k][46] ^= buf[k][47];
153             pre[2][k][i] ^= buf[k][46];
154             buf[k][42] ^= buf[k][46];
155             pre[0][k][i] ^= buf[k][42];
156             buf[k][43] ^= buf[k][42];
157             pre[1][k][i] ^= buf[k][43];
158             buf[k][41] ^= buf[k][43];
159             pre[0][k][i] ^= buf[k][41];
160             buf[k][40] ^= buf[k][41];
161             pre[4][k][i] = buf[k][40];
162             buf[k][56] ^= buf[k][40];
163             pre[0][k][i] ^= buf[k][56];
164             buf[k][57] ^= buf[k][56];
165             pre[1][k][i] ^= buf[k][57];
166             buf[k][59] ^= buf[k][57];
167             pre[0][k][i] ^= buf[k][59];
168             buf[k][58] ^= buf[k][59];
169             pre[2][k][i] ^= buf[k][58];
170             buf[k][62] ^= buf[k][58];
171             pre[0][k][i] ^= buf[k][62];
172             buf[k][63] ^= buf[k][62];
173             pre[1][k][i] ^= buf[k][63];
174             buf[k][61] ^= buf[k][63];
175             pre[0][k][i] ^= buf[k][61];
176             buf[k][60] ^= buf[k][61];
177             pre[3][k][i] ^= buf[k][60];
178             buf[k][52] ^= buf[k][60];
179             pre[0][k][i] ^= buf[k][52];
180             buf[k][53] ^= buf[k][52];
181             pre[1][k][i] ^= buf[k][53];
182             buf[k][55] ^= buf[k][53];
183             pre[0][k][i] ^= buf[k][55];
184             buf[k][54] ^= buf[k][55];
185             pre[2][k][i] ^= buf[k][54];
186             buf[k][50] ^= buf[k][54];
187             pre[0][k][i] ^= buf[k][50];
188             buf[k][51] ^= buf[k][50];
189             pre[1][k][i] ^= buf[k][51];
190             buf[k][49] ^= buf[k][51];
191             pre[0][k][i] ^= buf[k][49];
192             buf[k][48] ^= buf[k][49];
193             pre[5][k][i] = buf[k][48];
194             buf[k][16] ^= buf[k][48];
195             pre[0][k][i] ^= buf[k][16];
196             buf[k][17] ^= buf[k][16];
197             pre[1][k][i] ^= buf[k][17];
198             buf[k][19] ^= buf[k][17];
199             pre[0][k][i] ^= buf[k][19];
200             buf[k][18] ^= buf[k][19];
201             pre[2][k][i] ^= buf[k][18];
202             buf[k][22] ^= buf[k][18];
203             pre[0][k][i] ^= buf[k][22];
204             buf[k][23] ^= buf[k][22];
205             pre[1][k][i] ^= buf[k][23];
206             buf[k][21] ^= buf[k][23];
207             pre[0][k][i] ^= buf[k][21];
208             buf[k][20] ^= buf[k][21];
209             pre[3][k][i] ^= buf[k][20];
210             buf[k][28] ^= buf[k][20];
211             pre[0][k][i] ^= buf[k][28];
212             buf[k][29] ^= buf[k][28];
213             pre[1][k][i] ^= buf[k][29];
214             buf[k][31] ^= buf[k][29];
215             pre[0][k][i] ^= buf[k][31];
216             buf[k][30] ^= buf[k][31];
217             pre[2][k][i] ^= buf[k][30];
218             buf[k][26] ^= buf[k][30];
219             pre[0][k][i] ^= buf[k][26];
220             buf[k][27] ^= buf[k][26];
221             pre[1][k][i] ^= buf[k][27];
222             buf[k][25] ^= buf[k][27];
223             pre[0][k][i] ^= buf[k][25];
224             buf[k][24] ^= buf[k][25];
225             pre[4][k][i] ^= buf[k][24];
226             buf[k][8] ^= buf[k][24];
227             pre[0][k][i] ^= buf[k][8];
228             buf[k][9] ^= buf[k][8];
229             pre[1][k][i] ^= buf[k][9];
230             buf[k][11] ^= buf[k][9];
231             pre[0][k][i] ^= buf[k][11];
232             buf[k][10] ^= buf[k][11];
233             pre[2][k][i] ^= buf[k][10];
234             buf[k][14] ^= buf[k][10];
235             pre[0][k][i] ^= buf[k][14];
236             buf[k][15] ^= buf[k][14];
237             pre[1][k][i] ^= buf[k][15];
238             buf[k][13] ^= buf[k][15];
239             pre[0][k][i] ^= buf[k][13];
240             buf[k][12] ^= buf[k][13];
241             pre[3][k][i] ^= buf[k][12];
242             buf[k][4] ^= buf[k][12];
243             pre[0][k][i] ^= buf[k][4];
244             buf[k][5] ^= buf[k][4];
245             pre[1][k][i] ^= buf[k][5];
246             buf[k][7] ^= buf[k][5];
247             pre[0][k][i] ^= buf[k][7];
248             buf[k][6] ^= buf[k][7];
249             pre[2][k][i] ^= buf[k][6];
250             buf[k][2] ^= buf[k][6];
251             pre[0][k][i] ^= buf[k][2];
252             buf[k][3] ^= buf[k][2];
253             pre[1][k][i] ^= buf[k][3];
254             buf[k][1] ^= buf[k][3];
255 
256             pre[0][k][i] ^= buf[k][1];
257             out[k][i] = buf[k][0] ^ buf[k][1];
258         }
259     }
260 
261     for (j = 0; j < GFBITS; j++) {
262         tmp[j] = PQCLEAN_MCELIECE460896_VEC_vec_setbits((beta[0] >> j) & 1);
263     }
264 
265     PQCLEAN_MCELIECE460896_VEC_vec_mul(out[2], pre[0][0], tmp);
266     PQCLEAN_MCELIECE460896_VEC_vec_mul(out[3], pre[0][1], tmp);
267 
268     for (i = 1; i < 6; i++) {
269         for (j = 0; j < GFBITS; j++) {
270             tmp[j] = PQCLEAN_MCELIECE460896_VEC_vec_setbits((beta[i] >> j) & 1);
271         }
272 
273         PQCLEAN_MCELIECE460896_VEC_vec_mul(pre[i][0], pre[i][0], tmp);
274         PQCLEAN_MCELIECE460896_VEC_vec_mul(pre[i][1], pre[i][1], tmp);
275 
276         for (b = 0; b < GFBITS; b++) {
277             out[2][b] ^= pre[i][0][b];
278             out[3][b] ^= pre[i][1][b];
279         }
280     }
281 
282 }
283 
284 /* justifying the length of the output */
285 static void postprocess(vec out[4][GFBITS]) {
286     int i;
287 
288     for (i = 0; i < GFBITS; i++) {
289         out[3][i] = 0;
290     }
291 }
292 
293 void PQCLEAN_MCELIECE460896_VEC_fft_tr(vec out[][GFBITS], vec in[][ GFBITS ]) {
294     butterflies_tr(out, in);
295     radix_conversions_tr(out);
296 
297     postprocess(out);
298 }
299 
300