1 /* 2 This file is for transpose of the Gao-Mateer FFT 3 Functions with names ending with _tr are (roughly) the transpose of the corresponding functions in fft.c 4 */ 5 6 #include "fft_tr.h" 7 8 #include "transpose.h" 9 10 #include <stdint.h> 11 12 static void radix_conversions_tr(vec in[][ GFBITS ]) { 13 int i, j, k; 14 15 const vec mask[6][2] = { 16 {0x2222222222222222, 0x4444444444444444}, 17 {0x0C0C0C0C0C0C0C0C, 0x3030303030303030}, 18 {0x00F000F000F000F0, 0x0F000F000F000F00}, 19 {0x0000FF000000FF00, 0x00FF000000FF0000}, 20 {0x00000000FFFF0000, 0x0000FFFF00000000}, 21 {0xFFFFFFFF00000000, 0x00000000FFFFFFFF} 22 }; 23 24 const vec s[6][4][GFBITS] = { 25 #include "scalars_4x.inc" 26 }; 27 28 // 29 30 for (j = 6; j >= 0; j--) { 31 if (j < 6) { 32 PQCLEAN_MCELIECE460896_VEC_vec_mul(in[0], in[0], s[j][0]); // scaling 33 PQCLEAN_MCELIECE460896_VEC_vec_mul(in[1], in[1], s[j][1]); // scaling 34 PQCLEAN_MCELIECE460896_VEC_vec_mul(in[2], in[2], s[j][2]); // scaling 35 PQCLEAN_MCELIECE460896_VEC_vec_mul(in[3], in[3], s[j][3]); // scaling 36 } 37 38 for (k = j; k <= 4; k++) { 39 for (i = 0; i < GFBITS; i++) { 40 in[0][i] ^= (in[0][i] & mask[k][0]) << (1 << k); 41 in[0][i] ^= (in[0][i] & mask[k][1]) << (1 << k); 42 in[1][i] ^= (in[1][i] & mask[k][0]) << (1 << k); 43 in[1][i] ^= (in[1][i] & mask[k][1]) << (1 << k); 44 in[2][i] ^= (in[2][i] & mask[k][0]) << (1 << k); 45 in[2][i] ^= (in[2][i] & mask[k][1]) << (1 << k); 46 in[3][i] ^= (in[3][i] & mask[k][0]) << (1 << k); 47 in[3][i] ^= (in[3][i] & mask[k][1]) << (1 << k); 48 } 49 } 50 51 if (j <= 5) { 52 for (i = 0; i < GFBITS; i++) { 53 in[1][i] ^= in[0][i] >> 32; 54 in[1][i] ^= in[1][i] << 32; 55 56 in[3][i] ^= in[2][i] >> 32; 57 in[3][i] ^= in[3][i] << 32; 58 } 59 } 60 61 for (i = 0; i < GFBITS; i++) { 62 in[3][i] ^= in[2][i] ^= in[1][i]; 63 } 64 } 65 } 66 67 static void butterflies_tr(vec out[][ GFBITS ], vec in[][ GFBITS ]) { 68 int i, j, k, s, b; 69 70 vec tmp[ GFBITS ]; 71 vec pre[6][2][ GFBITS ]; 72 vec buf[2][64]; 73 74 const vec consts[ 128 ][ GFBITS ] = { 75 #include "consts.inc" 76 }; 77 78 uint64_t consts_ptr = 128; 79 80 const unsigned char reversal[128] = { 81 0, 64, 32, 96, 16, 80, 48, 112, 82 8, 72, 40, 104, 24, 88, 56, 120, 83 4, 68, 36, 100, 20, 84, 52, 116, 84 12, 76, 44, 108, 28, 92, 60, 124, 85 2, 66, 34, 98, 18, 82, 50, 114, 86 10, 74, 42, 106, 26, 90, 58, 122, 87 6, 70, 38, 102, 22, 86, 54, 118, 88 14, 78, 46, 110, 30, 94, 62, 126, 89 1, 65, 33, 97, 17, 81, 49, 113, 90 9, 73, 41, 105, 25, 89, 57, 121, 91 5, 69, 37, 101, 21, 85, 53, 117, 92 13, 77, 45, 109, 29, 93, 61, 125, 93 3, 67, 35, 99, 19, 83, 51, 115, 94 11, 75, 43, 107, 27, 91, 59, 123, 95 7, 71, 39, 103, 23, 87, 55, 119, 96 15, 79, 47, 111, 31, 95, 63, 127 97 }; 98 99 const uint16_t beta[6] = {5246, 5306, 6039, 6685, 4905, 6755}; 100 101 // 102 103 for (i = 6; i >= 0; i--) { 104 s = 1 << i; 105 consts_ptr -= s; 106 107 for (j = 0; j < 128; j += 2 * s) { 108 for (k = j; k < j + s; k++) { 109 for (b = 0; b < GFBITS; b++) { 110 in[k][b] ^= in[k + s][b]; 111 } 112 113 PQCLEAN_MCELIECE460896_VEC_vec_mul(tmp, in[k], consts[ consts_ptr + (k - j) ]); 114 115 for (b = 0; b < GFBITS; b++) { 116 in[k + s][b] ^= tmp[b]; 117 } 118 } 119 } 120 } 121 122 for (i = 0; i < GFBITS; i++) { 123 for (k = 0; k < 128; k++) { 124 (&buf[0][0])[ k ] = in[ reversal[k] ][i]; 125 } 126 127 PQCLEAN_MCELIECE460896_VEC_transpose_64x64(buf[0], buf[0]); 128 PQCLEAN_MCELIECE460896_VEC_transpose_64x64(buf[1], buf[1]); 129 130 for (k = 0; k < 2; k++) { 131 pre[0][k][i] = buf[k][32]; 132 buf[k][33] ^= buf[k][32]; 133 pre[1][k][i] = buf[k][33]; 134 buf[k][35] ^= buf[k][33]; 135 pre[0][k][i] ^= buf[k][35]; 136 buf[k][34] ^= buf[k][35]; 137 pre[2][k][i] = buf[k][34]; 138 buf[k][38] ^= buf[k][34]; 139 pre[0][k][i] ^= buf[k][38]; 140 buf[k][39] ^= buf[k][38]; 141 pre[1][k][i] ^= buf[k][39]; 142 buf[k][37] ^= buf[k][39]; 143 pre[0][k][i] ^= buf[k][37]; 144 buf[k][36] ^= buf[k][37]; 145 pre[3][k][i] = buf[k][36]; 146 buf[k][44] ^= buf[k][36]; 147 pre[0][k][i] ^= buf[k][44]; 148 buf[k][45] ^= buf[k][44]; 149 pre[1][k][i] ^= buf[k][45]; 150 buf[k][47] ^= buf[k][45]; 151 pre[0][k][i] ^= buf[k][47]; 152 buf[k][46] ^= buf[k][47]; 153 pre[2][k][i] ^= buf[k][46]; 154 buf[k][42] ^= buf[k][46]; 155 pre[0][k][i] ^= buf[k][42]; 156 buf[k][43] ^= buf[k][42]; 157 pre[1][k][i] ^= buf[k][43]; 158 buf[k][41] ^= buf[k][43]; 159 pre[0][k][i] ^= buf[k][41]; 160 buf[k][40] ^= buf[k][41]; 161 pre[4][k][i] = buf[k][40]; 162 buf[k][56] ^= buf[k][40]; 163 pre[0][k][i] ^= buf[k][56]; 164 buf[k][57] ^= buf[k][56]; 165 pre[1][k][i] ^= buf[k][57]; 166 buf[k][59] ^= buf[k][57]; 167 pre[0][k][i] ^= buf[k][59]; 168 buf[k][58] ^= buf[k][59]; 169 pre[2][k][i] ^= buf[k][58]; 170 buf[k][62] ^= buf[k][58]; 171 pre[0][k][i] ^= buf[k][62]; 172 buf[k][63] ^= buf[k][62]; 173 pre[1][k][i] ^= buf[k][63]; 174 buf[k][61] ^= buf[k][63]; 175 pre[0][k][i] ^= buf[k][61]; 176 buf[k][60] ^= buf[k][61]; 177 pre[3][k][i] ^= buf[k][60]; 178 buf[k][52] ^= buf[k][60]; 179 pre[0][k][i] ^= buf[k][52]; 180 buf[k][53] ^= buf[k][52]; 181 pre[1][k][i] ^= buf[k][53]; 182 buf[k][55] ^= buf[k][53]; 183 pre[0][k][i] ^= buf[k][55]; 184 buf[k][54] ^= buf[k][55]; 185 pre[2][k][i] ^= buf[k][54]; 186 buf[k][50] ^= buf[k][54]; 187 pre[0][k][i] ^= buf[k][50]; 188 buf[k][51] ^= buf[k][50]; 189 pre[1][k][i] ^= buf[k][51]; 190 buf[k][49] ^= buf[k][51]; 191 pre[0][k][i] ^= buf[k][49]; 192 buf[k][48] ^= buf[k][49]; 193 pre[5][k][i] = buf[k][48]; 194 buf[k][16] ^= buf[k][48]; 195 pre[0][k][i] ^= buf[k][16]; 196 buf[k][17] ^= buf[k][16]; 197 pre[1][k][i] ^= buf[k][17]; 198 buf[k][19] ^= buf[k][17]; 199 pre[0][k][i] ^= buf[k][19]; 200 buf[k][18] ^= buf[k][19]; 201 pre[2][k][i] ^= buf[k][18]; 202 buf[k][22] ^= buf[k][18]; 203 pre[0][k][i] ^= buf[k][22]; 204 buf[k][23] ^= buf[k][22]; 205 pre[1][k][i] ^= buf[k][23]; 206 buf[k][21] ^= buf[k][23]; 207 pre[0][k][i] ^= buf[k][21]; 208 buf[k][20] ^= buf[k][21]; 209 pre[3][k][i] ^= buf[k][20]; 210 buf[k][28] ^= buf[k][20]; 211 pre[0][k][i] ^= buf[k][28]; 212 buf[k][29] ^= buf[k][28]; 213 pre[1][k][i] ^= buf[k][29]; 214 buf[k][31] ^= buf[k][29]; 215 pre[0][k][i] ^= buf[k][31]; 216 buf[k][30] ^= buf[k][31]; 217 pre[2][k][i] ^= buf[k][30]; 218 buf[k][26] ^= buf[k][30]; 219 pre[0][k][i] ^= buf[k][26]; 220 buf[k][27] ^= buf[k][26]; 221 pre[1][k][i] ^= buf[k][27]; 222 buf[k][25] ^= buf[k][27]; 223 pre[0][k][i] ^= buf[k][25]; 224 buf[k][24] ^= buf[k][25]; 225 pre[4][k][i] ^= buf[k][24]; 226 buf[k][8] ^= buf[k][24]; 227 pre[0][k][i] ^= buf[k][8]; 228 buf[k][9] ^= buf[k][8]; 229 pre[1][k][i] ^= buf[k][9]; 230 buf[k][11] ^= buf[k][9]; 231 pre[0][k][i] ^= buf[k][11]; 232 buf[k][10] ^= buf[k][11]; 233 pre[2][k][i] ^= buf[k][10]; 234 buf[k][14] ^= buf[k][10]; 235 pre[0][k][i] ^= buf[k][14]; 236 buf[k][15] ^= buf[k][14]; 237 pre[1][k][i] ^= buf[k][15]; 238 buf[k][13] ^= buf[k][15]; 239 pre[0][k][i] ^= buf[k][13]; 240 buf[k][12] ^= buf[k][13]; 241 pre[3][k][i] ^= buf[k][12]; 242 buf[k][4] ^= buf[k][12]; 243 pre[0][k][i] ^= buf[k][4]; 244 buf[k][5] ^= buf[k][4]; 245 pre[1][k][i] ^= buf[k][5]; 246 buf[k][7] ^= buf[k][5]; 247 pre[0][k][i] ^= buf[k][7]; 248 buf[k][6] ^= buf[k][7]; 249 pre[2][k][i] ^= buf[k][6]; 250 buf[k][2] ^= buf[k][6]; 251 pre[0][k][i] ^= buf[k][2]; 252 buf[k][3] ^= buf[k][2]; 253 pre[1][k][i] ^= buf[k][3]; 254 buf[k][1] ^= buf[k][3]; 255 256 pre[0][k][i] ^= buf[k][1]; 257 out[k][i] = buf[k][0] ^ buf[k][1]; 258 } 259 } 260 261 for (j = 0; j < GFBITS; j++) { 262 tmp[j] = PQCLEAN_MCELIECE460896_VEC_vec_setbits((beta[0] >> j) & 1); 263 } 264 265 PQCLEAN_MCELIECE460896_VEC_vec_mul(out[2], pre[0][0], tmp); 266 PQCLEAN_MCELIECE460896_VEC_vec_mul(out[3], pre[0][1], tmp); 267 268 for (i = 1; i < 6; i++) { 269 for (j = 0; j < GFBITS; j++) { 270 tmp[j] = PQCLEAN_MCELIECE460896_VEC_vec_setbits((beta[i] >> j) & 1); 271 } 272 273 PQCLEAN_MCELIECE460896_VEC_vec_mul(pre[i][0], pre[i][0], tmp); 274 PQCLEAN_MCELIECE460896_VEC_vec_mul(pre[i][1], pre[i][1], tmp); 275 276 for (b = 0; b < GFBITS; b++) { 277 out[2][b] ^= pre[i][0][b]; 278 out[3][b] ^= pre[i][1][b]; 279 } 280 } 281 282 } 283 284 /* justifying the length of the output */ 285 static void postprocess(vec out[4][GFBITS]) { 286 int i; 287 288 for (i = 0; i < GFBITS; i++) { 289 out[3][i] = 0; 290 } 291 } 292 293 void PQCLEAN_MCELIECE460896_VEC_fft_tr(vec out[][GFBITS], vec in[][ GFBITS ]) { 294 butterflies_tr(out, in); 295 radix_conversions_tr(out); 296 297 postprocess(out); 298 } 299 300