1 /*
2 This file is for public-key generation
3 */
4
5 #include "pk_gen.h"
6
7 #include "benes.h"
8 #include "controlbits.h"
9 #include "fft.h"
10 #include "params.h"
11 #include "util.h"
12
13 #include <stdint.h>
14
de_bitslicing(uint64_t * out,vec256 in[][GFBITS])15 static void de_bitslicing(uint64_t *out, vec256 in[][GFBITS]) {
16 int i, j, r;
17 uint64_t u = 0;
18
19 for (i = 0; i < (1 << GFBITS); i++) {
20 out[i] = 0 ;
21 }
22
23 for (i = 0; i < 16; i++) {
24 for (j = GFBITS - 1; j >= 0; j--) {
25 u = PQCLEAN_MCELIECE348864_AVX_vec256_extract(in[i][j], 0);
26 for (r = 0; r < 64; r++) {
27 out[i * 256 + 0 * 64 + r] <<= 1;
28 out[i * 256 + 0 * 64 + r] |= (u >> r) & 1;
29 }
30 u = PQCLEAN_MCELIECE348864_AVX_vec256_extract(in[i][j], 1);
31 for (r = 0; r < 64; r++) {
32 out[i * 256 + 1 * 64 + r] <<= 1;
33 out[i * 256 + 1 * 64 + r] |= (u >> r) & 1;
34 }
35 u = PQCLEAN_MCELIECE348864_AVX_vec256_extract(in[i][j], 2);
36 for (r = 0; r < 64; r++) {
37 out[i * 256 + 2 * 64 + r] <<= 1;
38 out[i * 256 + 2 * 64 + r] |= (u >> r) & 1;
39 }
40 u = PQCLEAN_MCELIECE348864_AVX_vec256_extract(in[i][j], 3);
41 for (r = 0; r < 64; r++) {
42 out[i * 256 + 3 * 64 + r] <<= 1;
43 out[i * 256 + 3 * 64 + r] |= (u >> r) & 1;
44 }
45 }
46 }
47 }
48
to_bitslicing_2x(vec256 out0[][GFBITS],vec256 out1[][GFBITS],const uint64_t * in)49 static void to_bitslicing_2x(vec256 out0[][GFBITS], vec256 out1[][GFBITS], const uint64_t *in) {
50 int i, j, k, r;
51 uint64_t u[4] = {0};
52
53 for (i = 0; i < 16; i++) {
54 for (j = GFBITS - 1; j >= 0; j--) {
55 for (k = 0; k < 4; k++) {
56 for (r = 63; r >= 0; r--) {
57 u[k] <<= 1;
58 u[k] |= (in[i * 256 + k * 64 + r] >> (j + GFBITS)) & 1;
59 }
60 }
61
62 out1[i][j] = PQCLEAN_MCELIECE348864_AVX_vec256_set4x(u[0], u[1], u[2], u[3]);
63 }
64
65 for (j = GFBITS - 1; j >= 0; j--) {
66 for (k = 0; k < 4; k++) {
67 for (r = 63; r >= 0; r--) {
68 u[k] <<= 1;
69 u[k] |= (in[i * 256 + k * 64 + r] >> j) & 1;
70 }
71 }
72
73 out0[i][GFBITS - 1 - j] = PQCLEAN_MCELIECE348864_AVX_vec256_set4x(u[0], u[1], u[2], u[3]);
74 }
75 }
76 }
77
78 #define NBLOCKS1_H ((SYS_N + 63) / 64)
79 #define NBLOCKS2_H ((SYS_N + 255) / 256)
80 #define NBLOCKS1_I ((GFBITS * SYS_T + 63) / 64)
81 #define NBLOCKS2_I ((GFBITS * SYS_T + 255) / 256)
PQCLEAN_MCELIECE348864_AVX_pk_gen(unsigned char * pk,uint32_t * perm,const unsigned char * sk)82 int PQCLEAN_MCELIECE348864_AVX_pk_gen(unsigned char *pk, uint32_t *perm, const unsigned char *sk) {
83 const int block_idx = NBLOCKS1_I;
84
85 int i, j, k;
86 int row, c;
87
88 uint64_t mat[ GFBITS * SYS_T ][ NBLOCKS2_H * 4 ];
89 uint64_t ops[ GFBITS * SYS_T ][ NBLOCKS1_I ];
90
91 uint64_t mask;
92
93 uint64_t sk_int[ GFBITS ];
94
95 vec256 consts[ 16 ][ GFBITS ];
96 vec256 eval[ 16 ][ GFBITS ];
97 vec256 prod[ 16 ][ GFBITS ];
98 vec256 tmp[ GFBITS ];
99
100 uint64_t list[1 << GFBITS];
101 uint64_t one_row[ 128 ];
102
103 // compute the inverses
104
105 PQCLEAN_MCELIECE348864_AVX_irr_load(sk_int, sk);
106
107 PQCLEAN_MCELIECE348864_AVX_fft(eval, sk_int);
108
109 PQCLEAN_MCELIECE348864_AVX_vec256_copy(prod[0], eval[0]);
110
111 for (i = 1; i < 16; i++) {
112 PQCLEAN_MCELIECE348864_AVX_vec256_mul(prod[i], prod[i - 1], eval[i]);
113 }
114
115 PQCLEAN_MCELIECE348864_AVX_vec256_inv(tmp, prod[15]);
116
117 for (i = 14; i >= 0; i--) {
118 PQCLEAN_MCELIECE348864_AVX_vec256_mul(prod[i + 1], prod[i], tmp);
119 PQCLEAN_MCELIECE348864_AVX_vec256_mul(tmp, tmp, eval[i + 1]);
120 }
121
122 PQCLEAN_MCELIECE348864_AVX_vec256_copy(prod[0], tmp);
123
124 // fill matrix
125
126 de_bitslicing(list, prod);
127
128 for (i = 0; i < (1 << GFBITS); i++) {
129 list[i] <<= GFBITS;
130 list[i] |= i;
131 list[i] |= ((uint64_t) perm[i]) << 31;
132 }
133
134 PQCLEAN_MCELIECE348864_AVX_sort_63b(1 << GFBITS, list);
135
136 to_bitslicing_2x(consts, prod, list);
137
138 for (i = 0; i < (1 << GFBITS); i++) {
139 perm[i] = list[i] & GFMASK;
140 }
141
142 for (j = 0; j < NBLOCKS2_I; j++) {
143 for (k = 0; k < GFBITS; k++) {
144 mat[ k ][ 4 * j + 0 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 0);
145 mat[ k ][ 4 * j + 1 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 1);
146 mat[ k ][ 4 * j + 2 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 2);
147 mat[ k ][ 4 * j + 3 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 3);
148 }
149 }
150
151 for (i = 1; i < SYS_T; i++) {
152 for (j = 0; j < NBLOCKS2_I; j++) {
153 PQCLEAN_MCELIECE348864_AVX_vec256_mul(prod[j], prod[j], consts[j]);
154
155 for (k = 0; k < GFBITS; k++) {
156 mat[ i * GFBITS + k ][ 4 * j + 0 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 0);
157 mat[ i * GFBITS + k ][ 4 * j + 1 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 1);
158 mat[ i * GFBITS + k ][ 4 * j + 2 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 2);
159 mat[ i * GFBITS + k ][ 4 * j + 3 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 3);
160 }
161 }
162 }
163
164 // gaussian elimination to obtain an upper triangular matrix
165 // and keep track of the operations in ops
166
167 for (i = 0; i < PK_NROWS; i++) {
168 for (j = 0; j < NBLOCKS1_I; j++) {
169 ops[ i ][ j ] = 0;
170 }
171 }
172
173 for (i = 0; i < PK_NROWS; i++) {
174 ops[ i ][ i / 64 ] = 1;
175 ops[ i ][ i / 64 ] <<= (i % 64);
176 }
177
178 for (row = 0; row < PK_NROWS; row++) {
179 i = row >> 6;
180 j = row & 63;
181
182 for (k = row + 1; k < PK_NROWS; k++) {
183 mask = mat[ row ][ i ] >> j;
184 mask &= 1;
185 mask -= 1;
186
187 for (c = 0; c < NBLOCKS1_I; c++) {
188 mat[ row ][ c ] ^= mat[ k ][ c ] & mask;
189 ops[ row ][ c ] ^= ops[ k ][ c ] & mask;
190 }
191 }
192
193 if ( ((mat[ row ][ i ] >> j) & 1) == 0 ) { // return if not systematic
194 return -1;
195 }
196
197 for (k = row + 1; k < PK_NROWS; k++) {
198 mask = mat[ k ][ i ] >> j;
199 mask &= 1;
200 mask = -mask;
201
202 for (c = 0; c < NBLOCKS1_I; c++) {
203 mat[ k ][ c ] ^= mat[ row ][ c ] & mask;
204 ops[ k ][ c ] ^= ops[ row ][ c ] & mask;
205 }
206 }
207 }
208
209 // computing the lineaer map required to obatin the systematic form
210
211 for (row = PK_NROWS - 1; row >= 0; row--) {
212 for (k = 0; k < row; k++) {
213 mask = mat[ k ][ row / 64 ] >> (row & 63);
214 mask &= 1;
215 mask = -mask;
216
217 for (c = 0; c < NBLOCKS1_I; c++) {
218 ops[ k ][ c ] ^= ops[ row ][ c ] & mask;
219 }
220 }
221 }
222
223 // apply the linear map to the non-systematic part
224
225 for (j = NBLOCKS2_I; j < NBLOCKS2_H; j++) {
226 for (k = 0; k < GFBITS; k++) {
227 mat[ k ][ 4 * j + 0 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 0);
228 mat[ k ][ 4 * j + 1 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 1);
229 mat[ k ][ 4 * j + 2 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 2);
230 mat[ k ][ 4 * j + 3 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 3);
231 }
232 }
233
234 for (i = 1; i < SYS_T; i++) {
235 for (j = NBLOCKS2_I; j < NBLOCKS2_H; j++) {
236 PQCLEAN_MCELIECE348864_AVX_vec256_mul(prod[j], prod[j], consts[j]);
237
238 for (k = 0; k < GFBITS; k++) {
239 mat[ i * GFBITS + k ][ 4 * j + 0 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 0);
240 mat[ i * GFBITS + k ][ 4 * j + 1 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 1);
241 mat[ i * GFBITS + k ][ 4 * j + 2 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 2);
242 mat[ i * GFBITS + k ][ 4 * j + 3 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 3);
243 }
244 }
245 }
246
247 for (row = 0; row < PK_NROWS; row++) {
248 for (k = 0; k < NBLOCKS1_H; k++) {
249 one_row[ k ] = 0;
250 }
251
252 for (c = 0; c < PK_NROWS; c++) {
253 mask = ops[ row ][ c >> 6 ] >> (c & 63);
254 mask &= 1;
255 mask = -mask;
256
257 for (k = block_idx; k < NBLOCKS1_H; k++) {
258 one_row[ k ] ^= mat[ c ][ k ] & mask;
259 }
260 }
261
262 for (k = block_idx; k < NBLOCKS1_H - 1; k++) {
263 PQCLEAN_MCELIECE348864_AVX_store8(pk, one_row[k]);
264 pk += 8;
265 }
266
267 PQCLEAN_MCELIECE348864_AVX_store_i(pk, one_row[k], PK_ROW_BYTES % 8);
268
269 pk += PK_ROW_BYTES % 8;
270 }
271
272 //
273
274 return 0;
275 }
276
277