1 /*
2   This file is for public-key generation
3 */
4 
5 #include "pk_gen.h"
6 
7 #include "benes.h"
8 #include "controlbits.h"
9 #include "fft.h"
10 #include "params.h"
11 #include "util.h"
12 
13 #include <stdint.h>
14 
de_bitslicing(uint64_t * out,vec256 in[][GFBITS])15 static void de_bitslicing(uint64_t *out, vec256 in[][GFBITS]) {
16     int i, j, r;
17     uint64_t u = 0;
18 
19     for (i = 0; i < (1 << GFBITS); i++) {
20         out[i] = 0 ;
21     }
22 
23     for (i = 0; i < 16; i++) {
24         for (j = GFBITS - 1; j >= 0; j--) {
25             u = PQCLEAN_MCELIECE348864_AVX_vec256_extract(in[i][j], 0);
26             for (r = 0; r < 64; r++) {
27                 out[i * 256 + 0 * 64 + r] <<= 1;
28                 out[i * 256 + 0 * 64 + r] |= (u >> r) & 1;
29             }
30             u = PQCLEAN_MCELIECE348864_AVX_vec256_extract(in[i][j], 1);
31             for (r = 0; r < 64; r++) {
32                 out[i * 256 + 1 * 64 + r] <<= 1;
33                 out[i * 256 + 1 * 64 + r] |= (u >> r) & 1;
34             }
35             u = PQCLEAN_MCELIECE348864_AVX_vec256_extract(in[i][j], 2);
36             for (r = 0; r < 64; r++) {
37                 out[i * 256 + 2 * 64 + r] <<= 1;
38                 out[i * 256 + 2 * 64 + r] |= (u >> r) & 1;
39             }
40             u = PQCLEAN_MCELIECE348864_AVX_vec256_extract(in[i][j], 3);
41             for (r = 0; r < 64; r++) {
42                 out[i * 256 + 3 * 64 + r] <<= 1;
43                 out[i * 256 + 3 * 64 + r] |= (u >> r) & 1;
44             }
45         }
46     }
47 }
48 
to_bitslicing_2x(vec256 out0[][GFBITS],vec256 out1[][GFBITS],const uint64_t * in)49 static void to_bitslicing_2x(vec256 out0[][GFBITS], vec256 out1[][GFBITS], const uint64_t *in) {
50     int i, j, k, r;
51     uint64_t u[4] = {0};
52 
53     for (i = 0; i < 16; i++) {
54         for (j = GFBITS - 1; j >= 0; j--) {
55             for (k = 0; k < 4; k++) {
56                 for (r = 63; r >= 0; r--) {
57                     u[k] <<= 1;
58                     u[k] |= (in[i * 256 + k * 64 + r] >> (j + GFBITS)) & 1;
59                 }
60             }
61 
62             out1[i][j] = PQCLEAN_MCELIECE348864_AVX_vec256_set4x(u[0], u[1], u[2], u[3]);
63         }
64 
65         for (j = GFBITS - 1; j >= 0; j--) {
66             for (k = 0; k < 4; k++) {
67                 for (r = 63; r >= 0; r--) {
68                     u[k] <<= 1;
69                     u[k] |= (in[i * 256 + k * 64 + r] >> j) & 1;
70                 }
71             }
72 
73             out0[i][GFBITS - 1 - j] = PQCLEAN_MCELIECE348864_AVX_vec256_set4x(u[0], u[1], u[2], u[3]);
74         }
75     }
76 }
77 
78 #define NBLOCKS1_H ((SYS_N + 63) / 64)
79 #define NBLOCKS2_H ((SYS_N + 255) / 256)
80 #define NBLOCKS1_I ((GFBITS * SYS_T + 63) / 64)
81 #define NBLOCKS2_I ((GFBITS * SYS_T + 255) / 256)
PQCLEAN_MCELIECE348864_AVX_pk_gen(unsigned char * pk,uint32_t * perm,const unsigned char * sk)82 int PQCLEAN_MCELIECE348864_AVX_pk_gen(unsigned char *pk, uint32_t *perm, const unsigned char *sk) {
83     const int block_idx = NBLOCKS1_I;
84 
85     int i, j, k;
86     int row, c;
87 
88     uint64_t mat[ GFBITS * SYS_T ][ NBLOCKS2_H * 4 ];
89     uint64_t ops[ GFBITS * SYS_T ][ NBLOCKS1_I ];
90 
91     uint64_t mask;
92 
93     uint64_t sk_int[ GFBITS ];
94 
95     vec256 consts[ 16 ][ GFBITS ];
96     vec256 eval[ 16 ][ GFBITS ];
97     vec256 prod[ 16 ][ GFBITS ];
98     vec256 tmp[ GFBITS ];
99 
100     uint64_t list[1 << GFBITS];
101     uint64_t one_row[ 128 ];
102 
103     // compute the inverses
104 
105     PQCLEAN_MCELIECE348864_AVX_irr_load(sk_int, sk);
106 
107     PQCLEAN_MCELIECE348864_AVX_fft(eval, sk_int);
108 
109     PQCLEAN_MCELIECE348864_AVX_vec256_copy(prod[0], eval[0]);
110 
111     for (i = 1; i < 16; i++) {
112         PQCLEAN_MCELIECE348864_AVX_vec256_mul(prod[i], prod[i - 1], eval[i]);
113     }
114 
115     PQCLEAN_MCELIECE348864_AVX_vec256_inv(tmp, prod[15]);
116 
117     for (i = 14; i >= 0; i--) {
118         PQCLEAN_MCELIECE348864_AVX_vec256_mul(prod[i + 1], prod[i], tmp);
119         PQCLEAN_MCELIECE348864_AVX_vec256_mul(tmp, tmp, eval[i + 1]);
120     }
121 
122     PQCLEAN_MCELIECE348864_AVX_vec256_copy(prod[0], tmp);
123 
124     // fill matrix
125 
126     de_bitslicing(list, prod);
127 
128     for (i = 0; i < (1 << GFBITS); i++) {
129         list[i] <<= GFBITS;
130         list[i] |= i;
131         list[i] |= ((uint64_t) perm[i]) << 31;
132     }
133 
134     PQCLEAN_MCELIECE348864_AVX_sort_63b(1 << GFBITS, list);
135 
136     to_bitslicing_2x(consts, prod, list);
137 
138     for (i = 0; i < (1 << GFBITS); i++) {
139         perm[i] = list[i] & GFMASK;
140     }
141 
142     for (j = 0; j < NBLOCKS2_I; j++) {
143         for (k = 0; k < GFBITS; k++) {
144             mat[ k ][ 4 * j + 0 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 0);
145             mat[ k ][ 4 * j + 1 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 1);
146             mat[ k ][ 4 * j + 2 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 2);
147             mat[ k ][ 4 * j + 3 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 3);
148         }
149     }
150 
151     for (i = 1; i < SYS_T; i++) {
152         for (j = 0; j < NBLOCKS2_I; j++) {
153             PQCLEAN_MCELIECE348864_AVX_vec256_mul(prod[j], prod[j], consts[j]);
154 
155             for (k = 0; k < GFBITS; k++) {
156                 mat[ i * GFBITS + k ][ 4 * j + 0 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 0);
157                 mat[ i * GFBITS + k ][ 4 * j + 1 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 1);
158                 mat[ i * GFBITS + k ][ 4 * j + 2 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 2);
159                 mat[ i * GFBITS + k ][ 4 * j + 3 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 3);
160             }
161         }
162     }
163 
164     // gaussian elimination to obtain an upper triangular matrix
165     // and keep track of the operations in ops
166 
167     for (i = 0; i < PK_NROWS; i++) {
168         for (j = 0; j < NBLOCKS1_I; j++) {
169             ops[ i ][ j ] = 0;
170         }
171     }
172 
173     for (i = 0; i < PK_NROWS; i++) {
174         ops[ i ][ i / 64 ] = 1;
175         ops[ i ][ i / 64 ] <<= (i % 64);
176     }
177 
178     for (row = 0; row < PK_NROWS; row++) {
179         i = row >> 6;
180         j = row & 63;
181 
182         for (k = row + 1; k < PK_NROWS; k++) {
183             mask = mat[ row ][ i ] >> j;
184             mask &= 1;
185             mask -= 1;
186 
187             for (c = 0; c < NBLOCKS1_I; c++) {
188                 mat[ row ][ c ] ^= mat[ k ][ c ] & mask;
189                 ops[ row ][ c ] ^= ops[ k ][ c ] & mask;
190             }
191         }
192 
193         if ( ((mat[ row ][ i ] >> j) & 1) == 0 ) { // return if not systematic
194             return -1;
195         }
196 
197         for (k = row + 1; k < PK_NROWS; k++) {
198             mask = mat[ k ][ i ] >> j;
199             mask &= 1;
200             mask = -mask;
201 
202             for (c = 0; c < NBLOCKS1_I; c++) {
203                 mat[ k ][ c ] ^= mat[ row ][ c ] & mask;
204                 ops[ k ][ c ] ^= ops[ row ][ c ] & mask;
205             }
206         }
207     }
208 
209     // computing the lineaer map required to obatin the systematic form
210 
211     for (row = PK_NROWS - 1; row >= 0; row--) {
212         for (k = 0; k < row; k++) {
213             mask = mat[ k ][ row / 64 ] >> (row & 63);
214             mask &= 1;
215             mask = -mask;
216 
217             for (c = 0; c < NBLOCKS1_I; c++) {
218                 ops[ k ][ c ] ^= ops[ row ][ c ] & mask;
219             }
220         }
221     }
222 
223     // apply the linear map to the non-systematic part
224 
225     for (j = NBLOCKS2_I; j < NBLOCKS2_H; j++) {
226         for (k = 0; k < GFBITS; k++) {
227             mat[ k ][ 4 * j + 0 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 0);
228             mat[ k ][ 4 * j + 1 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 1);
229             mat[ k ][ 4 * j + 2 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 2);
230             mat[ k ][ 4 * j + 3 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 3);
231         }
232     }
233 
234     for (i = 1; i < SYS_T; i++) {
235         for (j = NBLOCKS2_I; j < NBLOCKS2_H; j++) {
236             PQCLEAN_MCELIECE348864_AVX_vec256_mul(prod[j], prod[j], consts[j]);
237 
238             for (k = 0; k < GFBITS; k++) {
239                 mat[ i * GFBITS + k ][ 4 * j + 0 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 0);
240                 mat[ i * GFBITS + k ][ 4 * j + 1 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 1);
241                 mat[ i * GFBITS + k ][ 4 * j + 2 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 2);
242                 mat[ i * GFBITS + k ][ 4 * j + 3 ] = PQCLEAN_MCELIECE348864_AVX_vec256_extract(prod[ j ][ k ], 3);
243             }
244         }
245     }
246 
247     for (row = 0; row < PK_NROWS; row++) {
248         for (k = 0; k < NBLOCKS1_H; k++) {
249             one_row[ k ] = 0;
250         }
251 
252         for (c = 0; c < PK_NROWS; c++) {
253             mask = ops[ row ][ c >> 6 ] >> (c & 63);
254             mask &= 1;
255             mask = -mask;
256 
257             for (k = block_idx; k < NBLOCKS1_H; k++) {
258                 one_row[ k ] ^= mat[ c ][ k ] & mask;
259             }
260         }
261 
262         for (k = block_idx; k < NBLOCKS1_H - 1; k++) {
263             PQCLEAN_MCELIECE348864_AVX_store8(pk, one_row[k]);
264             pk += 8;
265         }
266 
267         PQCLEAN_MCELIECE348864_AVX_store_i(pk, one_row[k], PK_ROW_BYTES % 8);
268 
269         pk += PK_ROW_BYTES % 8;
270     }
271 
272     //
273 
274     return 0;
275 }
276 
277