1 /*
2 This file is for the inversion-free Berlekamp-Massey algorithm
3 see https://ieeexplore.ieee.org/document/87857
4 */
5
6 #include "bm.h"
7
8 #include "gf.h"
9
mask_nonzero(gf a)10 static inline uint16_t mask_nonzero(gf a) {
11 uint32_t ret = a;
12
13 ret -= 1;
14 ret >>= 31;
15 ret -= 1;
16
17 return (uint16_t)ret;
18 }
19
mask_leq(uint16_t a,uint16_t b)20 static inline uint16_t mask_leq(uint16_t a, uint16_t b) {
21 uint32_t a_tmp = a;
22 uint32_t b_tmp = b;
23 uint32_t ret = b_tmp - a_tmp;
24
25 ret >>= 31;
26 ret -= 1;
27
28 return (uint16_t)ret;
29 }
30
vec_cmov(vec * out,const vec * in,uint16_t mask)31 static inline void vec_cmov(vec *out, const vec *in, uint16_t mask) {
32 int i;
33
34 vec m0, m1;
35
36 m0 = PQCLEAN_MCELIECE8192128F_VEC_vec_set1_16b(mask);
37 m1 = ~m0;
38
39 for (i = 0; i < GFBITS; i++) {
40 out[i] = (in[i] & m0) | (out[i] & m1);
41 out[i] = (in[i] & m0) | (out[i] & m1);
42 }
43 }
44
interleave(vec * in,int idx0,int idx1,const vec * mask,int b)45 static inline void interleave(vec *in, int idx0, int idx1, const vec *mask, int b) {
46 int s = 1 << b;
47
48 vec x, y;
49
50 x = (in[idx0] & mask[0]) | ((in[idx1] & mask[0]) << s);
51 y = ((in[idx0] & mask[1]) >> s) | (in[idx1] & mask[1]);
52
53 in[idx0] = x;
54 in[idx1] = y;
55 }
56
57 /* input: in, field elements in bitsliced form */
58 /* output: out, field elements in non-bitsliced form */
get_coefs(gf * out,const vec * in)59 static inline void get_coefs(gf *out, const vec *in) {
60 int i, k;
61
62 vec mask[4][2];
63 vec buf[16];
64
65 for (i = 0; i < 13; i++) {
66 buf[i] = in[i];
67 }
68 for (i = 13; i < 16; i++) {
69 buf[i] = 0;
70 }
71
72 mask[0][0] = PQCLEAN_MCELIECE8192128F_VEC_vec_set1_16b(0x5555);
73 mask[0][1] = PQCLEAN_MCELIECE8192128F_VEC_vec_set1_16b(0xAAAA);
74 mask[1][0] = PQCLEAN_MCELIECE8192128F_VEC_vec_set1_16b(0x3333);
75 mask[1][1] = PQCLEAN_MCELIECE8192128F_VEC_vec_set1_16b(0xCCCC);
76 mask[2][0] = PQCLEAN_MCELIECE8192128F_VEC_vec_set1_16b(0x0F0F);
77 mask[2][1] = PQCLEAN_MCELIECE8192128F_VEC_vec_set1_16b(0xF0F0);
78 mask[3][0] = PQCLEAN_MCELIECE8192128F_VEC_vec_set1_16b(0x00FF);
79 mask[3][1] = PQCLEAN_MCELIECE8192128F_VEC_vec_set1_16b(0xFF00);
80
81 interleave(buf, 0, 8, mask[3], 3);
82 interleave(buf, 1, 9, mask[3], 3);
83 interleave(buf, 2, 10, mask[3], 3);
84 interleave(buf, 3, 11, mask[3], 3);
85 interleave(buf, 4, 12, mask[3], 3);
86 interleave(buf, 5, 13, mask[3], 3);
87 interleave(buf, 6, 14, mask[3], 3);
88 interleave(buf, 7, 15, mask[3], 3);
89
90 interleave(buf, 0, 4, mask[2], 2);
91 interleave(buf, 1, 5, mask[2], 2);
92 interleave(buf, 2, 6, mask[2], 2);
93 interleave(buf, 3, 7, mask[2], 2);
94 interleave(buf, 8, 12, mask[2], 2);
95 interleave(buf, 9, 13, mask[2], 2);
96 interleave(buf, 10, 14, mask[2], 2);
97 interleave(buf, 11, 15, mask[2], 2);
98
99 interleave(buf, 0, 2, mask[1], 1);
100 interleave(buf, 1, 3, mask[1], 1);
101 interleave(buf, 4, 6, mask[1], 1);
102 interleave(buf, 5, 7, mask[1], 1);
103 interleave(buf, 8, 10, mask[1], 1);
104 interleave(buf, 9, 11, mask[1], 1);
105 interleave(buf, 12, 14, mask[1], 1);
106 interleave(buf, 13, 15, mask[1], 1);
107
108 interleave(buf, 0, 1, mask[0], 0);
109 interleave(buf, 2, 3, mask[0], 0);
110 interleave(buf, 4, 5, mask[0], 0);
111 interleave(buf, 6, 7, mask[0], 0);
112 interleave(buf, 8, 9, mask[0], 0);
113 interleave(buf, 10, 11, mask[0], 0);
114 interleave(buf, 12, 13, mask[0], 0);
115 interleave(buf, 14, 15, mask[0], 0);
116
117 for (i = 0; i < 16; i++) {
118 for (k = 0; k < 4; k++) {
119 out[ k * 16 + i ] = (buf[i] >> (k * 16)) & GFMASK;
120 }
121 }
122 }
123
update(vec in[][GFBITS],const gf e)124 static void update(vec in[][GFBITS], const gf e) {
125 int i;
126 vec tmp;
127
128 for (i = 0; i < GFBITS; i++) {
129 tmp = (e >> i) & 1;
130
131 in[0][i] = (in[0][i] >> 1) | (in[1][i] << 63);
132 in[1][i] = (in[1][i] >> 1) | (tmp << 63);
133 }
134 }
135
vec_reduce(vec in[][GFBITS])136 static inline gf vec_reduce(vec in[][GFBITS]) {
137 int i;
138 vec tmp;
139 gf ret = 0;
140
141 for (i = GFBITS - 1; i >= 0; i--) {
142 tmp = in[0][i] ^ in[1][i];
143
144 tmp ^= tmp >> 32;
145 tmp ^= tmp >> 16;
146 tmp ^= tmp >> 8;
147 tmp ^= tmp >> 4;
148 tmp ^= tmp >> 2;
149 tmp ^= tmp >> 1;
150
151 ret <<= 1;
152 ret |= tmp & 1;
153 }
154
155 return ret;
156 }
157
158 /* input: in, sequence of field elements */
159 /* output: out, minimal polynomial of in */
PQCLEAN_MCELIECE8192128F_VEC_bm(vec out[][GFBITS],vec in[][GFBITS])160 void PQCLEAN_MCELIECE8192128F_VEC_bm(vec out[][ GFBITS ], vec in[][ GFBITS ]) {
161 int i;
162 uint16_t N, L;
163 uint16_t mask;
164 uint64_t one = 1, t;
165
166 vec prod[2][GFBITS];
167 vec interval[2][GFBITS];
168 vec dd[2][GFBITS], bb[2][GFBITS];
169 vec B[2][GFBITS], C[2][GFBITS];
170 vec B_tmp[2][GFBITS], C_tmp[2][GFBITS];
171 vec v[GFBITS];
172
173 gf d, b, c0 = 1;
174 gf coefs[256];
175
176 // initialization
177
178 get_coefs(&coefs[ 0], in[0]);
179 get_coefs(&coefs[ 64], in[1]);
180 get_coefs(&coefs[128], in[2]);
181 get_coefs(&coefs[192], in[3]);
182
183 C[0][0] = 0;
184 C[1][0] = 0;
185 B[0][0] = 0;
186 B[1][0] = one << 63;
187
188 for (i = 1; i < GFBITS; i++) {
189 C[0][i] = C[1][i] = B[0][i] = B[1][i] = 0;
190 }
191
192 b = 1;
193 L = 0;
194
195 //
196
197 for (i = 0; i < GFBITS; i++) {
198 interval[0][i] = interval[1][i] = 0;
199 }
200
201 for (N = 0; N < 256; N++) {
202 PQCLEAN_MCELIECE8192128F_VEC_vec_mul(prod[0], C[0], interval[0]);
203 PQCLEAN_MCELIECE8192128F_VEC_vec_mul(prod[1], C[1], interval[1]);
204 update(interval, coefs[N]);
205 d = vec_reduce(prod);
206
207 t = PQCLEAN_MCELIECE8192128F_VEC_gf_mul2(c0, coefs[N], b);
208 d ^= t & 0xFFFFFFFF;
209
210 mask = mask_nonzero(d) & mask_leq(L * 2, N);
211
212 for (i = 0; i < GFBITS; i++) {
213 dd[0][i] = dd[1][i] = PQCLEAN_MCELIECE8192128F_VEC_vec_setbits((d >> i) & 1);
214 bb[0][i] = bb[1][i] = PQCLEAN_MCELIECE8192128F_VEC_vec_setbits((b >> i) & 1);
215 }
216
217 PQCLEAN_MCELIECE8192128F_VEC_vec_mul(B_tmp[0], dd[0], B[0]);
218 PQCLEAN_MCELIECE8192128F_VEC_vec_mul(B_tmp[1], dd[1], B[1]);
219 PQCLEAN_MCELIECE8192128F_VEC_vec_mul(C_tmp[0], bb[0], C[0]);
220 PQCLEAN_MCELIECE8192128F_VEC_vec_mul(C_tmp[1], bb[1], C[1]);
221
222 vec_cmov(B[0], C[0], mask);
223 vec_cmov(B[1], C[1], mask);
224 update(B, c0 & mask);
225
226 for (i = 0; i < GFBITS; i++) {
227 C[0][i] = B_tmp[0][i] ^ C_tmp[0][i];
228 C[1][i] = B_tmp[1][i] ^ C_tmp[1][i];
229 }
230
231 c0 = (gf)(t >> 32);
232 b = (d & mask) | (b & ~mask);
233 L = ((N + 1 - L) & mask) | (L & ~mask);
234 }
235
236 c0 = PQCLEAN_MCELIECE8192128F_VEC_gf_inv(c0);
237
238 for (i = 0; i < GFBITS; i++) {
239 v[i] = PQCLEAN_MCELIECE8192128F_VEC_vec_setbits((c0 >> i) & 1);
240 }
241
242 PQCLEAN_MCELIECE8192128F_VEC_vec_mul(out[0], C[0], v);
243 PQCLEAN_MCELIECE8192128F_VEC_vec_mul(out[1], C[1], v);
244 }
245
246