1 /* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 * SPDX-License-Identifier: Apache-2.0"
3 *
4 * Written by Nir Drucker, Shay Gueron, and Dusan Kostic,
5 * AWS Cryptographic Algorithms Group.
6 * (ndrucker@amazon.com, gueron@amazon.com, dkostic@amazon.com)
7 *
8 * [1] The optimizations are based on the description developed in the paper:
9 * Drucker, Nir, and Shay Gueron. 2019. “A Toolbox for Software Optimization
10 * of QC-MDPC Code-Based Cryptosystems.” Journal of Cryptographic Engineering,
11 * January, 1–17. https://doi.org/10.1007/s13389-018-00200-4.
12 *
13 * [2] The decoder algorithm is the Black-Gray decoder in
14 * the early submission of CAKE (due to N. Sandrier and R Misoczki).
15 *
16 * [3] The analysis for the constant time implementation is given in
17 * Drucker, Nir, Shay Gueron, and Dusan Kostic. 2019.
18 * “On Constant-Time QC-MDPC Decoding with Negligible Failure Rate.”
19 * Cryptology EPrint Archive, 2019. https://eprint.iacr.org/2019/1289.
20 *
21 * [4] it was adapted to BGF in:
22 * Drucker, Nir, Shay Gueron, and Dusan Kostic. 2019.
23 * “QC-MDPC decoders with several shades of gray.”
24 * Cryptology EPrint Archive, 2019. To be published.
25 *
26 * [5] Chou, T.: QcBits: Constant-Time Small-Key Code-Based Cryptography.
27 * In: Gier-lichs, B., Poschmann, A.Y. (eds.) Cryptographic Hardware
28 * and Embedded Systems– CHES 2016. pp. 280–300. Springer Berlin Heidelberg,
29 * Berlin, Heidelberg (2016)
30 *
31 * [6] The rotate512_small funciton is a derivative of the code described in:
32 * Guimarães, Antonio, Diego F Aranha, and Edson Borin. 2019.
33 * “Optimized Implementation of QC-MDPC Code-Based Cryptography.”
34 * Concurrency and Computation: Practice and Experience 31 (18):
35 * e5089. https://doi.org/10.1002/cpe.5089.
36 */
37
38 #include "decode.h"
39 #include "gf2x.h"
40 #include "utilities.h"
41 #include <string.h>
42
43 // Decoding (bit-flipping) parameter
44 #ifdef BG_DECODER
45 # if(LEVEL == 1)
46 # define MAX_IT 3
47 # elif(LEVEL == 3)
48 # define MAX_IT 4
49 # elif(LEVEL == 5)
50 # define MAX_IT 7
51 # else
52 # error "Level can only be 1/3/5"
53 # endif
54 #elif defined(BGF_DECODER)
55 # if(LEVEL == 1)
56 # define MAX_IT 5
57 # elif(LEVEL == 3)
58 # define MAX_IT 6
59 # elif(LEVEL == 5)
60 # define MAX_IT 7
61 # else
62 # error "Level can only be 1/3/5"
63 # endif
64 #endif
65
66 // Duplicates the first R_BITS of the syndrome three times
67 // |------------------------------------------|
68 // | Third copy | Second copy | first R_BITS |
69 // |------------------------------------------|
70 // This is required by the rotate functions.
71 _INLINE_ void
dup(IN OUT syndrome_t * s)72 dup(IN OUT syndrome_t *s)
73 {
74 s->qw[R_QW - 1] =
75 (s->qw[0] << LAST_R_QW_LEAD) | (s->qw[R_QW - 1] & LAST_R_QW_MASK);
76
77 for(size_t i = 0; i < (2 * R_QW) - 1; i++)
78 {
79 s->qw[R_QW + i] =
80 (s->qw[i] >> LAST_R_QW_TRAIL) | (s->qw[i + 1] << LAST_R_QW_LEAD);
81 }
82 }
83
84 ret_t
compute_syndrome(OUT syndrome_t * syndrome,IN const ct_t * ct,IN const sk_t * sk)85 compute_syndrome(OUT syndrome_t *syndrome, IN const ct_t *ct, IN const sk_t *sk)
86 {
87 // gf2x_mod_mul requires the values to be 64bit padded and extra (dbl) space
88 // for the results
89 DEFER_CLEANUP(dbl_pad_syndrome_t pad_s, dbl_pad_syndrome_cleanup);
90 DEFER_CLEANUP(pad_sk_t pad_sk = {0}, pad_sk_cleanup);
91 pad_sk[0].val = sk->bin[0];
92 pad_sk[1].val = sk->bin[1];
93
94 DEFER_CLEANUP(pad_ct_t pad_ct = {0}, pad_ct_cleanup);
95 pad_ct[0].val = ct->val[0];
96 pad_ct[1].val = ct->val[1];
97
98 // Compute s = c0*h0 + c1*h1:
99 POSIX_GUARD(gf2x_mod_mul((uint64_t *)&pad_s[0], (uint64_t *)&pad_ct[0],
100 (uint64_t *)&pad_sk[0]));
101 POSIX_GUARD(gf2x_mod_mul((uint64_t *)&pad_s[1], (uint64_t *)&pad_ct[1],
102 (uint64_t *)&pad_sk[1]));
103
104 POSIX_GUARD(gf2x_add(pad_s[0].val.raw, pad_s[0].val.raw, pad_s[1].val.raw, R_SIZE));
105
106 memcpy((uint8_t *)syndrome->qw, pad_s[0].val.raw, R_SIZE);
107 dup(syndrome);
108
109 return SUCCESS;
110 }
111
112 _INLINE_ ret_t
recompute_syndrome(OUT syndrome_t * syndrome,IN const ct_t * ct,IN const sk_t * sk,IN const split_e_t * splitted_e)113 recompute_syndrome(OUT syndrome_t *syndrome,
114 IN const ct_t *ct,
115 IN const sk_t *sk,
116 IN const split_e_t *splitted_e)
117 {
118 ct_t tmp_ct = *ct;
119
120 // Adapt the ciphertext
121 POSIX_GUARD(gf2x_add(tmp_ct.val[0].raw, tmp_ct.val[0].raw, splitted_e->val[0].raw,
122 R_SIZE));
123 POSIX_GUARD(gf2x_add(tmp_ct.val[1].raw, tmp_ct.val[1].raw, splitted_e->val[1].raw,
124 R_SIZE));
125
126 // Recompute the syndrome
127 POSIX_GUARD(compute_syndrome(syndrome, &tmp_ct, sk));
128
129 return SUCCESS;
130 }
131
132 _INLINE_ uint8_t
get_threshold(IN const syndrome_t * s)133 get_threshold(IN const syndrome_t *s)
134 {
135 bike_static_assert(sizeof(*s) >= sizeof(r_t), syndrome_is_large_enough);
136
137 const uint32_t syndrome_weight = r_bits_vector_weight((const r_t *)s->qw);
138
139 // The equations below are defined in BIKE's specification:
140 // https://bikesuite.org/files/round2/spec/BIKE-Spec-Round2.2019.03.30.pdf
141 // Page 20 Section 2.4.2
142 const uint8_t threshold =
143 THRESHOLD_COEFF0 + (THRESHOLD_COEFF1 * syndrome_weight);
144
145 DMSG(" Thresold: %d\n", threshold);
146 return threshold;
147 }
148
149 // Use half-adder as described in [5].
150 _INLINE_ void
bit_sliced_adder(OUT upc_t * upc,IN OUT syndrome_t * rotated_syndrome,IN const size_t num_of_slices)151 bit_sliced_adder(OUT upc_t *upc,
152 IN OUT syndrome_t *rotated_syndrome,
153 IN const size_t num_of_slices)
154 {
155 // From cache-memory perspective this loop should be the outside loop
156 for(size_t j = 0; j < num_of_slices; j++)
157 {
158 for(size_t i = 0; i < R_QW; i++)
159 {
160 const uint64_t carry = (upc->slice[j].u.qw[i] & rotated_syndrome->qw[i]);
161 upc->slice[j].u.qw[i] ^= rotated_syndrome->qw[i];
162 rotated_syndrome->qw[i] = carry;
163 }
164 }
165 }
166
167 _INLINE_ void
bit_slice_full_subtract(OUT upc_t * upc,IN uint8_t val)168 bit_slice_full_subtract(OUT upc_t *upc, IN uint8_t val)
169 {
170 // Borrow
171 uint64_t br[R_QW] = {0};
172
173 for(size_t j = 0; j < SLICES; j++)
174 {
175
176 const uint64_t lsb_mask = 0 - (val & 0x1);
177 val >>= 1;
178
179 // Perform a - b with c as the input/output carry
180 // br = 0 0 0 0 1 1 1 1
181 // a = 0 0 1 1 0 0 1 1
182 // b = 0 1 0 1 0 1 0 1
183 // -------------------
184 // o = 0 1 1 0 0 1 1 1
185 // c = 0 1 0 0 1 1 0 1
186 //
187 // o = a^b^c
188 // _ __ _ _ _ _ _
189 // br = abc + abc + abc + abc = abc + ((a+b))c
190
191 for(size_t i = 0; i < R_QW; i++)
192 {
193 const uint64_t a = upc->slice[j].u.qw[i];
194 const uint64_t b = lsb_mask;
195 const uint64_t tmp = ((~a) & b & (~br[i])) | ((((~a) | b) & br[i]));
196 upc->slice[j].u.qw[i] = a ^ b ^ br[i];
197 br[i] = tmp;
198 }
199 }
200 }
201
202 // Calculate the Unsatisfied Parity Checks (UPCs) and update the errors
203 // vector (e) accordingy. In addition, update the black and gray errors vector
204 // with the relevant values.
205 _INLINE_ void
find_err1(OUT split_e_t * e,OUT split_e_t * black_e,OUT split_e_t * gray_e,IN const syndrome_t * syndrome,IN const compressed_idx_dv_ar_t wlist,IN const uint8_t threshold)206 find_err1(OUT split_e_t *e,
207 OUT split_e_t *black_e,
208 OUT split_e_t *gray_e,
209 IN const syndrome_t * syndrome,
210 IN const compressed_idx_dv_ar_t wlist,
211 IN const uint8_t threshold)
212 {
213 // This function uses the bit-slice-adder methodology of [5]:
214 DEFER_CLEANUP(syndrome_t rotated_syndrome = {0}, syndrome_cleanup);
215 DEFER_CLEANUP(upc_t upc, upc_cleanup);
216
217 for(uint32_t i = 0; i < N0; i++)
218 {
219 // UPC must start from zero at every iteration
220 memset(&upc, 0, sizeof(upc));
221
222 // 1) Right-rotate the syndrome for every secret key set bit index
223 // Then slice-add it to the UPC array.
224 for(size_t j = 0; j < DV; j++)
225 {
226 rotate_right(&rotated_syndrome, syndrome, wlist[i].val[j]);
227 bit_sliced_adder(&upc, &rotated_syndrome, LOG2_MSB(j + 1));
228 }
229
230 // 2) Subtract the threshold from the UPC counters
231 bit_slice_full_subtract(&upc, threshold);
232
233 // 3) Update the errors and the black errors vectors.
234 // The last slice of the UPC array holds the MSB of the accumulated values
235 // minus the threshold. Every zero bit indicates a potential error bit.
236 // The errors values are stored in the black array and xored with the
237 // errors Of the previous iteration.
238 const r_t *last_slice = &(upc.slice[SLICES - 1].u.r.val);
239 for(size_t j = 0; j < R_SIZE; j++)
240 {
241 const uint8_t sum_msb = (~last_slice->raw[j]);
242 black_e->val[i].raw[j] = sum_msb;
243 e->val[i].raw[j] ^= sum_msb;
244 }
245
246 // Ensure that the padding bits (upper bits of the last byte) are zero so
247 // they will not be included in the multiplication and in the hash function.
248 e->val[i].raw[R_SIZE - 1] &= LAST_R_BYTE_MASK;
249
250 // 4) Calculate the gray error array by adding "DELTA" to the UPC array.
251 // For that we reuse the rotated_syndrome variable setting it to all "1".
252 for(size_t l = 0; l < DELTA; l++)
253 {
254 memset((uint8_t *)rotated_syndrome.qw, 0xff, R_SIZE);
255 bit_sliced_adder(&upc, &rotated_syndrome, SLICES);
256 }
257
258 // 5) Update the gray list with the relevant bits that are not
259 // set in the black list.
260 for(size_t j = 0; j < R_SIZE; j++)
261 {
262 const uint8_t sum_msb = (~last_slice->raw[j]);
263 gray_e->val[i].raw[j] = (~(black_e->val[i].raw[j])) & sum_msb;
264 }
265 }
266 }
267
268 // Recalculate the UPCs and update the errors vector (e) according to it
269 // and to the black/gray vectors.
270 _INLINE_ void
find_err2(OUT split_e_t * e,IN split_e_t * pos_e,IN const syndrome_t * syndrome,IN const compressed_idx_dv_ar_t wlist,IN const uint8_t threshold)271 find_err2(OUT split_e_t *e,
272 IN split_e_t *pos_e,
273 IN const syndrome_t * syndrome,
274 IN const compressed_idx_dv_ar_t wlist,
275 IN const uint8_t threshold)
276 {
277 DEFER_CLEANUP(syndrome_t rotated_syndrome = {0}, syndrome_cleanup);
278 DEFER_CLEANUP(upc_t upc, upc_cleanup);
279
280 for(uint32_t i = 0; i < N0; i++)
281 {
282 // UPC must start from zero at every iteration
283 memset(&upc, 0, sizeof(upc));
284
285 // 1) Right-rotate the syndrome for every secret key set bit index
286 // Then slice-add it to the UPC array.
287 for(size_t j = 0; j < DV; j++)
288 {
289 rotate_right(&rotated_syndrome, syndrome, wlist[i].val[j]);
290 bit_sliced_adder(&upc, &rotated_syndrome, LOG2_MSB(j + 1));
291 }
292
293 // 2) Subtract the threshold from the UPC counters
294 bit_slice_full_subtract(&upc, threshold);
295
296 // 3) Update the errors vector.
297 // The last slice of the UPC array holds the MSB of the accumulated values
298 // minus the threshold. Every zero bit indicates a potential error bit.
299 const r_t *last_slice = &(upc.slice[SLICES - 1].u.r.val);
300 for(size_t j = 0; j < R_SIZE; j++)
301 {
302 const uint8_t sum_msb = (~last_slice->raw[j]);
303 e->val[i].raw[j] ^= (pos_e->val[i].raw[j] & sum_msb);
304 }
305
306 // Ensure that the padding bits (upper bits of the last byte) are zero so
307 // they will not be included in the multiplication and in the hash function.
308 e->val[i].raw[R_SIZE - 1] &= LAST_R_BYTE_MASK;
309 }
310 }
311
312 ret_t
decode(OUT split_e_t * e,IN const syndrome_t * original_s,IN const ct_t * ct,IN const sk_t * sk)313 decode(OUT split_e_t *e,
314 IN const syndrome_t *original_s,
315 IN const ct_t *ct,
316 IN const sk_t *sk)
317 {
318 split_e_t black_e = {0};
319 split_e_t gray_e = {0};
320 syndrome_t s;
321
322 // Reset (init) the error because it is xored in the find_err funcitons.
323 memset(e, 0, sizeof(*e));
324 s = *original_s;
325 dup(&s);
326
327 for(uint32_t iter = 0; iter < MAX_IT; iter++)
328 {
329 const uint8_t threshold = get_threshold(&s);
330
331 DMSG(" Iteration: %d\n", iter);
332 DMSG(" Weight of e: %lu\n",
333 r_bits_vector_weight(&e->val[0]) + r_bits_vector_weight(&e->val[1]));
334 DMSG(" Weight of syndrome: %lu\n", r_bits_vector_weight((r_t *)s.qw));
335
336 find_err1(e, &black_e, &gray_e, &s, sk->wlist, threshold);
337 POSIX_GUARD(recompute_syndrome(&s, ct, sk, e));
338 #ifdef BGF_DECODER
339 if(iter >= 1)
340 {
341 continue;
342 }
343 #endif
344 DMSG(" Weight of e: %lu\n",
345 r_bits_vector_weight(&e->val[0]) + r_bits_vector_weight(&e->val[1]));
346 DMSG(" Weight of syndrome: %lu\n", r_bits_vector_weight((r_t *)s.qw));
347
348 find_err2(e, &black_e, &s, sk->wlist, ((DV + 1) / 2) + 1);
349 POSIX_GUARD(recompute_syndrome(&s, ct, sk, e));
350
351 DMSG(" Weight of e: %lu\n",
352 r_bits_vector_weight(&e->val[0]) + r_bits_vector_weight(&e->val[1]));
353 DMSG(" Weight of syndrome: %lu\n", r_bits_vector_weight((r_t *)s.qw));
354
355 find_err2(e, &gray_e, &s, sk->wlist, ((DV + 1) / 2) + 1);
356 POSIX_GUARD(recompute_syndrome(&s, ct, sk, e));
357 }
358
359 if(r_bits_vector_weight((r_t *)s.qw) > 0)
360 {
361 BIKE_ERROR(E_DECODING_FAILURE);
362 }
363
364 return SUCCESS;
365 }
366