1 /* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2  * SPDX-License-Identifier: Apache-2.0"
3  *
4  * Written by Nir Drucker, Shay Gueron, and Dusan Kostic,
5  * AWS Cryptographic Algorithms Group.
6  * (ndrucker@amazon.com, gueron@amazon.com, dkostic@amazon.com)
7  *
8  * [1] The optimizations are based on the description developed in the paper:
9  *     Drucker, Nir, and Shay Gueron. 2019. “A Toolbox for Software Optimization
10  *     of QC-MDPC Code-Based Cryptosystems.” Journal of Cryptographic Engineering,
11  *     January, 1–17. https://doi.org/10.1007/s13389-018-00200-4.
12  *
13  * [2] The decoder algorithm is the Black-Gray decoder in
14  *     the early submission of CAKE (due to N. Sandrier and R Misoczki).
15  *
16  * [3] The analysis for the constant time implementation is given in
17  *     Drucker, Nir, Shay Gueron, and Dusan Kostic. 2019.
18  *     “On Constant-Time QC-MDPC Decoding with Negligible Failure Rate.”
19  *     Cryptology EPrint Archive, 2019. https://eprint.iacr.org/2019/1289.
20  *
21  * [4] it was adapted to BGF in:
22  *     Drucker, Nir, Shay Gueron, and Dusan Kostic. 2019.
23  *     “QC-MDPC decoders with several shades of gray.”
24  *     Cryptology EPrint Archive, 2019. To be published.
25  *
26  * [5] Chou, T.: QcBits: Constant-Time Small-Key Code-Based Cryptography.
27  *     In: Gier-lichs, B., Poschmann, A.Y. (eds.) Cryptographic Hardware
28  *     and Embedded Systems– CHES 2016. pp. 280–300. Springer Berlin Heidelberg,
29  *     Berlin, Heidelberg (2016)
30  *
31  * [6] The rotate512_small funciton is a derivative of the code described in:
32  *     Guimarães, Antonio, Diego F Aranha, and Edson Borin. 2019.
33  *     “Optimized Implementation of QC-MDPC Code-Based Cryptography.”
34  *     Concurrency and Computation: Practice and Experience 31 (18):
35  *     e5089. https://doi.org/10.1002/cpe.5089.
36  */
37 
38 #include "decode.h"
39 #include "gf2x.h"
40 #include "utilities.h"
41 #include <string.h>
42 
43 // Decoding (bit-flipping) parameter
44 #ifdef BG_DECODER
45 #  if(LEVEL == 1)
46 #    define MAX_IT 3
47 #  elif(LEVEL == 3)
48 #    define MAX_IT 4
49 #  elif(LEVEL == 5)
50 #    define MAX_IT 7
51 #  else
52 #    error "Level can only be 1/3/5"
53 #  endif
54 #elif defined(BGF_DECODER)
55 #  if(LEVEL == 1)
56 #    define MAX_IT 5
57 #  elif(LEVEL == 3)
58 #    define MAX_IT 6
59 #  elif(LEVEL == 5)
60 #    define MAX_IT 7
61 #  else
62 #    error "Level can only be 1/3/5"
63 #  endif
64 #endif
65 
66 // Duplicates the first R_BITS of the syndrome three times
67 // |------------------------------------------|
68 // |  Third copy | Second copy | first R_BITS |
69 // |------------------------------------------|
70 // This is required by the rotate functions.
71 _INLINE_ void
dup(IN OUT syndrome_t * s)72 dup(IN OUT syndrome_t *s)
73 {
74   s->qw[R_QW - 1] =
75       (s->qw[0] << LAST_R_QW_LEAD) | (s->qw[R_QW - 1] & LAST_R_QW_MASK);
76 
77   for(size_t i = 0; i < (2 * R_QW) - 1; i++)
78   {
79     s->qw[R_QW + i] =
80         (s->qw[i] >> LAST_R_QW_TRAIL) | (s->qw[i + 1] << LAST_R_QW_LEAD);
81   }
82 }
83 
84 ret_t
compute_syndrome(OUT syndrome_t * syndrome,IN const ct_t * ct,IN const sk_t * sk)85 compute_syndrome(OUT syndrome_t *syndrome, IN const ct_t *ct, IN const sk_t *sk)
86 {
87   // gf2x_mod_mul requires the values to be 64bit padded and extra (dbl) space
88   // for the results
89   DEFER_CLEANUP(dbl_pad_syndrome_t pad_s, dbl_pad_syndrome_cleanup);
90   DEFER_CLEANUP(pad_sk_t pad_sk = {0}, pad_sk_cleanup);
91   pad_sk[0].val = sk->bin[0];
92   pad_sk[1].val = sk->bin[1];
93 
94   DEFER_CLEANUP(pad_ct_t pad_ct = {0}, pad_ct_cleanup);
95   pad_ct[0].val = ct->val[0];
96   pad_ct[1].val = ct->val[1];
97 
98   // Compute s = c0*h0 + c1*h1:
99   POSIX_GUARD(gf2x_mod_mul((uint64_t *)&pad_s[0], (uint64_t *)&pad_ct[0],
100                      (uint64_t *)&pad_sk[0]));
101   POSIX_GUARD(gf2x_mod_mul((uint64_t *)&pad_s[1], (uint64_t *)&pad_ct[1],
102                      (uint64_t *)&pad_sk[1]));
103 
104   POSIX_GUARD(gf2x_add(pad_s[0].val.raw, pad_s[0].val.raw, pad_s[1].val.raw, R_SIZE));
105 
106   memcpy((uint8_t *)syndrome->qw, pad_s[0].val.raw, R_SIZE);
107   dup(syndrome);
108 
109   return SUCCESS;
110 }
111 
112 _INLINE_ ret_t
recompute_syndrome(OUT syndrome_t * syndrome,IN const ct_t * ct,IN const sk_t * sk,IN const split_e_t * splitted_e)113 recompute_syndrome(OUT syndrome_t *syndrome,
114                    IN const ct_t *ct,
115                    IN const sk_t *sk,
116                    IN const split_e_t *splitted_e)
117 {
118   ct_t tmp_ct = *ct;
119 
120   // Adapt the ciphertext
121   POSIX_GUARD(gf2x_add(tmp_ct.val[0].raw, tmp_ct.val[0].raw, splitted_e->val[0].raw,
122                  R_SIZE));
123   POSIX_GUARD(gf2x_add(tmp_ct.val[1].raw, tmp_ct.val[1].raw, splitted_e->val[1].raw,
124                  R_SIZE));
125 
126   // Recompute the syndrome
127   POSIX_GUARD(compute_syndrome(syndrome, &tmp_ct, sk));
128 
129   return SUCCESS;
130 }
131 
132 _INLINE_ uint8_t
get_threshold(IN const syndrome_t * s)133 get_threshold(IN const syndrome_t *s)
134 {
135   bike_static_assert(sizeof(*s) >= sizeof(r_t), syndrome_is_large_enough);
136 
137   const uint32_t syndrome_weight = r_bits_vector_weight((const r_t *)s->qw);
138 
139   // The equations below are defined in BIKE's specification:
140   // https://bikesuite.org/files/round2/spec/BIKE-Spec-Round2.2019.03.30.pdf
141   // Page 20 Section 2.4.2
142   const uint8_t threshold =
143       THRESHOLD_COEFF0 + (THRESHOLD_COEFF1 * syndrome_weight);
144 
145   DMSG("    Thresold: %d\n", threshold);
146   return threshold;
147 }
148 
149 // Use half-adder as described in [5].
150 _INLINE_ void
bit_sliced_adder(OUT upc_t * upc,IN OUT syndrome_t * rotated_syndrome,IN const size_t num_of_slices)151 bit_sliced_adder(OUT upc_t *upc,
152                  IN OUT syndrome_t *rotated_syndrome,
153                  IN const size_t    num_of_slices)
154 {
155   // From cache-memory perspective this loop should be the outside loop
156   for(size_t j = 0; j < num_of_slices; j++)
157   {
158     for(size_t i = 0; i < R_QW; i++)
159     {
160       const uint64_t carry = (upc->slice[j].u.qw[i] & rotated_syndrome->qw[i]);
161       upc->slice[j].u.qw[i] ^= rotated_syndrome->qw[i];
162       rotated_syndrome->qw[i] = carry;
163     }
164   }
165 }
166 
167 _INLINE_ void
bit_slice_full_subtract(OUT upc_t * upc,IN uint8_t val)168 bit_slice_full_subtract(OUT upc_t *upc, IN uint8_t val)
169 {
170   // Borrow
171   uint64_t br[R_QW] = {0};
172 
173   for(size_t j = 0; j < SLICES; j++)
174   {
175 
176     const uint64_t lsb_mask = 0 - (val & 0x1);
177     val >>= 1;
178 
179     // Perform a - b with c as the input/output carry
180     // br = 0 0 0 0 1 1 1 1
181     // a  = 0 0 1 1 0 0 1 1
182     // b  = 0 1 0 1 0 1 0 1
183     // -------------------
184     // o  = 0 1 1 0 0 1 1 1
185     // c  = 0 1 0 0 1 1 0 1
186     //
187     // o  = a^b^c
188     //            _     __    _ _   _ _     _
189     // br = abc + abc + abc + abc = abc + ((a+b))c
190 
191     for(size_t i = 0; i < R_QW; i++)
192     {
193       const uint64_t a      = upc->slice[j].u.qw[i];
194       const uint64_t b      = lsb_mask;
195       const uint64_t tmp    = ((~a) & b & (~br[i])) | ((((~a) | b) & br[i]));
196       upc->slice[j].u.qw[i] = a ^ b ^ br[i];
197       br[i]                 = tmp;
198     }
199   }
200 }
201 
202 // Calculate the Unsatisfied Parity Checks (UPCs) and update the errors
203 // vector (e) accordingy. In addition, update the black and gray errors vector
204 // with the relevant values.
205 _INLINE_ void
find_err1(OUT split_e_t * e,OUT split_e_t * black_e,OUT split_e_t * gray_e,IN const syndrome_t * syndrome,IN const compressed_idx_dv_ar_t wlist,IN const uint8_t threshold)206 find_err1(OUT split_e_t *e,
207           OUT split_e_t *black_e,
208           OUT split_e_t *gray_e,
209           IN const syndrome_t *           syndrome,
210           IN const compressed_idx_dv_ar_t wlist,
211           IN const uint8_t                threshold)
212 {
213   // This function uses the bit-slice-adder methodology of [5]:
214   DEFER_CLEANUP(syndrome_t rotated_syndrome = {0}, syndrome_cleanup);
215   DEFER_CLEANUP(upc_t upc, upc_cleanup);
216 
217   for(uint32_t i = 0; i < N0; i++)
218   {
219     // UPC must start from zero at every iteration
220     memset(&upc, 0, sizeof(upc));
221 
222     // 1) Right-rotate the syndrome for every secret key set bit index
223     //    Then slice-add it to the UPC array.
224     for(size_t j = 0; j < DV; j++)
225     {
226       rotate_right(&rotated_syndrome, syndrome, wlist[i].val[j]);
227       bit_sliced_adder(&upc, &rotated_syndrome, LOG2_MSB(j + 1));
228     }
229 
230     // 2) Subtract the threshold from the UPC counters
231     bit_slice_full_subtract(&upc, threshold);
232 
233     // 3) Update the errors and the black errors vectors.
234     //    The last slice of the UPC array holds the MSB of the accumulated values
235     //    minus the threshold. Every zero bit indicates a potential error bit.
236     //    The errors values are stored in the black array and xored with the
237     //    errors Of the previous iteration.
238     const r_t *last_slice = &(upc.slice[SLICES - 1].u.r.val);
239     for(size_t j = 0; j < R_SIZE; j++)
240     {
241       const uint8_t sum_msb  = (~last_slice->raw[j]);
242       black_e->val[i].raw[j] = sum_msb;
243       e->val[i].raw[j] ^= sum_msb;
244     }
245 
246     // Ensure that the padding bits (upper bits of the last byte) are zero so
247     // they will not be included in the multiplication and in the hash function.
248     e->val[i].raw[R_SIZE - 1] &= LAST_R_BYTE_MASK;
249 
250     // 4) Calculate the gray error array by adding "DELTA" to the UPC array.
251     //    For that we reuse the rotated_syndrome variable setting it to all "1".
252     for(size_t l = 0; l < DELTA; l++)
253     {
254       memset((uint8_t *)rotated_syndrome.qw, 0xff, R_SIZE);
255       bit_sliced_adder(&upc, &rotated_syndrome, SLICES);
256     }
257 
258     // 5) Update the gray list with the relevant bits that are not
259     //    set in the black list.
260     for(size_t j = 0; j < R_SIZE; j++)
261     {
262       const uint8_t sum_msb = (~last_slice->raw[j]);
263       gray_e->val[i].raw[j] = (~(black_e->val[i].raw[j])) & sum_msb;
264     }
265   }
266 }
267 
268 // Recalculate the UPCs and update the errors vector (e) according to it
269 // and to the black/gray vectors.
270 _INLINE_ void
find_err2(OUT split_e_t * e,IN split_e_t * pos_e,IN const syndrome_t * syndrome,IN const compressed_idx_dv_ar_t wlist,IN const uint8_t threshold)271 find_err2(OUT split_e_t *e,
272           IN split_e_t *pos_e,
273           IN const syndrome_t *           syndrome,
274           IN const compressed_idx_dv_ar_t wlist,
275           IN const uint8_t                threshold)
276 {
277   DEFER_CLEANUP(syndrome_t rotated_syndrome = {0}, syndrome_cleanup);
278   DEFER_CLEANUP(upc_t upc, upc_cleanup);
279 
280   for(uint32_t i = 0; i < N0; i++)
281   {
282     // UPC must start from zero at every iteration
283     memset(&upc, 0, sizeof(upc));
284 
285     // 1) Right-rotate the syndrome for every secret key set bit index
286     //    Then slice-add it to the UPC array.
287     for(size_t j = 0; j < DV; j++)
288     {
289       rotate_right(&rotated_syndrome, syndrome, wlist[i].val[j]);
290       bit_sliced_adder(&upc, &rotated_syndrome, LOG2_MSB(j + 1));
291     }
292 
293     // 2) Subtract the threshold from the UPC counters
294     bit_slice_full_subtract(&upc, threshold);
295 
296     // 3) Update the errors vector.
297     //    The last slice of the UPC array holds the MSB of the accumulated values
298     //    minus the threshold. Every zero bit indicates a potential error bit.
299     const r_t *last_slice = &(upc.slice[SLICES - 1].u.r.val);
300     for(size_t j = 0; j < R_SIZE; j++)
301     {
302       const uint8_t sum_msb = (~last_slice->raw[j]);
303       e->val[i].raw[j] ^= (pos_e->val[i].raw[j] & sum_msb);
304     }
305 
306     // Ensure that the padding bits (upper bits of the last byte) are zero so
307     // they will not be included in the multiplication and in the hash function.
308     e->val[i].raw[R_SIZE - 1] &= LAST_R_BYTE_MASK;
309   }
310 }
311 
312 ret_t
decode(OUT split_e_t * e,IN const syndrome_t * original_s,IN const ct_t * ct,IN const sk_t * sk)313 decode(OUT split_e_t *e,
314        IN const syndrome_t *original_s,
315        IN const ct_t *ct,
316        IN const sk_t *sk)
317 {
318   split_e_t  black_e = {0};
319   split_e_t  gray_e  = {0};
320   syndrome_t s;
321 
322   // Reset (init) the error because it is xored in the find_err funcitons.
323   memset(e, 0, sizeof(*e));
324   s = *original_s;
325   dup(&s);
326 
327   for(uint32_t iter = 0; iter < MAX_IT; iter++)
328   {
329     const uint8_t threshold = get_threshold(&s);
330 
331     DMSG("    Iteration: %d\n", iter);
332     DMSG("    Weight of e: %lu\n",
333          r_bits_vector_weight(&e->val[0]) + r_bits_vector_weight(&e->val[1]));
334     DMSG("    Weight of syndrome: %lu\n", r_bits_vector_weight((r_t *)s.qw));
335 
336     find_err1(e, &black_e, &gray_e, &s, sk->wlist, threshold);
337     POSIX_GUARD(recompute_syndrome(&s, ct, sk, e));
338 #ifdef BGF_DECODER
339     if(iter >= 1)
340     {
341       continue;
342     }
343 #endif
344     DMSG("    Weight of e: %lu\n",
345          r_bits_vector_weight(&e->val[0]) + r_bits_vector_weight(&e->val[1]));
346     DMSG("    Weight of syndrome: %lu\n", r_bits_vector_weight((r_t *)s.qw));
347 
348     find_err2(e, &black_e, &s, sk->wlist, ((DV + 1) / 2) + 1);
349     POSIX_GUARD(recompute_syndrome(&s, ct, sk, e));
350 
351     DMSG("    Weight of e: %lu\n",
352          r_bits_vector_weight(&e->val[0]) + r_bits_vector_weight(&e->val[1]));
353     DMSG("    Weight of syndrome: %lu\n", r_bits_vector_weight((r_t *)s.qw));
354 
355     find_err2(e, &gray_e, &s, sk->wlist, ((DV + 1) / 2) + 1);
356     POSIX_GUARD(recompute_syndrome(&s, ct, sk, e));
357   }
358 
359   if(r_bits_vector_weight((r_t *)s.qw) > 0)
360   {
361     BIKE_ERROR(E_DECODING_FAILURE);
362   }
363 
364   return SUCCESS;
365 }
366