1 /*
2   This file is for Niederreiter decryption
3 */
4 
5 #include "decrypt.h"
6 
7 #include "benes.h"
8 #include "bm.h"
9 #include "fft.h"
10 #include "fft_tr.h"
11 #include "params.h"
12 #include "util.h"
13 
14 #include <stdio.h>
15 
scaling(vec256 out[][GFBITS],vec256 inv[][GFBITS],const unsigned char * sk,vec256 * recv)16 static void scaling(vec256 out[][GFBITS], vec256 inv[][GFBITS], const unsigned char *sk, vec256 *recv) {
17     int i, j;
18 
19     vec128 sk_int[ GFBITS ];
20     vec256 eval[32][ GFBITS ];
21     vec256 tmp[ GFBITS ];
22 
23     // computing inverses
24 
25     PQCLEAN_MCELIECE8192128F_AVX_irr_load(sk_int, sk);
26 
27     PQCLEAN_MCELIECE8192128F_AVX_fft(eval, sk_int);
28 
29     for (i = 0; i < 32; i++) {
30         PQCLEAN_MCELIECE8192128F_AVX_vec256_sq(eval[i], eval[i]);
31     }
32 
33     PQCLEAN_MCELIECE8192128F_AVX_vec256_copy(inv[0], eval[0]);
34 
35     for (i = 1; i < 32; i++) {
36         PQCLEAN_MCELIECE8192128F_AVX_vec256_mul(inv[i], inv[i - 1], eval[i]);
37     }
38 
39     PQCLEAN_MCELIECE8192128F_AVX_vec256_inv(tmp, inv[31]);
40 
41     for (i = 30; i >= 0; i--) {
42         PQCLEAN_MCELIECE8192128F_AVX_vec256_mul(inv[i + 1], tmp, inv[i]);
43         PQCLEAN_MCELIECE8192128F_AVX_vec256_mul(tmp, tmp, eval[i + 1]);
44     }
45 
46     PQCLEAN_MCELIECE8192128F_AVX_vec256_copy(inv[0], tmp);
47 
48     //
49 
50     for (i = 0; i < 32; i++) {
51         for (j = 0; j < GFBITS; j++) {
52             out[i][j] = PQCLEAN_MCELIECE8192128F_AVX_vec256_and(inv[i][j], recv[i]);
53         }
54     }
55 }
56 
scaling_inv(vec256 out[][GFBITS],vec256 inv[][GFBITS],vec256 * recv)57 static void scaling_inv(vec256 out[][GFBITS], vec256 inv[][GFBITS], vec256 *recv) {
58     int i, j;
59 
60     for (i = 0; i < 32; i++) {
61         for (j = 0; j < GFBITS; j++) {
62             out[i][j] = PQCLEAN_MCELIECE8192128F_AVX_vec256_and(inv[i][j], recv[i]);
63         }
64     }
65 }
66 
preprocess(vec128 * recv,const unsigned char * s)67 static void preprocess(vec128 *recv, const unsigned char *s) {
68     int i;
69 
70     recv[0] = PQCLEAN_MCELIECE8192128F_AVX_vec128_setbits(0);
71 
72     for (i = 1; i < 64; i++) {
73         recv[i] = recv[0];
74     }
75 
76     for (i = 0; i < SYND_BYTES / 16; i++) {
77         recv[i] = PQCLEAN_MCELIECE8192128F_AVX_load16(s + i * 16);
78     }
79 }
80 
weight(vec256 * v)81 static int weight(vec256 *v) {
82     int i, w = 0;
83 
84     for (i = 0; i < 32; i++) {
85         w += (int)_mm_popcnt_u64( PQCLEAN_MCELIECE8192128F_AVX_vec256_extract(v[i], 0) );
86         w += (int)_mm_popcnt_u64( PQCLEAN_MCELIECE8192128F_AVX_vec256_extract(v[i], 1) );
87         w += (int)_mm_popcnt_u64( PQCLEAN_MCELIECE8192128F_AVX_vec256_extract(v[i], 2) );
88         w += (int)_mm_popcnt_u64( PQCLEAN_MCELIECE8192128F_AVX_vec256_extract(v[i], 3) );
89     }
90 
91     return w;
92 }
93 
synd_cmp(vec256 * s0,vec256 * s1)94 static uint16_t synd_cmp(vec256 *s0, vec256 *s1) {
95     int i;
96     vec256 diff;
97 
98     diff = PQCLEAN_MCELIECE8192128F_AVX_vec256_xor(s0[0], s1[0]);
99 
100     for (i = 1; i < GFBITS; i++) {
101         diff = PQCLEAN_MCELIECE8192128F_AVX_vec256_or(diff, PQCLEAN_MCELIECE8192128F_AVX_vec256_xor(s0[i], s1[i]));
102     }
103 
104     return PQCLEAN_MCELIECE8192128F_AVX_vec256_testz(diff);
105 }
106 
reformat_128to256(vec256 * out,vec128 * in)107 static void reformat_128to256(vec256 *out, vec128 *in) {
108     int i;
109     uint64_t v[4];
110 
111     for (i = 0; i < 32; i++) {
112         v[0] = PQCLEAN_MCELIECE8192128F_AVX_vec128_extract(in[2 * i + 0], 0);
113         v[1] = PQCLEAN_MCELIECE8192128F_AVX_vec128_extract(in[2 * i + 0], 1);
114         v[2] = PQCLEAN_MCELIECE8192128F_AVX_vec128_extract(in[2 * i + 1], 0);
115         v[3] = PQCLEAN_MCELIECE8192128F_AVX_vec128_extract(in[2 * i + 1], 1);
116 
117         out[i] = PQCLEAN_MCELIECE8192128F_AVX_vec256_set4x(v[0], v[1], v[2], v[3]);
118     }
119 }
120 
reformat_256to128(vec128 * out,vec256 * in)121 static void reformat_256to128(vec128 *out, vec256 *in) {
122     int i;
123     uint64_t v[4];
124 
125     for (i = 0; i < 32; i++) {
126         v[0] = PQCLEAN_MCELIECE8192128F_AVX_vec256_extract(in[i], 0);
127         v[1] = PQCLEAN_MCELIECE8192128F_AVX_vec256_extract(in[i], 1);
128         v[2] = PQCLEAN_MCELIECE8192128F_AVX_vec256_extract(in[i], 2);
129         v[3] = PQCLEAN_MCELIECE8192128F_AVX_vec256_extract(in[i], 3);
130 
131         out[2 * i + 0] = PQCLEAN_MCELIECE8192128F_AVX_vec128_set2x(v[0], v[1]);
132         out[2 * i + 1] = PQCLEAN_MCELIECE8192128F_AVX_vec128_set2x(v[2], v[3]);
133     }
134 }
135 
136 /* Niederreiter decryption with the Berlekamp decoder */
137 /* intput: sk, secret key */
138 /*         c, ciphertext (syndrome) */
139 /* output: e, error vector */
140 /* return: 0 for success; 1 for failure */
PQCLEAN_MCELIECE8192128F_AVX_decrypt(unsigned char * e,const unsigned char * sk,const unsigned char * c)141 int PQCLEAN_MCELIECE8192128F_AVX_decrypt(unsigned char *e, const unsigned char *sk, const unsigned char *c) {
142     int i;
143 
144     uint16_t check_synd;
145     uint16_t check_weight;
146 
147     vec256 inv[ 64 ][ GFBITS ];
148     vec256 scaled[ 64 ][ GFBITS ];
149     vec256 eval[ 64 ][ GFBITS ];
150 
151     vec128 error128[ 64 ];
152     vec256 error256[ 32 ];
153 
154     vec256 s_priv[ GFBITS ];
155     vec256 s_priv_cmp[ GFBITS ];
156     vec128 locator[ GFBITS ];
157 
158     vec128 recv128[ 64 ];
159     vec256 recv256[ 32 ];
160     vec256 allone;
161 
162     vec128 bits_int[25][32];
163 
164     // Berlekamp decoder
165 
166     preprocess(recv128, c);
167 
168     PQCLEAN_MCELIECE8192128F_AVX_load_bits(bits_int, sk + IRR_BYTES);
169     PQCLEAN_MCELIECE8192128F_AVX_benes(recv128, bits_int, 1);
170     reformat_128to256(recv256, recv128);
171 
172     scaling(scaled, inv, sk, recv256); // scaling
173     PQCLEAN_MCELIECE8192128F_AVX_fft_tr(s_priv, scaled); // transposed FFT
174     PQCLEAN_MCELIECE8192128F_AVX_bm(locator, s_priv); // Berlekamp Massey
175 
176     PQCLEAN_MCELIECE8192128F_AVX_fft(eval, locator); // FFT
177 
178     // reencryption and weight check
179 
180     allone = PQCLEAN_MCELIECE8192128F_AVX_vec256_set1_16b(0xFFFF);
181 
182     for (i = 0; i < 32; i++) {
183         error256[i] = PQCLEAN_MCELIECE8192128F_AVX_vec256_or_reduce(eval[i]);
184         error256[i] = PQCLEAN_MCELIECE8192128F_AVX_vec256_xor(error256[i], allone);
185     }
186 
187     check_weight = (uint16_t)(weight(error256) ^ SYS_T);
188     check_weight -= 1;
189     check_weight >>= 15;
190 
191     scaling_inv(scaled, inv, error256);
192     PQCLEAN_MCELIECE8192128F_AVX_fft_tr(s_priv_cmp, scaled);
193 
194     check_synd = synd_cmp(s_priv, s_priv_cmp);
195 
196     //
197 
198     reformat_256to128(error128, error256);
199     PQCLEAN_MCELIECE8192128F_AVX_benes(error128, bits_int, 0);
200 
201     for (i = 0; i < 64; i++) {
202         PQCLEAN_MCELIECE8192128F_AVX_store16(e + i * 16, error128[i]);
203     }
204 
205     return 1 - (check_synd & check_weight);
206 }
207 
208