1 #include "fft.h"
2 #include "gf.h"
3 #include "parameters.h"
4 #include "parsing.h"
5 #include "reed_solomon.h"
6 #include <stdint.h>
7 #include <stdio.h>
8 #include <string.h>
9 /**
10  * @file reed_solomon.c
11  * Constant time implementation of Reed-Solomon codes
12  */
13 
14 
15 static void compute_syndromes(uint16_t *syndromes, uint8_t *cdw);
16 static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes);
17 static void compute_roots(uint8_t *error, uint16_t *sigma);
18 static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes);
19 static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error);
20 static void correct_errors(uint8_t *cdw, const uint16_t *error_values);
21 
22 static const __m256i alpha_ij256_1[55] = {
23     {0x0010000800040002, 0x001d008000400020, 0x00cd00e80074003a, 0x004c002600130087},
24     {0x001d004000100004, 0x004c001300cd0074, 0x008f00ea00b4002d, 0x009d006000180006},
25     {0x00cd003a00400008, 0x008f0075002d0026, 0x002500270060000c, 0x004600c100b50035},
26     {0x004c00cd001d0010, 0x009d0018008f00b4, 0x004600ee006a0025, 0x005f00b9005d0014},
27     {0x00b4002600740020, 0x006a009c00600003, 0x00b900a0000500c1, 0x00fd000f005e00be},
28     {0x008f002d00cd0040, 0x004600b500250060, 0x0065006100b90050, 0x00d900df006b0078},
29     {0x0018007500130080, 0x005d008c00b5009c, 0x006b003c005e00a1, 0x0081001a004300a3},
30     {0x009d008f004c001d, 0x005f005d0046006a, 0x00d900fe00fd0065, 0x0085003b0081000d},
31     {0x0025000c002d003a, 0x006500a1005000c1, 0x00d0008600df00e7, 0x00a800a9006600ed},
32     {0x006a006000b40074, 0x00fd005e00b90005, 0x003b0067001100df, 0x00e600550084002e},
33     {0x00ee002700ea00e8, 0x00fe003c006100a0, 0x00b8007600670086, 0x00e3009100390054},
34     {0x00460025008f00cd, 0x00d9006b006500b9, 0x00a800b8003b00d0, 0x0082009600fc00e4},
35     {0x0014003500060087, 0x000d00a3007800be, 0x00e40054002e00ed, 0x00510064006200e5},
36     {0x005d00b500180013, 0x00810043006b005e, 0x00fc003900840066, 0x0012005900c80062},
37     {0x00b900c100600026, 0x003b001a00df000f, 0x00960091005500a9, 0x002c002400590064},
38     {0x005f0046009d004c, 0x0085008100d900fd, 0x008200e300e600a8, 0x0002002c00120051},
39     {0x0099000a004e0098, 0x004f0093004400d6, 0x00dd00dc00d70092, 0x00980001000b0045},
40     {0x006500500025002d, 0x00a8006600d000df, 0x00c30007009600bf, 0x0027002600ad00fb},
41     {0x001e00ba0094005a, 0x0049006d003e00e2, 0x003d00a200ae00b3, 0x008c006000e80083},
42     {0x00fd00b9006a00b4, 0x00e60084003b0011, 0x002c00ac001c0096, 0x00be00c100030020},
43     {0x006b00a100b50075, 0x00fc00290066001a, 0x00ad00f500590057, 0x00e700b90035002d},
44     {0x00fe006100ee00ea, 0x00e3003900b80067, 0x003a00b000ac0007, 0x00af000f002800c0},
45     {0x005b002f009f00c9, 0x009500d10021007c, 0x0075004700f400a6, 0x001f00df00c200ee},
46     {0x00d900650046008f, 0x008200fc00a8003b, 0x0027003a002c00c3, 0x0017001a00e700ba},
47     {0x0011000f00050003, 0x001c00ff00550033, 0x00c100b4006c0024, 0x004d003b00e2005e},
48     {0x000d007800140006, 0x0051006200e4002e, 0x00ba00c0002000fb, 0x00d100a900bd00bb},
49     {0x00d000e70050000c, 0x00c3005700bf00a9, 0x002f00b50026007d, 0x00db005500c500d9},
50     {0x0081006b005d0018, 0x001200c800fc0084, 0x00e70028000300ad, 0x00190091009e00bd},
51     {0x00f8007f00690030, 0x00f700e000f1004d, 0x00b6005f009c0040, 0x00a2009600aa00ec},
52     {0x003b00df00b90060, 0x002c005900960055, 0x001a000f00c10026, 0x00240064009100a9},
53     {0x009700b600de00c0, 0x001b009b006e0072, 0x00ed00b100a0008f, 0x00580059004b0052},
54     {0x008500d9005f009d, 0x00020012008200e6, 0x001700af00be0027, 0x00040024001900d1},
55     {0x00b8008600610027, 0x003a00f500070091, 0x001500d0000f00b5, 0x002d002c00a600f1},
56     {0x004f00440099004e, 0x0098000b00dd00d7, 0x0092009300d6000a, 0x004e0001004500dc},
57     {0x0084001a005e009c, 0x000300e9005900ff, 0x0091002e00e200b9, 0x0005002600eb001c},
58     {0x00a800d000650025, 0x002700ad00c30096, 0x00db0015001a002f, 0x00610060003600f2},
59     {0x005200ce0089004a, 0x00d40010008a0037, 0x00570049007c0078, 0x00d300c1001d0048},
60     {0x0049003e001e0094, 0x008c00e8003d00ae, 0x003800630033007f, 0x004300b900ea0016},
61     {0x00e400ed00780035, 0x00ba002d00fb0064, 0x00f200f100a900d9, 0x003e000f002500ad},
62     {0x00e6003b00fd006a, 0x00be0003002c001c, 0x00240037004d001a, 0x002e00df00050074},
63     {0x00c600c500d300d4, 0x00ca009d00cf00a7, 0x008b00c80072003e, 0x009a001a005f00c9},
64     {0x00fc0066006b00b5, 0x00e7003500ad0059, 0x003600a6009100c5, 0x00bf003b00780025},
65     {0x007b001700b10077, 0x00e1009f000800ef, 0x0040002b00ff00b8, 0x00ab00a9005b008c},
66     {0x00e300b800fe00ee, 0x00af0028003a00ac, 0x002d007a00370015, 0x00320055003400de},
67     {0x009600a900df00c1, 0x001a00b900260024, 0x0060002c00640055, 0x00590091003b000f},
68     {0x00950021005b009f, 0x001f00c2007500f4, 0x00b500d800a70073, 0x0048009600da00fe},
69     {0x00a5001500710023, 0x00760089000c00eb, 0x0050008000ef00fc, 0x00b0006400520022},
70     {0x008200a800d90046, 0x001700e70027002c, 0x0061002d002400db, 0x0008005900bf003e},
71     {0x00c800290043008c, 0x009e00fe003500e9, 0x0078003000eb006e, 0x005a002400e300cc},
72     {0x001c005500110005, 0x004d00e200c1006c, 0x00df006a00e90064, 0x009c002c00ae0084},
73     {0x00dd00920044000a, 0x00920044000a0001, 0x0044000a000100dd, 0x000a000100dd0092},
74     {0x005100e4000d0014, 0x00d100bd00ba0020, 0x003e00de007400f2, 0x00c20026002b003f},
75     {0x0079007300340028, 0x00e500f800a10074, 0x006600ca00b4008a, 0x00bb006000f7004b},
76     {0x00c300bf00d00050, 0x00db00c5002f0026, 0x0021006b006000f5, 0x008600c100cf0082},
77     {0x00ac0091006700a0, 0x0037002e000f00b4, 0x005500e2006a002c, 0x007c00b9002000a7}
78 };
79 static const __m256i alpha_ij256_2[55] = {
80     {0x00b4005a002d0098, 0x008f00c900ea0075, 0x0018000c00060003, 0x009d00c000600030},
81     {0x006a00940025004e, 0x0046009f00ee00b5, 0x005d005000140005, 0x005f00de00b90069},
82     {0x00b900ba0050000a, 0x0065002f006100a1, 0x006b00e70078000f, 0x00d900b600df007f},
83     {0x00fd001e00650099, 0x00d9005b00fe006b, 0x008100d0000d0011, 0x00850097003b00f8},
84     {0x001100e200df00d6, 0x003b007c0067001a, 0x008400a9002e0033, 0x00e600720055004d},
85     {0x003b003e00d00044, 0x00a8002100b80066, 0x00fc00bf00e40055, 0x0082006e009600f1},
86     {0x0084006d00660093, 0x00fc00d100390029, 0x00c80057006200ff, 0x0012009b005900e0},
87     {0x00e6004900a8004f, 0x0082009500e300fc, 0x001200c30051001c, 0x0002001b002c00f7},
88     {0x009600b300bf0092, 0x00c300a600070057, 0x00ad007d00fb0024, 0x0027008f00260040},
89     {0x001c00ae009600d7, 0x002c00f400ac0059, 0x000300260020006c, 0x00be00a000c1009c},
90     {0x00ac00a2000700dc, 0x003a004700b000f5, 0x002800b500c000b4, 0x00af00b1000f005f},
91     {0x002c003d00c300dd, 0x00270075003a00ad, 0x00e7002f00ba00c1, 0x001700ed001a00b6},
92     {0x0020008300fb0045, 0x00ba00ee00c0002d, 0x00bd00d900bb005e, 0x00d1005200a900ec},
93     {0x000300e800ad000b, 0x00e700c200280035, 0x009e00c500bd00e2, 0x0019004b009100aa},
94     {0x00c1006000260001, 0x001a00df000f00b9, 0x0091005500a9003b, 0x0024005900640096},
95     {0x00be008c00270098, 0x0017001f00af00e7, 0x001900db00d1004d, 0x00040058002400a2},
96     {0x00d60099000a004e, 0x0092004f00930044, 0x004500dd00dc00d7, 0x004e00980001000b},
97     {0x001a007f002f000a, 0x00db0073001500c5, 0x003600f500f20064, 0x00610046006000cd},
98     {0x00330034007f0099, 0x00380062006300a8, 0x00ea0008001600ac, 0x004300f000b900d4},
99     {0x004d0033001a00d6, 0x002400a700370091, 0x00050060007400e9, 0x002e006700df005e},
100     {0x009100a800c50044, 0x0036003d00a6006e, 0x007800ba00250026, 0x00bf0015003b0086},
101     {0x0037006300150093, 0x002d00d8007a00a6, 0x0034006b00de006a, 0x0032007b00550085},
102     {0x00a700620073004f, 0x00b5005a00d8003d, 0x00da00ce00fe00be, 0x004800e0009600d5},
103     {0x0024003800db0092, 0x006100b5002d0036, 0x00bf0021003e00df, 0x000800fb0059006e},
104     {0x00e900ac006400d7, 0x00df00be006a0026, 0x00ae00910084007c, 0x009c0074002c00ef},
105     {0x0074001600f200dc, 0x003e00fe00de0025, 0x002b0082003f0084, 0x00c200d4002600fa},
106     {0x0060000800f500dd, 0x002100ce006b00ba, 0x00cf005600820091, 0x0086006500c1002d},
107     {0x000500ea00360045, 0x00bf00da00340078, 0x005a00cf002b00ae, 0x005c0088000f0023},
108     {0x005e00d400cd000b, 0x006e00d500850086, 0x0023002d00fa00ef, 0x006300da001a001e},
109     {0x00df00b900600001, 0x005900960055003b, 0x000f00c10026002c, 0x0064009100a9001a},
110     {0x006700f000460098, 0x00fb00e0007b0015, 0x0088006500d40074, 0x009000c8009100da},
111     {0x002e00430061004e, 0x00080048003200bf, 0x005c008600c2009c, 0x0010009000640063},
112     {0x005500ed006b000a, 0x000c003600c300c4, 0x0073006600b600b9, 0x0025000800240082},
113     {0x00d7004f00440099, 0x000a0098000b00dd, 0x00dc0092009300d6, 0x0099004e00010045},
114     {0x00ae0072003b00d6, 0x000f006a00200024, 0x00ef0096004d0067, 0x001100be0060006c},
115     {0x005900f100210044, 0x008600a1000c00cf, 0x007d00a600b300a9, 0x00b800d900b9008f},
116     {0x00f4001900e40093, 0x00c500b1008c00cd, 0x004c00fb008d00e6, 0x00c600cc00df0028},
117     {0x006c007900f1004f, 0x002900bd00bc0027, 0x00ee004000090037, 0x00c800b7003b00d3},
118     {0x002600f500820092, 0x00b300b800b60050, 0x0065002700360059, 0x003d0057005500ce},
119     {0x009c006c005900d7, 0x00640072007c000f, 0x001100b900b400eb, 0x002000ac00960084},
120     {0x00a00013003d00dc, 0x005600ab009e00d9, 0x0085007f009f0020, 0x004a00d8005900e5},
121     {0x000f002700cf00dd, 0x007d0038007300ed, 0x00e4003e00650060, 0x002f000c002c0007},
122     {0x00e20014003a0045, 0x00cd001200310021, 0x00950015004300a0, 0x0022006900260090},
123     {0x007c00bc000c000b, 0x0025008300e00073, 0x007900fc009700fd, 0x006d00e100c10002},
124     {0x00a900df00c10001, 0x00b9002600240096, 0x002c00640055001a, 0x0091003b000f0060},
125     {0x007200bd00a10098, 0x006b009400830038, 0x0087008a00e3002e, 0x008d00aa001a00d2},
126     {0x00ff008500e7004e, 0x00d0006f0013008a, 0x00d4003600700072, 0x007a006200a900fe},
127     {0x006400290086000a, 0x00b8006b0025007d, 0x002f0075003d0096, 0x004000f2009100ed},
128     {0x00ef003f00ed0099, 0x00e400680069003a, 0x00af0046008e00a7, 0x009400fa0064009a},
129     {0x00eb003700a900d6, 0x0096002e00fd0060, 0x0033000f000300f4, 0x005e00b4002400ff},
130     {0x000100dd00920044, 0x00dd00920044000a, 0x00920044000a0001, 0x0044000a000100dd},
131     {0x00b4000900b30093, 0x003d00e300970065, 0x00310017003c0003, 0x00da00d3006000f3},
132     {0x006a00b00057004f, 0x00ad000e009a00b6, 0x00a200e400880005, 0x003f001f00b90080},
133     {0x00b9004000a60092, 0x0075008a00fc003e, 0x008b00c40017000f, 0x000700a800df0025},
134     {0x00fd0003002400d7, 0x00c100e900ae00a9, 0x0074005900720011, 0x00f400ff003b00be}
135 };
136 
137 /**
138  * @brief Encodes a message message of PARAM_K bits to a Reed-Solomon codeword codeword of PARAM_N1 bytes
139  *
140  * Following @cite lin1983error (Chapter 4 - Cyclic Codes),
141  * We perform a systematic encoding using a linear (PARAM_N1 - PARAM_K)-stage shift register
142  * with feedback connections based on the generator polynomial PARAM_RS_POLY of the Reed-Solomon code.
143  *
144  * @param[out] cdw Array of size VEC_N1_SIZE_64 receiving the encoded message
145  * @param[in] msg Array of size VEC_K_SIZE_64 storing the message
146  */
147 void PQCLEAN_HQCRMRS192_AVX2_reed_solomon_encode(uint8_t *cdw, const uint8_t *msg) {
148     size_t i, k;
149     uint8_t gate_value = 0;
150     uint8_t prev, x;
151 
152     union {
153         uint16_t arr16[16 * CEIL_DIVIDE(PARAM_G, 16)];
154         __m256i dummy;
155     } tmp = {0};
156 
157     union {
158         uint16_t arr16[16 * CEIL_DIVIDE(PARAM_G, 16)];
159         __m256i dummy;
160     } PARAM_RS_POLY = {{ RS_POLY_COEFS }};
161 
162     __m256i *tmp256 = (__m256i *)tmp.arr16;
163     __m256i *param256 = (__m256i *)PARAM_RS_POLY.arr16;
164 
165     for (i = 0; i < PARAM_K; ++i) {
166         gate_value = (uint8_t) (msg[PARAM_K - 1 - i] ^ cdw[PARAM_N1 - PARAM_K - 1]);
167         tmp256[0] = PQCLEAN_HQCRMRS192_AVX2_gf_mul_vect(_mm256_set1_epi16(gate_value), param256[0]);
168         tmp256[1] = PQCLEAN_HQCRMRS192_AVX2_gf_mul_vect(_mm256_set1_epi16(gate_value), param256[1]);
169 
170         for (size_t j = 32; j < PARAM_G; ++j) {
171             tmp.arr16[j] = PQCLEAN_HQCRMRS192_AVX2_gf_mul(gate_value, PARAM_RS_POLY.arr16[j]);
172         }
173 
174         prev = 0;
175         for (k = 0; k < PARAM_N1 - PARAM_K; k++) {
176             x = cdw[k];
177             cdw[k] = (uint8_t) (prev ^ tmp.arr16[k]);
178             prev = x;
179         }
180     }
181 
182     memcpy(cdw + PARAM_N1 - PARAM_K, msg, PARAM_K);
183 }
184 
185 
186 
187 /**
188  * @brief Computes 2 * PARAM_DELTA syndromes
189  *
190  * @param[out] syndromes Array of size 2 * PARAM_DELTA receiving the computed syndromes
191  * @param[in] cdw Array of size PARAM_N1 storing the received vector
192  */
193 void compute_syndromes(uint16_t *syndromes, uint8_t *cdw) {
194     __m256i *syndromes256 = (__m256i *) syndromes;
195     syndromes256[0] = _mm256_set1_epi16(cdw[0]);
196 
197     for (size_t i = 0; i < PARAM_N1 - 1; ++i) {
198         syndromes256[0] ^= PQCLEAN_HQCRMRS192_AVX2_gf_mul_vect(_mm256_set1_epi16(cdw[i + 1]), alpha_ij256_1[i]);
199     }
200 
201     syndromes256[1] = _mm256_set1_epi16(cdw[0]);
202 
203     for (size_t i = 0; i < PARAM_N1 - 1; ++i) {
204         syndromes256[1] ^= PQCLEAN_HQCRMRS192_AVX2_gf_mul_vect(_mm256_set1_epi16(cdw[i + 1]), alpha_ij256_2[i]);
205     }
206 }
207 
208 
209 
210 /**
211  * @brief Computes the error locator polynomial (ELP) sigma
212  *
213  * This is a constant time implementation of Berlekamp's simplified algorithm (see @cite lin1983error (Chapter 6 - BCH Codes). <br>
214  * We use the letter p for rho which is initialized at -1. <br>
215  * The array X_sigma_p represents the polynomial X^(mu-rho)*sigma_p(X). <br>
216  * Instead of maintaining a list of sigmas, we update in place both sigma and X_sigma_p. <br>
217  * sigma_copy serves as a temporary save of sigma in case X_sigma_p needs to be updated. <br>
218  * We can properly correct only if the degree of sigma does not exceed PARAM_DELTA.
219  * This means only the first PARAM_DELTA + 1 coefficients of sigma are of value
220  * and we only need to save its first PARAM_DELTA - 1 coefficients.
221  *
222  * @returns the degree of the ELP sigma
223  * @param[out] sigma Array of size (at least) PARAM_DELTA receiving the ELP
224  * @param[in] syndromes Array of size (at least) 2*PARAM_DELTA storing the syndromes
225  */
226 static uint16_t compute_elp(uint16_t *sigma, const uint16_t *syndromes) {
227     uint16_t deg_sigma = 0;
228     uint16_t deg_sigma_p = 0;
229     uint16_t deg_sigma_copy = 0;
230     uint16_t sigma_copy[PARAM_DELTA + 1] = {0};
231     uint16_t X_sigma_p[PARAM_DELTA + 1] = {0, 1};
232     uint16_t pp = (uint16_t) -1; // 2*rho
233     uint16_t d_p = 1;
234     uint16_t d = syndromes[0];
235 
236     uint16_t mask1, mask2, mask12;
237     uint16_t deg_X, deg_X_sigma_p;
238     uint16_t dd;
239     uint16_t mu;
240 
241     uint16_t i;
242 
243     sigma[0] = 1;
244     for (mu = 0; (mu < (2 * PARAM_DELTA)); ++mu) {
245         // Save sigma in case we need it to update X_sigma_p
246         memcpy(sigma_copy, sigma, 2 * (PARAM_DELTA));
247         deg_sigma_copy = deg_sigma;
248 
249         dd = PQCLEAN_HQCRMRS192_AVX2_gf_mul(d, PQCLEAN_HQCRMRS192_AVX2_gf_inverse(d_p));
250 
251         for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) {
252             sigma[i] ^= PQCLEAN_HQCRMRS192_AVX2_gf_mul(dd, X_sigma_p[i]);
253         }
254 
255         deg_X = mu - pp;
256         deg_X_sigma_p = deg_X + deg_sigma_p;
257 
258         // mask1 = 0xffff if(d != 0) and 0 otherwise
259         mask1 = -((uint16_t) - d >> 15);
260 
261         // mask2 = 0xffff if(deg_X_sigma_p > deg_sigma) and 0 otherwise
262         mask2 = -((uint16_t) (deg_sigma - deg_X_sigma_p) >> 15);
263 
264         // mask12 = 0xffff if the deg_sigma increased and 0 otherwise
265         mask12 = mask1 & mask2;
266         deg_sigma ^= mask12 & (deg_X_sigma_p ^ deg_sigma);
267 
268         if (mu == (2 * PARAM_DELTA - 1)) {
269             break;
270         }
271 
272         pp ^= mask12 & (mu ^ pp);
273         d_p ^= mask12 & (d ^ d_p);
274         for (i = PARAM_DELTA; i; --i) {
275             X_sigma_p[i] = (mask12 & sigma_copy[i - 1]) ^ (~mask12 & X_sigma_p[i - 1]);
276         }
277 
278         deg_sigma_p ^= mask12 & (deg_sigma_copy ^ deg_sigma_p);
279         d = syndromes[mu + 1];
280 
281         for (i = 1; (i <= mu + 1) && (i <= PARAM_DELTA); ++i) {
282             d ^= PQCLEAN_HQCRMRS192_AVX2_gf_mul(sigma[i], syndromes[mu + 1 - i]);
283         }
284     }
285 
286     return deg_sigma;
287 }
288 
289 
290 
291 /**
292  * @brief Computes the error polynomial error from the error locator polynomial sigma
293  *
294  * See function PQCLEAN_HQCRMRS192_AVX2_fft for more details.
295  *
296  * @param[out] error Array of 2^PARAM_M elements receiving the error polynomial
297  * @param[out] error_compact Array of PARAM_DELTA + PARAM_N1 elements receiving a compact representation of the vector error
298  * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial
299  */
300 static void compute_roots(uint8_t *error, uint16_t *sigma) {
301     uint16_t w[1 << PARAM_M] = {0};
302 
303     PQCLEAN_HQCRMRS192_AVX2_fft(w, sigma, PARAM_DELTA + 1);
304     PQCLEAN_HQCRMRS192_AVX2_fft_retrieve_error_poly(error, w);
305 }
306 
307 
308 
309 /**
310  * @brief Computes the polynomial z(x)
311  *
312  * See @cite lin1983error (Chapter 6 - BCH Codes) for more details.
313  *
314  * @param[out] z Array of PARAM_DELTA + 1 elements receiving the polynomial z(x)
315  * @param[in] sigma Array of 2^PARAM_FFT elements storing the error locator polynomial
316  * @param[in] degree Integer that is the degree of polynomial sigma
317  * @param[in] syndromes Array of 2 * PARAM_DELTA storing the syndromes
318  */
319 static void compute_z_poly(uint16_t *z, const uint16_t *sigma, uint16_t degree, const uint16_t *syndromes) {
320     size_t i, j;
321     uint16_t mask;
322 
323     z[0] = 1;
324 
325     for (i = 1; i < PARAM_DELTA + 1; ++i) {
326         mask = -((uint16_t) (i - degree - 1) >> 15);
327         z[i] = mask & sigma[i];
328     }
329 
330     z[1] ^= syndromes[0];
331 
332     for (i = 2; i <= PARAM_DELTA; ++i) {
333         mask = -((uint16_t) (i - degree - 1) >> 15);
334         z[i] ^= mask & syndromes[i - 1];
335 
336         for (j = 1; j < i; ++j) {
337             z[i] ^= mask & PQCLEAN_HQCRMRS192_AVX2_gf_mul(sigma[j], syndromes[i - j - 1]);
338         }
339     }
340 }
341 
342 
343 
344 /**
345  * @brief Computes the error values
346  *
347  * See @cite lin1983error (Chapter 6 - BCH Codes) for more details.
348  *
349  * @param[out] error_values Array of PARAM_DELTA elements receiving the error values
350  * @param[in] z Array of PARAM_DELTA + 1 elements storing the polynomial z(x)
351  * @param[in] z_degree Integer that is the degree of polynomial z(x)
352  * @param[in] error_compact Array of PARAM_DELTA + PARAM_N1 storing compact representation of the error
353  */
354 static void compute_error_values(uint16_t *error_values, const uint16_t *z, const uint8_t *error) {
355     uint16_t beta_j[PARAM_DELTA] = {0};
356     uint16_t e_j[PARAM_DELTA] = {0};
357 
358     uint16_t delta_counter;
359     uint16_t delta_real_value;
360     uint16_t found;
361     uint16_t mask1;
362     uint16_t mask2;
363     uint16_t tmp1;
364     uint16_t tmp2;
365     uint16_t inverse;
366     uint16_t inverse_power_j;
367 
368     // Compute the beta_{j_i} page 31 of the documentation
369     delta_counter = 0;
370     for (size_t i = 0; i < PARAM_N1; i++) {
371         found = 0;
372         mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0
373         for (size_t j = 0; j < PARAM_DELTA; j++) {
374             mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter
375             beta_j[j] += mask1 & mask2 & gf_exp[i];
376             found += mask1 & mask2 & 1;
377         }
378         delta_counter += found;
379     }
380     delta_real_value = delta_counter;
381 
382     // Compute the e_{j_i} page 31 of the documentation
383     for (size_t i = 0; i < PARAM_DELTA; ++i) {
384         tmp1 = 1;
385         tmp2 = 1;
386         inverse = PQCLEAN_HQCRMRS192_AVX2_gf_inverse(beta_j[i]);
387         inverse_power_j = 1;
388 
389         for (size_t j = 1; j <= PARAM_DELTA; ++j) {
390             inverse_power_j = PQCLEAN_HQCRMRS192_AVX2_gf_mul(inverse_power_j, inverse);
391             tmp1 ^= PQCLEAN_HQCRMRS192_AVX2_gf_mul(inverse_power_j, z[j]);
392         }
393         for (size_t k = 1; k < PARAM_DELTA; ++k) {
394             tmp2 = PQCLEAN_HQCRMRS192_AVX2_gf_mul(tmp2, (1 ^ PQCLEAN_HQCRMRS192_AVX2_gf_mul(inverse, beta_j[(i + k) % PARAM_DELTA])));
395         }
396         mask1 = (uint16_t) (((int16_t) i - delta_real_value) >> 15); // i < delta_real_value
397         e_j[i] = mask1 & PQCLEAN_HQCRMRS192_AVX2_gf_mul(tmp1, PQCLEAN_HQCRMRS192_AVX2_gf_inverse(tmp2));
398     }
399 
400     // Place the delta e_{j_i} values at the right coordinates of the output vector
401     delta_counter = 0;
402     for (size_t i = 0; i < PARAM_N1; ++i) {
403         found = 0;
404         mask1 = (uint16_t) (-((int32_t)error[i]) >> 31); // error[i] != 0
405         for (size_t j = 0; j < PARAM_DELTA; j++) {
406             mask2 = ~((uint16_t) (-((int32_t) j ^ delta_counter) >> 31)); // j == delta_counter
407             error_values[i] += mask1 & mask2 & e_j[j];
408             found += mask1 & mask2 & 1;
409         }
410         delta_counter += found;
411     }
412 }
413 
414 
415 
416 /**
417  * @brief Correct the errors
418  *
419  * @param[out] cdw Array of PARAM_N1 elements receiving the corrected vector
420  * @param[in] error Array of the error vector
421  * @param[in] error_values Array of PARAM_DELTA elements storing the error values
422  */
423 static void correct_errors(uint8_t *cdw, const uint16_t *error_values) {
424     for (size_t i = 0; i < PARAM_N1; ++i) {
425         cdw[i] ^= error_values[i];
426     }
427 }
428 
429 
430 
431 /**
432  * @brief Decodes the received word
433  *
434  * This function relies on six steps:
435  *    <ol>
436  *    <li> The first step, is the computation of the 2*PARAM_DELTA syndromes.
437  *    <li> The second step is the computation of the error-locator polynomial sigma.
438  *    <li> The third step, done by additive FFT, is finding the error-locator numbers by calculating the roots of the polynomial sigma and takings their inverses.
439  *    <li> The fourth step, is the polynomial z(x).
440  *    <li> The fifth step, is the computation of the error values.
441  *    <li> The sixth step is the correction of the errors in the received polynomial.
442  *    </ol>
443  * For a more complete picture on Reed-Solomon decoding, see Shu. Lin and Daniel J. Costello in Error Control Coding: Fundamentals and Applications @cite lin1983error
444  *
445  * @param[out] msg Array of size VEC_K_SIZE_64 receiving the decoded message
446  * @param[in] cdw Array of size VEC_N1_SIZE_64 storing the received word
447  */
448 void PQCLEAN_HQCRMRS192_AVX2_reed_solomon_decode(uint8_t *msg, uint8_t *cdw) {
449     union {
450         uint16_t arr16[16 * CEIL_DIVIDE(2 * PARAM_DELTA, 16)];
451         __m256i dummy;
452     } syndromes_aligned = {0};
453     uint16_t *syndromes = syndromes_aligned.arr16;
454 
455     uint16_t sigma[1 << PARAM_FFT] = {0};
456     uint8_t error[1 << PARAM_M] = {0};
457     uint16_t z[PARAM_N1] = {0};
458     uint16_t error_values[PARAM_N1] = {0};
459     uint16_t deg;
460 
461     // Calculate the 2*PARAM_DELTA syndromes
462     compute_syndromes(syndromes, cdw);
463 
464     // Compute the error locator polynomial sigma
465     // Sigma's degree is at most PARAM_DELTA but the FFT requires the extra room
466     deg = compute_elp(sigma, syndromes);
467 
468     // Compute the error polynomial error
469     compute_roots(error, sigma);
470 
471     // Compute the polynomial z(x)
472     compute_z_poly(z, sigma, deg, syndromes);
473 
474     // Compute the error values
475     compute_error_values(error_values, z, error);
476 
477     // Correct the errors
478     correct_errors(cdw, error_values);
479 
480     // Retrieve the message from the decoded codeword
481     memcpy(msg, cdw + (PARAM_G - 1), PARAM_K);
482 
483 }
484