1 /*
2 * Constant time implementation of the Haraka hash function.
3 *
4 * The bit-sliced implementation of the AES round functions are
5 * based on the AES implementation in BearSSL written
6 * by Thomas Pornin <pornin@bolet.org>
7 */
8
9 #include <stddef.h>
10 #include <stdint.h>
11 #include <stdlib.h>
12 #include <string.h>
13
14 #include "haraka.h"
15
16 #define HARAKAS_RATE 32
17
18 static const uint64_t haraka512_rc64[10][8] = {
19 {0x24cf0ab9086f628b, 0xbdd6eeecc83b8382, 0xd96fb0306cdad0a7, 0xaace082ac8f95f89, 0x449d8e8870d7041f, 0x49bb2f80b2b3e2f8, 0x0569ae98d93bb258, 0x23dc9691e7d6a4b1},
20 {0xd8ba10ede0fe5b6e, 0x7ecf7dbe424c7b8e, 0x6ea9949c6df62a31, 0xbf3f3c97ec9c313e, 0x241d03a196a1861e, 0xead3a51116e5a2ea, 0x77d479fcad9574e3, 0x18657a1af894b7a0},
21 {0x10671e1a7f595522, 0xd9a00ff675d28c7b, 0x2f1edf0d2b9ba661, 0xb8ff58b8e3de45f9, 0xee29261da9865c02, 0xd1532aa4b50bdf43, 0x8bf858159b231bb1, 0xdf17439d22d4f599},
22 {0xdd4b2f0870b918c0, 0x757a81f3b39b1bb6, 0x7a5c556898952e3f, 0x7dd70a16d915d87a, 0x3ae61971982b8301, 0xc3ab319e030412be, 0x17c0033ac094a8cb, 0x5a0630fc1a8dc4ef},
23 {0x17708988c1632f73, 0xf92ddae090b44f4f, 0x11ac0285c43aa314, 0x509059941936b8ba, 0xd03e152fa2ce9b69, 0x3fbcbcb63a32998b, 0x6204696d692254f7, 0x915542ed93ec59b4},
24 {0xf4ed94aa8879236e, 0xff6cb41cd38e03c0, 0x069b38602368aeab, 0x669495b820f0ddba, 0xf42013b1b8bf9e3d, 0xcf935efe6439734d, 0xbc1dcf42ca29e3f8, 0x7e6d3ed29f78ad67},
25 {0xf3b0f6837ffcddaa, 0x3a76faef934ddf41, 0xcec7ae583a9c8e35, 0xe4dd18c68f0260af, 0x2c0e5df1ad398eaa, 0x478df5236ae22e8c, 0xfb944c46fe865f39, 0xaa48f82f028132ba},
26 {0x231b9ae2b76aca77, 0x292a76a712db0b40, 0x5850625dc8134491, 0x73137dd469810fb5, 0x8a12a6a202a474fd, 0xd36fd9daa78bdb80, 0xb34c5e733505706f, 0xbaf1cdca818d9d96},
27 {0x2e99781335e8c641, 0xbddfe5cce47d560e, 0xf74e9bf32e5e040c, 0x1d7a709d65996be9, 0x670df36a9cf66cdd, 0xd05ef84a176a2875, 0x0f888e828cb1c44e, 0x1a79e9c9727b052c},
28 {0x83497348628d84de, 0x2e9387d51f22a754, 0xb000068da2f852d6, 0x378c9e1190fd6fe5, 0x870027c316de7293, 0xe51a9d4462e047bb, 0x90ecf7f8c6251195, 0x655953bfbed90a9c},
29 };
30
br_dec32le(const unsigned char * src)31 static inline uint32_t br_dec32le(const unsigned char *src) {
32 return (uint32_t)src[0]
33 | ((uint32_t)src[1] << 8)
34 | ((uint32_t)src[2] << 16)
35 | ((uint32_t)src[3] << 24);
36 }
37
br_range_dec32le(uint32_t * v,size_t num,const unsigned char * src)38 static void br_range_dec32le(uint32_t *v, size_t num, const unsigned char *src) {
39 while (num-- > 0) {
40 *v ++ = br_dec32le(src);
41 src += 4;
42 }
43 }
44
br_enc32le(unsigned char * dst,uint32_t x)45 static inline void br_enc32le(unsigned char *dst, uint32_t x) {
46 dst[0] = (unsigned char)x;
47 dst[1] = (unsigned char)(x >> 8);
48 dst[2] = (unsigned char)(x >> 16);
49 dst[3] = (unsigned char)(x >> 24);
50 }
51
52
br_range_enc32le(unsigned char * dst,const uint32_t * v,size_t num)53 static void br_range_enc32le(unsigned char *dst, const uint32_t *v, size_t num) {
54 while (num-- > 0) {
55 br_enc32le(dst, *v ++);
56 dst += 4;
57 }
58 }
59
br_aes_ct64_bitslice_Sbox(uint64_t * q)60 static void br_aes_ct64_bitslice_Sbox(uint64_t *q) {
61 /*
62 * This S-box implementation is a straightforward translation of
63 * the circuit described by Boyar and Peralta in "A new
64 * combinational logic minimization technique with applications
65 * to cryptology" (https://eprint.iacr.org/2009/191.pdf).
66 *
67 * Note that variables x* (input) and s* (output) are numbered
68 * in "reverse" order (x0 is the high bit, x7 is the low bit).
69 */
70
71 uint64_t x0, x1, x2, x3, x4, x5, x6, x7;
72 uint64_t y1, y2, y3, y4, y5, y6, y7, y8, y9;
73 uint64_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
74 uint64_t y20, y21;
75 uint64_t z0, z1, z2, z3, z4, z5, z6, z7, z8, z9;
76 uint64_t z10, z11, z12, z13, z14, z15, z16, z17;
77 uint64_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
78 uint64_t t10, t11, t12, t13, t14, t15, t16, t17, t18, t19;
79 uint64_t t20, t21, t22, t23, t24, t25, t26, t27, t28, t29;
80 uint64_t t30, t31, t32, t33, t34, t35, t36, t37, t38, t39;
81 uint64_t t40, t41, t42, t43, t44, t45, t46, t47, t48, t49;
82 uint64_t t50, t51, t52, t53, t54, t55, t56, t57, t58, t59;
83 uint64_t t60, t61, t62, t63, t64, t65, t66, t67;
84 uint64_t s0, s1, s2, s3, s4, s5, s6, s7;
85
86 x0 = q[7];
87 x1 = q[6];
88 x2 = q[5];
89 x3 = q[4];
90 x4 = q[3];
91 x5 = q[2];
92 x6 = q[1];
93 x7 = q[0];
94
95 /*
96 * Top linear transformation.
97 */
98 y14 = x3 ^ x5;
99 y13 = x0 ^ x6;
100 y9 = x0 ^ x3;
101 y8 = x0 ^ x5;
102 t0 = x1 ^ x2;
103 y1 = t0 ^ x7;
104 y4 = y1 ^ x3;
105 y12 = y13 ^ y14;
106 y2 = y1 ^ x0;
107 y5 = y1 ^ x6;
108 y3 = y5 ^ y8;
109 t1 = x4 ^ y12;
110 y15 = t1 ^ x5;
111 y20 = t1 ^ x1;
112 y6 = y15 ^ x7;
113 y10 = y15 ^ t0;
114 y11 = y20 ^ y9;
115 y7 = x7 ^ y11;
116 y17 = y10 ^ y11;
117 y19 = y10 ^ y8;
118 y16 = t0 ^ y11;
119 y21 = y13 ^ y16;
120 y18 = x0 ^ y16;
121
122 /*
123 * Non-linear section.
124 */
125 t2 = y12 & y15;
126 t3 = y3 & y6;
127 t4 = t3 ^ t2;
128 t5 = y4 & x7;
129 t6 = t5 ^ t2;
130 t7 = y13 & y16;
131 t8 = y5 & y1;
132 t9 = t8 ^ t7;
133 t10 = y2 & y7;
134 t11 = t10 ^ t7;
135 t12 = y9 & y11;
136 t13 = y14 & y17;
137 t14 = t13 ^ t12;
138 t15 = y8 & y10;
139 t16 = t15 ^ t12;
140 t17 = t4 ^ t14;
141 t18 = t6 ^ t16;
142 t19 = t9 ^ t14;
143 t20 = t11 ^ t16;
144 t21 = t17 ^ y20;
145 t22 = t18 ^ y19;
146 t23 = t19 ^ y21;
147 t24 = t20 ^ y18;
148
149 t25 = t21 ^ t22;
150 t26 = t21 & t23;
151 t27 = t24 ^ t26;
152 t28 = t25 & t27;
153 t29 = t28 ^ t22;
154 t30 = t23 ^ t24;
155 t31 = t22 ^ t26;
156 t32 = t31 & t30;
157 t33 = t32 ^ t24;
158 t34 = t23 ^ t33;
159 t35 = t27 ^ t33;
160 t36 = t24 & t35;
161 t37 = t36 ^ t34;
162 t38 = t27 ^ t36;
163 t39 = t29 & t38;
164 t40 = t25 ^ t39;
165
166 t41 = t40 ^ t37;
167 t42 = t29 ^ t33;
168 t43 = t29 ^ t40;
169 t44 = t33 ^ t37;
170 t45 = t42 ^ t41;
171 z0 = t44 & y15;
172 z1 = t37 & y6;
173 z2 = t33 & x7;
174 z3 = t43 & y16;
175 z4 = t40 & y1;
176 z5 = t29 & y7;
177 z6 = t42 & y11;
178 z7 = t45 & y17;
179 z8 = t41 & y10;
180 z9 = t44 & y12;
181 z10 = t37 & y3;
182 z11 = t33 & y4;
183 z12 = t43 & y13;
184 z13 = t40 & y5;
185 z14 = t29 & y2;
186 z15 = t42 & y9;
187 z16 = t45 & y14;
188 z17 = t41 & y8;
189
190 /*
191 * Bottom linear transformation.
192 */
193 t46 = z15 ^ z16;
194 t47 = z10 ^ z11;
195 t48 = z5 ^ z13;
196 t49 = z9 ^ z10;
197 t50 = z2 ^ z12;
198 t51 = z2 ^ z5;
199 t52 = z7 ^ z8;
200 t53 = z0 ^ z3;
201 t54 = z6 ^ z7;
202 t55 = z16 ^ z17;
203 t56 = z12 ^ t48;
204 t57 = t50 ^ t53;
205 t58 = z4 ^ t46;
206 t59 = z3 ^ t54;
207 t60 = t46 ^ t57;
208 t61 = z14 ^ t57;
209 t62 = t52 ^ t58;
210 t63 = t49 ^ t58;
211 t64 = z4 ^ t59;
212 t65 = t61 ^ t62;
213 t66 = z1 ^ t63;
214 s0 = t59 ^ t63;
215 s6 = t56 ^ ~t62;
216 s7 = t48 ^ ~t60;
217 t67 = t64 ^ t65;
218 s3 = t53 ^ t66;
219 s4 = t51 ^ t66;
220 s5 = t47 ^ t65;
221 s1 = t64 ^ ~s3;
222 s2 = t55 ^ ~t67;
223
224 q[7] = s0;
225 q[6] = s1;
226 q[5] = s2;
227 q[4] = s3;
228 q[3] = s4;
229 q[2] = s5;
230 q[1] = s6;
231 q[0] = s7;
232 }
233
br_aes_ct_bitslice_Sbox(uint32_t * q)234 static void br_aes_ct_bitslice_Sbox(uint32_t *q) {
235 /*
236 * This S-box implementation is a straightforward translation of
237 * the circuit described by Boyar and Peralta in "A new
238 * combinational logic minimization technique with applications
239 * to cryptology" (https://eprint.iacr.org/2009/191.pdf).
240 *
241 * Note that variables x* (input) and s* (output) are numbered
242 * in "reverse" order (x0 is the high bit, x7 is the low bit).
243 */
244
245 uint32_t x0, x1, x2, x3, x4, x5, x6, x7;
246 uint32_t y1, y2, y3, y4, y5, y6, y7, y8, y9;
247 uint32_t y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
248 uint32_t y20, y21;
249 uint32_t z0, z1, z2, z3, z4, z5, z6, z7, z8, z9;
250 uint32_t z10, z11, z12, z13, z14, z15, z16, z17;
251 uint32_t t0, t1, t2, t3, t4, t5, t6, t7, t8, t9;
252 uint32_t t10, t11, t12, t13, t14, t15, t16, t17, t18, t19;
253 uint32_t t20, t21, t22, t23, t24, t25, t26, t27, t28, t29;
254 uint32_t t30, t31, t32, t33, t34, t35, t36, t37, t38, t39;
255 uint32_t t40, t41, t42, t43, t44, t45, t46, t47, t48, t49;
256 uint32_t t50, t51, t52, t53, t54, t55, t56, t57, t58, t59;
257 uint32_t t60, t61, t62, t63, t64, t65, t66, t67;
258 uint32_t s0, s1, s2, s3, s4, s5, s6, s7;
259
260 x0 = q[7];
261 x1 = q[6];
262 x2 = q[5];
263 x3 = q[4];
264 x4 = q[3];
265 x5 = q[2];
266 x6 = q[1];
267 x7 = q[0];
268
269 /*
270 * Top linear transformation.
271 */
272 y14 = x3 ^ x5;
273 y13 = x0 ^ x6;
274 y9 = x0 ^ x3;
275 y8 = x0 ^ x5;
276 t0 = x1 ^ x2;
277 y1 = t0 ^ x7;
278 y4 = y1 ^ x3;
279 y12 = y13 ^ y14;
280 y2 = y1 ^ x0;
281 y5 = y1 ^ x6;
282 y3 = y5 ^ y8;
283 t1 = x4 ^ y12;
284 y15 = t1 ^ x5;
285 y20 = t1 ^ x1;
286 y6 = y15 ^ x7;
287 y10 = y15 ^ t0;
288 y11 = y20 ^ y9;
289 y7 = x7 ^ y11;
290 y17 = y10 ^ y11;
291 y19 = y10 ^ y8;
292 y16 = t0 ^ y11;
293 y21 = y13 ^ y16;
294 y18 = x0 ^ y16;
295
296 /*
297 * Non-linear section.
298 */
299 t2 = y12 & y15;
300 t3 = y3 & y6;
301 t4 = t3 ^ t2;
302 t5 = y4 & x7;
303 t6 = t5 ^ t2;
304 t7 = y13 & y16;
305 t8 = y5 & y1;
306 t9 = t8 ^ t7;
307 t10 = y2 & y7;
308 t11 = t10 ^ t7;
309 t12 = y9 & y11;
310 t13 = y14 & y17;
311 t14 = t13 ^ t12;
312 t15 = y8 & y10;
313 t16 = t15 ^ t12;
314 t17 = t4 ^ t14;
315 t18 = t6 ^ t16;
316 t19 = t9 ^ t14;
317 t20 = t11 ^ t16;
318 t21 = t17 ^ y20;
319 t22 = t18 ^ y19;
320 t23 = t19 ^ y21;
321 t24 = t20 ^ y18;
322
323 t25 = t21 ^ t22;
324 t26 = t21 & t23;
325 t27 = t24 ^ t26;
326 t28 = t25 & t27;
327 t29 = t28 ^ t22;
328 t30 = t23 ^ t24;
329 t31 = t22 ^ t26;
330 t32 = t31 & t30;
331 t33 = t32 ^ t24;
332 t34 = t23 ^ t33;
333 t35 = t27 ^ t33;
334 t36 = t24 & t35;
335 t37 = t36 ^ t34;
336 t38 = t27 ^ t36;
337 t39 = t29 & t38;
338 t40 = t25 ^ t39;
339
340 t41 = t40 ^ t37;
341 t42 = t29 ^ t33;
342 t43 = t29 ^ t40;
343 t44 = t33 ^ t37;
344 t45 = t42 ^ t41;
345 z0 = t44 & y15;
346 z1 = t37 & y6;
347 z2 = t33 & x7;
348 z3 = t43 & y16;
349 z4 = t40 & y1;
350 z5 = t29 & y7;
351 z6 = t42 & y11;
352 z7 = t45 & y17;
353 z8 = t41 & y10;
354 z9 = t44 & y12;
355 z10 = t37 & y3;
356 z11 = t33 & y4;
357 z12 = t43 & y13;
358 z13 = t40 & y5;
359 z14 = t29 & y2;
360 z15 = t42 & y9;
361 z16 = t45 & y14;
362 z17 = t41 & y8;
363
364 /*
365 * Bottom linear transformation.
366 */
367 t46 = z15 ^ z16;
368 t47 = z10 ^ z11;
369 t48 = z5 ^ z13;
370 t49 = z9 ^ z10;
371 t50 = z2 ^ z12;
372 t51 = z2 ^ z5;
373 t52 = z7 ^ z8;
374 t53 = z0 ^ z3;
375 t54 = z6 ^ z7;
376 t55 = z16 ^ z17;
377 t56 = z12 ^ t48;
378 t57 = t50 ^ t53;
379 t58 = z4 ^ t46;
380 t59 = z3 ^ t54;
381 t60 = t46 ^ t57;
382 t61 = z14 ^ t57;
383 t62 = t52 ^ t58;
384 t63 = t49 ^ t58;
385 t64 = z4 ^ t59;
386 t65 = t61 ^ t62;
387 t66 = z1 ^ t63;
388 s0 = t59 ^ t63;
389 s6 = t56 ^ ~t62;
390 s7 = t48 ^ ~t60;
391 t67 = t64 ^ t65;
392 s3 = t53 ^ t66;
393 s4 = t51 ^ t66;
394 s5 = t47 ^ t65;
395 s1 = t64 ^ ~s3;
396 s2 = t55 ^ ~t67;
397
398 q[7] = s0;
399 q[6] = s1;
400 q[5] = s2;
401 q[4] = s3;
402 q[3] = s4;
403 q[2] = s5;
404 q[1] = s6;
405 q[0] = s7;
406 }
407
br_aes_ct_ortho(uint32_t * q)408 static void br_aes_ct_ortho(uint32_t *q) {
409 #define SWAPN_32(cl, ch, s, x, y) do { \
410 uint32_t a, b; \
411 a = (x); \
412 b = (y); \
413 (x) = (a & (uint32_t)(cl)) | ((b & (uint32_t)(cl)) << (s)); \
414 (y) = ((a & (uint32_t)(ch)) >> (s)) | (b & (uint32_t)(ch)); \
415 } while (0)
416
417 #define SWAP2_32(x, y) SWAPN_32(0x55555555, 0xAAAAAAAA, 1, x, y)
418 #define SWAP4_32(x, y) SWAPN_32(0x33333333, 0xCCCCCCCC, 2, x, y)
419 #define SWAP8_32(x, y) SWAPN_32(0x0F0F0F0F, 0xF0F0F0F0, 4, x, y)
420
421 SWAP2_32(q[0], q[1]);
422 SWAP2_32(q[2], q[3]);
423 SWAP2_32(q[4], q[5]);
424 SWAP2_32(q[6], q[7]);
425
426 SWAP4_32(q[0], q[2]);
427 SWAP4_32(q[1], q[3]);
428 SWAP4_32(q[4], q[6]);
429 SWAP4_32(q[5], q[7]);
430
431 SWAP8_32(q[0], q[4]);
432 SWAP8_32(q[1], q[5]);
433 SWAP8_32(q[2], q[6]);
434 SWAP8_32(q[3], q[7]);
435 }
436
add_round_key32(uint32_t * q,const uint32_t * sk)437 static inline void add_round_key32(uint32_t *q, const uint32_t *sk) {
438 q[0] ^= sk[0];
439 q[1] ^= sk[1];
440 q[2] ^= sk[2];
441 q[3] ^= sk[3];
442 q[4] ^= sk[4];
443 q[5] ^= sk[5];
444 q[6] ^= sk[6];
445 q[7] ^= sk[7];
446 }
447
shift_rows32(uint32_t * q)448 static inline void shift_rows32(uint32_t *q) {
449 int i;
450
451 for (i = 0; i < 8; i++) {
452 uint32_t x;
453
454 x = q[i];
455 q[i] = (x & 0x000000FF)
456 | ((x & 0x0000FC00) >> 2) | ((x & 0x00000300) << 6)
457 | ((x & 0x00F00000) >> 4) | ((x & 0x000F0000) << 4)
458 | ((x & 0xC0000000) >> 6) | ((x & 0x3F000000) << 2);
459 }
460 }
461
rotr16(uint32_t x)462 static inline uint32_t rotr16(uint32_t x) {
463 return (x << 16) | (x >> 16);
464 }
465
mix_columns32(uint32_t * q)466 static inline void mix_columns32(uint32_t *q) {
467 uint32_t q0, q1, q2, q3, q4, q5, q6, q7;
468 uint32_t r0, r1, r2, r3, r4, r5, r6, r7;
469
470 q0 = q[0];
471 q1 = q[1];
472 q2 = q[2];
473 q3 = q[3];
474 q4 = q[4];
475 q5 = q[5];
476 q6 = q[6];
477 q7 = q[7];
478 r0 = (q0 >> 8) | (q0 << 24);
479 r1 = (q1 >> 8) | (q1 << 24);
480 r2 = (q2 >> 8) | (q2 << 24);
481 r3 = (q3 >> 8) | (q3 << 24);
482 r4 = (q4 >> 8) | (q4 << 24);
483 r5 = (q5 >> 8) | (q5 << 24);
484 r6 = (q6 >> 8) | (q6 << 24);
485 r7 = (q7 >> 8) | (q7 << 24);
486
487 q[0] = q7 ^ r7 ^ r0 ^ rotr16(q0 ^ r0);
488 q[1] = q0 ^ r0 ^ q7 ^ r7 ^ r1 ^ rotr16(q1 ^ r1);
489 q[2] = q1 ^ r1 ^ r2 ^ rotr16(q2 ^ r2);
490 q[3] = q2 ^ r2 ^ q7 ^ r7 ^ r3 ^ rotr16(q3 ^ r3);
491 q[4] = q3 ^ r3 ^ q7 ^ r7 ^ r4 ^ rotr16(q4 ^ r4);
492 q[5] = q4 ^ r4 ^ r5 ^ rotr16(q5 ^ r5);
493 q[6] = q5 ^ r5 ^ r6 ^ rotr16(q6 ^ r6);
494 q[7] = q6 ^ r6 ^ r7 ^ rotr16(q7 ^ r7);
495 }
496
br_aes_ct64_ortho(uint64_t * q)497 static void br_aes_ct64_ortho(uint64_t *q) {
498 #define SWAPN(cl, ch, s, x, y) do { \
499 uint64_t a, b; \
500 a = (x); \
501 b = (y); \
502 (x) = (a & (uint64_t)(cl)) | ((b & (uint64_t)(cl)) << (s)); \
503 (y) = ((a & (uint64_t)(ch)) >> (s)) | (b & (uint64_t)(ch)); \
504 } while (0)
505
506 #define SWAP2(x, y) SWAPN(0x5555555555555555, 0xAAAAAAAAAAAAAAAA, 1, x, y)
507 #define SWAP4(x, y) SWAPN(0x3333333333333333, 0xCCCCCCCCCCCCCCCC, 2, x, y)
508 #define SWAP8(x, y) SWAPN(0x0F0F0F0F0F0F0F0F, 0xF0F0F0F0F0F0F0F0, 4, x, y)
509
510 SWAP2(q[0], q[1]);
511 SWAP2(q[2], q[3]);
512 SWAP2(q[4], q[5]);
513 SWAP2(q[6], q[7]);
514
515 SWAP4(q[0], q[2]);
516 SWAP4(q[1], q[3]);
517 SWAP4(q[4], q[6]);
518 SWAP4(q[5], q[7]);
519
520 SWAP8(q[0], q[4]);
521 SWAP8(q[1], q[5]);
522 SWAP8(q[2], q[6]);
523 SWAP8(q[3], q[7]);
524 }
525
526
br_aes_ct64_interleave_in(uint64_t * q0,uint64_t * q1,const uint32_t * w)527 static void br_aes_ct64_interleave_in(uint64_t *q0, uint64_t *q1, const uint32_t *w) {
528 uint64_t x0, x1, x2, x3;
529
530 x0 = w[0];
531 x1 = w[1];
532 x2 = w[2];
533 x3 = w[3];
534 x0 |= (x0 << 16);
535 x1 |= (x1 << 16);
536 x2 |= (x2 << 16);
537 x3 |= (x3 << 16);
538 x0 &= (uint64_t)0x0000FFFF0000FFFF;
539 x1 &= (uint64_t)0x0000FFFF0000FFFF;
540 x2 &= (uint64_t)0x0000FFFF0000FFFF;
541 x3 &= (uint64_t)0x0000FFFF0000FFFF;
542 x0 |= (x0 << 8);
543 x1 |= (x1 << 8);
544 x2 |= (x2 << 8);
545 x3 |= (x3 << 8);
546 x0 &= (uint64_t)0x00FF00FF00FF00FF;
547 x1 &= (uint64_t)0x00FF00FF00FF00FF;
548 x2 &= (uint64_t)0x00FF00FF00FF00FF;
549 x3 &= (uint64_t)0x00FF00FF00FF00FF;
550 *q0 = x0 | (x2 << 8);
551 *q1 = x1 | (x3 << 8);
552 }
553
554
br_aes_ct64_interleave_out(uint32_t * w,uint64_t q0,uint64_t q1)555 static void br_aes_ct64_interleave_out(uint32_t *w, uint64_t q0, uint64_t q1) {
556 uint64_t x0, x1, x2, x3;
557
558 x0 = q0 & (uint64_t)0x00FF00FF00FF00FF;
559 x1 = q1 & (uint64_t)0x00FF00FF00FF00FF;
560 x2 = (q0 >> 8) & (uint64_t)0x00FF00FF00FF00FF;
561 x3 = (q1 >> 8) & (uint64_t)0x00FF00FF00FF00FF;
562 x0 |= (x0 >> 8);
563 x1 |= (x1 >> 8);
564 x2 |= (x2 >> 8);
565 x3 |= (x3 >> 8);
566 x0 &= (uint64_t)0x0000FFFF0000FFFF;
567 x1 &= (uint64_t)0x0000FFFF0000FFFF;
568 x2 &= (uint64_t)0x0000FFFF0000FFFF;
569 x3 &= (uint64_t)0x0000FFFF0000FFFF;
570 w[0] = (uint32_t)x0 | (uint32_t)(x0 >> 16);
571 w[1] = (uint32_t)x1 | (uint32_t)(x1 >> 16);
572 w[2] = (uint32_t)x2 | (uint32_t)(x2 >> 16);
573 w[3] = (uint32_t)x3 | (uint32_t)(x3 >> 16);
574 }
575
add_round_key(uint64_t * q,const uint64_t * sk)576 static inline void add_round_key(uint64_t *q, const uint64_t *sk) {
577 q[0] ^= sk[0];
578 q[1] ^= sk[1];
579 q[2] ^= sk[2];
580 q[3] ^= sk[3];
581 q[4] ^= sk[4];
582 q[5] ^= sk[5];
583 q[6] ^= sk[6];
584 q[7] ^= sk[7];
585 }
586
shift_rows(uint64_t * q)587 static inline void shift_rows(uint64_t *q) {
588 int i;
589
590 for (i = 0; i < 8; i++) {
591 uint64_t x;
592
593 x = q[i];
594 q[i] = (x & (uint64_t)0x000000000000FFFF)
595 | ((x & (uint64_t)0x00000000FFF00000) >> 4)
596 | ((x & (uint64_t)0x00000000000F0000) << 12)
597 | ((x & (uint64_t)0x0000FF0000000000) >> 8)
598 | ((x & (uint64_t)0x000000FF00000000) << 8)
599 | ((x & (uint64_t)0xF000000000000000) >> 12)
600 | ((x & (uint64_t)0x0FFF000000000000) << 4);
601 }
602 }
603
rotr32(uint64_t x)604 static inline uint64_t rotr32(uint64_t x) {
605 return (x << 32) | (x >> 32);
606 }
607
mix_columns(uint64_t * q)608 static inline void mix_columns(uint64_t *q) {
609 uint64_t q0, q1, q2, q3, q4, q5, q6, q7;
610 uint64_t r0, r1, r2, r3, r4, r5, r6, r7;
611
612 q0 = q[0];
613 q1 = q[1];
614 q2 = q[2];
615 q3 = q[3];
616 q4 = q[4];
617 q5 = q[5];
618 q6 = q[6];
619 q7 = q[7];
620 r0 = (q0 >> 16) | (q0 << 48);
621 r1 = (q1 >> 16) | (q1 << 48);
622 r2 = (q2 >> 16) | (q2 << 48);
623 r3 = (q3 >> 16) | (q3 << 48);
624 r4 = (q4 >> 16) | (q4 << 48);
625 r5 = (q5 >> 16) | (q5 << 48);
626 r6 = (q6 >> 16) | (q6 << 48);
627 r7 = (q7 >> 16) | (q7 << 48);
628
629 q[0] = q7 ^ r7 ^ r0 ^ rotr32(q0 ^ r0);
630 q[1] = q0 ^ r0 ^ q7 ^ r7 ^ r1 ^ rotr32(q1 ^ r1);
631 q[2] = q1 ^ r1 ^ r2 ^ rotr32(q2 ^ r2);
632 q[3] = q2 ^ r2 ^ q7 ^ r7 ^ r3 ^ rotr32(q3 ^ r3);
633 q[4] = q3 ^ r3 ^ q7 ^ r7 ^ r4 ^ rotr32(q4 ^ r4);
634 q[5] = q4 ^ r4 ^ r5 ^ rotr32(q5 ^ r5);
635 q[6] = q5 ^ r5 ^ r6 ^ rotr32(q6 ^ r6);
636 q[7] = q6 ^ r6 ^ r7 ^ rotr32(q7 ^ r7);
637 }
638
interleave_constant(uint64_t * out,const unsigned char * in)639 static void interleave_constant(uint64_t *out, const unsigned char *in) {
640 uint32_t tmp_32_constant[16];
641 int i;
642
643 br_range_dec32le(tmp_32_constant, 16, in);
644 for (i = 0; i < 4; i++) {
645 br_aes_ct64_interleave_in(&out[i], &out[i + 4], tmp_32_constant + (i << 2));
646 }
647 br_aes_ct64_ortho(out);
648 }
649
interleave_constant32(uint32_t * out,const unsigned char * in)650 static void interleave_constant32(uint32_t *out, const unsigned char *in) {
651 int i;
652 for (i = 0; i < 4; i++) {
653 out[2 * i] = br_dec32le(in + 4 * i);
654 out[2 * i + 1] = br_dec32le(in + 4 * i + 16);
655 }
656 br_aes_ct_ortho(out);
657 }
658
PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_tweak_constants(harakactx * state,const unsigned char * pk_seed,const unsigned char * sk_seed,unsigned long long seed_length)659 void PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_tweak_constants(
660 harakactx *state,
661 const unsigned char *pk_seed, const unsigned char *sk_seed,
662 unsigned long long seed_length) {
663 unsigned char buf[40 * 16];
664 int i;
665
666 /* Use the standard constants to generate tweaked ones. */
667 memcpy((uint8_t *)state->tweaked512_rc64, (uint8_t *)haraka512_rc64, 40 * 16);
668
669 /* Constants for sk.seed */
670 if (sk_seed != NULL) {
671 PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_haraka_S(
672 buf, 40 * 16, sk_seed, seed_length, state);
673
674 /* Interleave constants */
675 for (i = 0; i < 10; i++) {
676 interleave_constant32(state->tweaked256_rc32_sseed[i], buf + 32 * i);
677 }
678 }
679
680 /* Constants for pk.seed */
681 PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_haraka_S(
682 buf, 40 * 16, pk_seed, seed_length, state);
683 for (i = 0; i < 10; i++) {
684 interleave_constant32(state->tweaked256_rc32[i], buf + 32 * i);
685 interleave_constant(state->tweaked512_rc64[i], buf + 64 * i);
686 }
687 }
688
haraka_S_absorb(unsigned char * s,const unsigned char * m,unsigned long long mlen,unsigned char p,const harakactx * state)689 static void haraka_S_absorb(unsigned char *s,
690 const unsigned char *m, unsigned long long mlen,
691 unsigned char p, const harakactx *state) {
692 unsigned long long i;
693 unsigned char t[HARAKAS_RATE];
694
695 while (mlen >= HARAKAS_RATE) {
696 /* XOR block to state */
697 for (i = 0; i < HARAKAS_RATE; ++i) {
698 s[i] ^= m[i];
699 }
700 PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_haraka512_perm(s, s, state);
701 mlen -= HARAKAS_RATE;
702 m += HARAKAS_RATE;
703 }
704
705 for (i = 0; i < HARAKAS_RATE; ++i) {
706 t[i] = 0;
707 }
708 for (i = 0; i < mlen; ++i) {
709 t[i] = m[i];
710 }
711 t[i] = p;
712 t[HARAKAS_RATE - 1] |= 128;
713 for (i = 0; i < HARAKAS_RATE; ++i) {
714 s[i] ^= t[i];
715 }
716 }
717
haraka_S_squeezeblocks(unsigned char * h,unsigned long long nblocks,unsigned char * s,const harakactx * state)718 static void haraka_S_squeezeblocks(unsigned char *h, unsigned long long nblocks,
719 unsigned char *s, const harakactx *state) {
720 while (nblocks > 0) {
721 PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_haraka512_perm(s, s, state);
722 memcpy(h, s, HARAKAS_RATE);
723 h += HARAKAS_RATE;
724 nblocks--;
725 }
726 }
727
PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_haraka_S_inc_init(uint8_t * s_inc)728 void PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_haraka_S_inc_init(uint8_t *s_inc) {
729 size_t i;
730
731 for (i = 0; i < 64; i++) {
732 s_inc[i] = 0;
733 }
734 s_inc[64] = 0;
735 }
736
PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_haraka_S_inc_absorb(uint8_t * s_inc,const uint8_t * m,size_t mlen,const harakactx * state)737 void PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_haraka_S_inc_absorb(uint8_t *s_inc, const uint8_t *m, size_t mlen, const harakactx *state) {
738 size_t i;
739
740 /* Recall that s_inc[64] is the non-absorbed bytes xored into the state */
741 while (mlen + s_inc[64] >= HARAKAS_RATE) {
742 for (i = 0; i < (size_t)(HARAKAS_RATE - s_inc[64]); i++) {
743 /* Take the i'th byte from message
744 xor with the s_inc[64] + i'th byte of the state */
745 s_inc[s_inc[64] + i] ^= m[i];
746 }
747 mlen -= (size_t)(HARAKAS_RATE - s_inc[64]);
748 m += HARAKAS_RATE - s_inc[64];
749 s_inc[64] = 0;
750
751 PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_haraka512_perm(s_inc, s_inc, state);
752 }
753
754 for (i = 0; i < mlen; i++) {
755 s_inc[s_inc[64] + i] ^= m[i];
756 }
757 s_inc[64] = (uint8_t)(mlen + s_inc[64]);
758 }
759
PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_haraka_S_inc_finalize(uint8_t * s_inc)760 void PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_haraka_S_inc_finalize(uint8_t *s_inc) {
761 /* After haraka_S_inc_absorb, we are guaranteed that s_inc[64] < HARAKAS_RATE,
762 so we can always use one more byte for p in the current state. */
763 s_inc[s_inc[64]] ^= 0x1F;
764 s_inc[HARAKAS_RATE - 1] ^= 128;
765 s_inc[64] = 0;
766 }
767
PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_haraka_S_inc_squeeze(uint8_t * out,size_t outlen,uint8_t * s_inc,const harakactx * state)768 void PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_haraka_S_inc_squeeze(uint8_t *out, size_t outlen, uint8_t *s_inc, const harakactx *state) {
769 uint8_t i;
770
771 /* First consume any bytes we still have sitting around */
772 for (i = 0; i < outlen && i < s_inc[64]; i++) {
773 /* There are s_inc[64] bytes left, so r - s_inc[64] is the first
774 available byte. We consume from there, i.e., up to r. */
775 out[i] = s_inc[(HARAKAS_RATE - s_inc[64] + i)];
776 }
777 out += i;
778 outlen -= i;
779 s_inc[64] = (uint8_t)(s_inc[64] - i);
780
781 /* Then squeeze the remaining necessary blocks */
782 while (outlen > 0) {
783 PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_haraka512_perm(s_inc, s_inc, state);
784
785 for (i = 0; i < outlen && i < HARAKAS_RATE; i++) {
786 out[i] = s_inc[i];
787 }
788 out += i;
789 outlen -= i;
790 s_inc[64] = (uint8_t)(HARAKAS_RATE - i);
791 }
792 }
793
PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_haraka_S(unsigned char * out,unsigned long long outlen,const unsigned char * in,unsigned long long inlen,const harakactx * state)794 void PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_haraka_S(unsigned char *out, unsigned long long outlen, const unsigned char *in, unsigned long long inlen, const harakactx *state) {
795 unsigned long long i;
796 unsigned char s[64];
797 unsigned char d[32];
798
799 for (i = 0; i < 64; i++) {
800 s[i] = 0;
801 }
802 haraka_S_absorb(s, in, inlen, 0x1F, state);
803
804 haraka_S_squeezeblocks(out, outlen / 32, s, state);
805 out += (outlen / 32) * 32;
806
807 if (outlen % 32) {
808 haraka_S_squeezeblocks(d, 1, s, state);
809 for (i = 0; i < outlen % 32; i++) {
810 out[i] = d[i];
811 }
812 }
813 }
814
PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_haraka512_perm(unsigned char * out,const unsigned char * in,const harakactx * state)815 void PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_haraka512_perm(unsigned char *out, const unsigned char *in, const harakactx *state) {
816 uint32_t w[16];
817 uint64_t q[8], tmp_q;
818 unsigned int i, j;
819
820 br_range_dec32le(w, 16, in);
821 for (i = 0; i < 4; i++) {
822 br_aes_ct64_interleave_in(&q[i], &q[i + 4], w + (i << 2));
823 }
824 br_aes_ct64_ortho(q);
825
826 /* AES rounds */
827 for (i = 0; i < 5; i++) {
828 for (j = 0; j < 2; j++) {
829 br_aes_ct64_bitslice_Sbox(q);
830 shift_rows(q);
831 mix_columns(q);
832 add_round_key(q, state->tweaked512_rc64[2 * i + j]);
833 }
834 /* Mix states */
835 for (j = 0; j < 8; j++) {
836 tmp_q = q[j];
837 q[j] = (tmp_q & 0x0001000100010001) << 5 |
838 (tmp_q & 0x0002000200020002) << 12 |
839 (tmp_q & 0x0004000400040004) >> 1 |
840 (tmp_q & 0x0008000800080008) << 6 |
841 (tmp_q & 0x0020002000200020) << 9 |
842 (tmp_q & 0x0040004000400040) >> 4 |
843 (tmp_q & 0x0080008000800080) << 3 |
844 (tmp_q & 0x2100210021002100) >> 5 |
845 (tmp_q & 0x0210021002100210) << 2 |
846 (tmp_q & 0x0800080008000800) << 4 |
847 (tmp_q & 0x1000100010001000) >> 12 |
848 (tmp_q & 0x4000400040004000) >> 10 |
849 (tmp_q & 0x8400840084008400) >> 3;
850 }
851 }
852
853 br_aes_ct64_ortho(q);
854 for (i = 0; i < 4; i ++) {
855 br_aes_ct64_interleave_out(w + (i << 2), q[i], q[i + 4]);
856 }
857 br_range_enc32le(out, w, 16);
858 }
859
PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_haraka512(unsigned char * out,const unsigned char * in,const harakactx * state)860 void PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_haraka512(unsigned char *out, const unsigned char *in, const harakactx *state) {
861 int i;
862
863 unsigned char buf[64];
864
865 PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_haraka512_perm(buf, in, state);
866 /* Feed-forward */
867 for (i = 0; i < 64; i++) {
868 buf[i] = buf[i] ^ in[i];
869 }
870
871 /* Truncated */
872 memcpy(out, buf + 8, 8);
873 memcpy(out + 8, buf + 24, 8);
874 memcpy(out + 16, buf + 32, 8);
875 memcpy(out + 24, buf + 48, 8);
876 }
877
878
PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_haraka256(unsigned char * out,const unsigned char * in,const harakactx * state)879 void PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_haraka256(unsigned char *out, const unsigned char *in, const harakactx *state) {
880 uint32_t q[8], tmp_q;
881 int i, j;
882
883 for (i = 0; i < 4; i++) {
884 q[2 * i] = br_dec32le(in + 4 * i);
885 q[2 * i + 1] = br_dec32le(in + 4 * i + 16);
886 }
887 br_aes_ct_ortho(q);
888
889 /* AES rounds */
890 for (i = 0; i < 5; i++) {
891 for (j = 0; j < 2; j++) {
892 br_aes_ct_bitslice_Sbox(q);
893 shift_rows32(q);
894 mix_columns32(q);
895 add_round_key32(q, state->tweaked256_rc32[2 * i + j]);
896 }
897
898 /* Mix states */
899 for (j = 0; j < 8; j++) {
900 tmp_q = q[j];
901 q[j] = (tmp_q & 0x81818181) |
902 (tmp_q & 0x02020202) << 1 |
903 (tmp_q & 0x04040404) << 2 |
904 (tmp_q & 0x08080808) << 3 |
905 (tmp_q & 0x10101010) >> 3 |
906 (tmp_q & 0x20202020) >> 2 |
907 (tmp_q & 0x40404040) >> 1;
908 }
909 }
910
911 br_aes_ct_ortho(q);
912 for (i = 0; i < 4; i++) {
913 br_enc32le(out + 4 * i, q[2 * i]);
914 br_enc32le(out + 4 * i + 16, q[2 * i + 1]);
915 }
916
917 for (i = 0; i < 32; i++) {
918 out[i] ^= in[i];
919 }
920 }
921
PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_haraka256_sk(unsigned char * out,const unsigned char * in,const harakactx * state)922 void PQCLEAN_SPHINCSHARAKA192FSIMPLE_CLEAN_haraka256_sk(unsigned char *out, const unsigned char *in, const harakactx *state) {
923 uint32_t q[8], tmp_q;
924 int i, j;
925
926 for (i = 0; i < 4; i++) {
927 q[2 * i] = br_dec32le(in + 4 * i);
928 q[2 * i + 1] = br_dec32le(in + 4 * i + 16);
929 }
930 br_aes_ct_ortho(q);
931
932 /* AES rounds */
933 for (i = 0; i < 5; i++) {
934 for (j = 0; j < 2; j++) {
935 br_aes_ct_bitslice_Sbox(q);
936 shift_rows32(q);
937 mix_columns32(q);
938 add_round_key32(q, state->tweaked256_rc32_sseed[2 * i + j]);
939 }
940
941 /* Mix states */
942 for (j = 0; j < 8; j++) {
943 tmp_q = q[j];
944 q[j] = (tmp_q & 0x81818181) |
945 (tmp_q & 0x02020202) << 1 |
946 (tmp_q & 0x04040404) << 2 |
947 (tmp_q & 0x08080808) << 3 |
948 (tmp_q & 0x10101010) >> 3 |
949 (tmp_q & 0x20202020) >> 2 |
950 (tmp_q & 0x40404040) >> 1;
951 }
952 }
953
954 br_aes_ct_ortho(q);
955 for (i = 0; i < 4; i++) {
956 br_enc32le(out + 4 * i, q[2 * i]);
957 br_enc32le(out + 4 * i + 16, q[2 * i + 1]);
958 }
959
960 for (i = 0; i < 32; i++) {
961 out[i] ^= in[i];
962 }
963 }
964