1 #include <stdint.h>
2 #include <string.h>
3
4 #include "address.h"
5 #include "params.h"
6 #include "sha256.h"
7 #include "sha256avx.h"
8 #include "sha256x8.h"
9 #include "thashx8.h"
10 #include "utils.h"
11
12 /**
13 * 8-way parallel version of thash; takes 8x as much input and output
14 */
thashx8(uint8_t * out0,uint8_t * out1,uint8_t * out2,uint8_t * out3,uint8_t * out4,uint8_t * out5,uint8_t * out6,uint8_t * out7,const uint8_t * in0,const uint8_t * in1,const uint8_t * in2,const uint8_t * in3,const uint8_t * in4,const uint8_t * in5,const uint8_t * in6,const uint8_t * in7,unsigned int inblocks,const uint8_t * pub_seed,uint32_t addrx8[8* 8],uint8_t * bufx8,uint8_t * bitmaskx8,const hash_state * state_seeded)15 static void thashx8(uint8_t *out0,
16 uint8_t *out1,
17 uint8_t *out2,
18 uint8_t *out3,
19 uint8_t *out4,
20 uint8_t *out5,
21 uint8_t *out6,
22 uint8_t *out7,
23 const uint8_t *in0,
24 const uint8_t *in1,
25 const uint8_t *in2,
26 const uint8_t *in3,
27 const uint8_t *in4,
28 const uint8_t *in5,
29 const uint8_t *in6,
30 const uint8_t *in7,
31 unsigned int inblocks,
32 const uint8_t *pub_seed,
33 uint32_t addrx8[8 * 8],
34 uint8_t *bufx8,
35 uint8_t *bitmaskx8,
36 const hash_state *state_seeded) {
37 unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES];
38 unsigned int i;
39 sha256ctxx8 ctx;
40
41 (void)pub_seed; /* Suppress an 'unused parameter' warning. */
42
43 for (i = 0; i < 8; i++) {
44 memcpy(bufx8 + i * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
45 pub_seed, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N);
46 PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_compress_address(bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N +
47 i * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
48 addrx8 + i * 8);
49 }
50
51 PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_mgf1x8(bitmaskx8, inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N,
52 bufx8 + 0 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
53 bufx8 + 1 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
54 bufx8 + 2 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
55 bufx8 + 3 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
56 bufx8 + 4 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
57 bufx8 + 5 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
58 bufx8 + 6 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
59 bufx8 + 7 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
60 PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES
61 );
62
63 PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_clone_statex8(&ctx, &state_seeded->x8);
64
65 for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N; i++) {
66 bufx8[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + i +
67 0 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)] =
68 in0[i] ^ bitmaskx8[i + 0 * (inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)];
69 bufx8[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + i +
70 1 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)] =
71 in1[i] ^ bitmaskx8[i + 1 * (inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)];
72 bufx8[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + i +
73 2 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)] =
74 in2[i] ^ bitmaskx8[i + 2 * (inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)];
75 bufx8[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + i +
76 3 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)] =
77 in3[i] ^ bitmaskx8[i + 3 * (inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)];
78 bufx8[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + i +
79 4 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)] =
80 in4[i] ^ bitmaskx8[i + 4 * (inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)];
81 bufx8[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + i +
82 5 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)] =
83 in5[i] ^ bitmaskx8[i + 5 * (inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)];
84 bufx8[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + i +
85 6 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)] =
86 in6[i] ^ bitmaskx8[i + 6 * (inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)];
87 bufx8[PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + i +
88 7 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)] =
89 in7[i] ^ bitmaskx8[i + 7 * (inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)];
90 }
91
92 PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_update8x(&ctx,
93 bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + 0 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
94 bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + 1 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
95 bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + 2 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
96 bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + 3 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
97 bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + 4 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
98 bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + 5 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
99 bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + 6 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
100 bufx8 + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + 7 * (PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N),
101 PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N);
102
103 PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_sha256_final8x(&ctx,
104 outbufx8 + 0 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES,
105 outbufx8 + 1 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES,
106 outbufx8 + 2 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES,
107 outbufx8 + 3 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES,
108 outbufx8 + 4 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES,
109 outbufx8 + 5 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES,
110 outbufx8 + 6 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES,
111 outbufx8 + 7 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES);
112
113 memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N);
114 memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N);
115 memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N);
116 memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N);
117 memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N);
118 memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N);
119 memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N);
120 memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N);
121 }
122
123 #define thash_size_variant(name, size) \
124 void PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_thashx8_##name(unsigned char *out0, \
125 unsigned char *out1, \
126 unsigned char *out2, \
127 unsigned char *out3, \
128 unsigned char *out4, \
129 unsigned char *out5, \
130 unsigned char *out6, \
131 unsigned char *out7, \
132 const unsigned char *in0, \
133 const unsigned char *in1, \
134 const unsigned char *in2, \
135 const unsigned char *in3, \
136 const unsigned char *in4, \
137 const unsigned char *in5, \
138 const unsigned char *in6, \
139 const unsigned char *in7, \
140 const unsigned char *pub_seed, \
141 uint32_t addrx8[8*8], \
142 const hash_state *state_seeded) { \
143 const unsigned int inblocks = (size); \
144 uint8_t bufx8[8*(PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_SHA256_ADDR_BYTES + (size)*PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)]; \
145 uint8_t bitmaskx8[8*((size) * PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_N)]; \
146 thashx8(out0, out1, out2, out3, out4, out5, out6, out7, \
147 in0, in1, in2, in3, in4, in5, in6, in7, inblocks, \
148 pub_seed, addrx8, bufx8, bitmaskx8, state_seeded); \
149 }
150
151 thash_size_variant(1, 1)
152 thash_size_variant(2, 2)
153 thash_size_variant(WOTS_LEN, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_WOTS_LEN)
154 thash_size_variant(FORS_TREES, PQCLEAN_SPHINCSSHA256128SROBUST_AVX2_FORS_TREES)
155
156 #undef thash_size_variant
157