1 #include <stdint.h>
2 #include <string.h>
3 
4 #include "address.h"
5 #include "params.h"
6 #include "sha256.h"
7 #include "sha256avx.h"
8 #include "sha256x8.h"
9 #include "thashx8.h"
10 #include "utils.h"
11 
12 /**
13  * 8-way parallel version of thash; takes 8x as much input and output
thash(unsigned char * out,unsigned char * buf,const unsigned char * in,unsigned int inblocks,const unsigned char * pub_seed,uint32_t addr[8],const hash_state * hash_state_seeded)14  */
15 static void thashx8(uint8_t *out0,
16                     uint8_t *out1,
17                     uint8_t *out2,
18                     uint8_t *out3,
19                     uint8_t *out4,
20                     uint8_t *out5,
21                     uint8_t *out6,
22                     uint8_t *out7,
23                     const uint8_t *in0,
24                     const uint8_t *in1,
25                     const uint8_t *in2,
26                     const uint8_t *in3,
27                     const uint8_t *in4,
28                     const uint8_t *in5,
29                     const uint8_t *in6,
30                     const uint8_t *in7,
31                     unsigned int inblocks,
32                     const uint8_t *pub_seed,
33                     uint32_t addrx8[8 * 8],
34                     uint8_t *bufx8,
35                     uint8_t *bitmaskx8,
36                     const hash_state *state_seeded) {
37     unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES];
38     unsigned int i;
39     sha256ctxx8 ctx;
40 
41     (void)pub_seed; /* Suppress an 'unused parameter' warning. */
42 
43     for (i = 0; i < 8; i++) {
44         memcpy(bufx8 + i * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N),
45                pub_seed, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N);
46         PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_compress_address(bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N +
47                 i * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N),
48                 addrx8 + i * 8);
49     }
50 
51     PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_mgf1x8(bitmaskx8, inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N,
52             bufx8 + 0 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N),
53             bufx8 + 1 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N),
54             bufx8 + 2 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N),
55             bufx8 + 3 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N),
56             bufx8 + 4 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N),
57             bufx8 + 5 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N),
58             bufx8 + 6 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N),
59             bufx8 + 7 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N),
60             PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES
61                                                );
62 
63     PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_clone_statex8(&ctx, &state_seeded->x8);
64 
65     for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N; i++) {
66         bufx8[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + i +
67                                                      0 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)] =
68                   in0[i] ^ bitmaskx8[i + 0 * (inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)];
69         bufx8[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + i +
70                                                      1 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)] =
71                   in1[i] ^ bitmaskx8[i + 1 * (inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)];
72         bufx8[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + i +
73                                                      2 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)] =
74                   in2[i] ^ bitmaskx8[i + 2 * (inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)];
75         bufx8[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + i +
76                                                      3 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)] =
77                   in3[i] ^ bitmaskx8[i + 3 * (inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)];
78         bufx8[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + i +
79                                                      4 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)] =
80                   in4[i] ^ bitmaskx8[i + 4 * (inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)];
81         bufx8[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + i +
82                                                      5 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)] =
83                   in5[i] ^ bitmaskx8[i + 5 * (inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)];
84         bufx8[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + i +
85                                                      6 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)] =
86                   in6[i] ^ bitmaskx8[i + 6 * (inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)];
87         bufx8[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + i +
88                                                      7 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)] =
89                   in7[i] ^ bitmaskx8[i + 7 * (inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)];
90     }
91 
92     PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_update8x(&ctx,
93             bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + 0 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N),
94             bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + 1 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N),
95             bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + 2 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N),
96             bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + 3 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N),
97             bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + 4 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N),
98             bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + 5 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N),
99             bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + 6 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N),
100             bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + 7 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N),
101             PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N);
102 
103     PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_final8x(&ctx,
104             outbufx8 + 0 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES,
105             outbufx8 + 1 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES,
106             outbufx8 + 2 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES,
107             outbufx8 + 3 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES,
108             outbufx8 + 4 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES,
109             outbufx8 + 5 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES,
110             outbufx8 + 6 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES,
111             outbufx8 + 7 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES);
112 
113     memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N);
114     memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N);
115     memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N);
116     memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N);
117     memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N);
118     memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N);
119     memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N);
120     memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N);
121 }
122 
123 #define thash_size_variant(name, size) \
124     void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_thashx8_##name(unsigned char *out0,                                 \
125             unsigned char *out1,                                 \
126             unsigned char *out2,                                 \
127             unsigned char *out3,                                 \
128             unsigned char *out4,                                 \
129             unsigned char *out5,                                 \
130             unsigned char *out6,                                 \
131             unsigned char *out7,                                 \
132             const unsigned char *in0,                            \
133             const unsigned char *in1,                            \
134             const unsigned char *in2,                            \
135             const unsigned char *in3,                            \
136             const unsigned char *in4,                            \
137             const unsigned char *in5,                            \
138             const unsigned char *in6,                            \
139             const unsigned char *in7,                            \
140             const unsigned char *pub_seed,                       \
141             uint32_t addrx8[8*8],                                \
142             const hash_state *state_seeded) {                    \
143         const unsigned int inblocks = (size);                                    \
144         uint8_t bufx8[8*(PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + (size)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)];         \
145         uint8_t bitmaskx8[8*((size) * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)];                                   \
146         thashx8(out0, out1, out2, out3, out4, out5, out6, out7,                  \
147                 in0, in1, in2, in3, in4, in5, in6, in7, inblocks,                \
148                 pub_seed, addrx8, bufx8, bitmaskx8, state_seeded);               \
149     }
150 
151 thash_size_variant(1, 1)
152 thash_size_variant(2, 2)
153 thash_size_variant(WOTS_LEN, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LEN)
154 thash_size_variant(FORS_TREES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_TREES)
155 
156 #undef thash_size_variant
157