1 #include <stdint.h> 2 #include <string.h> 3 4 #include "address.h" 5 #include "params.h" 6 #include "sha256.h" 7 #include "sha256avx.h" 8 #include "sha256x8.h" 9 #include "thashx8.h" 10 #include "utils.h" 11 12 /** 13 * 8-way parallel version of thash; takes 8x as much input and output thash(unsigned char * out,unsigned char * buf,const unsigned char * in,unsigned int inblocks,const unsigned char * pub_seed,uint32_t addr[8],const hash_state * hash_state_seeded)14 */ 15 static void thashx8(uint8_t *out0, 16 uint8_t *out1, 17 uint8_t *out2, 18 uint8_t *out3, 19 uint8_t *out4, 20 uint8_t *out5, 21 uint8_t *out6, 22 uint8_t *out7, 23 const uint8_t *in0, 24 const uint8_t *in1, 25 const uint8_t *in2, 26 const uint8_t *in3, 27 const uint8_t *in4, 28 const uint8_t *in5, 29 const uint8_t *in6, 30 const uint8_t *in7, 31 unsigned int inblocks, 32 const uint8_t *pub_seed, 33 uint32_t addrx8[8 * 8], 34 uint8_t *bufx8, 35 uint8_t *bitmaskx8, 36 const hash_state *state_seeded) { 37 unsigned char outbufx8[8 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES]; 38 unsigned int i; 39 sha256ctxx8 ctx; 40 41 (void)pub_seed; /* Suppress an 'unused parameter' warning. */ 42 43 for (i = 0; i < 8; i++) { 44 memcpy(bufx8 + i * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), 45 pub_seed, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); 46 PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_compress_address(bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + 47 i * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), 48 addrx8 + i * 8); 49 } 50 51 PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_mgf1x8(bitmaskx8, inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N, 52 bufx8 + 0 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), 53 bufx8 + 1 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), 54 bufx8 + 2 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), 55 bufx8 + 3 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), 56 bufx8 + 4 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), 57 bufx8 + 5 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), 58 bufx8 + 6 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), 59 bufx8 + 7 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), 60 PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES 61 ); 62 63 PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_clone_statex8(&ctx, &state_seeded->x8); 64 65 for (i = 0; i < inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N; i++) { 66 bufx8[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + i + 67 0 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)] = 68 in0[i] ^ bitmaskx8[i + 0 * (inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)]; 69 bufx8[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + i + 70 1 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)] = 71 in1[i] ^ bitmaskx8[i + 1 * (inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)]; 72 bufx8[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + i + 73 2 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)] = 74 in2[i] ^ bitmaskx8[i + 2 * (inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)]; 75 bufx8[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + i + 76 3 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)] = 77 in3[i] ^ bitmaskx8[i + 3 * (inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)]; 78 bufx8[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + i + 79 4 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)] = 80 in4[i] ^ bitmaskx8[i + 4 * (inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)]; 81 bufx8[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + i + 82 5 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)] = 83 in5[i] ^ bitmaskx8[i + 5 * (inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)]; 84 bufx8[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + i + 85 6 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)] = 86 in6[i] ^ bitmaskx8[i + 6 * (inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)]; 87 bufx8[PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + i + 88 7 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)] = 89 in7[i] ^ bitmaskx8[i + 7 * (inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)]; 90 } 91 92 PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_update8x(&ctx, 93 bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + 0 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), 94 bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + 1 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), 95 bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + 2 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), 96 bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + 3 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), 97 bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + 4 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), 98 bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + 5 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), 99 bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + 6 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), 100 bufx8 + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + 7 * (PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N), 101 PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + inblocks * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); 102 103 PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_sha256_final8x(&ctx, 104 outbufx8 + 0 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, 105 outbufx8 + 1 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, 106 outbufx8 + 2 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, 107 outbufx8 + 3 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, 108 outbufx8 + 4 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, 109 outbufx8 + 5 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, 110 outbufx8 + 6 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, 111 outbufx8 + 7 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES); 112 113 memcpy(out0, outbufx8 + 0 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); 114 memcpy(out1, outbufx8 + 1 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); 115 memcpy(out2, outbufx8 + 2 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); 116 memcpy(out3, outbufx8 + 3 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); 117 memcpy(out4, outbufx8 + 4 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); 118 memcpy(out5, outbufx8 + 5 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); 119 memcpy(out6, outbufx8 + 6 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); 120 memcpy(out7, outbufx8 + 7 * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_OUTPUT_BYTES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N); 121 } 122 123 #define thash_size_variant(name, size) \ 124 void PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_thashx8_##name(unsigned char *out0, \ 125 unsigned char *out1, \ 126 unsigned char *out2, \ 127 unsigned char *out3, \ 128 unsigned char *out4, \ 129 unsigned char *out5, \ 130 unsigned char *out6, \ 131 unsigned char *out7, \ 132 const unsigned char *in0, \ 133 const unsigned char *in1, \ 134 const unsigned char *in2, \ 135 const unsigned char *in3, \ 136 const unsigned char *in4, \ 137 const unsigned char *in5, \ 138 const unsigned char *in6, \ 139 const unsigned char *in7, \ 140 const unsigned char *pub_seed, \ 141 uint32_t addrx8[8*8], \ 142 const hash_state *state_seeded) { \ 143 const unsigned int inblocks = (size); \ 144 uint8_t bufx8[8*(PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N + PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_SHA256_ADDR_BYTES + (size)*PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)]; \ 145 uint8_t bitmaskx8[8*((size) * PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_N)]; \ 146 thashx8(out0, out1, out2, out3, out4, out5, out6, out7, \ 147 in0, in1, in2, in3, in4, in5, in6, in7, inblocks, \ 148 pub_seed, addrx8, bufx8, bitmaskx8, state_seeded); \ 149 } 150 151 thash_size_variant(1, 1) 152 thash_size_variant(2, 2) 153 thash_size_variant(WOTS_LEN, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_WOTS_LEN) 154 thash_size_variant(FORS_TREES, PQCLEAN_SPHINCSSHA256192SROBUST_AVX2_FORS_TREES) 155 156 #undef thash_size_variant 157