1 #include "blake3_impl.h"
2 #include <string.h>
3
store32(void * dst,uint32_t w)4 INLINE void store32(void *dst, uint32_t w) {
5 uint8_t *p = (uint8_t *)dst;
6 p[0] = (uint8_t)(w >> 0);
7 p[1] = (uint8_t)(w >> 8);
8 p[2] = (uint8_t)(w >> 16);
9 p[3] = (uint8_t)(w >> 24);
10 }
11
rotr32(uint32_t w,uint32_t c)12 INLINE uint32_t rotr32(uint32_t w, uint32_t c) {
13 return (w >> c) | (w << (32 - c));
14 }
15
g(uint32_t * state,size_t a,size_t b,size_t c,size_t d,uint32_t x,uint32_t y)16 INLINE void g(uint32_t *state, size_t a, size_t b, size_t c, size_t d,
17 uint32_t x, uint32_t y) {
18 state[a] = state[a] + state[b] + x;
19 state[d] = rotr32(state[d] ^ state[a], 16);
20 state[c] = state[c] + state[d];
21 state[b] = rotr32(state[b] ^ state[c], 12);
22 state[a] = state[a] + state[b] + y;
23 state[d] = rotr32(state[d] ^ state[a], 8);
24 state[c] = state[c] + state[d];
25 state[b] = rotr32(state[b] ^ state[c], 7);
26 }
27
round_fn(uint32_t state[16],const uint32_t * msg,size_t round)28 INLINE void round_fn(uint32_t state[16], const uint32_t *msg, size_t round) {
29 // Select the message schedule based on the round.
30 const uint8_t *schedule = MSG_SCHEDULE[round];
31
32 // Mix the columns.
33 g(state, 0, 4, 8, 12, msg[schedule[0]], msg[schedule[1]]);
34 g(state, 1, 5, 9, 13, msg[schedule[2]], msg[schedule[3]]);
35 g(state, 2, 6, 10, 14, msg[schedule[4]], msg[schedule[5]]);
36 g(state, 3, 7, 11, 15, msg[schedule[6]], msg[schedule[7]]);
37
38 // Mix the rows.
39 g(state, 0, 5, 10, 15, msg[schedule[8]], msg[schedule[9]]);
40 g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]);
41 g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]);
42 g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]);
43 }
44
compress_pre(uint32_t state[16],const uint32_t cv[8],const uint8_t block[BLAKE3_BLOCK_LEN],uint8_t block_len,uint64_t counter,uint8_t flags)45 INLINE void compress_pre(uint32_t state[16], const uint32_t cv[8],
46 const uint8_t block[BLAKE3_BLOCK_LEN],
47 uint8_t block_len, uint64_t counter, uint8_t flags) {
48 uint32_t block_words[16];
49 block_words[0] = load32(block + 4 * 0);
50 block_words[1] = load32(block + 4 * 1);
51 block_words[2] = load32(block + 4 * 2);
52 block_words[3] = load32(block + 4 * 3);
53 block_words[4] = load32(block + 4 * 4);
54 block_words[5] = load32(block + 4 * 5);
55 block_words[6] = load32(block + 4 * 6);
56 block_words[7] = load32(block + 4 * 7);
57 block_words[8] = load32(block + 4 * 8);
58 block_words[9] = load32(block + 4 * 9);
59 block_words[10] = load32(block + 4 * 10);
60 block_words[11] = load32(block + 4 * 11);
61 block_words[12] = load32(block + 4 * 12);
62 block_words[13] = load32(block + 4 * 13);
63 block_words[14] = load32(block + 4 * 14);
64 block_words[15] = load32(block + 4 * 15);
65
66 state[0] = cv[0];
67 state[1] = cv[1];
68 state[2] = cv[2];
69 state[3] = cv[3];
70 state[4] = cv[4];
71 state[5] = cv[5];
72 state[6] = cv[6];
73 state[7] = cv[7];
74 state[8] = IV[0];
75 state[9] = IV[1];
76 state[10] = IV[2];
77 state[11] = IV[3];
78 state[12] = counter_low(counter);
79 state[13] = counter_high(counter);
80 state[14] = (uint32_t)block_len;
81 state[15] = (uint32_t)flags;
82
83 round_fn(state, &block_words[0], 0);
84 round_fn(state, &block_words[0], 1);
85 round_fn(state, &block_words[0], 2);
86 round_fn(state, &block_words[0], 3);
87 round_fn(state, &block_words[0], 4);
88 round_fn(state, &block_words[0], 5);
89 round_fn(state, &block_words[0], 6);
90 }
91
blake3_compress_in_place_portable(uint32_t cv[8],const uint8_t block[BLAKE3_BLOCK_LEN],uint8_t block_len,uint64_t counter,uint8_t flags)92 void blake3_compress_in_place_portable(uint32_t cv[8],
93 const uint8_t block[BLAKE3_BLOCK_LEN],
94 uint8_t block_len, uint64_t counter,
95 uint8_t flags) {
96 uint32_t state[16];
97 compress_pre(state, cv, block, block_len, counter, flags);
98 cv[0] = state[0] ^ state[8];
99 cv[1] = state[1] ^ state[9];
100 cv[2] = state[2] ^ state[10];
101 cv[3] = state[3] ^ state[11];
102 cv[4] = state[4] ^ state[12];
103 cv[5] = state[5] ^ state[13];
104 cv[6] = state[6] ^ state[14];
105 cv[7] = state[7] ^ state[15];
106 }
107
blake3_compress_xof_portable(const uint32_t cv[8],const uint8_t block[BLAKE3_BLOCK_LEN],uint8_t block_len,uint64_t counter,uint8_t flags,uint8_t out[64])108 void blake3_compress_xof_portable(const uint32_t cv[8],
109 const uint8_t block[BLAKE3_BLOCK_LEN],
110 uint8_t block_len, uint64_t counter,
111 uint8_t flags, uint8_t out[64]) {
112 uint32_t state[16];
113 compress_pre(state, cv, block, block_len, counter, flags);
114
115 store32(&out[0 * 4], state[0] ^ state[8]);
116 store32(&out[1 * 4], state[1] ^ state[9]);
117 store32(&out[2 * 4], state[2] ^ state[10]);
118 store32(&out[3 * 4], state[3] ^ state[11]);
119 store32(&out[4 * 4], state[4] ^ state[12]);
120 store32(&out[5 * 4], state[5] ^ state[13]);
121 store32(&out[6 * 4], state[6] ^ state[14]);
122 store32(&out[7 * 4], state[7] ^ state[15]);
123 store32(&out[8 * 4], state[8] ^ cv[0]);
124 store32(&out[9 * 4], state[9] ^ cv[1]);
125 store32(&out[10 * 4], state[10] ^ cv[2]);
126 store32(&out[11 * 4], state[11] ^ cv[3]);
127 store32(&out[12 * 4], state[12] ^ cv[4]);
128 store32(&out[13 * 4], state[13] ^ cv[5]);
129 store32(&out[14 * 4], state[14] ^ cv[6]);
130 store32(&out[15 * 4], state[15] ^ cv[7]);
131 }
132
hash_one_portable(const uint8_t * input,size_t blocks,const uint32_t key[8],uint64_t counter,uint8_t flags,uint8_t flags_start,uint8_t flags_end,uint8_t out[BLAKE3_OUT_LEN])133 INLINE void hash_one_portable(const uint8_t *input, size_t blocks,
134 const uint32_t key[8], uint64_t counter,
135 uint8_t flags, uint8_t flags_start,
136 uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) {
137 uint32_t cv[8];
138 memcpy(cv, key, BLAKE3_KEY_LEN);
139 uint8_t block_flags = flags | flags_start;
140 while (blocks > 0) {
141 if (blocks == 1) {
142 block_flags |= flags_end;
143 }
144 blake3_compress_in_place_portable(cv, input, BLAKE3_BLOCK_LEN, counter,
145 block_flags);
146 input = &input[BLAKE3_BLOCK_LEN];
147 blocks -= 1;
148 block_flags = flags;
149 }
150 memcpy(out, cv, 32);
151 }
152
blake3_hash_many_portable(const uint8_t * const * inputs,size_t num_inputs,size_t blocks,const uint32_t key[8],uint64_t counter,bool increment_counter,uint8_t flags,uint8_t flags_start,uint8_t flags_end,uint8_t * out)153 void blake3_hash_many_portable(const uint8_t *const *inputs, size_t num_inputs,
154 size_t blocks, const uint32_t key[8],
155 uint64_t counter, bool increment_counter,
156 uint8_t flags, uint8_t flags_start,
157 uint8_t flags_end, uint8_t *out) {
158 while (num_inputs > 0) {
159 hash_one_portable(inputs[0], blocks, key, counter, flags, flags_start,
160 flags_end, out);
161 if (increment_counter) {
162 counter += 1;
163 }
164 inputs += 1;
165 num_inputs -= 1;
166 out = &out[BLAKE3_OUT_LEN];
167 }
168 }
169