1 //! BLAKE2bp, a variant of BLAKE2b that uses SIMD more efficiently.
2 //!
3 //! The AVX2 implementation of BLAKE2bp is about twice as fast that of BLAKE2b.
4 //! However, note that it's a different hash function, and it gives a different
5 //! hash from BLAKE2b for the same input.
6 //!
7 //! # Example
8 //!
9 //! ```
10 //! use blake2b_simd::blake2bp;
11 //!
12 //! let hash = blake2bp::Params::new()
13 //! .hash_length(16)
14 //! .key(b"The Magic Words are Squeamish Ossifrage")
15 //! .to_state()
16 //! .update(b"foo")
17 //! .update(b"bar")
18 //! .update(b"baz")
19 //! .finalize();
20 //! assert_eq!("e69c7d2c42a5ac14948772231c68c552", &hash.to_hex());
21 //! ```
22
23 use crate::guts::{Finalize, Implementation, Job, LastNode, Stride};
24 use crate::many;
25 use crate::Count;
26 use crate::Hash;
27 use crate::Word;
28 use crate::BLOCKBYTES;
29 use crate::KEYBYTES;
30 use crate::OUTBYTES;
31 use core::cmp;
32 use core::fmt;
33 use core::mem::size_of;
34
35 #[cfg(feature = "std")]
36 use std;
37
38 pub(crate) const DEGREE: usize = 4;
39
40 /// Compute the BLAKE2bp hash of a slice of bytes all at once, using default
41 /// parameters.
42 ///
43 /// # Example
44 ///
45 /// ```
46 /// # use blake2b_simd::blake2bp::blake2bp;
47 /// let expected = "8ca9ccee7946afcb686fe7556628b5ba1bf9a691da37ca58cd049354d99f3704\
48 /// 2c007427e5f219b9ab5063707ec6823872dee413ee014b4d02f2ebb6abb5f643";
49 /// let hash = blake2bp(b"foo");
50 /// assert_eq!(expected, &hash.to_hex());
51 /// ```
blake2bp(input: &[u8]) -> Hash52 pub fn blake2bp(input: &[u8]) -> Hash {
53 Params::new().hash(input)
54 }
55
56 /// A parameter builder for BLAKE2bp, just like the [`Params`](../struct.Params.html) type for
57 /// BLAKE2b.
58 ///
59 /// This builder only supports configuring the hash length and a secret key. This matches the
60 /// options provided by the [reference
61 /// implementation](https://github.com/BLAKE2/BLAKE2/blob/320c325437539ae91091ce62efec1913cd8093c2/ref/blake2.h#L162-L165).
62 ///
63 /// # Example
64 ///
65 /// ```
66 /// use blake2b_simd::blake2bp;
67 /// let mut state = blake2bp::Params::new().hash_length(32).to_state();
68 /// ```
69 #[derive(Clone)]
70 pub struct Params {
71 hash_length: u8,
72 key_length: u8,
73 key: [u8; KEYBYTES],
74 implementation: Implementation,
75 }
76
77 impl Params {
78 /// Equivalent to `Params::default()`.
new() -> Self79 pub fn new() -> Self {
80 Self {
81 hash_length: OUTBYTES as u8,
82 key_length: 0,
83 key: [0; KEYBYTES],
84 implementation: Implementation::detect(),
85 }
86 }
87
to_words(&self) -> ([[Word; 8]; DEGREE], [Word; 8])88 fn to_words(&self) -> ([[Word; 8]; DEGREE], [Word; 8]) {
89 let mut base_params = crate::Params::new();
90 base_params
91 .hash_length(self.hash_length as usize)
92 .key(&self.key[..self.key_length as usize])
93 .fanout(DEGREE as u8)
94 .max_depth(2)
95 .max_leaf_length(0)
96 // Note that inner_hash_length is always OUTBYTES, regardless of the hash_length
97 // parameter. This isn't documented in the spec, but it matches the behavior of the
98 // reference implementation: https://github.com/BLAKE2/BLAKE2/blob/320c325437539ae91091ce62efec1913cd8093c2/ref/blake2bp-ref.c#L55
99 .inner_hash_length(OUTBYTES);
100 let leaf_words = |worker_index| {
101 base_params
102 .clone()
103 .node_offset(worker_index)
104 .node_depth(0)
105 // Note that setting the last_node flag here has no effect,
106 // because it isn't included in the state words.
107 .to_words()
108 };
109 let leaf_words = [leaf_words(0), leaf_words(1), leaf_words(2), leaf_words(3)];
110 let root_words = base_params
111 .clone()
112 .node_offset(0)
113 .node_depth(1)
114 // Note that setting the last_node flag here has no effect, because
115 // it isn't included in the state words. Also note that because
116 // we're only preserving its state words, the root node won't hash
117 // any key bytes.
118 .to_words();
119 (leaf_words, root_words)
120 }
121
122 /// Hash an input all at once with these parameters.
hash(&self, input: &[u8]) -> Hash123 pub fn hash(&self, input: &[u8]) -> Hash {
124 // If there's a key, just fall back to using the State.
125 if self.key_length > 0 {
126 return self.to_state().update(input).finalize();
127 }
128 let (mut leaf_words, mut root_words) = self.to_words();
129 // Hash each leaf in parallel.
130 let jobs = leaf_words.iter_mut().enumerate().map(|(i, words)| {
131 let input_start = cmp::min(input.len(), i * BLOCKBYTES);
132 Job {
133 input: &input[input_start..],
134 words,
135 count: 0,
136 last_node: if i == DEGREE - 1 {
137 LastNode::Yes
138 } else {
139 LastNode::No
140 },
141 }
142 });
143 many::compress_many(jobs, self.implementation, Finalize::Yes, Stride::Parallel);
144 // Hash each leaf into the root.
145 finalize_root_words(
146 &leaf_words,
147 &mut root_words,
148 self.hash_length,
149 self.implementation,
150 )
151 }
152
153 /// Construct a BLAKE2bp `State` object based on these parameters.
to_state(&self) -> State154 pub fn to_state(&self) -> State {
155 State::with_params(self)
156 }
157
158 /// Set the length of the final hash, from 1 to `OUTBYTES` (64). Apart from controlling the
159 /// length of the final `Hash`, this is also associated data, and changing it will result in a
160 /// totally different hash.
hash_length(&mut self, length: usize) -> &mut Self161 pub fn hash_length(&mut self, length: usize) -> &mut Self {
162 assert!(
163 1 <= length && length <= OUTBYTES,
164 "Bad hash length: {}",
165 length
166 );
167 self.hash_length = length as u8;
168 self
169 }
170
171 /// Use a secret key, so that BLAKE2bp acts as a MAC. The maximum key length is `KEYBYTES`
172 /// (64). An empty key is equivalent to having no key at all.
key(&mut self, key: &[u8]) -> &mut Self173 pub fn key(&mut self, key: &[u8]) -> &mut Self {
174 assert!(key.len() <= KEYBYTES, "Bad key length: {}", key.len());
175 self.key_length = key.len() as u8;
176 self.key = [0; KEYBYTES];
177 self.key[..key.len()].copy_from_slice(key);
178 self
179 }
180 }
181
182 impl Default for Params {
default() -> Self183 fn default() -> Self {
184 Self::new()
185 }
186 }
187
188 impl fmt::Debug for Params {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result189 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
190 write!(
191 f,
192 "Params {{ hash_length: {}, key_length: {} }}",
193 self.hash_length,
194 // NB: Don't print the key itself. Debug shouldn't leak secrets.
195 self.key_length,
196 )
197 }
198 }
199
200 /// An incremental hasher for BLAKE2bp, just like the [`State`](../struct.State.html) type for
201 /// BLAKE2b.
202 ///
203 /// # Example
204 ///
205 /// ```
206 /// use blake2b_simd::blake2bp;
207 ///
208 /// let mut state = blake2bp::State::new();
209 /// state.update(b"foo");
210 /// state.update(b"bar");
211 /// let hash = state.finalize();
212 ///
213 /// let expected = "e654427b6ef02949471712263e59071abbb6aa94855674c1daeed6cfaf127c33\
214 /// dfa3205f7f7f71e4f0673d25fa82a368488911f446bccd323af3ab03f53e56e5";
215 /// assert_eq!(expected, &hash.to_hex());
216 /// ```
217 #[derive(Clone)]
218 pub struct State {
219 leaf_words: [[Word; 8]; DEGREE],
220 root_words: [Word; 8],
221 // Note that this buffer is twice as large as what compress4 needs. That guarantees that we
222 // have enough input when we compress to know we don't need to finalize any of the leaves.
223 buf: [u8; 2 * DEGREE * BLOCKBYTES],
224 buf_len: u16,
225 // Note that this is the *per-leaf* count.
226 count: Count,
227 hash_length: u8,
228 implementation: Implementation,
229 is_keyed: bool,
230 }
231
232 impl State {
233 /// Equivalent to `State::default()` or `Params::default().to_state()`.
new() -> Self234 pub fn new() -> Self {
235 Self::with_params(&Params::default())
236 }
237
with_params(params: &Params) -> Self238 fn with_params(params: &Params) -> Self {
239 let (leaf_words, root_words) = params.to_words();
240
241 // If a key is set, initalize the buffer to contain the key bytes. Note
242 // that only the leaves hash key bytes. The root doesn't, even though
243 // the key length it still set in its parameters. Again this isn't
244 // documented in the spec, but it matches the behavior of the reference
245 // implementation:
246 // https://github.com/BLAKE2/BLAKE2/blob/320c325437539ae91091ce62efec1913cd8093c2/ref/blake2bp-ref.c#L128
247 // This particular behavior (though not the inner hash length behavior
248 // above) is also corroborated by the official test vectors; see
249 // tests/vector_tests.rs.
250 let mut buf = [0; 2 * DEGREE * BLOCKBYTES];
251 let mut buf_len = 0;
252 if params.key_length > 0 {
253 for i in 0..DEGREE {
254 let keybytes = ¶ms.key[..params.key_length as usize];
255 buf[i * BLOCKBYTES..][..keybytes.len()].copy_from_slice(keybytes);
256 buf_len = BLOCKBYTES * DEGREE;
257 }
258 }
259
260 Self {
261 leaf_words,
262 root_words,
263 buf,
264 buf_len: buf_len as u16,
265 count: 0, // count gets updated in self.compress()
266 hash_length: params.hash_length,
267 implementation: params.implementation,
268 is_keyed: params.key_length > 0,
269 }
270 }
271
fill_buf(&mut self, input: &mut &[u8])272 fn fill_buf(&mut self, input: &mut &[u8]) {
273 let take = cmp::min(self.buf.len() - self.buf_len as usize, input.len());
274 self.buf[self.buf_len as usize..][..take].copy_from_slice(&input[..take]);
275 self.buf_len += take as u16;
276 *input = &input[take..];
277 }
278
compress_to_leaves( leaves: &mut [[Word; 8]; DEGREE], input: &[u8], count: &mut Count, implementation: Implementation, )279 fn compress_to_leaves(
280 leaves: &mut [[Word; 8]; DEGREE],
281 input: &[u8],
282 count: &mut Count,
283 implementation: Implementation,
284 ) {
285 // Input is assumed to be an even number of blocks for each leaf. Since
286 // we're not finilizing, debug asserts will fire otherwise.
287 let jobs = leaves.iter_mut().enumerate().map(|(i, words)| {
288 Job {
289 input: &input[i * BLOCKBYTES..],
290 words,
291 count: *count,
292 last_node: LastNode::No, // irrelevant when not finalizing
293 }
294 });
295 many::compress_many(jobs, implementation, Finalize::No, Stride::Parallel);
296 // Note that count is the bytes input *per-leaf*.
297 *count = count.wrapping_add((input.len() / DEGREE) as Count);
298 }
299
300 /// Add input to the hash. You can call `update` any number of times.
update(&mut self, mut input: &[u8]) -> &mut Self301 pub fn update(&mut self, mut input: &[u8]) -> &mut Self {
302 // If we have a partial buffer, try to complete it. If we complete it and there's more
303 // input waiting, we need to compress to make more room. However, because we need to be
304 // sure that *none* of the leaves would need to be finalized as part of this round of
305 // compression, we need to buffer more than we would for BLAKE2b.
306 if self.buf_len > 0 {
307 self.fill_buf(&mut input);
308 // The buffer is large enough for two compressions. If we've filled
309 // the buffer and there's still more input coming, then we have to
310 // do at least one compression. If there's enough input still
311 // coming that all the leaves are guaranteed to get more, do both
312 // compressions in the buffer. Otherwise, do just one and shift the
313 // back half of the buffer to the front.
314 if !input.is_empty() {
315 if input.len() > (DEGREE - 1) * BLOCKBYTES {
316 // Enough input coming to do both compressions.
317 Self::compress_to_leaves(
318 &mut self.leaf_words,
319 &self.buf,
320 &mut self.count,
321 self.implementation,
322 );
323 self.buf_len = 0;
324 } else {
325 // Only enough input coming for one compression.
326 Self::compress_to_leaves(
327 &mut self.leaf_words,
328 &self.buf[..DEGREE * BLOCKBYTES],
329 &mut self.count,
330 self.implementation,
331 );
332 self.buf_len = (DEGREE * BLOCKBYTES) as u16;
333 let (buf_front, buf_back) = self.buf.split_at_mut(DEGREE * BLOCKBYTES);
334 buf_front.copy_from_slice(buf_back);
335 }
336 }
337 }
338
339 // Now we directly compress as much input as possible, without copying
340 // it into the buffer. We need to make sure we buffer at least one byte
341 // for each of the leaves, so that we know we don't need to finalize
342 // them.
343 let needed_tail = (DEGREE - 1) * BLOCKBYTES + 1;
344 let mut bulk_bytes = input.len().saturating_sub(needed_tail);
345 bulk_bytes -= bulk_bytes % (DEGREE * BLOCKBYTES);
346 if bulk_bytes > 0 {
347 Self::compress_to_leaves(
348 &mut self.leaf_words,
349 &input[..bulk_bytes],
350 &mut self.count,
351 self.implementation,
352 );
353 input = &input[bulk_bytes..];
354 }
355
356 // Buffer any remaining input, to be either compressed or finalized in
357 // a subsequent call.
358 self.fill_buf(&mut input);
359 debug_assert_eq!(0, input.len());
360 self
361 }
362
363 /// Finalize the state and return a `Hash`. This method is idempotent, and calling it multiple
364 /// times will give the same result. It's also possible to `update` with more input in between.
finalize(&self) -> Hash365 pub fn finalize(&self) -> Hash {
366 // Hash whatever's remaining in the buffer and finalize the leaves.
367 let buf_len = self.buf_len as usize;
368 let mut leaves_copy = self.leaf_words;
369 let jobs = leaves_copy
370 .iter_mut()
371 .enumerate()
372 .map(|(leaf_index, leaf_words)| {
373 let input = &self.buf[cmp::min(leaf_index * BLOCKBYTES, buf_len)..buf_len];
374 Job {
375 input,
376 words: leaf_words,
377 count: self.count,
378 last_node: if leaf_index == DEGREE - 1 {
379 LastNode::Yes
380 } else {
381 LastNode::No
382 },
383 }
384 });
385 many::compress_many(jobs, self.implementation, Finalize::Yes, Stride::Parallel);
386
387 // Concatenate each leaf into the root and hash that.
388 let mut root_words_copy = self.root_words;
389 finalize_root_words(
390 &leaves_copy,
391 &mut root_words_copy,
392 self.hash_length,
393 self.implementation,
394 )
395 }
396
397 /// Return the total number of bytes input so far.
398 ///
399 /// Note that `count` doesn't include the bytes of the key block, if any.
400 /// It's exactly the total number of input bytes fed to `update`.
count(&self) -> Count401 pub fn count(&self) -> Count {
402 // Remember that self.count is *per-leaf*.
403 let mut ret = self
404 .count
405 .wrapping_mul(DEGREE as Count)
406 .wrapping_add(self.buf_len as Count);
407 if self.is_keyed {
408 ret -= (DEGREE * BLOCKBYTES) as Count;
409 }
410 ret
411 }
412 }
413
414 #[cfg(feature = "std")]
415 impl std::io::Write for State {
write(&mut self, buf: &[u8]) -> std::io::Result<usize>416 fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
417 self.update(buf);
418 Ok(buf.len())
419 }
420
flush(&mut self) -> std::io::Result<()>421 fn flush(&mut self) -> std::io::Result<()> {
422 Ok(())
423 }
424 }
425
426 impl fmt::Debug for State {
fmt(&self, f: &mut fmt::Formatter) -> fmt::Result427 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
428 write!(
429 f,
430 "State {{ count: {}, hash_length: {} }}",
431 self.count(),
432 self.hash_length,
433 )
434 }
435 }
436
437 impl Default for State {
default() -> Self438 fn default() -> Self {
439 Self::with_params(&Params::default())
440 }
441 }
442
443 // Compress each of the four finalized hashes into the root words as input,
444 // using two compressions. Note that even if a future version of this
445 // implementation supports the hash_length parameter and sets it as associated
446 // data for all nodes, this step must still use the untruncated output of each
447 // leaf. Note also that, as mentioned above, the root node doesn't hash any key
448 // bytes.
finalize_root_words( leaf_words: &[[Word; 8]; DEGREE], root_words: &mut [Word; 8], hash_length: u8, imp: Implementation, ) -> Hash449 fn finalize_root_words(
450 leaf_words: &[[Word; 8]; DEGREE],
451 root_words: &mut [Word; 8],
452 hash_length: u8,
453 imp: Implementation,
454 ) -> Hash {
455 debug_assert_eq!(OUTBYTES, 8 * size_of::<Word>());
456 let mut block = [0; DEGREE * OUTBYTES];
457 for (word, chunk) in leaf_words
458 .iter()
459 .flat_map(|words| words.iter())
460 .zip(block.chunks_exact_mut(size_of::<Word>()))
461 {
462 chunk.copy_from_slice(&word.to_le_bytes());
463 }
464 imp.compress1_loop(
465 &block,
466 root_words,
467 0,
468 LastNode::Yes,
469 Finalize::Yes,
470 Stride::Serial,
471 );
472 Hash {
473 bytes: crate::state_words_to_bytes(&root_words),
474 len: hash_length,
475 }
476 }
477
force_portable(params: &mut Params)478 pub(crate) fn force_portable(params: &mut Params) {
479 params.implementation = Implementation::portable();
480 }
481
482 #[cfg(test)]
483 pub(crate) mod test {
484 use super::*;
485 use crate::paint_test_input;
486
487 // This is a simple reference implementation without the complicated buffering or parameter
488 // support of the real implementation. We need this because the official test vectors don't
489 // include any inputs large enough to exercise all the branches in the buffering logic.
blake2bp_reference(input: &[u8]) -> Hash490 fn blake2bp_reference(input: &[u8]) -> Hash {
491 let mut leaves = arrayvec::ArrayVec::<[_; DEGREE]>::new();
492 for leaf_index in 0..DEGREE {
493 leaves.push(
494 crate::Params::new()
495 .fanout(DEGREE as u8)
496 .max_depth(2)
497 .node_offset(leaf_index as u64)
498 .inner_hash_length(OUTBYTES)
499 .to_state(),
500 );
501 }
502 leaves[DEGREE - 1].set_last_node(true);
503 for (i, chunk) in input.chunks(BLOCKBYTES).enumerate() {
504 leaves[i % DEGREE].update(chunk);
505 }
506 let mut root = crate::Params::new()
507 .fanout(DEGREE as u8)
508 .max_depth(2)
509 .node_depth(1)
510 .inner_hash_length(OUTBYTES)
511 .last_node(true)
512 .to_state();
513 for leaf in &mut leaves {
514 root.update(leaf.finalize().as_bytes());
515 }
516 root.finalize()
517 }
518
519 #[test]
test_against_reference()520 fn test_against_reference() {
521 let mut buf = [0; 21 * BLOCKBYTES];
522 paint_test_input(&mut buf);
523 // - 8 blocks is just enought to fill the double buffer.
524 // - 9 blocks triggers the "perform one compression on the double buffer" case.
525 // - 11 blocks is the largest input where only one compression may be performed, on the
526 // first half of the buffer, because there's not enough input to avoid needing to
527 // finalize the second half.
528 // - 12 blocks triggers the "perform both compressions in the double buffer" case.
529 // - 15 blocks is the largest input where, after compressing 8 blocks from the buffer,
530 // there's not enough input to hash directly from memory.
531 // - 16 blocks triggers "after emptying the buffer, hash directly from memory".
532 for num_blocks in 0..=20 {
533 for &extra in &[0, 1, BLOCKBYTES - 1] {
534 for &portable in &[false, true] {
535 // eprintln!("\ncase -----");
536 // dbg!(num_blocks);
537 // dbg!(extra);
538 // dbg!(portable);
539
540 // First hash the input all at once, as a sanity check.
541 let mut params = Params::new();
542 if portable {
543 force_portable(&mut params);
544 }
545 let input = &buf[..num_blocks * BLOCKBYTES + extra];
546 let expected = blake2bp_reference(&input);
547 let mut state = params.to_state();
548 let found = state.update(input).finalize();
549 assert_eq!(expected, found);
550
551 // Then, do it again, but buffer 1 byte of input first. That causes the buffering
552 // branch to trigger.
553 let mut state = params.to_state();
554 let maybe_one = cmp::min(1, input.len());
555 state.update(&input[..maybe_one]);
556 assert_eq!(maybe_one as Count, state.count());
557 // Do a throwaway finalize here to check for idempotency.
558 state.finalize();
559 state.update(&input[maybe_one..]);
560 assert_eq!(input.len() as Count, state.count());
561 let found = state.finalize();
562 assert_eq!(expected, found);
563
564 // Finally, do it again with the all-at-once interface.
565 assert_eq!(expected, blake2bp(input));
566 }
567 }
568 }
569 }
570 }
571