1 // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 //! An implementation of SipHash.
12
13 use core::cmp;
14 use core::hash;
15 use core::marker::PhantomData;
16 use core::mem;
17 use core::ptr;
18 use core::u64;
19
20 /// An implementation of SipHash 1-3.
21 ///
22 /// See: <https://www.aumasson.jp/siphash/siphash.pdf>
23 #[derive(Debug, Clone, Copy, Default)]
24 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
25 pub struct SipHasher13 {
26 hasher: Hasher<Sip13Rounds>,
27 }
28
29 /// An implementation of SipHash 2-4.
30 ///
31 /// See: <https://www.aumasson.jp/siphash/siphash.pdf>
32 #[derive(Debug, Clone, Copy, Default)]
33 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
34 pub struct SipHasher24 {
35 hasher: Hasher<Sip24Rounds>,
36 }
37
38 /// An implementation of SipHash 2-4.
39 ///
40 /// See: <https://www.aumasson.jp/siphash/siphash.pdf>
41 ///
42 /// SipHash is a general-purpose hashing function: it runs at a good
43 /// speed (competitive with Spooky and City) and permits strong _keyed_
44 /// hashing. This lets you key your hashtables from a strong RNG, such as
45 /// [`rand::os::OsRng`](https://doc.rust-lang.org/rand/rand/os/struct.OsRng.html).
46 ///
47 /// Although the SipHash algorithm is considered to be generally strong,
48 /// it is not intended for cryptographic purposes. As such, all
49 /// cryptographic uses of this implementation are _strongly discouraged_.
50 #[derive(Debug, Clone, Copy, Default)]
51 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
52 pub struct SipHasher(SipHasher24);
53
54 #[derive(Debug, Clone, Copy)]
55 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
56 struct Hasher<S: Sip> {
57 k0: u64,
58 k1: u64,
59 length: usize, // how many bytes we've processed
60 state: State, // hash State
61 tail: u64, // unprocessed bytes le
62 ntail: usize, // how many bytes in tail are valid
63 _marker: PhantomData<S>,
64 }
65
66 #[derive(Debug, Clone, Copy)]
67 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
68 struct State {
69 // v0, v2 and v1, v3 show up in pairs in the algorithm,
70 // and simd implementations of SipHash will use vectors
71 // of v02 and v13. By placing them in this order in the struct,
72 // the compiler can pick up on just a few simd optimizations by itself.
73 v0: u64,
74 v2: u64,
75 v1: u64,
76 v3: u64,
77 }
78
79 macro_rules! compress {
80 ($state:expr) => {{
81 compress!($state.v0, $state.v1, $state.v2, $state.v3)
82 }};
83 ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{
84 $v0 = $v0.wrapping_add($v1);
85 $v1 = $v1.rotate_left(13);
86 $v1 ^= $v0;
87 $v0 = $v0.rotate_left(32);
88 $v2 = $v2.wrapping_add($v3);
89 $v3 = $v3.rotate_left(16);
90 $v3 ^= $v2;
91 $v0 = $v0.wrapping_add($v3);
92 $v3 = $v3.rotate_left(21);
93 $v3 ^= $v0;
94 $v2 = $v2.wrapping_add($v1);
95 $v1 = $v1.rotate_left(17);
96 $v1 ^= $v2;
97 $v2 = $v2.rotate_left(32);
98 }};
99 }
100
101 /// Loads an integer of the desired type from a byte stream, in LE order. Uses
102 /// `copy_nonoverlapping` to let the compiler generate the most efficient way
103 /// to load it from a possibly unaligned address.
104 ///
105 /// Unsafe because: unchecked indexing at `i..i+size_of(int_ty)`
106 macro_rules! load_int_le {
107 ($buf:expr, $i:expr, $int_ty:ident) => {{
108 debug_assert!($i + mem::size_of::<$int_ty>() <= $buf.len());
109 let mut data = 0 as $int_ty;
110 ptr::copy_nonoverlapping(
111 $buf.get_unchecked($i),
112 &mut data as *mut _ as *mut u8,
113 mem::size_of::<$int_ty>(),
114 );
115 data.to_le()
116 }};
117 }
118
119 /// Loads a u64 using up to 7 bytes of a byte slice. It looks clumsy but the
120 /// `copy_nonoverlapping` calls that occur (via `load_int_le!`) all have fixed
121 /// sizes and avoid calling `memcpy`, which is good for speed.
122 ///
123 /// Unsafe because: unchecked indexing at start..start+len
124 #[inline]
u8to64_le(buf: &[u8], start: usize, len: usize) -> u64125 unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 {
126 debug_assert!(len < 8);
127 let mut i = 0; // current byte index (from LSB) in the output u64
128 let mut out = 0;
129 if i + 3 < len {
130 out = load_int_le!(buf, start + i, u32) as u64;
131 i += 4;
132 }
133 if i + 1 < len {
134 out |= (load_int_le!(buf, start + i, u16) as u64) << (i * 8);
135 i += 2
136 }
137 if i < len {
138 out |= (*buf.get_unchecked(start + i) as u64) << (i * 8);
139 i += 1;
140 }
141 debug_assert_eq!(i, len);
142 out
143 }
144
145 impl SipHasher {
146 /// Creates a new `SipHasher` with the two initial keys set to 0.
147 #[inline]
new() -> SipHasher148 pub fn new() -> SipHasher {
149 SipHasher::new_with_keys(0, 0)
150 }
151
152 /// Creates a `SipHasher` that is keyed off the provided keys.
153 #[inline]
new_with_keys(key0: u64, key1: u64) -> SipHasher154 pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher {
155 SipHasher(SipHasher24::new_with_keys(key0, key1))
156 }
157
158 /// Creates a `SipHasher` from a 16 byte key.
new_with_key(key: &[u8; 16]) -> SipHasher159 pub fn new_with_key(key: &[u8; 16]) -> SipHasher {
160 let mut b0 = [0u8; 8];
161 let mut b1 = [0u8; 8];
162 b0.copy_from_slice(&key[0..8]);
163 b1.copy_from_slice(&key[0..8]);
164 let key0 = u64::from_le_bytes(b0);
165 let key1 = u64::from_le_bytes(b1);
166 Self::new_with_keys(key0, key1)
167 }
168
169 /// Get the keys used by this hasher
keys(&self) -> (u64, u64)170 pub fn keys(&self) -> (u64, u64) {
171 (self.0.hasher.k0, self.0.hasher.k1)
172 }
173
174 /// Get the key used by this hasher as a 16 byte vector
key(&self) -> [u8; 16]175 pub fn key(&self) -> [u8; 16] {
176 let mut bytes = [0u8; 16];
177 bytes[0..8].copy_from_slice(&self.0.hasher.k0.to_le_bytes());
178 bytes[0..16].copy_from_slice(&self.0.hasher.k1.to_le_bytes());
179 bytes
180 }
181 }
182
183 impl SipHasher13 {
184 /// Creates a new `SipHasher13` with the two initial keys set to 0.
185 #[inline]
new() -> SipHasher13186 pub fn new() -> SipHasher13 {
187 SipHasher13::new_with_keys(0, 0)
188 }
189
190 /// Creates a `SipHasher13` that is keyed off the provided keys.
191 #[inline]
new_with_keys(key0: u64, key1: u64) -> SipHasher13192 pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 {
193 SipHasher13 {
194 hasher: Hasher::new_with_keys(key0, key1),
195 }
196 }
197
198 /// Creates a `SipHasher13` from a 16 byte key.
new_with_key(key: &[u8; 16]) -> SipHasher13199 pub fn new_with_key(key: &[u8; 16]) -> SipHasher13 {
200 let mut b0 = [0u8; 8];
201 let mut b1 = [0u8; 8];
202 b0.copy_from_slice(&key[0..8]);
203 b1.copy_from_slice(&key[0..8]);
204 let key0 = u64::from_le_bytes(b0);
205 let key1 = u64::from_le_bytes(b1);
206 Self::new_with_keys(key0, key1)
207 }
208
209 /// Get the keys used by this hasher
keys(&self) -> (u64, u64)210 pub fn keys(&self) -> (u64, u64) {
211 (self.hasher.k0, self.hasher.k1)
212 }
213
214 /// Get the key used by this hasher as a 16 byte vector
key(&self) -> [u8; 16]215 pub fn key(&self) -> [u8; 16] {
216 let mut bytes = [0u8; 16];
217 bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes());
218 bytes[0..16].copy_from_slice(&self.hasher.k1.to_le_bytes());
219 bytes
220 }
221 }
222
223 impl SipHasher24 {
224 /// Creates a new `SipHasher24` with the two initial keys set to 0.
225 #[inline]
new() -> SipHasher24226 pub fn new() -> SipHasher24 {
227 SipHasher24::new_with_keys(0, 0)
228 }
229
230 /// Creates a `SipHasher24` that is keyed off the provided keys.
231 #[inline]
new_with_keys(key0: u64, key1: u64) -> SipHasher24232 pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher24 {
233 SipHasher24 {
234 hasher: Hasher::new_with_keys(key0, key1),
235 }
236 }
237
238 /// Creates a `SipHasher24` from a 16 byte key.
new_with_key(key: &[u8; 16]) -> SipHasher24239 pub fn new_with_key(key: &[u8; 16]) -> SipHasher24 {
240 let mut b0 = [0u8; 8];
241 let mut b1 = [0u8; 8];
242 b0.copy_from_slice(&key[0..8]);
243 b1.copy_from_slice(&key[0..8]);
244 let key0 = u64::from_le_bytes(b0);
245 let key1 = u64::from_le_bytes(b1);
246 Self::new_with_keys(key0, key1)
247 }
248
249 /// Get the keys used by this hasher
keys(&self) -> (u64, u64)250 pub fn keys(&self) -> (u64, u64) {
251 (self.hasher.k0, self.hasher.k1)
252 }
253
254 /// Get the key used by this hasher as a 16 byte vector
key(&self) -> [u8; 16]255 pub fn key(&self) -> [u8; 16] {
256 let mut bytes = [0u8; 16];
257 bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes());
258 bytes[0..16].copy_from_slice(&self.hasher.k1.to_le_bytes());
259 bytes
260 }
261 }
262
263 impl<S: Sip> Hasher<S> {
264 #[inline]
new_with_keys(key0: u64, key1: u64) -> Hasher<S>265 fn new_with_keys(key0: u64, key1: u64) -> Hasher<S> {
266 let mut state = Hasher {
267 k0: key0,
268 k1: key1,
269 length: 0,
270 state: State {
271 v0: 0,
272 v1: 0,
273 v2: 0,
274 v3: 0,
275 },
276 tail: 0,
277 ntail: 0,
278 _marker: PhantomData,
279 };
280 state.reset();
281 state
282 }
283
284 #[inline]
reset(&mut self)285 fn reset(&mut self) {
286 self.length = 0;
287 self.state.v0 = self.k0 ^ 0x736f6d6570736575;
288 self.state.v1 = self.k1 ^ 0x646f72616e646f6d;
289 self.state.v2 = self.k0 ^ 0x6c7967656e657261;
290 self.state.v3 = self.k1 ^ 0x7465646279746573;
291 self.ntail = 0;
292 }
293
294 // A specialized write function for values with size <= 8.
295 //
296 // The hashing of multi-byte integers depends on endianness. E.g.:
297 // - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
298 // - big-endian: `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
299 //
300 // This function does the right thing for little-endian hardware. On
301 // big-endian hardware `x` must be byte-swapped first to give the right
302 // behaviour. After any byte-swapping, the input must be zero-extended to
303 // 64-bits. The caller is responsible for the byte-swapping and
304 // zero-extension.
305 #[inline]
short_write<T>(&mut self, _x: T, x: u64)306 fn short_write<T>(&mut self, _x: T, x: u64) {
307 let size = mem::size_of::<T>();
308 self.length += size;
309
310 // The original number must be zero-extended, not sign-extended.
311 debug_assert!(if size < 8 { x >> (8 * size) == 0 } else { true });
312
313 // The number of bytes needed to fill `self.tail`.
314 let needed = 8 - self.ntail;
315
316 self.tail |= x << (8 * self.ntail);
317 if size < needed {
318 self.ntail += size;
319 return;
320 }
321
322 // `self.tail` is full, process it.
323 self.state.v3 ^= self.tail;
324 S::c_rounds(&mut self.state);
325 self.state.v0 ^= self.tail;
326
327 self.ntail = size - needed;
328 self.tail = if needed < 8 { x >> (8 * needed) } else { 0 };
329 }
330 }
331
332 impl hash::Hasher for SipHasher {
333 #[inline]
write(&mut self, msg: &[u8])334 fn write(&mut self, msg: &[u8]) {
335 self.0.write(msg)
336 }
337
338 #[inline]
finish(&self) -> u64339 fn finish(&self) -> u64 {
340 self.0.finish()
341 }
342
343 #[inline]
write_usize(&mut self, i: usize)344 fn write_usize(&mut self, i: usize) {
345 self.0.write_usize(i);
346 }
347
348 #[inline]
write_u8(&mut self, i: u8)349 fn write_u8(&mut self, i: u8) {
350 self.0.write_u8(i);
351 }
352
353 #[inline]
write_u16(&mut self, i: u16)354 fn write_u16(&mut self, i: u16) {
355 self.0.write_u16(i);
356 }
357
358 #[inline]
write_u32(&mut self, i: u32)359 fn write_u32(&mut self, i: u32) {
360 self.0.write_u32(i);
361 }
362
363 #[inline]
write_u64(&mut self, i: u64)364 fn write_u64(&mut self, i: u64) {
365 self.0.write_u64(i);
366 }
367 }
368
369 impl hash::Hasher for SipHasher13 {
370 #[inline]
write(&mut self, msg: &[u8])371 fn write(&mut self, msg: &[u8]) {
372 self.hasher.write(msg)
373 }
374
375 #[inline]
finish(&self) -> u64376 fn finish(&self) -> u64 {
377 self.hasher.finish()
378 }
379
380 #[inline]
write_usize(&mut self, i: usize)381 fn write_usize(&mut self, i: usize) {
382 self.hasher.write_usize(i);
383 }
384
385 #[inline]
write_u8(&mut self, i: u8)386 fn write_u8(&mut self, i: u8) {
387 self.hasher.write_u8(i);
388 }
389
390 #[inline]
write_u16(&mut self, i: u16)391 fn write_u16(&mut self, i: u16) {
392 self.hasher.write_u16(i);
393 }
394
395 #[inline]
write_u32(&mut self, i: u32)396 fn write_u32(&mut self, i: u32) {
397 self.hasher.write_u32(i);
398 }
399
400 #[inline]
write_u64(&mut self, i: u64)401 fn write_u64(&mut self, i: u64) {
402 self.hasher.write_u64(i);
403 }
404 }
405
406 impl hash::Hasher for SipHasher24 {
407 #[inline]
write(&mut self, msg: &[u8])408 fn write(&mut self, msg: &[u8]) {
409 self.hasher.write(msg)
410 }
411
412 #[inline]
finish(&self) -> u64413 fn finish(&self) -> u64 {
414 self.hasher.finish()
415 }
416
417 #[inline]
write_usize(&mut self, i: usize)418 fn write_usize(&mut self, i: usize) {
419 self.hasher.write_usize(i);
420 }
421
422 #[inline]
write_u8(&mut self, i: u8)423 fn write_u8(&mut self, i: u8) {
424 self.hasher.write_u8(i);
425 }
426
427 #[inline]
write_u16(&mut self, i: u16)428 fn write_u16(&mut self, i: u16) {
429 self.hasher.write_u16(i);
430 }
431
432 #[inline]
write_u32(&mut self, i: u32)433 fn write_u32(&mut self, i: u32) {
434 self.hasher.write_u32(i);
435 }
436
437 #[inline]
write_u64(&mut self, i: u64)438 fn write_u64(&mut self, i: u64) {
439 self.hasher.write_u64(i);
440 }
441 }
442
443 impl<S: Sip> hash::Hasher for Hasher<S> {
444 #[inline]
write_usize(&mut self, i: usize)445 fn write_usize(&mut self, i: usize) {
446 self.short_write(i, i.to_le() as u64);
447 }
448
449 #[inline]
write_u8(&mut self, i: u8)450 fn write_u8(&mut self, i: u8) {
451 self.short_write(i, i as u64);
452 }
453
454 #[inline]
write_u32(&mut self, i: u32)455 fn write_u32(&mut self, i: u32) {
456 self.short_write(i, i.to_le() as u64);
457 }
458
459 #[inline]
write_u64(&mut self, i: u64)460 fn write_u64(&mut self, i: u64) {
461 self.short_write(i, i.to_le() as u64);
462 }
463
464 #[inline]
write(&mut self, msg: &[u8])465 fn write(&mut self, msg: &[u8]) {
466 let length = msg.len();
467 self.length += length;
468
469 let mut needed = 0;
470
471 if self.ntail != 0 {
472 needed = 8 - self.ntail;
473 self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << (8 * self.ntail);
474 if length < needed {
475 self.ntail += length;
476 return;
477 } else {
478 self.state.v3 ^= self.tail;
479 S::c_rounds(&mut self.state);
480 self.state.v0 ^= self.tail;
481 self.ntail = 0;
482 }
483 }
484
485 // Buffered tail is now flushed, process new input.
486 let len = length - needed;
487 let left = len & 0x7;
488
489 let mut i = needed;
490 while i < len - left {
491 let mi = unsafe { load_int_le!(msg, i, u64) };
492
493 self.state.v3 ^= mi;
494 S::c_rounds(&mut self.state);
495 self.state.v0 ^= mi;
496
497 i += 8;
498 }
499
500 self.tail = unsafe { u8to64_le(msg, i, left) };
501 self.ntail = left;
502 }
503
504 #[inline]
finish(&self) -> u64505 fn finish(&self) -> u64 {
506 let mut state = self.state;
507
508 let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail;
509
510 state.v3 ^= b;
511 S::c_rounds(&mut state);
512 state.v0 ^= b;
513
514 state.v2 ^= 0xff;
515 S::d_rounds(&mut state);
516
517 state.v0 ^ state.v1 ^ state.v2 ^ state.v3
518 }
519 }
520
521 impl<S: Sip> Default for Hasher<S> {
522 /// Creates a `Hasher<S>` with the two initial keys set to 0.
523 #[inline]
default() -> Hasher<S>524 fn default() -> Hasher<S> {
525 Hasher::new_with_keys(0, 0)
526 }
527 }
528
529 #[doc(hidden)]
530 trait Sip {
c_rounds(_: &mut State)531 fn c_rounds(_: &mut State);
d_rounds(_: &mut State)532 fn d_rounds(_: &mut State);
533 }
534
535 #[derive(Debug, Clone, Copy, Default)]
536 struct Sip13Rounds;
537
538 impl Sip for Sip13Rounds {
539 #[inline]
c_rounds(state: &mut State)540 fn c_rounds(state: &mut State) {
541 compress!(state);
542 }
543
544 #[inline]
d_rounds(state: &mut State)545 fn d_rounds(state: &mut State) {
546 compress!(state);
547 compress!(state);
548 compress!(state);
549 }
550 }
551
552 #[derive(Debug, Clone, Copy, Default)]
553 struct Sip24Rounds;
554
555 impl Sip for Sip24Rounds {
556 #[inline]
c_rounds(state: &mut State)557 fn c_rounds(state: &mut State) {
558 compress!(state);
559 compress!(state);
560 }
561
562 #[inline]
d_rounds(state: &mut State)563 fn d_rounds(state: &mut State) {
564 compress!(state);
565 compress!(state);
566 compress!(state);
567 compress!(state);
568 }
569 }
570