1 // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10 
11 //! An implementation of SipHash.
12 
13 use core::cmp;
14 use core::hash;
15 use core::marker::PhantomData;
16 use core::mem;
17 use core::ptr;
18 use core::u64;
19 
20 /// An implementation of SipHash 1-3.
21 ///
22 /// See: <https://www.aumasson.jp/siphash/siphash.pdf>
23 #[derive(Debug, Clone, Copy, Default)]
24 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
25 pub struct SipHasher13 {
26     hasher: Hasher<Sip13Rounds>,
27 }
28 
29 /// An implementation of SipHash 2-4.
30 ///
31 /// See: <https://www.aumasson.jp/siphash/siphash.pdf>
32 #[derive(Debug, Clone, Copy, Default)]
33 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
34 pub struct SipHasher24 {
35     hasher: Hasher<Sip24Rounds>,
36 }
37 
38 /// An implementation of SipHash 2-4.
39 ///
40 /// See: <https://www.aumasson.jp/siphash/siphash.pdf>
41 ///
42 /// SipHash is a general-purpose hashing function: it runs at a good
43 /// speed (competitive with Spooky and City) and permits strong _keyed_
44 /// hashing. This lets you key your hashtables from a strong RNG, such as
45 /// [`rand::os::OsRng`](https://doc.rust-lang.org/rand/rand/os/struct.OsRng.html).
46 ///
47 /// Although the SipHash algorithm is considered to be generally strong,
48 /// it is not intended for cryptographic purposes. As such, all
49 /// cryptographic uses of this implementation are _strongly discouraged_.
50 #[derive(Debug, Clone, Copy, Default)]
51 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
52 pub struct SipHasher(SipHasher24);
53 
54 #[derive(Debug, Clone, Copy)]
55 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
56 struct Hasher<S: Sip> {
57     k0: u64,
58     k1: u64,
59     length: usize, // how many bytes we've processed
60     state: State,  // hash State
61     tail: u64,     // unprocessed bytes le
62     ntail: usize,  // how many bytes in tail are valid
63     _marker: PhantomData<S>,
64 }
65 
66 #[derive(Debug, Clone, Copy)]
67 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
68 struct State {
69     // v0, v2 and v1, v3 show up in pairs in the algorithm,
70     // and simd implementations of SipHash will use vectors
71     // of v02 and v13. By placing them in this order in the struct,
72     // the compiler can pick up on just a few simd optimizations by itself.
73     v0: u64,
74     v2: u64,
75     v1: u64,
76     v3: u64,
77 }
78 
79 macro_rules! compress {
80     ($state:expr) => {{
81         compress!($state.v0, $state.v1, $state.v2, $state.v3)
82     }};
83     ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{
84         $v0 = $v0.wrapping_add($v1);
85         $v1 = $v1.rotate_left(13);
86         $v1 ^= $v0;
87         $v0 = $v0.rotate_left(32);
88         $v2 = $v2.wrapping_add($v3);
89         $v3 = $v3.rotate_left(16);
90         $v3 ^= $v2;
91         $v0 = $v0.wrapping_add($v3);
92         $v3 = $v3.rotate_left(21);
93         $v3 ^= $v0;
94         $v2 = $v2.wrapping_add($v1);
95         $v1 = $v1.rotate_left(17);
96         $v1 ^= $v2;
97         $v2 = $v2.rotate_left(32);
98     }};
99 }
100 
101 /// Loads an integer of the desired type from a byte stream, in LE order. Uses
102 /// `copy_nonoverlapping` to let the compiler generate the most efficient way
103 /// to load it from a possibly unaligned address.
104 ///
105 /// Unsafe because: unchecked indexing at `i..i+size_of(int_ty)`
106 macro_rules! load_int_le {
107     ($buf:expr, $i:expr, $int_ty:ident) => {{
108         debug_assert!($i + mem::size_of::<$int_ty>() <= $buf.len());
109         let mut data = 0 as $int_ty;
110         ptr::copy_nonoverlapping(
111             $buf.get_unchecked($i),
112             &mut data as *mut _ as *mut u8,
113             mem::size_of::<$int_ty>(),
114         );
115         data.to_le()
116     }};
117 }
118 
119 /// Loads a u64 using up to 7 bytes of a byte slice. It looks clumsy but the
120 /// `copy_nonoverlapping` calls that occur (via `load_int_le!`) all have fixed
121 /// sizes and avoid calling `memcpy`, which is good for speed.
122 ///
123 /// Unsafe because: unchecked indexing at start..start+len
124 #[inline]
u8to64_le(buf: &[u8], start: usize, len: usize) -> u64125 unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 {
126     debug_assert!(len < 8);
127     let mut i = 0; // current byte index (from LSB) in the output u64
128     let mut out = 0;
129     if i + 3 < len {
130         out = load_int_le!(buf, start + i, u32) as u64;
131         i += 4;
132     }
133     if i + 1 < len {
134         out |= (load_int_le!(buf, start + i, u16) as u64) << (i * 8);
135         i += 2
136     }
137     if i < len {
138         out |= (*buf.get_unchecked(start + i) as u64) << (i * 8);
139         i += 1;
140     }
141     debug_assert_eq!(i, len);
142     out
143 }
144 
145 impl SipHasher {
146     /// Creates a new `SipHasher` with the two initial keys set to 0.
147     #[inline]
new() -> SipHasher148     pub fn new() -> SipHasher {
149         SipHasher::new_with_keys(0, 0)
150     }
151 
152     /// Creates a `SipHasher` that is keyed off the provided keys.
153     #[inline]
new_with_keys(key0: u64, key1: u64) -> SipHasher154     pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher {
155         SipHasher(SipHasher24::new_with_keys(key0, key1))
156     }
157 
158     /// Creates a `SipHasher` from a 16 byte key.
new_with_key(key: &[u8; 16]) -> SipHasher159     pub fn new_with_key(key: &[u8; 16]) -> SipHasher {
160         let mut b0 = [0u8; 8];
161         let mut b1 = [0u8; 8];
162         b0.copy_from_slice(&key[0..8]);
163         b1.copy_from_slice(&key[0..8]);
164         let key0 = u64::from_le_bytes(b0);
165         let key1 = u64::from_le_bytes(b1);
166         Self::new_with_keys(key0, key1)
167     }
168 
169     /// Get the keys used by this hasher
keys(&self) -> (u64, u64)170     pub fn keys(&self) -> (u64, u64) {
171         (self.0.hasher.k0, self.0.hasher.k1)
172     }
173 
174     /// Get the key used by this hasher as a 16 byte vector
key(&self) -> [u8; 16]175     pub fn key(&self) -> [u8; 16] {
176         let mut bytes = [0u8; 16];
177         bytes[0..8].copy_from_slice(&self.0.hasher.k0.to_le_bytes());
178         bytes[0..16].copy_from_slice(&self.0.hasher.k1.to_le_bytes());
179         bytes
180     }
181 }
182 
183 impl SipHasher13 {
184     /// Creates a new `SipHasher13` with the two initial keys set to 0.
185     #[inline]
new() -> SipHasher13186     pub fn new() -> SipHasher13 {
187         SipHasher13::new_with_keys(0, 0)
188     }
189 
190     /// Creates a `SipHasher13` that is keyed off the provided keys.
191     #[inline]
new_with_keys(key0: u64, key1: u64) -> SipHasher13192     pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 {
193         SipHasher13 {
194             hasher: Hasher::new_with_keys(key0, key1),
195         }
196     }
197 
198     /// Creates a `SipHasher13` from a 16 byte key.
new_with_key(key: &[u8; 16]) -> SipHasher13199     pub fn new_with_key(key: &[u8; 16]) -> SipHasher13 {
200         let mut b0 = [0u8; 8];
201         let mut b1 = [0u8; 8];
202         b0.copy_from_slice(&key[0..8]);
203         b1.copy_from_slice(&key[0..8]);
204         let key0 = u64::from_le_bytes(b0);
205         let key1 = u64::from_le_bytes(b1);
206         Self::new_with_keys(key0, key1)
207     }
208 
209     /// Get the keys used by this hasher
keys(&self) -> (u64, u64)210     pub fn keys(&self) -> (u64, u64) {
211         (self.hasher.k0, self.hasher.k1)
212     }
213 
214     /// Get the key used by this hasher as a 16 byte vector
key(&self) -> [u8; 16]215     pub fn key(&self) -> [u8; 16] {
216         let mut bytes = [0u8; 16];
217         bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes());
218         bytes[0..16].copy_from_slice(&self.hasher.k1.to_le_bytes());
219         bytes
220     }
221 }
222 
223 impl SipHasher24 {
224     /// Creates a new `SipHasher24` with the two initial keys set to 0.
225     #[inline]
new() -> SipHasher24226     pub fn new() -> SipHasher24 {
227         SipHasher24::new_with_keys(0, 0)
228     }
229 
230     /// Creates a `SipHasher24` that is keyed off the provided keys.
231     #[inline]
new_with_keys(key0: u64, key1: u64) -> SipHasher24232     pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher24 {
233         SipHasher24 {
234             hasher: Hasher::new_with_keys(key0, key1),
235         }
236     }
237 
238     /// Creates a `SipHasher24` from a 16 byte key.
new_with_key(key: &[u8; 16]) -> SipHasher24239     pub fn new_with_key(key: &[u8; 16]) -> SipHasher24 {
240         let mut b0 = [0u8; 8];
241         let mut b1 = [0u8; 8];
242         b0.copy_from_slice(&key[0..8]);
243         b1.copy_from_slice(&key[0..8]);
244         let key0 = u64::from_le_bytes(b0);
245         let key1 = u64::from_le_bytes(b1);
246         Self::new_with_keys(key0, key1)
247     }
248 
249     /// Get the keys used by this hasher
keys(&self) -> (u64, u64)250     pub fn keys(&self) -> (u64, u64) {
251         (self.hasher.k0, self.hasher.k1)
252     }
253 
254     /// Get the key used by this hasher as a 16 byte vector
key(&self) -> [u8; 16]255     pub fn key(&self) -> [u8; 16] {
256         let mut bytes = [0u8; 16];
257         bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes());
258         bytes[0..16].copy_from_slice(&self.hasher.k1.to_le_bytes());
259         bytes
260     }
261 }
262 
263 impl<S: Sip> Hasher<S> {
264     #[inline]
new_with_keys(key0: u64, key1: u64) -> Hasher<S>265     fn new_with_keys(key0: u64, key1: u64) -> Hasher<S> {
266         let mut state = Hasher {
267             k0: key0,
268             k1: key1,
269             length: 0,
270             state: State {
271                 v0: 0,
272                 v1: 0,
273                 v2: 0,
274                 v3: 0,
275             },
276             tail: 0,
277             ntail: 0,
278             _marker: PhantomData,
279         };
280         state.reset();
281         state
282     }
283 
284     #[inline]
reset(&mut self)285     fn reset(&mut self) {
286         self.length = 0;
287         self.state.v0 = self.k0 ^ 0x736f6d6570736575;
288         self.state.v1 = self.k1 ^ 0x646f72616e646f6d;
289         self.state.v2 = self.k0 ^ 0x6c7967656e657261;
290         self.state.v3 = self.k1 ^ 0x7465646279746573;
291         self.ntail = 0;
292     }
293 
294     // A specialized write function for values with size <= 8.
295     //
296     // The hashing of multi-byte integers depends on endianness. E.g.:
297     // - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
298     // - big-endian:    `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
299     //
300     // This function does the right thing for little-endian hardware. On
301     // big-endian hardware `x` must be byte-swapped first to give the right
302     // behaviour. After any byte-swapping, the input must be zero-extended to
303     // 64-bits. The caller is responsible for the byte-swapping and
304     // zero-extension.
305     #[inline]
short_write<T>(&mut self, _x: T, x: u64)306     fn short_write<T>(&mut self, _x: T, x: u64) {
307         let size = mem::size_of::<T>();
308         self.length += size;
309 
310         // The original number must be zero-extended, not sign-extended.
311         debug_assert!(if size < 8 { x >> (8 * size) == 0 } else { true });
312 
313         // The number of bytes needed to fill `self.tail`.
314         let needed = 8 - self.ntail;
315 
316         self.tail |= x << (8 * self.ntail);
317         if size < needed {
318             self.ntail += size;
319             return;
320         }
321 
322         // `self.tail` is full, process it.
323         self.state.v3 ^= self.tail;
324         S::c_rounds(&mut self.state);
325         self.state.v0 ^= self.tail;
326 
327         self.ntail = size - needed;
328         self.tail = if needed < 8 { x >> (8 * needed) } else { 0 };
329     }
330 }
331 
332 impl hash::Hasher for SipHasher {
333     #[inline]
write(&mut self, msg: &[u8])334     fn write(&mut self, msg: &[u8]) {
335         self.0.write(msg)
336     }
337 
338     #[inline]
finish(&self) -> u64339     fn finish(&self) -> u64 {
340         self.0.finish()
341     }
342 
343     #[inline]
write_usize(&mut self, i: usize)344     fn write_usize(&mut self, i: usize) {
345         self.0.write_usize(i);
346     }
347 
348     #[inline]
write_u8(&mut self, i: u8)349     fn write_u8(&mut self, i: u8) {
350         self.0.write_u8(i);
351     }
352 
353     #[inline]
write_u16(&mut self, i: u16)354     fn write_u16(&mut self, i: u16) {
355         self.0.write_u16(i);
356     }
357 
358     #[inline]
write_u32(&mut self, i: u32)359     fn write_u32(&mut self, i: u32) {
360         self.0.write_u32(i);
361     }
362 
363     #[inline]
write_u64(&mut self, i: u64)364     fn write_u64(&mut self, i: u64) {
365         self.0.write_u64(i);
366     }
367 }
368 
369 impl hash::Hasher for SipHasher13 {
370     #[inline]
write(&mut self, msg: &[u8])371     fn write(&mut self, msg: &[u8]) {
372         self.hasher.write(msg)
373     }
374 
375     #[inline]
finish(&self) -> u64376     fn finish(&self) -> u64 {
377         self.hasher.finish()
378     }
379 
380     #[inline]
write_usize(&mut self, i: usize)381     fn write_usize(&mut self, i: usize) {
382         self.hasher.write_usize(i);
383     }
384 
385     #[inline]
write_u8(&mut self, i: u8)386     fn write_u8(&mut self, i: u8) {
387         self.hasher.write_u8(i);
388     }
389 
390     #[inline]
write_u16(&mut self, i: u16)391     fn write_u16(&mut self, i: u16) {
392         self.hasher.write_u16(i);
393     }
394 
395     #[inline]
write_u32(&mut self, i: u32)396     fn write_u32(&mut self, i: u32) {
397         self.hasher.write_u32(i);
398     }
399 
400     #[inline]
write_u64(&mut self, i: u64)401     fn write_u64(&mut self, i: u64) {
402         self.hasher.write_u64(i);
403     }
404 }
405 
406 impl hash::Hasher for SipHasher24 {
407     #[inline]
write(&mut self, msg: &[u8])408     fn write(&mut self, msg: &[u8]) {
409         self.hasher.write(msg)
410     }
411 
412     #[inline]
finish(&self) -> u64413     fn finish(&self) -> u64 {
414         self.hasher.finish()
415     }
416 
417     #[inline]
write_usize(&mut self, i: usize)418     fn write_usize(&mut self, i: usize) {
419         self.hasher.write_usize(i);
420     }
421 
422     #[inline]
write_u8(&mut self, i: u8)423     fn write_u8(&mut self, i: u8) {
424         self.hasher.write_u8(i);
425     }
426 
427     #[inline]
write_u16(&mut self, i: u16)428     fn write_u16(&mut self, i: u16) {
429         self.hasher.write_u16(i);
430     }
431 
432     #[inline]
write_u32(&mut self, i: u32)433     fn write_u32(&mut self, i: u32) {
434         self.hasher.write_u32(i);
435     }
436 
437     #[inline]
write_u64(&mut self, i: u64)438     fn write_u64(&mut self, i: u64) {
439         self.hasher.write_u64(i);
440     }
441 }
442 
443 impl<S: Sip> hash::Hasher for Hasher<S> {
444     #[inline]
write_usize(&mut self, i: usize)445     fn write_usize(&mut self, i: usize) {
446         self.short_write(i, i.to_le() as u64);
447     }
448 
449     #[inline]
write_u8(&mut self, i: u8)450     fn write_u8(&mut self, i: u8) {
451         self.short_write(i, i as u64);
452     }
453 
454     #[inline]
write_u32(&mut self, i: u32)455     fn write_u32(&mut self, i: u32) {
456         self.short_write(i, i.to_le() as u64);
457     }
458 
459     #[inline]
write_u64(&mut self, i: u64)460     fn write_u64(&mut self, i: u64) {
461         self.short_write(i, i.to_le() as u64);
462     }
463 
464     #[inline]
write(&mut self, msg: &[u8])465     fn write(&mut self, msg: &[u8]) {
466         let length = msg.len();
467         self.length += length;
468 
469         let mut needed = 0;
470 
471         if self.ntail != 0 {
472             needed = 8 - self.ntail;
473             self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << (8 * self.ntail);
474             if length < needed {
475                 self.ntail += length;
476                 return;
477             } else {
478                 self.state.v3 ^= self.tail;
479                 S::c_rounds(&mut self.state);
480                 self.state.v0 ^= self.tail;
481                 self.ntail = 0;
482             }
483         }
484 
485         // Buffered tail is now flushed, process new input.
486         let len = length - needed;
487         let left = len & 0x7;
488 
489         let mut i = needed;
490         while i < len - left {
491             let mi = unsafe { load_int_le!(msg, i, u64) };
492 
493             self.state.v3 ^= mi;
494             S::c_rounds(&mut self.state);
495             self.state.v0 ^= mi;
496 
497             i += 8;
498         }
499 
500         self.tail = unsafe { u8to64_le(msg, i, left) };
501         self.ntail = left;
502     }
503 
504     #[inline]
finish(&self) -> u64505     fn finish(&self) -> u64 {
506         let mut state = self.state;
507 
508         let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail;
509 
510         state.v3 ^= b;
511         S::c_rounds(&mut state);
512         state.v0 ^= b;
513 
514         state.v2 ^= 0xff;
515         S::d_rounds(&mut state);
516 
517         state.v0 ^ state.v1 ^ state.v2 ^ state.v3
518     }
519 }
520 
521 impl<S: Sip> Default for Hasher<S> {
522     /// Creates a `Hasher<S>` with the two initial keys set to 0.
523     #[inline]
default() -> Hasher<S>524     fn default() -> Hasher<S> {
525         Hasher::new_with_keys(0, 0)
526     }
527 }
528 
529 #[doc(hidden)]
530 trait Sip {
c_rounds(_: &mut State)531     fn c_rounds(_: &mut State);
d_rounds(_: &mut State)532     fn d_rounds(_: &mut State);
533 }
534 
535 #[derive(Debug, Clone, Copy, Default)]
536 struct Sip13Rounds;
537 
538 impl Sip for Sip13Rounds {
539     #[inline]
c_rounds(state: &mut State)540     fn c_rounds(state: &mut State) {
541         compress!(state);
542     }
543 
544     #[inline]
d_rounds(state: &mut State)545     fn d_rounds(state: &mut State) {
546         compress!(state);
547         compress!(state);
548         compress!(state);
549     }
550 }
551 
552 #[derive(Debug, Clone, Copy, Default)]
553 struct Sip24Rounds;
554 
555 impl Sip for Sip24Rounds {
556     #[inline]
c_rounds(state: &mut State)557     fn c_rounds(state: &mut State) {
558         compress!(state);
559         compress!(state);
560     }
561 
562     #[inline]
d_rounds(state: &mut State)563     fn d_rounds(state: &mut State) {
564         compress!(state);
565         compress!(state);
566         compress!(state);
567         compress!(state);
568     }
569 }
570