1 // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10 
11 //! An implementation of SipHash with a 128-bit output.
12 
13 use core::cmp;
14 use core::hash;
15 use core::marker::PhantomData;
16 use core::mem;
17 use core::ptr;
18 
19 /// A 128-bit (2x64) hash output
20 #[derive(Debug, Clone, Copy, Default)]
21 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
22 pub struct Hash128 {
23     pub h1: u64,
24     pub h2: u64,
25 }
26 
27 impl From<u128> for Hash128 {
from(v: u128) -> Self28     fn from(v: u128) -> Self {
29         Hash128 {
30             h1: v as u64,
31             h2: (v >> 64) as u64,
32         }
33     }
34 }
35 
36 impl Into<u128> for Hash128 {
into(self) -> u12837     fn into(self) -> u128 {
38         (self.h1 as u128) | ((self.h2 as u128) << 64)
39     }
40 }
41 
42 /// An implementation of SipHash128 1-3.
43 #[derive(Debug, Clone, Copy, Default)]
44 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
45 pub struct SipHasher13 {
46     hasher: Hasher<Sip13Rounds>,
47 }
48 
49 /// An implementation of SipHash128 2-4.
50 #[derive(Debug, Clone, Copy, Default)]
51 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
52 pub struct SipHasher24 {
53     hasher: Hasher<Sip24Rounds>,
54 }
55 
56 /// An implementation of SipHash128 2-4.
57 ///
58 /// SipHash is a general-purpose hashing function: it runs at a good
59 /// speed (competitive with Spooky and City) and permits strong _keyed_
60 /// hashing. This lets you key your hashtables from a strong RNG, such as
61 /// [`rand::os::OsRng`](https://doc.rust-lang.org/rand/rand/os/struct.OsRng.html).
62 ///
63 /// Although the SipHash algorithm is considered to be generally strong,
64 /// it is not intended for cryptographic purposes. As such, all
65 /// cryptographic uses of this implementation are _strongly discouraged_.
66 #[derive(Debug, Clone, Copy, Default)]
67 pub struct SipHasher(SipHasher24);
68 
69 #[derive(Debug, Copy)]
70 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
71 struct Hasher<S: Sip> {
72     k0: u64,
73     k1: u64,
74     length: usize, // how many bytes we've processed
75     state: State,  // hash State
76     tail: u64,     // unprocessed bytes le
77     ntail: usize,  // how many bytes in tail are valid
78     _marker: PhantomData<S>,
79 }
80 
81 #[derive(Debug, Clone, Copy)]
82 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
83 struct State {
84     // v0, v2 and v1, v3 show up in pairs in the algorithm,
85     // and simd implementations of SipHash will use vectors
86     // of v02 and v13. By placing them in this order in the struct,
87     // the compiler can pick up on just a few simd optimizations by itself.
88     v0: u64,
89     v2: u64,
90     v1: u64,
91     v3: u64,
92 }
93 
94 macro_rules! compress {
95     ($state:expr) => {{
96         compress!($state.v0, $state.v1, $state.v2, $state.v3)
97     }};
98     ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{
99         $v0 = $v0.wrapping_add($v1);
100         $v1 = $v1.rotate_left(13);
101         $v1 ^= $v0;
102         $v0 = $v0.rotate_left(32);
103         $v2 = $v2.wrapping_add($v3);
104         $v3 = $v3.rotate_left(16);
105         $v3 ^= $v2;
106         $v0 = $v0.wrapping_add($v3);
107         $v3 = $v3.rotate_left(21);
108         $v3 ^= $v0;
109         $v2 = $v2.wrapping_add($v1);
110         $v1 = $v1.rotate_left(17);
111         $v1 ^= $v2;
112         $v2 = $v2.rotate_left(32);
113     }};
114 }
115 
116 /// Loads an integer of the desired type from a byte stream, in LE order. Uses
117 /// `copy_nonoverlapping` to let the compiler generate the most efficient way
118 /// to load it from a possibly unaligned address.
119 ///
120 /// Unsafe because: unchecked indexing at `i..i+size_of(int_ty)`
121 macro_rules! load_int_le {
122     ($buf:expr, $i:expr, $int_ty:ident) => {{
123         debug_assert!($i + mem::size_of::<$int_ty>() <= $buf.len());
124         let mut data = 0 as $int_ty;
125         ptr::copy_nonoverlapping(
126             $buf.get_unchecked($i),
127             &mut data as *mut _ as *mut u8,
128             mem::size_of::<$int_ty>(),
129         );
130         data.to_le()
131     }};
132 }
133 
134 /// Loads a u64 using up to 7 bytes of a byte slice. It looks clumsy but the
135 /// `copy_nonoverlapping` calls that occur (via `load_int_le!`) all have fixed
136 /// sizes and avoid calling `memcpy`, which is good for speed.
137 ///
138 /// Unsafe because: unchecked indexing at start..start+len
139 #[inline]
u8to64_le(buf: &[u8], start: usize, len: usize) -> u64140 unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 {
141     debug_assert!(len < 8);
142     let mut i = 0; // current byte index (from LSB) in the output u64
143     let mut out = 0;
144     if i + 3 < len {
145         out = load_int_le!(buf, start + i, u32) as u64;
146         i += 4;
147     }
148     if i + 1 < len {
149         out |= (load_int_le!(buf, start + i, u16) as u64) << (i * 8);
150         i += 2
151     }
152     if i < len {
153         out |= (*buf.get_unchecked(start + i) as u64) << (i * 8);
154         i += 1;
155     }
156     debug_assert_eq!(i, len);
157     out
158 }
159 
160 pub trait Hasher128 {
161     /// Return a 128-bit hash
finish128(&self) -> Hash128162     fn finish128(&self) -> Hash128;
163 }
164 
165 impl SipHasher {
166     /// Creates a new `SipHasher` with the two initial keys set to 0.
167     #[inline]
new() -> SipHasher168     pub fn new() -> SipHasher {
169         SipHasher::new_with_keys(0, 0)
170     }
171 
172     /// Creates a `SipHasher` that is keyed off the provided keys.
173     #[inline]
new_with_keys(key0: u64, key1: u64) -> SipHasher174     pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher {
175         SipHasher(SipHasher24::new_with_keys(key0, key1))
176     }
177 
178     /// Get the keys used by this hasher
keys(&self) -> (u64, u64)179     pub fn keys(&self) -> (u64, u64) {
180         (self.0.hasher.k0, self.0.hasher.k1)
181     }
182 }
183 
184 impl Hasher128 for SipHasher {
185     /// Return a 128-bit hash
186     #[inline]
finish128(&self) -> Hash128187     fn finish128(&self) -> Hash128 {
188         self.0.finish128()
189     }
190 }
191 
192 impl SipHasher13 {
193     /// Creates a new `SipHasher13` with the two initial keys set to 0.
194     #[inline]
new() -> SipHasher13195     pub fn new() -> SipHasher13 {
196         SipHasher13::new_with_keys(0, 0)
197     }
198 
199     /// Creates a `SipHasher13` that is keyed off the provided keys.
200     #[inline]
new_with_keys(key0: u64, key1: u64) -> SipHasher13201     pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 {
202         SipHasher13 {
203             hasher: Hasher::new_with_keys(key0, key1),
204         }
205     }
206 
207     /// Get the keys used by this hasher
keys(&self) -> (u64, u64)208     pub fn keys(&self) -> (u64, u64) {
209         (self.hasher.k0, self.hasher.k1)
210     }
211 }
212 
213 impl Hasher128 for SipHasher13 {
214     /// Return a 128-bit hash
215     #[inline]
finish128(&self) -> Hash128216     fn finish128(&self) -> Hash128 {
217         self.hasher.finish128()
218     }
219 }
220 
221 impl SipHasher24 {
222     /// Creates a new `SipHasher24` with the two initial keys set to 0.
223     #[inline]
new() -> SipHasher24224     pub fn new() -> SipHasher24 {
225         SipHasher24::new_with_keys(0, 0)
226     }
227 
228     /// Creates a `SipHasher24` that is keyed off the provided keys.
229     #[inline]
new_with_keys(key0: u64, key1: u64) -> SipHasher24230     pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher24 {
231         SipHasher24 {
232             hasher: Hasher::new_with_keys(key0, key1),
233         }
234     }
235 
236     /// Get the keys used by this hasher
keys(&self) -> (u64, u64)237     pub fn keys(&self) -> (u64, u64) {
238         (self.hasher.k0, self.hasher.k1)
239     }
240 }
241 
242 impl Hasher128 for SipHasher24 {
243     /// Return a 128-bit hash
244     #[inline]
finish128(&self) -> Hash128245     fn finish128(&self) -> Hash128 {
246         self.hasher.finish128()
247     }
248 }
249 
250 impl<S: Sip> Hasher<S> {
251     #[inline]
new_with_keys(key0: u64, key1: u64) -> Hasher<S>252     fn new_with_keys(key0: u64, key1: u64) -> Hasher<S> {
253         let mut state = Hasher {
254             k0: key0,
255             k1: key1,
256             length: 0,
257             state: State {
258                 v0: 0,
259                 v1: 0xee,
260                 v2: 0,
261                 v3: 0,
262             },
263             tail: 0,
264             ntail: 0,
265             _marker: PhantomData,
266         };
267         state.reset();
268         state
269     }
270 
271     #[inline]
reset(&mut self)272     fn reset(&mut self) {
273         self.length = 0;
274         self.state.v0 = self.k0 ^ 0x736f6d6570736575;
275         self.state.v1 = self.k1 ^ 0x646f72616e646f83;
276         self.state.v2 = self.k0 ^ 0x6c7967656e657261;
277         self.state.v3 = self.k1 ^ 0x7465646279746573;
278         self.ntail = 0;
279     }
280 
281     // A specialized write function for values with size <= 8.
282     //
283     // The hashing of multi-byte integers depends on endianness. E.g.:
284     // - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
285     // - big-endian:    `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
286     //
287     // This function does the right thing for little-endian hardware. On
288     // big-endian hardware `x` must be byte-swapped first to give the right
289     // behaviour. After any byte-swapping, the input must be zero-extended to
290     // 64-bits. The caller is responsible for the byte-swapping and
291     // zero-extension.
292     #[inline]
short_write<T>(&mut self, _x: T, x: u64)293     fn short_write<T>(&mut self, _x: T, x: u64) {
294         let size = mem::size_of::<T>();
295         self.length += size;
296 
297         // The original number must be zero-extended, not sign-extended.
298         debug_assert!(if size < 8 { x >> (8 * size) == 0 } else { true });
299 
300         // The number of bytes needed to fill `self.tail`.
301         let needed = 8 - self.ntail;
302 
303         self.tail |= x << (8 * self.ntail);
304         if size < needed {
305             self.ntail += size;
306             return;
307         }
308 
309         // `self.tail` is full, process it.
310         self.state.v3 ^= self.tail;
311         S::c_rounds(&mut self.state);
312         self.state.v0 ^= self.tail;
313 
314         self.ntail = size - needed;
315         self.tail = if needed < 8 { x >> (8 * needed) } else { 0 };
316     }
317 }
318 
319 impl<S: Sip> Hasher<S> {
320     #[inline]
finish128(&self) -> Hash128321     pub fn finish128(&self) -> Hash128 {
322         let mut state = self.state;
323 
324         let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail;
325 
326         state.v3 ^= b;
327         S::c_rounds(&mut state);
328         state.v0 ^= b;
329 
330         state.v2 ^= 0xee;
331         S::d_rounds(&mut state);
332         let h1 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3;
333 
334         state.v1 ^= 0xdd;
335         S::d_rounds(&mut state);
336         let h2 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3;
337 
338         Hash128 { h1, h2 }
339     }
340 }
341 
342 impl hash::Hasher for SipHasher {
343     #[inline]
write(&mut self, msg: &[u8])344     fn write(&mut self, msg: &[u8]) {
345         self.0.write(msg)
346     }
347 
348     #[inline]
finish(&self) -> u64349     fn finish(&self) -> u64 {
350         self.0.finish()
351     }
352 }
353 
354 impl hash::Hasher for SipHasher13 {
355     #[inline]
write(&mut self, msg: &[u8])356     fn write(&mut self, msg: &[u8]) {
357         self.hasher.write(msg)
358     }
359 
360     #[inline]
finish(&self) -> u64361     fn finish(&self) -> u64 {
362         self.hasher.finish()
363     }
364 }
365 
366 impl hash::Hasher for SipHasher24 {
367     #[inline]
write(&mut self, msg: &[u8])368     fn write(&mut self, msg: &[u8]) {
369         self.hasher.write(msg)
370     }
371 
372     #[inline]
finish(&self) -> u64373     fn finish(&self) -> u64 {
374         self.hasher.finish()
375     }
376 }
377 
378 impl<S: Sip> hash::Hasher for Hasher<S> {
379     #[inline]
write_usize(&mut self, i: usize)380     fn write_usize(&mut self, i: usize) {
381         self.short_write(i, i.to_le() as u64);
382     }
383 
384     #[inline]
write_u8(&mut self, i: u8)385     fn write_u8(&mut self, i: u8) {
386         self.short_write(i, i as u64);
387     }
388 
389     #[inline]
write_u32(&mut self, i: u32)390     fn write_u32(&mut self, i: u32) {
391         self.short_write(i, i.to_le() as u64);
392     }
393 
394     #[inline]
write_u64(&mut self, i: u64)395     fn write_u64(&mut self, i: u64) {
396         self.short_write(i, i.to_le() as u64);
397     }
398 
399     #[inline]
write(&mut self, msg: &[u8])400     fn write(&mut self, msg: &[u8]) {
401         let length = msg.len();
402         self.length += length;
403 
404         let mut needed = 0;
405 
406         if self.ntail != 0 {
407             needed = 8 - self.ntail;
408             self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << (8 * self.ntail);
409             if length < needed {
410                 self.ntail += length;
411                 return;
412             } else {
413                 self.state.v3 ^= self.tail;
414                 S::c_rounds(&mut self.state);
415                 self.state.v0 ^= self.tail;
416                 self.ntail = 0;
417             }
418         }
419 
420         // Buffered tail is now flushed, process new input.
421         let len = length - needed;
422         let left = len & 0x7;
423 
424         let mut i = needed;
425         while i < len - left {
426             let mi = unsafe { load_int_le!(msg, i, u64) };
427 
428             self.state.v3 ^= mi;
429             S::c_rounds(&mut self.state);
430             self.state.v0 ^= mi;
431 
432             i += 8;
433         }
434 
435         self.tail = unsafe { u8to64_le(msg, i, left) };
436         self.ntail = left;
437     }
438 
439     #[inline]
finish(&self) -> u64440     fn finish(&self) -> u64 {
441         self.finish128().h2
442     }
443 }
444 
445 impl<S: Sip> Clone for Hasher<S> {
446     #[inline]
clone(&self) -> Hasher<S>447     fn clone(&self) -> Hasher<S> {
448         Hasher {
449             k0: self.k0,
450             k1: self.k1,
451             length: self.length,
452             state: self.state,
453             tail: self.tail,
454             ntail: self.ntail,
455             _marker: self._marker,
456         }
457     }
458 }
459 
460 impl<S: Sip> Default for Hasher<S> {
461     /// Creates a `Hasher<S>` with the two initial keys set to 0.
462     #[inline]
default() -> Hasher<S>463     fn default() -> Hasher<S> {
464         Hasher::new_with_keys(0, 0)
465     }
466 }
467 
468 #[doc(hidden)]
469 trait Sip {
c_rounds(_: &mut State)470     fn c_rounds(_: &mut State);
d_rounds(_: &mut State)471     fn d_rounds(_: &mut State);
472 }
473 
474 #[derive(Debug, Clone, Copy, Default)]
475 struct Sip13Rounds;
476 
477 impl Sip for Sip13Rounds {
478     #[inline]
c_rounds(state: &mut State)479     fn c_rounds(state: &mut State) {
480         compress!(state);
481     }
482 
483     #[inline]
d_rounds(state: &mut State)484     fn d_rounds(state: &mut State) {
485         compress!(state);
486         compress!(state);
487         compress!(state);
488     }
489 }
490 
491 #[derive(Debug, Clone, Copy, Default)]
492 struct Sip24Rounds;
493 
494 impl Sip for Sip24Rounds {
495     #[inline]
c_rounds(state: &mut State)496     fn c_rounds(state: &mut State) {
497         compress!(state);
498         compress!(state);
499     }
500 
501     #[inline]
d_rounds(state: &mut State)502     fn d_rounds(state: &mut State) {
503         compress!(state);
504         compress!(state);
505         compress!(state);
506         compress!(state);
507     }
508 }
509 
510 impl Hash128 {
511     /// Convert into a 16-bytes vector
as_bytes(&self) -> [u8; 16]512     pub fn as_bytes(&self) -> [u8; 16] {
513         let mut bytes = [0u8; 16];
514         let h1 = self.h1.to_le();
515         let h2 = self.h2.to_le();
516         unsafe {
517             ptr::copy_nonoverlapping(&h1 as *const _ as *const u8, bytes.get_unchecked_mut(0), 8);
518             ptr::copy_nonoverlapping(&h2 as *const _ as *const u8, bytes.get_unchecked_mut(8), 8);
519         }
520         bytes
521     }
522 
523     /// Convert into a `u128`
524     #[inline]
as_u128(&self) -> u128525     pub fn as_u128(&self) -> u128 {
526         let h1 = self.h1.to_le();
527         let h2 = self.h2.to_le();
528         h1 as u128 | ((h2 as u128) << 64)
529     }
530 
531     /// Convert into `(u64, u64)`
532     #[inline]
as_u64(&self) -> (u64, u64)533     pub fn as_u64(&self) -> (u64, u64) {
534         let h1 = self.h1.to_le();
535         let h2 = self.h2.to_le();
536         (h1, h2)
537     }
538 }
539