1 // Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
4 //
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
10
11 //! An implementation of SipHash with a 128-bit output.
12
13 use core::cmp;
14 use core::hash;
15 use core::marker::PhantomData;
16 use core::mem;
17 use core::ptr;
18
19 /// A 128-bit (2x64) hash output
20 #[derive(Debug, Clone, Copy, Default)]
21 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
22 pub struct Hash128 {
23 pub h1: u64,
24 pub h2: u64,
25 }
26
27 impl From<u128> for Hash128 {
from(v: u128) -> Self28 fn from(v: u128) -> Self {
29 Hash128 {
30 h1: v as u64,
31 h2: (v >> 64) as u64,
32 }
33 }
34 }
35
36 impl Into<u128> for Hash128 {
into(self) -> u12837 fn into(self) -> u128 {
38 (self.h1 as u128) | ((self.h2 as u128) << 64)
39 }
40 }
41
42 /// An implementation of SipHash128 1-3.
43 #[derive(Debug, Clone, Copy, Default)]
44 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
45 pub struct SipHasher13 {
46 hasher: Hasher<Sip13Rounds>,
47 }
48
49 /// An implementation of SipHash128 2-4.
50 #[derive(Debug, Clone, Copy, Default)]
51 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
52 pub struct SipHasher24 {
53 hasher: Hasher<Sip24Rounds>,
54 }
55
56 /// An implementation of SipHash128 2-4.
57 ///
58 /// SipHash is a general-purpose hashing function: it runs at a good
59 /// speed (competitive with Spooky and City) and permits strong _keyed_
60 /// hashing. This lets you key your hashtables from a strong RNG, such as
61 /// [`rand::os::OsRng`](https://doc.rust-lang.org/rand/rand/os/struct.OsRng.html).
62 ///
63 /// Although the SipHash algorithm is considered to be generally strong,
64 /// it is not intended for cryptographic purposes. As such, all
65 /// cryptographic uses of this implementation are _strongly discouraged_.
66 #[derive(Debug, Clone, Copy, Default)]
67 pub struct SipHasher(SipHasher24);
68
69 #[derive(Debug, Copy)]
70 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
71 struct Hasher<S: Sip> {
72 k0: u64,
73 k1: u64,
74 length: usize, // how many bytes we've processed
75 state: State, // hash State
76 tail: u64, // unprocessed bytes le
77 ntail: usize, // how many bytes in tail are valid
78 _marker: PhantomData<S>,
79 }
80
81 #[derive(Debug, Clone, Copy)]
82 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
83 struct State {
84 // v0, v2 and v1, v3 show up in pairs in the algorithm,
85 // and simd implementations of SipHash will use vectors
86 // of v02 and v13. By placing them in this order in the struct,
87 // the compiler can pick up on just a few simd optimizations by itself.
88 v0: u64,
89 v2: u64,
90 v1: u64,
91 v3: u64,
92 }
93
94 macro_rules! compress {
95 ($state:expr) => {{
96 compress!($state.v0, $state.v1, $state.v2, $state.v3)
97 }};
98 ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{
99 $v0 = $v0.wrapping_add($v1);
100 $v1 = $v1.rotate_left(13);
101 $v1 ^= $v0;
102 $v0 = $v0.rotate_left(32);
103 $v2 = $v2.wrapping_add($v3);
104 $v3 = $v3.rotate_left(16);
105 $v3 ^= $v2;
106 $v0 = $v0.wrapping_add($v3);
107 $v3 = $v3.rotate_left(21);
108 $v3 ^= $v0;
109 $v2 = $v2.wrapping_add($v1);
110 $v1 = $v1.rotate_left(17);
111 $v1 ^= $v2;
112 $v2 = $v2.rotate_left(32);
113 }};
114 }
115
116 /// Loads an integer of the desired type from a byte stream, in LE order. Uses
117 /// `copy_nonoverlapping` to let the compiler generate the most efficient way
118 /// to load it from a possibly unaligned address.
119 ///
120 /// Unsafe because: unchecked indexing at `i..i+size_of(int_ty)`
121 macro_rules! load_int_le {
122 ($buf:expr, $i:expr, $int_ty:ident) => {{
123 debug_assert!($i + mem::size_of::<$int_ty>() <= $buf.len());
124 let mut data = 0 as $int_ty;
125 ptr::copy_nonoverlapping(
126 $buf.get_unchecked($i),
127 &mut data as *mut _ as *mut u8,
128 mem::size_of::<$int_ty>(),
129 );
130 data.to_le()
131 }};
132 }
133
134 /// Loads a u64 using up to 7 bytes of a byte slice. It looks clumsy but the
135 /// `copy_nonoverlapping` calls that occur (via `load_int_le!`) all have fixed
136 /// sizes and avoid calling `memcpy`, which is good for speed.
137 ///
138 /// Unsafe because: unchecked indexing at start..start+len
139 #[inline]
u8to64_le(buf: &[u8], start: usize, len: usize) -> u64140 unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 {
141 debug_assert!(len < 8);
142 let mut i = 0; // current byte index (from LSB) in the output u64
143 let mut out = 0;
144 if i + 3 < len {
145 out = load_int_le!(buf, start + i, u32) as u64;
146 i += 4;
147 }
148 if i + 1 < len {
149 out |= (load_int_le!(buf, start + i, u16) as u64) << (i * 8);
150 i += 2
151 }
152 if i < len {
153 out |= (*buf.get_unchecked(start + i) as u64) << (i * 8);
154 i += 1;
155 }
156 debug_assert_eq!(i, len);
157 out
158 }
159
160 pub trait Hasher128 {
161 /// Return a 128-bit hash
finish128(&self) -> Hash128162 fn finish128(&self) -> Hash128;
163 }
164
165 impl SipHasher {
166 /// Creates a new `SipHasher` with the two initial keys set to 0.
167 #[inline]
new() -> SipHasher168 pub fn new() -> SipHasher {
169 SipHasher::new_with_keys(0, 0)
170 }
171
172 /// Creates a `SipHasher` that is keyed off the provided keys.
173 #[inline]
new_with_keys(key0: u64, key1: u64) -> SipHasher174 pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher {
175 SipHasher(SipHasher24::new_with_keys(key0, key1))
176 }
177
178 /// Get the keys used by this hasher
keys(&self) -> (u64, u64)179 pub fn keys(&self) -> (u64, u64) {
180 (self.0.hasher.k0, self.0.hasher.k1)
181 }
182 }
183
184 impl Hasher128 for SipHasher {
185 /// Return a 128-bit hash
186 #[inline]
finish128(&self) -> Hash128187 fn finish128(&self) -> Hash128 {
188 self.0.finish128()
189 }
190 }
191
192 impl SipHasher13 {
193 /// Creates a new `SipHasher13` with the two initial keys set to 0.
194 #[inline]
new() -> SipHasher13195 pub fn new() -> SipHasher13 {
196 SipHasher13::new_with_keys(0, 0)
197 }
198
199 /// Creates a `SipHasher13` that is keyed off the provided keys.
200 #[inline]
new_with_keys(key0: u64, key1: u64) -> SipHasher13201 pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 {
202 SipHasher13 {
203 hasher: Hasher::new_with_keys(key0, key1),
204 }
205 }
206
207 /// Get the keys used by this hasher
keys(&self) -> (u64, u64)208 pub fn keys(&self) -> (u64, u64) {
209 (self.hasher.k0, self.hasher.k1)
210 }
211 }
212
213 impl Hasher128 for SipHasher13 {
214 /// Return a 128-bit hash
215 #[inline]
finish128(&self) -> Hash128216 fn finish128(&self) -> Hash128 {
217 self.hasher.finish128()
218 }
219 }
220
221 impl SipHasher24 {
222 /// Creates a new `SipHasher24` with the two initial keys set to 0.
223 #[inline]
new() -> SipHasher24224 pub fn new() -> SipHasher24 {
225 SipHasher24::new_with_keys(0, 0)
226 }
227
228 /// Creates a `SipHasher24` that is keyed off the provided keys.
229 #[inline]
new_with_keys(key0: u64, key1: u64) -> SipHasher24230 pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher24 {
231 SipHasher24 {
232 hasher: Hasher::new_with_keys(key0, key1),
233 }
234 }
235
236 /// Get the keys used by this hasher
keys(&self) -> (u64, u64)237 pub fn keys(&self) -> (u64, u64) {
238 (self.hasher.k0, self.hasher.k1)
239 }
240 }
241
242 impl Hasher128 for SipHasher24 {
243 /// Return a 128-bit hash
244 #[inline]
finish128(&self) -> Hash128245 fn finish128(&self) -> Hash128 {
246 self.hasher.finish128()
247 }
248 }
249
250 impl<S: Sip> Hasher<S> {
251 #[inline]
new_with_keys(key0: u64, key1: u64) -> Hasher<S>252 fn new_with_keys(key0: u64, key1: u64) -> Hasher<S> {
253 let mut state = Hasher {
254 k0: key0,
255 k1: key1,
256 length: 0,
257 state: State {
258 v0: 0,
259 v1: 0xee,
260 v2: 0,
261 v3: 0,
262 },
263 tail: 0,
264 ntail: 0,
265 _marker: PhantomData,
266 };
267 state.reset();
268 state
269 }
270
271 #[inline]
reset(&mut self)272 fn reset(&mut self) {
273 self.length = 0;
274 self.state.v0 = self.k0 ^ 0x736f6d6570736575;
275 self.state.v1 = self.k1 ^ 0x646f72616e646f83;
276 self.state.v2 = self.k0 ^ 0x6c7967656e657261;
277 self.state.v3 = self.k1 ^ 0x7465646279746573;
278 self.ntail = 0;
279 }
280
281 // A specialized write function for values with size <= 8.
282 //
283 // The hashing of multi-byte integers depends on endianness. E.g.:
284 // - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
285 // - big-endian: `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
286 //
287 // This function does the right thing for little-endian hardware. On
288 // big-endian hardware `x` must be byte-swapped first to give the right
289 // behaviour. After any byte-swapping, the input must be zero-extended to
290 // 64-bits. The caller is responsible for the byte-swapping and
291 // zero-extension.
292 #[inline]
short_write<T>(&mut self, _x: T, x: u64)293 fn short_write<T>(&mut self, _x: T, x: u64) {
294 let size = mem::size_of::<T>();
295 self.length += size;
296
297 // The original number must be zero-extended, not sign-extended.
298 debug_assert!(if size < 8 { x >> (8 * size) == 0 } else { true });
299
300 // The number of bytes needed to fill `self.tail`.
301 let needed = 8 - self.ntail;
302
303 self.tail |= x << (8 * self.ntail);
304 if size < needed {
305 self.ntail += size;
306 return;
307 }
308
309 // `self.tail` is full, process it.
310 self.state.v3 ^= self.tail;
311 S::c_rounds(&mut self.state);
312 self.state.v0 ^= self.tail;
313
314 self.ntail = size - needed;
315 self.tail = if needed < 8 { x >> (8 * needed) } else { 0 };
316 }
317 }
318
319 impl<S: Sip> Hasher<S> {
320 #[inline]
finish128(&self) -> Hash128321 pub fn finish128(&self) -> Hash128 {
322 let mut state = self.state;
323
324 let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail;
325
326 state.v3 ^= b;
327 S::c_rounds(&mut state);
328 state.v0 ^= b;
329
330 state.v2 ^= 0xee;
331 S::d_rounds(&mut state);
332 let h1 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3;
333
334 state.v1 ^= 0xdd;
335 S::d_rounds(&mut state);
336 let h2 = state.v0 ^ state.v1 ^ state.v2 ^ state.v3;
337
338 Hash128 { h1, h2 }
339 }
340 }
341
342 impl hash::Hasher for SipHasher {
343 #[inline]
write(&mut self, msg: &[u8])344 fn write(&mut self, msg: &[u8]) {
345 self.0.write(msg)
346 }
347
348 #[inline]
finish(&self) -> u64349 fn finish(&self) -> u64 {
350 self.0.finish()
351 }
352 }
353
354 impl hash::Hasher for SipHasher13 {
355 #[inline]
write(&mut self, msg: &[u8])356 fn write(&mut self, msg: &[u8]) {
357 self.hasher.write(msg)
358 }
359
360 #[inline]
finish(&self) -> u64361 fn finish(&self) -> u64 {
362 self.hasher.finish()
363 }
364 }
365
366 impl hash::Hasher for SipHasher24 {
367 #[inline]
write(&mut self, msg: &[u8])368 fn write(&mut self, msg: &[u8]) {
369 self.hasher.write(msg)
370 }
371
372 #[inline]
finish(&self) -> u64373 fn finish(&self) -> u64 {
374 self.hasher.finish()
375 }
376 }
377
378 impl<S: Sip> hash::Hasher for Hasher<S> {
379 #[inline]
write_usize(&mut self, i: usize)380 fn write_usize(&mut self, i: usize) {
381 self.short_write(i, i.to_le() as u64);
382 }
383
384 #[inline]
write_u8(&mut self, i: u8)385 fn write_u8(&mut self, i: u8) {
386 self.short_write(i, i as u64);
387 }
388
389 #[inline]
write_u32(&mut self, i: u32)390 fn write_u32(&mut self, i: u32) {
391 self.short_write(i, i.to_le() as u64);
392 }
393
394 #[inline]
write_u64(&mut self, i: u64)395 fn write_u64(&mut self, i: u64) {
396 self.short_write(i, i.to_le() as u64);
397 }
398
399 #[inline]
write(&mut self, msg: &[u8])400 fn write(&mut self, msg: &[u8]) {
401 let length = msg.len();
402 self.length += length;
403
404 let mut needed = 0;
405
406 if self.ntail != 0 {
407 needed = 8 - self.ntail;
408 self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << (8 * self.ntail);
409 if length < needed {
410 self.ntail += length;
411 return;
412 } else {
413 self.state.v3 ^= self.tail;
414 S::c_rounds(&mut self.state);
415 self.state.v0 ^= self.tail;
416 self.ntail = 0;
417 }
418 }
419
420 // Buffered tail is now flushed, process new input.
421 let len = length - needed;
422 let left = len & 0x7;
423
424 let mut i = needed;
425 while i < len - left {
426 let mi = unsafe { load_int_le!(msg, i, u64) };
427
428 self.state.v3 ^= mi;
429 S::c_rounds(&mut self.state);
430 self.state.v0 ^= mi;
431
432 i += 8;
433 }
434
435 self.tail = unsafe { u8to64_le(msg, i, left) };
436 self.ntail = left;
437 }
438
439 #[inline]
finish(&self) -> u64440 fn finish(&self) -> u64 {
441 self.finish128().h2
442 }
443 }
444
445 impl<S: Sip> Clone for Hasher<S> {
446 #[inline]
clone(&self) -> Hasher<S>447 fn clone(&self) -> Hasher<S> {
448 Hasher {
449 k0: self.k0,
450 k1: self.k1,
451 length: self.length,
452 state: self.state,
453 tail: self.tail,
454 ntail: self.ntail,
455 _marker: self._marker,
456 }
457 }
458 }
459
460 impl<S: Sip> Default for Hasher<S> {
461 /// Creates a `Hasher<S>` with the two initial keys set to 0.
462 #[inline]
default() -> Hasher<S>463 fn default() -> Hasher<S> {
464 Hasher::new_with_keys(0, 0)
465 }
466 }
467
468 #[doc(hidden)]
469 trait Sip {
c_rounds(_: &mut State)470 fn c_rounds(_: &mut State);
d_rounds(_: &mut State)471 fn d_rounds(_: &mut State);
472 }
473
474 #[derive(Debug, Clone, Copy, Default)]
475 struct Sip13Rounds;
476
477 impl Sip for Sip13Rounds {
478 #[inline]
c_rounds(state: &mut State)479 fn c_rounds(state: &mut State) {
480 compress!(state);
481 }
482
483 #[inline]
d_rounds(state: &mut State)484 fn d_rounds(state: &mut State) {
485 compress!(state);
486 compress!(state);
487 compress!(state);
488 }
489 }
490
491 #[derive(Debug, Clone, Copy, Default)]
492 struct Sip24Rounds;
493
494 impl Sip for Sip24Rounds {
495 #[inline]
c_rounds(state: &mut State)496 fn c_rounds(state: &mut State) {
497 compress!(state);
498 compress!(state);
499 }
500
501 #[inline]
d_rounds(state: &mut State)502 fn d_rounds(state: &mut State) {
503 compress!(state);
504 compress!(state);
505 compress!(state);
506 compress!(state);
507 }
508 }
509
510 impl Hash128 {
511 /// Convert into a 16-bytes vector
as_bytes(&self) -> [u8; 16]512 pub fn as_bytes(&self) -> [u8; 16] {
513 let mut bytes = [0u8; 16];
514 let h1 = self.h1.to_le();
515 let h2 = self.h2.to_le();
516 unsafe {
517 ptr::copy_nonoverlapping(&h1 as *const _ as *const u8, bytes.get_unchecked_mut(0), 8);
518 ptr::copy_nonoverlapping(&h2 as *const _ as *const u8, bytes.get_unchecked_mut(8), 8);
519 }
520 bytes
521 }
522
523 /// Convert into a `u128`
524 #[inline]
as_u128(&self) -> u128525 pub fn as_u128(&self) -> u128 {
526 let h1 = self.h1.to_le();
527 let h2 = self.h2.to_le();
528 h1 as u128 | ((h2 as u128) << 64)
529 }
530
531 /// Convert into `(u64, u64)`
532 #[inline]
as_u64(&self) -> (u64, u64)533 pub fn as_u64(&self) -> (u64, u64) {
534 let h1 = self.h1.to_le();
535 let h2 = self.h2.to_le();
536 (h1, h2)
537 }
538 }
539