1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 //
10 // Common hash functions with convenient interfaces. If hashing a
11 // statically-sized input in a performance-critical context, consider
12 // calling a specific hash implementation directly, such as
13 // XXH3p_64bits from xxhash.h.
14 //
15 // Since this is a very common header, implementation details are kept
16 // out-of-line. Out-of-lining also aids in tracking the time spent in
17 // hashing functions. Inlining is of limited benefit for runtime-sized
18 // hash inputs.
19 
20 #pragma once
21 #include <stddef.h>
22 #include <stdint.h>
23 
24 #include "rocksdb/slice.h"
25 
26 namespace ROCKSDB_NAMESPACE {
27 
28 // Stable/persistent 64-bit hash. Higher quality and generally faster than
29 // Hash(), especially for inputs > 24 bytes.
30 extern uint64_t Hash64(const char* data, size_t n, uint64_t seed);
31 
32 // Specific optimization without seed (same as seed = 0)
33 extern uint64_t Hash64(const char* data, size_t n);
34 
35 // Non-persistent hash. Must only used for in-memory data structure.
36 // The hash results are thus applicable to change. (Thus, it rarely makes
37 // sense to specify a seed for this function.)
NPHash64(const char * data,size_t n,uint32_t seed)38 inline uint64_t NPHash64(const char* data, size_t n, uint32_t seed) {
39   // Currently same as Hash64
40   return Hash64(data, n, seed);
41 }
42 
43 // Specific optimization without seed (same as seed = 0)
NPHash64(const char * data,size_t n)44 inline uint64_t NPHash64(const char* data, size_t n) {
45   // Currently same as Hash64
46   return Hash64(data, n);
47 }
48 
49 // Stable/persistent 32-bit hash. Moderate quality and high speed on
50 // small inputs.
51 // TODO: consider rename to Hash32
52 extern uint32_t Hash(const char* data, size_t n, uint32_t seed);
53 
54 // TODO: consider rename to LegacyBloomHash32
BloomHash(const Slice & key)55 inline uint32_t BloomHash(const Slice& key) {
56   return Hash(key.data(), key.size(), 0xbc9f1d34);
57 }
58 
GetSliceHash64(const Slice & key)59 inline uint64_t GetSliceHash64(const Slice& key) {
60   return Hash64(key.data(), key.size());
61 }
62 
GetSliceNPHash64(const Slice & s)63 inline uint64_t GetSliceNPHash64(const Slice& s) {
64   return NPHash64(s.data(), s.size());
65 }
66 
67 // TODO: consider rename to GetSliceHash32
GetSliceHash(const Slice & s)68 inline uint32_t GetSliceHash(const Slice& s) {
69   return Hash(s.data(), s.size(), 397);
70 }
71 
72 // Useful for splitting up a 64-bit hash
Upper32of64(uint64_t v)73 inline uint32_t Upper32of64(uint64_t v) {
74   return static_cast<uint32_t>(v >> 32);
75 }
Lower32of64(uint64_t v)76 inline uint32_t Lower32of64(uint64_t v) { return static_cast<uint32_t>(v); }
77 
78 // std::hash compatible interface.
79 // TODO: consider rename to SliceHasher32
80 struct SliceHasher {
operatorSliceHasher81   uint32_t operator()(const Slice& s) const { return GetSliceHash(s); }
82 };
83 
84 // An alternative to % for mapping a hash value to an arbitrary range. See
85 // https://github.com/lemire/fastrange
fastrange32(uint32_t hash,uint32_t range)86 inline uint32_t fastrange32(uint32_t hash, uint32_t range) {
87   uint64_t product = uint64_t{range} * hash;
88   return static_cast<uint32_t>(product >> 32);
89 }
90 
91 // An alternative to % for mapping a 64-bit hash value to an arbitrary range
92 // that fits in size_t. See https://github.com/lemire/fastrange
93 // We find size_t more convenient than uint64_t for the range, with side
94 // benefit of better optimization on 32-bit platforms.
fastrange64(uint64_t hash,size_t range)95 inline size_t fastrange64(uint64_t hash, size_t range) {
96 #if defined(HAVE_UINT128_EXTENSION)
97   // Can use compiler's 128-bit type. Trust it to do the right thing.
98   __uint128_t wide = __uint128_t{range} * hash;
99   return static_cast<size_t>(wide >> 64);
100 #else
101   // Fall back: full decomposition.
102   // NOTE: GCC seems to fully understand this code as 64-bit x {32 or 64}-bit
103   // -> {96 or 128}-bit multiplication and optimize it down to a single
104   // wide-result multiplication (64-bit platform) or two wide-result
105   // multiplications (32-bit platforms, where range64 >> 32 is zero).
106   uint64_t range64 = range;  // ok to shift by 32, even if size_t is 32-bit
107   uint64_t tmp = uint64_t{range64 & 0xffffFFFF} * uint64_t{hash & 0xffffFFFF};
108   tmp >>= 32;
109   tmp += uint64_t{range64 & 0xffffFFFF} * uint64_t{hash >> 32};
110   // Avoid overflow: first add lower 32 of tmp2, and later upper 32
111   uint64_t tmp2 = uint64_t{range64 >> 32} * uint64_t{hash & 0xffffFFFF};
112   tmp += static_cast<uint32_t>(tmp2);
113   tmp >>= 32;
114   tmp += (tmp2 >> 32);
115   tmp += uint64_t{range64 >> 32} * uint64_t{hash >> 32};
116   return static_cast<size_t>(tmp);
117 #endif
118 }
119 
120 }  // namespace ROCKSDB_NAMESPACE
121