1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 //
10 // Common hash functions with convenient interfaces. If hashing a
11 // statically-sized input in a performance-critical context, consider
12 // calling a specific hash implementation directly, such as
13 // XXH3_64bits from xxhash.h.
14 //
15 // Since this is a very common header, implementation details are kept
16 // out-of-line. Out-of-lining also aids in tracking the time spent in
17 // hashing functions. Inlining is of limited benefit for runtime-sized
18 // hash inputs.
19 
20 #pragma once
21 
22 #include <cstddef>
23 #include <cstdint>
24 
25 #include "rocksdb/slice.h"
26 #include "util/fastrange.h"
27 
28 namespace ROCKSDB_NAMESPACE {
29 
30 // Stable/persistent 64-bit hash. Higher quality and generally faster than
31 // Hash(), especially for inputs > 24 bytes.
32 // KNOWN FLAW: incrementing seed by 1 might not give sufficiently independent
33 // results from previous seed. Recommend incrementing by a large odd number.
34 extern uint64_t Hash64(const char* data, size_t n, uint64_t seed);
35 
36 // Specific optimization without seed (same as seed = 0)
37 extern uint64_t Hash64(const char* data, size_t n);
38 
39 // Non-persistent hash. Must only used for in-memory data structures.
40 // The hash results are thus subject to change between releases,
41 // architectures, build configuration, etc. (Thus, it rarely makes sense
42 // to specify a seed for this function, except for a "rolling" hash.)
43 // KNOWN FLAW: incrementing seed by 1 might not give sufficiently independent
44 // results from previous seed. Recommend incrementing by a large odd number.
NPHash64(const char * data,size_t n,uint64_t seed)45 inline uint64_t NPHash64(const char* data, size_t n, uint64_t seed) {
46 #ifdef ROCKSDB_MODIFY_NPHASH
47   // For testing "subject to change"
48   return Hash64(data, n, seed + 123456789);
49 #else
50   // Currently same as Hash64
51   return Hash64(data, n, seed);
52 #endif
53 }
54 
55 // Specific optimization without seed (same as seed = 0)
NPHash64(const char * data,size_t n)56 inline uint64_t NPHash64(const char* data, size_t n) {
57 #ifdef ROCKSDB_MODIFY_NPHASH
58   // For testing "subject to change"
59   return Hash64(data, n, 123456789);
60 #else
61   // Currently same as Hash64
62   return Hash64(data, n);
63 #endif
64 }
65 
66 // Convenient and equivalent version of Hash128 without depending on 128-bit
67 // scalars
68 void Hash2x64(const char* data, size_t n, uint64_t* high64, uint64_t* low64);
69 void Hash2x64(const char* data, size_t n, uint64_t seed, uint64_t* high64,
70               uint64_t* low64);
71 
72 // Hash 128 bits to 128 bits, guaranteed not to lose data (equivalent to
73 // Hash2x64 on 16 bytes little endian)
74 void BijectiveHash2x64(uint64_t in_high64, uint64_t in_low64,
75                        uint64_t* out_high64, uint64_t* out_low64);
76 void BijectiveHash2x64(uint64_t in_high64, uint64_t in_low64, uint64_t seed,
77                        uint64_t* out_high64, uint64_t* out_low64);
78 
79 // Inverse of above (mostly for testing)
80 void BijectiveUnhash2x64(uint64_t in_high64, uint64_t in_low64,
81                          uint64_t* out_high64, uint64_t* out_low64);
82 void BijectiveUnhash2x64(uint64_t in_high64, uint64_t in_low64, uint64_t seed,
83                          uint64_t* out_high64, uint64_t* out_low64);
84 
85 // Stable/persistent 32-bit hash. Moderate quality and high speed on
86 // small inputs.
87 // TODO: consider rename to Hash32
88 // KNOWN FLAW: incrementing seed by 1 might not give sufficiently independent
89 // results from previous seed. Recommend pseudorandom or hashed seeds.
90 extern uint32_t Hash(const char* data, size_t n, uint32_t seed);
91 
92 // TODO: consider rename to LegacyBloomHash32
BloomHash(const Slice & key)93 inline uint32_t BloomHash(const Slice& key) {
94   return Hash(key.data(), key.size(), 0xbc9f1d34);
95 }
96 
GetSliceHash64(const Slice & key)97 inline uint64_t GetSliceHash64(const Slice& key) {
98   return Hash64(key.data(), key.size());
99 }
100 // Provided for convenience for use with template argument deduction, where a
101 // specific overload needs to be used.
102 extern uint64_t (*kGetSliceNPHash64UnseededFnPtr)(const Slice&);
103 
GetSliceNPHash64(const Slice & s)104 inline uint64_t GetSliceNPHash64(const Slice& s) {
105   return NPHash64(s.data(), s.size());
106 }
107 
GetSliceNPHash64(const Slice & s,uint64_t seed)108 inline uint64_t GetSliceNPHash64(const Slice& s, uint64_t seed) {
109   return NPHash64(s.data(), s.size(), seed);
110 }
111 
112 // Similar to `GetSliceNPHash64()` with `seed`, but input comes from
113 // concatenation of `Slice`s in `data`.
114 extern uint64_t GetSlicePartsNPHash64(const SliceParts& data, uint64_t seed);
115 
GetSliceRangedNPHash(const Slice & s,size_t range)116 inline size_t GetSliceRangedNPHash(const Slice& s, size_t range) {
117   return FastRange64(NPHash64(s.data(), s.size()), range);
118 }
119 
120 // TODO: consider rename to GetSliceHash32
GetSliceHash(const Slice & s)121 inline uint32_t GetSliceHash(const Slice& s) {
122   return Hash(s.data(), s.size(), 397);
123 }
124 
125 // Useful for splitting up a 64-bit hash
Upper32of64(uint64_t v)126 inline uint32_t Upper32of64(uint64_t v) {
127   return static_cast<uint32_t>(v >> 32);
128 }
Lower32of64(uint64_t v)129 inline uint32_t Lower32of64(uint64_t v) { return static_cast<uint32_t>(v); }
130 
131 // std::hash compatible interface.
132 // TODO: consider rename to SliceHasher32
133 struct SliceHasher {
operatorSliceHasher134   uint32_t operator()(const Slice& s) const { return GetSliceHash(s); }
135 };
136 
137 }  // namespace ROCKSDB_NAMESPACE
138