1 // Copyright (c) 2014 Google, Inc.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 //
21 // FarmHash, by Geoff Pike
22
23 //
24 // http://code.google.com/p/farmhash/
25 //
26 // This file provides a few functions for hashing strings and other
27 // data. All of them are high-quality functions in the sense that
28 // they do well on standard tests such as Austin Appleby's SMHasher.
29 // They're also fast. FarmHash is the successor to CityHash.
30 //
31 // Functions in the FarmHash family are not suitable for cryptography.
32 //
33 // WARNING: This code has been only lightly tested on big-endian platforms!
34 // It is known to work well on little-endian platforms that have a small penalty
35 // for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
36 // It should work on all 32-bit and 64-bit platforms that allow unaligned reads;
37 // bug reports are welcome.
38 //
39 // By the way, for some hash functions, given strings a and b, the hash
40 // of a+b is easily derived from the hashes of a and b. This property
41 // doesn't hold for any hash functions in this file.
42
43 #ifndef FARM_HASH_H_
44 #define FARM_HASH_H_
45
46 #include <assert.h>
47 #include <stdint.h>
48 #include <stdlib.h>
49 #include <string.h> // for memcpy and memset
50 #include <utility>
51
52 #ifndef NAMESPACE_FOR_HASH_FUNCTIONS
53 #define NAMESPACE_FOR_HASH_FUNCTIONS util
54 #endif
55
56 namespace NAMESPACE_FOR_HASH_FUNCTIONS {
57
58 #if defined(FARMHASH_UINT128_T_DEFINED)
59 #if defined(__clang__)
60 #if !defined(uint128_t)
61 #define uint128_t __uint128_t
62 #endif
63 #endif
Uint128Low64(const uint128_t x)64 inline uint64_t Uint128Low64(const uint128_t x) {
65 return static_cast<uint64_t>(x);
66 }
Uint128High64(const uint128_t x)67 inline uint64_t Uint128High64(const uint128_t x) {
68 return static_cast<uint64_t>(x >> 64);
69 }
Uint128(uint64_t lo,uint64_t hi)70 inline uint128_t Uint128(uint64_t lo, uint64_t hi) {
71 return lo + (((uint128_t)hi) << 64);
72 }
73 #else
74 typedef std::pair<uint64_t, uint64_t> uint128_t;
75 inline uint64_t Uint128Low64(const uint128_t x) { return x.first; }
76 inline uint64_t Uint128High64(const uint128_t x) { return x.second; }
77 inline uint128_t Uint128(uint64_t lo, uint64_t hi) { return uint128_t(lo, hi); }
78 #endif
79
80
81 // BASIC STRING HASHING
82
83 // Hash function for a byte array.
84 // May change from time to time, may differ on different platforms, may differ
85 // depending on NDEBUG.
86 size_t Hash(const char* s, size_t len);
87
88 // Hash function for a byte array. Most useful in 32-bit binaries.
89 // May change from time to time, may differ on different platforms, may differ
90 // depending on NDEBUG.
91 uint32_t Hash32(const char* s, size_t len);
92
93 // Hash function for a byte array. For convenience, a 32-bit seed is also
94 // hashed into the result.
95 // May change from time to time, may differ on different platforms, may differ
96 // depending on NDEBUG.
97 uint32_t Hash32WithSeed(const char* s, size_t len, uint32_t seed);
98
99 // Hash function for a byte array.
100 // May change from time to time, may differ on different platforms, may differ
101 // depending on NDEBUG.
102 uint64_t Hash64(const char* s, size_t len);
103
104 // Hash function for a byte array. For convenience, a 64-bit seed is also
105 // hashed into the result.
106 // May change from time to time, may differ on different platforms, may differ
107 // depending on NDEBUG.
108 uint64_t Hash64WithSeed(const char* s, size_t len, uint64_t seed);
109
110 // Hash function for a byte array. For convenience, two seeds are also
111 // hashed into the result.
112 // May change from time to time, may differ on different platforms, may differ
113 // depending on NDEBUG.
114 uint64_t Hash64WithSeeds(const char* s, size_t len,
115 uint64_t seed0, uint64_t seed1);
116
117 // Hash function for a byte array.
118 // May change from time to time, may differ on different platforms, may differ
119 // depending on NDEBUG.
120 uint128_t Hash128(const char* s, size_t len);
121
122 // Hash function for a byte array. For convenience, a 128-bit seed is also
123 // hashed into the result.
124 // May change from time to time, may differ on different platforms, may differ
125 // depending on NDEBUG.
126 uint128_t Hash128WithSeed(const char* s, size_t len, uint128_t seed);
127
128 // BASIC NON-STRING HASHING
129
130 // Hash 128 input bits down to 64 bits of output.
131 // This is intended to be a reasonably good hash function.
132 // May change from time to time, may differ on different platforms, may differ
133 // depending on NDEBUG.
Hash128to64(uint128_t x)134 inline uint64_t Hash128to64(uint128_t x) {
135 // Murmur-inspired hashing.
136 const uint64_t kMul = 0x9ddfea08eb382d69ULL;
137 uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
138 a ^= (a >> 47);
139 uint64_t b = (Uint128High64(x) ^ a) * kMul;
140 b ^= (b >> 47);
141 b *= kMul;
142 return b;
143 }
144
145 // FINGERPRINTING (i.e., good, portable, forever-fixed hash functions)
146
147 // Fingerprint function for a byte array. Most useful in 32-bit binaries.
148 uint32_t Fingerprint32(const char* s, size_t len);
149
150 // Fingerprint function for a byte array.
151 uint64_t Fingerprint64(const char* s, size_t len);
152
153 // Fingerprint function for a byte array.
154 uint128_t Fingerprint128(const char* s, size_t len);
155
156 // This is intended to be a good fingerprinting primitive.
157 // See below for more overloads.
Fingerprint(uint128_t x)158 inline uint64_t Fingerprint(uint128_t x) {
159 // Murmur-inspired hashing.
160 const uint64_t kMul = 0x9ddfea08eb382d69ULL;
161 uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
162 a ^= (a >> 47);
163 uint64_t b = (Uint128High64(x) ^ a) * kMul;
164 b ^= (b >> 44);
165 b *= kMul;
166 b ^= (b >> 41);
167 b *= kMul;
168 return b;
169 }
170
171 // This is intended to be a good fingerprinting primitive.
Fingerprint(uint64_t x)172 inline uint64_t Fingerprint(uint64_t x) {
173 // Murmur-inspired hashing.
174 const uint64_t kMul = 0x9ddfea08eb382d69ULL;
175 uint64_t b = x * kMul;
176 b ^= (b >> 44);
177 b *= kMul;
178 b ^= (b >> 41);
179 b *= kMul;
180 return b;
181 }
182
183 #ifndef FARMHASH_NO_CXX_STRING
184
185 // Convenience functions to hash or fingerprint C++ strings.
186 // These require that Str::data() return a pointer to the first char
187 // (as a const char*) and that Str::length() return the string's length;
188 // they work with std::string, for example.
189
190 // Hash function for a byte array.
191 // May change from time to time, may differ on different platforms, may differ
192 // depending on NDEBUG.
193 template <typename Str>
Hash(const Str & s)194 inline size_t Hash(const Str& s) {
195 assert(sizeof(s[0]) == 1);
196 return Hash(s.data(), s.length());
197 }
198
199 // Hash function for a byte array. Most useful in 32-bit binaries.
200 // May change from time to time, may differ on different platforms, may differ
201 // depending on NDEBUG.
202 template <typename Str>
Hash32(const Str & s)203 inline uint32_t Hash32(const Str& s) {
204 assert(sizeof(s[0]) == 1);
205 return Hash32(s.data(), s.length());
206 }
207
208 // Hash function for a byte array. For convenience, a 32-bit seed is also
209 // hashed into the result.
210 // May change from time to time, may differ on different platforms, may differ
211 // depending on NDEBUG.
212 template <typename Str>
Hash32WithSeed(const Str & s,uint32_t seed)213 inline uint32_t Hash32WithSeed(const Str& s, uint32_t seed) {
214 assert(sizeof(s[0]) == 1);
215 return Hash32WithSeed(s.data(), s.length(), seed);
216 }
217
218 // Hash 128 input bits down to 64 bits of output.
219 // Hash function for a byte array.
220 // May change from time to time, may differ on different platforms, may differ
221 // depending on NDEBUG.
222 template <typename Str>
Hash64(const Str & s)223 inline uint64_t Hash64(const Str& s) {
224 assert(sizeof(s[0]) == 1);
225 return Hash64(s.data(), s.length());
226 }
227
228 // Hash function for a byte array. For convenience, a 64-bit seed is also
229 // hashed into the result.
230 // May change from time to time, may differ on different platforms, may differ
231 // depending on NDEBUG.
232 template <typename Str>
Hash64WithSeed(const Str & s,uint64_t seed)233 inline uint64_t Hash64WithSeed(const Str& s, uint64_t seed) {
234 assert(sizeof(s[0]) == 1);
235 return Hash64WithSeed(s.data(), s.length(), seed);
236 }
237
238 // Hash function for a byte array. For convenience, two seeds are also
239 // hashed into the result.
240 // May change from time to time, may differ on different platforms, may differ
241 // depending on NDEBUG.
242 template <typename Str>
Hash64WithSeeds(const Str & s,uint64_t seed0,uint64_t seed1)243 inline uint64_t Hash64WithSeeds(const Str& s, uint64_t seed0, uint64_t seed1) {
244 assert(sizeof(s[0]) == 1);
245 return Hash64WithSeeds(s.data(), s.length(), seed0, seed1);
246 }
247
248 // Hash function for a byte array.
249 // May change from time to time, may differ on different platforms, may differ
250 // depending on NDEBUG.
251 template <typename Str>
Hash128(const Str & s)252 inline uint128_t Hash128(const Str& s) {
253 assert(sizeof(s[0]) == 1);
254 return Hash128(s.data(), s.length());
255 }
256
257 // Hash function for a byte array. For convenience, a 128-bit seed is also
258 // hashed into the result.
259 // May change from time to time, may differ on different platforms, may differ
260 // depending on NDEBUG.
261 template <typename Str>
Hash128WithSeed(const Str & s,uint128_t seed)262 inline uint128_t Hash128WithSeed(const Str& s, uint128_t seed) {
263 assert(sizeof(s[0]) == 1);
264 return Hash128(s.data(), s.length(), seed);
265 }
266
267 // FINGERPRINTING (i.e., good, portable, forever-fixed hash functions)
268
269 // Fingerprint function for a byte array. Most useful in 32-bit binaries.
270 template <typename Str>
Fingerprint32(const Str & s)271 inline uint32_t Fingerprint32(const Str& s) {
272 assert(sizeof(s[0]) == 1);
273 return Fingerprint32(s.data(), s.length());
274 }
275
276 // Fingerprint 128 input bits down to 64 bits of output.
277 // Fingerprint function for a byte array.
278 template <typename Str>
Fingerprint64(const Str & s)279 inline uint64_t Fingerprint64(const Str& s) {
280 assert(sizeof(s[0]) == 1);
281 return Fingerprint64(s.data(), s.length());
282 }
283
284 // Fingerprint function for a byte array.
285 template <typename Str>
Fingerprint128(const Str & s)286 inline uint128_t Fingerprint128(const Str& s) {
287 assert(sizeof(s[0]) == 1);
288 return Fingerprint128(s.data(), s.length());
289 }
290
291 #endif
292
293 } // namespace NAMESPACE_FOR_HASH_FUNCTIONS
294
295 /* gently define FARMHASH_BIG_ENDIAN when detected big-endian machine */
296 #if defined(__BIG_ENDIAN__)
297 #if !defined(FARMHASH_BIG_ENDIAN)
298 #define FARMHASH_BIG_ENDIAN
299 #endif
300 #elif defined(__LITTLE_ENDIAN__)
301 // nothing for little-endian
302 #elif defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER == __ORDER_LITTLE_ENDIAN__)
303 // nothing for little-endian
304 #elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER == __ORDER_BIG_ENDIAN__)
305 #if !defined(FARMHASH_BIG_ENDIAN)
306 #define FARMHASH_BIG_ENDIAN
307 #endif
308 #elif defined(__linux__) || defined(__CYGWIN__) || (defined( __GNUC__ ) && !defined(__DragonFly__)) || defined( __GNU_LIBRARY__ )
309 #include <endian.h> // libc6-dev, GLIBC
310 #if BYTE_ORDER == BIG_ENDIAN
311 #if !defined(FARMHASH_BIG_ENDIAN)
312 #define FARMHASH_BIG_ENDIAN
313 #endif
314 #endif
315 #elif defined(__OpenBSD__) || defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__) || defined(__s390x__)
316 #include <sys/endian.h>
317 #if BYTE_ORDER == BIG_ENDIAN
318 #if !defined(FARMHASH_BIG_ENDIAN)
319 #define FARMHASH_BIG_ENDIAN
320 #endif
321 #endif
322 #elif defined(_WIN32)
323 // Windows is (currently) little-endian
324 #else
325 #error "Unable to determine endianness!"
326 #endif /* __BIG_ENDIAN__ */
327
328 #endif // FARM_HASH_H_
329