1 // Copyright (c) 2014 Google, Inc.
2 //
3 // Permission is hereby granted, free of charge, to any person obtaining a copy
4 // of this software and associated documentation files (the "Software"), to deal
5 // in the Software without restriction, including without limitation the rights
6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 // copies of the Software, and to permit persons to whom the Software is
8 // furnished to do so, subject to the following conditions:
9 //
10 // The above copyright notice and this permission notice shall be included in
11 // all copies or substantial portions of the Software.
12 //
13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 // THE SOFTWARE.
20 //
21 // FarmHash, by Geoff Pike
22 
23 //
24 // http://code.google.com/p/farmhash/
25 //
26 // This file provides a few functions for hashing strings and other
27 // data.  All of them are high-quality functions in the sense that
28 // they do well on standard tests such as Austin Appleby's SMHasher.
29 // They're also fast.  FarmHash is the successor to CityHash.
30 //
31 // Functions in the FarmHash family are not suitable for cryptography.
32 //
33 // WARNING: This code has been only lightly tested on big-endian platforms!
34 // It is known to work well on little-endian platforms that have a small penalty
35 // for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs.
36 // It should work on all 32-bit and 64-bit platforms that allow unaligned reads;
37 // bug reports are welcome.
38 //
39 // By the way, for some hash functions, given strings a and b, the hash
40 // of a+b is easily derived from the hashes of a and b.  This property
41 // doesn't hold for any hash functions in this file.
42 
43 #ifndef FARM_HASH_H_
44 #define FARM_HASH_H_
45 
46 #include <assert.h>
47 #include <stdint.h>
48 #include <stdlib.h>
49 #include <string.h>   // for memcpy and memset
50 #include <utility>
51 
52 #ifndef NAMESPACE_FOR_HASH_FUNCTIONS
53 #define NAMESPACE_FOR_HASH_FUNCTIONS util
54 #endif
55 
56 namespace NAMESPACE_FOR_HASH_FUNCTIONS {
57 
58 #if defined(FARMHASH_UINT128_T_DEFINED)
59 #if defined(__clang__)
60 #if !defined(uint128_t)
61 #define uint128_t __uint128_t
62 #endif
63 #endif
Uint128Low64(const uint128_t x)64 inline uint64_t Uint128Low64(const uint128_t x) {
65   return static_cast<uint64_t>(x);
66 }
Uint128High64(const uint128_t x)67 inline uint64_t Uint128High64(const uint128_t x) {
68   return static_cast<uint64_t>(x >> 64);
69 }
Uint128(uint64_t lo,uint64_t hi)70 inline uint128_t Uint128(uint64_t lo, uint64_t hi) {
71   return lo + (((uint128_t)hi) << 64);
72 }
73 #else
74 typedef std::pair<uint64_t, uint64_t> uint128_t;
75 inline uint64_t Uint128Low64(const uint128_t x) { return x.first; }
76 inline uint64_t Uint128High64(const uint128_t x) { return x.second; }
77 inline uint128_t Uint128(uint64_t lo, uint64_t hi) { return uint128_t(lo, hi); }
78 #endif
79 
80 
81 // BASIC STRING HASHING
82 
83 // Hash function for a byte array.
84 // May change from time to time, may differ on different platforms, may differ
85 // depending on NDEBUG.
86 size_t Hash(const char* s, size_t len);
87 
88 // Hash function for a byte array.  Most useful in 32-bit binaries.
89 // May change from time to time, may differ on different platforms, may differ
90 // depending on NDEBUG.
91 uint32_t Hash32(const char* s, size_t len);
92 
93 // Hash function for a byte array.  For convenience, a 32-bit seed is also
94 // hashed into the result.
95 // May change from time to time, may differ on different platforms, may differ
96 // depending on NDEBUG.
97 uint32_t Hash32WithSeed(const char* s, size_t len, uint32_t seed);
98 
99 // Hash function for a byte array.
100 // May change from time to time, may differ on different platforms, may differ
101 // depending on NDEBUG.
102 uint64_t Hash64(const char* s, size_t len);
103 
104 // Hash function for a byte array.  For convenience, a 64-bit seed is also
105 // hashed into the result.
106 // May change from time to time, may differ on different platforms, may differ
107 // depending on NDEBUG.
108 uint64_t Hash64WithSeed(const char* s, size_t len, uint64_t seed);
109 
110 // Hash function for a byte array.  For convenience, two seeds are also
111 // hashed into the result.
112 // May change from time to time, may differ on different platforms, may differ
113 // depending on NDEBUG.
114 uint64_t Hash64WithSeeds(const char* s, size_t len,
115                        uint64_t seed0, uint64_t seed1);
116 
117 // Hash function for a byte array.
118 // May change from time to time, may differ on different platforms, may differ
119 // depending on NDEBUG.
120 uint128_t Hash128(const char* s, size_t len);
121 
122 // Hash function for a byte array.  For convenience, a 128-bit seed is also
123 // hashed into the result.
124 // May change from time to time, may differ on different platforms, may differ
125 // depending on NDEBUG.
126 uint128_t Hash128WithSeed(const char* s, size_t len, uint128_t seed);
127 
128 // BASIC NON-STRING HASHING
129 
130 // Hash 128 input bits down to 64 bits of output.
131 // This is intended to be a reasonably good hash function.
132 // May change from time to time, may differ on different platforms, may differ
133 // depending on NDEBUG.
Hash128to64(uint128_t x)134 inline uint64_t Hash128to64(uint128_t x) {
135   // Murmur-inspired hashing.
136   const uint64_t kMul = 0x9ddfea08eb382d69ULL;
137   uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
138   a ^= (a >> 47);
139   uint64_t b = (Uint128High64(x) ^ a) * kMul;
140   b ^= (b >> 47);
141   b *= kMul;
142   return b;
143 }
144 
145 // FINGERPRINTING (i.e., good, portable, forever-fixed hash functions)
146 
147 // Fingerprint function for a byte array.  Most useful in 32-bit binaries.
148 uint32_t Fingerprint32(const char* s, size_t len);
149 
150 // Fingerprint function for a byte array.
151 uint64_t Fingerprint64(const char* s, size_t len);
152 
153 // Fingerprint function for a byte array.
154 uint128_t Fingerprint128(const char* s, size_t len);
155 
156 // This is intended to be a good fingerprinting primitive.
157 // See below for more overloads.
Fingerprint(uint128_t x)158 inline uint64_t Fingerprint(uint128_t x) {
159   // Murmur-inspired hashing.
160   const uint64_t kMul = 0x9ddfea08eb382d69ULL;
161   uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul;
162   a ^= (a >> 47);
163   uint64_t b = (Uint128High64(x) ^ a) * kMul;
164   b ^= (b >> 44);
165   b *= kMul;
166   b ^= (b >> 41);
167   b *= kMul;
168   return b;
169 }
170 
171 // This is intended to be a good fingerprinting primitive.
Fingerprint(uint64_t x)172 inline uint64_t Fingerprint(uint64_t x) {
173   // Murmur-inspired hashing.
174   const uint64_t kMul = 0x9ddfea08eb382d69ULL;
175   uint64_t b = x * kMul;
176   b ^= (b >> 44);
177   b *= kMul;
178   b ^= (b >> 41);
179   b *= kMul;
180   return b;
181 }
182 
183 #ifndef FARMHASH_NO_CXX_STRING
184 
185 // Convenience functions to hash or fingerprint C++ strings.
186 // These require that Str::data() return a pointer to the first char
187 // (as a const char*) and that Str::length() return the string's length;
188 // they work with std::string, for example.
189 
190 // Hash function for a byte array.
191 // May change from time to time, may differ on different platforms, may differ
192 // depending on NDEBUG.
193 template <typename Str>
Hash(const Str & s)194 inline size_t Hash(const Str& s) {
195   assert(sizeof(s[0]) == 1);
196   return Hash(s.data(), s.length());
197 }
198 
199 // Hash function for a byte array.  Most useful in 32-bit binaries.
200 // May change from time to time, may differ on different platforms, may differ
201 // depending on NDEBUG.
202 template <typename Str>
Hash32(const Str & s)203 inline uint32_t Hash32(const Str& s) {
204   assert(sizeof(s[0]) == 1);
205   return Hash32(s.data(), s.length());
206 }
207 
208 // Hash function for a byte array.  For convenience, a 32-bit seed is also
209 // hashed into the result.
210 // May change from time to time, may differ on different platforms, may differ
211 // depending on NDEBUG.
212 template <typename Str>
Hash32WithSeed(const Str & s,uint32_t seed)213 inline uint32_t Hash32WithSeed(const Str& s, uint32_t seed) {
214   assert(sizeof(s[0]) == 1);
215   return Hash32WithSeed(s.data(), s.length(), seed);
216 }
217 
218 // Hash 128 input bits down to 64 bits of output.
219 // Hash function for a byte array.
220 // May change from time to time, may differ on different platforms, may differ
221 // depending on NDEBUG.
222 template <typename Str>
Hash64(const Str & s)223 inline uint64_t Hash64(const Str& s) {
224   assert(sizeof(s[0]) == 1);
225   return Hash64(s.data(), s.length());
226 }
227 
228 // Hash function for a byte array.  For convenience, a 64-bit seed is also
229 // hashed into the result.
230 // May change from time to time, may differ on different platforms, may differ
231 // depending on NDEBUG.
232 template <typename Str>
Hash64WithSeed(const Str & s,uint64_t seed)233 inline uint64_t Hash64WithSeed(const Str& s, uint64_t seed) {
234   assert(sizeof(s[0]) == 1);
235   return Hash64WithSeed(s.data(), s.length(), seed);
236 }
237 
238 // Hash function for a byte array.  For convenience, two seeds are also
239 // hashed into the result.
240 // May change from time to time, may differ on different platforms, may differ
241 // depending on NDEBUG.
242 template <typename Str>
Hash64WithSeeds(const Str & s,uint64_t seed0,uint64_t seed1)243 inline uint64_t Hash64WithSeeds(const Str& s, uint64_t seed0, uint64_t seed1) {
244   assert(sizeof(s[0]) == 1);
245   return Hash64WithSeeds(s.data(), s.length(), seed0, seed1);
246 }
247 
248 // Hash function for a byte array.
249 // May change from time to time, may differ on different platforms, may differ
250 // depending on NDEBUG.
251 template <typename Str>
Hash128(const Str & s)252 inline uint128_t Hash128(const Str& s) {
253   assert(sizeof(s[0]) == 1);
254   return Hash128(s.data(), s.length());
255 }
256 
257 // Hash function for a byte array.  For convenience, a 128-bit seed is also
258 // hashed into the result.
259 // May change from time to time, may differ on different platforms, may differ
260 // depending on NDEBUG.
261 template <typename Str>
Hash128WithSeed(const Str & s,uint128_t seed)262 inline uint128_t Hash128WithSeed(const Str& s, uint128_t seed) {
263   assert(sizeof(s[0]) == 1);
264   return Hash128(s.data(), s.length(), seed);
265 }
266 
267 // FINGERPRINTING (i.e., good, portable, forever-fixed hash functions)
268 
269 // Fingerprint function for a byte array.  Most useful in 32-bit binaries.
270 template <typename Str>
Fingerprint32(const Str & s)271 inline uint32_t Fingerprint32(const Str& s) {
272   assert(sizeof(s[0]) == 1);
273   return Fingerprint32(s.data(), s.length());
274 }
275 
276 // Fingerprint 128 input bits down to 64 bits of output.
277 // Fingerprint function for a byte array.
278 template <typename Str>
Fingerprint64(const Str & s)279 inline uint64_t Fingerprint64(const Str& s) {
280   assert(sizeof(s[0]) == 1);
281   return Fingerprint64(s.data(), s.length());
282 }
283 
284 // Fingerprint function for a byte array.
285 template <typename Str>
Fingerprint128(const Str & s)286 inline uint128_t Fingerprint128(const Str& s) {
287   assert(sizeof(s[0]) == 1);
288   return Fingerprint128(s.data(), s.length());
289 }
290 
291 #endif
292 
293 }  // namespace NAMESPACE_FOR_HASH_FUNCTIONS
294 
295 /* gently define FARMHASH_BIG_ENDIAN when detected big-endian machine */
296 #if defined(__BIG_ENDIAN__)
297   #if !defined(FARMHASH_BIG_ENDIAN)
298     #define FARMHASH_BIG_ENDIAN
299   #endif
300 #elif defined(__LITTLE_ENDIAN__)
301   // nothing for little-endian
302 #elif defined(__BYTE_ORDER__) && defined(__ORDER_LITTLE_ENDIAN__) && (__BYTE_ORDER == __ORDER_LITTLE_ENDIAN__)
303   // nothing for little-endian
304 #elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) && (__BYTE_ORDER == __ORDER_BIG_ENDIAN__)
305   #if !defined(FARMHASH_BIG_ENDIAN)
306     #define FARMHASH_BIG_ENDIAN
307   #endif
308 #elif defined(__linux__) || defined(__CYGWIN__) || (defined( __GNUC__ ) && !defined(__DragonFly__))  || defined( __GNU_LIBRARY__ )
309   #include <endian.h> // libc6-dev, GLIBC
310   #if BYTE_ORDER == BIG_ENDIAN
311     #if !defined(FARMHASH_BIG_ENDIAN)
312       #define FARMHASH_BIG_ENDIAN
313     #endif
314   #endif
315 #elif defined(__OpenBSD__) || defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__) || defined(__s390x__)
316   #include <sys/endian.h>
317   #if BYTE_ORDER == BIG_ENDIAN
318     #if !defined(FARMHASH_BIG_ENDIAN)
319       #define FARMHASH_BIG_ENDIAN
320     #endif
321   #endif
322 #elif defined(_WIN32)
323   // Windows is (currently) little-endian
324 #else
325   #error "Unable to determine endianness!"
326 #endif /* __BIG_ENDIAN__ */
327 
328 #endif  // FARM_HASH_H_
329