1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 
10 #include <string.h>
11 #include "util/coding.h"
12 #include "util/hash.h"
13 #include "util/util.h"
14 #include "util/xxhash.h"
15 
16 namespace ROCKSDB_NAMESPACE {
17 
18 uint32_t Hash(const char* data, size_t n, uint32_t seed) {
19   // MurmurHash1 - fast but mediocre quality
20   // https://github.com/aappleby/smhasher/wiki/MurmurHash1
21   //
22   const uint32_t m = 0xc6a4a793;
23   const uint32_t r = 24;
24   const char* limit = data + n;
25   uint32_t h = static_cast<uint32_t>(seed ^ (n * m));
26 
27   // Pick up four bytes at a time
28   while (data + 4 <= limit) {
29     uint32_t w = DecodeFixed32(data);
30     data += 4;
31     h += w;
32     h *= m;
33     h ^= (h >> 16);
34   }
35 
36   // Pick up remaining bytes
37   switch (limit - data) {
38     // Note: The original hash implementation used data[i] << shift, which
39     // promotes the char to int and then performs the shift. If the char is
40     // negative, the shift is undefined behavior in C++. The hash algorithm is
41     // part of the format definition, so we cannot change it; to obtain the same
42     // behavior in a legal way we just cast to uint32_t, which will do
43     // sign-extension. To guarantee compatibility with architectures where chars
44     // are unsigned we first cast the char to int8_t.
45     case 3:
46       h += static_cast<uint32_t>(static_cast<int8_t>(data[2])) << 16;
47       FALLTHROUGH_INTENDED;
48     case 2:
49       h += static_cast<uint32_t>(static_cast<int8_t>(data[1])) << 8;
50       FALLTHROUGH_INTENDED;
51     case 1:
52       h += static_cast<uint32_t>(static_cast<int8_t>(data[0]));
53       h *= m;
54       h ^= (h >> r);
55       break;
56   }
57   return h;
58 }
59 
60 // We are standardizing on a preview release of XXH3, because that's
61 // the best available at time of standardizing.
62 //
63 // In testing (mostly Intel Skylake), this hash function is much more
64 // thorough than Hash32 and is almost universally faster. Hash() only
65 // seems faster when passing runtime-sized keys of the same small size
66 // (less than about 24 bytes) thousands of times in a row; this seems
67 // to allow the branch predictor to work some magic. XXH3's speed is
68 // much less dependent on branch prediction.
69 //
70 // Hashing with a prefix extractor is potentially a common case of
71 // hashing objects of small, predictable size. We could consider
72 // bundling hash functions specialized for particular lengths with
73 // the prefix extractors.
74 uint64_t Hash64(const char* data, size_t n, uint64_t seed) {
75   return XXH3p_64bits_withSeed(data, n, seed);
76 }
77 
78 uint64_t Hash64(const char* data, size_t n) {
79   // Same as seed = 0
80   return XXH3p_64bits(data, n);
81 }
82 
83 }  // namespace ROCKSDB_NAMESPACE
84