1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. 2 // This source code is licensed under both the GPLv2 (found in the 3 // COPYING file in the root directory) and Apache 2.0 License 4 // (found in the LICENSE.Apache file in the root directory). 5 // 6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 7 // Use of this source code is governed by a BSD-style license that can be 8 // found in the LICENSE file. See the AUTHORS file for names of contributors. 9 10 #include <string.h> 11 #include "util/coding.h" 12 #include "util/hash.h" 13 #include "util/util.h" 14 #include "util/xxhash.h" 15 16 namespace ROCKSDB_NAMESPACE { 17 18 uint32_t Hash(const char* data, size_t n, uint32_t seed) { 19 // MurmurHash1 - fast but mediocre quality 20 // https://github.com/aappleby/smhasher/wiki/MurmurHash1 21 // 22 const uint32_t m = 0xc6a4a793; 23 const uint32_t r = 24; 24 const char* limit = data + n; 25 uint32_t h = static_cast<uint32_t>(seed ^ (n * m)); 26 27 // Pick up four bytes at a time 28 while (data + 4 <= limit) { 29 uint32_t w = DecodeFixed32(data); 30 data += 4; 31 h += w; 32 h *= m; 33 h ^= (h >> 16); 34 } 35 36 // Pick up remaining bytes 37 switch (limit - data) { 38 // Note: The original hash implementation used data[i] << shift, which 39 // promotes the char to int and then performs the shift. If the char is 40 // negative, the shift is undefined behavior in C++. The hash algorithm is 41 // part of the format definition, so we cannot change it; to obtain the same 42 // behavior in a legal way we just cast to uint32_t, which will do 43 // sign-extension. To guarantee compatibility with architectures where chars 44 // are unsigned we first cast the char to int8_t. 45 case 3: 46 h += static_cast<uint32_t>(static_cast<int8_t>(data[2])) << 16; 47 FALLTHROUGH_INTENDED; 48 case 2: 49 h += static_cast<uint32_t>(static_cast<int8_t>(data[1])) << 8; 50 FALLTHROUGH_INTENDED; 51 case 1: 52 h += static_cast<uint32_t>(static_cast<int8_t>(data[0])); 53 h *= m; 54 h ^= (h >> r); 55 break; 56 } 57 return h; 58 } 59 60 // We are standardizing on a preview release of XXH3, because that's 61 // the best available at time of standardizing. 62 // 63 // In testing (mostly Intel Skylake), this hash function is much more 64 // thorough than Hash32 and is almost universally faster. Hash() only 65 // seems faster when passing runtime-sized keys of the same small size 66 // (less than about 24 bytes) thousands of times in a row; this seems 67 // to allow the branch predictor to work some magic. XXH3's speed is 68 // much less dependent on branch prediction. 69 // 70 // Hashing with a prefix extractor is potentially a common case of 71 // hashing objects of small, predictable size. We could consider 72 // bundling hash functions specialized for particular lengths with 73 // the prefix extractors. 74 uint64_t Hash64(const char* data, size_t n, uint64_t seed) { 75 return XXH3p_64bits_withSeed(data, n, seed); 76 } 77 78 uint64_t Hash64(const char* data, size_t n) { 79 // Same as seed = 0 80 return XXH3p_64bits(data, n); 81 } 82 83 } // namespace ROCKSDB_NAMESPACE 84