1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2012 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 
10 #include "util/hash.h"
11 
12 #include <cstring>
13 #include <vector>
14 
15 #include "test_util/testharness.h"
16 #include "util/coding.h"
17 #include "util/coding_lean.h"
18 #include "util/hash128.h"
19 #include "util/math128.h"
20 
21 using ROCKSDB_NAMESPACE::BijectiveHash2x64;
22 using ROCKSDB_NAMESPACE::BijectiveUnhash2x64;
23 using ROCKSDB_NAMESPACE::DecodeFixed64;
24 using ROCKSDB_NAMESPACE::EncodeFixed32;
25 using ROCKSDB_NAMESPACE::GetSliceHash64;
26 using ROCKSDB_NAMESPACE::Hash;
27 using ROCKSDB_NAMESPACE::Hash128;
28 using ROCKSDB_NAMESPACE::Hash2x64;
29 using ROCKSDB_NAMESPACE::Hash64;
30 using ROCKSDB_NAMESPACE::Lower32of64;
31 using ROCKSDB_NAMESPACE::Lower64of128;
32 using ROCKSDB_NAMESPACE::Slice;
33 using ROCKSDB_NAMESPACE::Unsigned128;
34 using ROCKSDB_NAMESPACE::Upper32of64;
35 using ROCKSDB_NAMESPACE::Upper64of128;
36 
37 // The hash algorithm is part of the file format, for example for the Bloom
38 // filters. Test that the hash values are stable for a set of random strings of
39 // varying lengths.
TEST(HashTest,Values)40 TEST(HashTest, Values) {
41   constexpr uint32_t kSeed = 0xbc9f1d34;  // Same as BloomHash.
42 
43   EXPECT_EQ(Hash("", 0, kSeed), 3164544308u);
44   EXPECT_EQ(Hash("\x08", 1, kSeed), 422599524u);
45   EXPECT_EQ(Hash("\x17", 1, kSeed), 3168152998u);
46   EXPECT_EQ(Hash("\x9a", 1, kSeed), 3195034349u);
47   EXPECT_EQ(Hash("\x1c", 1, kSeed), 2651681383u);
48   EXPECT_EQ(Hash("\x4d\x76", 2, kSeed), 2447836956u);
49   EXPECT_EQ(Hash("\x52\xd5", 2, kSeed), 3854228105u);
50   EXPECT_EQ(Hash("\x91\xf7", 2, kSeed), 31066776u);
51   EXPECT_EQ(Hash("\xd6\x27", 2, kSeed), 1806091603u);
52   EXPECT_EQ(Hash("\x30\x46\x0b", 3, kSeed), 3808221797u);
53   EXPECT_EQ(Hash("\x56\xdc\xd6", 3, kSeed), 2157698265u);
54   EXPECT_EQ(Hash("\xd4\x52\x33", 3, kSeed), 1721992661u);
55   EXPECT_EQ(Hash("\x6a\xb5\xf4", 3, kSeed), 2469105222u);
56   EXPECT_EQ(Hash("\x67\x53\x81\x1c", 4, kSeed), 118283265u);
57   EXPECT_EQ(Hash("\x69\xb8\xc0\x88", 4, kSeed), 3416318611u);
58   EXPECT_EQ(Hash("\x1e\x84\xaf\x2d", 4, kSeed), 3315003572u);
59   EXPECT_EQ(Hash("\x46\xdc\x54\xbe", 4, kSeed), 447346355u);
60   EXPECT_EQ(Hash("\xd0\x7a\x6e\xea\x56", 5, kSeed), 4255445370u);
61   EXPECT_EQ(Hash("\x86\x83\xd5\xa4\xd8", 5, kSeed), 2390603402u);
62   EXPECT_EQ(Hash("\xb7\x46\xbb\x77\xce", 5, kSeed), 2048907743u);
63   EXPECT_EQ(Hash("\x6c\xa8\xbc\xe5\x99", 5, kSeed), 2177978500u);
64   EXPECT_EQ(Hash("\x5c\x5e\xe1\xa0\x73\x81", 6, kSeed), 1036846008u);
65   EXPECT_EQ(Hash("\x08\x5d\x73\x1c\xe5\x2e", 6, kSeed), 229980482u);
66   EXPECT_EQ(Hash("\x42\xfb\xf2\x52\xb4\x10", 6, kSeed), 3655585422u);
67   EXPECT_EQ(Hash("\x73\xe1\xff\x56\x9c\xce", 6, kSeed), 3502708029u);
68   EXPECT_EQ(Hash("\x5c\xbe\x97\x75\x54\x9a\x52", 7, kSeed), 815120748u);
69   EXPECT_EQ(Hash("\x16\x82\x39\x49\x88\x2b\x36", 7, kSeed), 3056033698u);
70   EXPECT_EQ(Hash("\x59\x77\xf0\xa7\x24\xf4\x78", 7, kSeed), 587205227u);
71   EXPECT_EQ(Hash("\xd3\xa5\x7c\x0e\xc0\x02\x07", 7, kSeed), 2030937252u);
72   EXPECT_EQ(Hash("\x31\x1b\x98\x75\x96\x22\xd3\x9a", 8, kSeed), 469635402u);
73   EXPECT_EQ(Hash("\x38\xd6\xf7\x28\x20\xb4\x8a\xe9", 8, kSeed), 3530274698u);
74   EXPECT_EQ(Hash("\xbb\x18\x5d\xf4\x12\x03\xf7\x99", 8, kSeed), 1974545809u);
75   EXPECT_EQ(Hash("\x80\xd4\x3b\x3b\xae\x22\xa2\x78", 8, kSeed), 3563570120u);
76   EXPECT_EQ(Hash("\x1a\xb5\xd0\xfe\xab\xc3\x61\xb2\x99", 9, kSeed),
77             2706087434u);
78   EXPECT_EQ(Hash("\x8e\x4a\xc3\x18\x20\x2f\x06\xe6\x3c", 9, kSeed),
79             1534654151u);
80   EXPECT_EQ(Hash("\xb6\xc0\xdd\x05\x3f\xc4\x86\x4c\xef", 9, kSeed),
81             2355554696u);
82   EXPECT_EQ(Hash("\x9a\x5f\x78\x0d\xaf\x50\xe1\x1f\x55", 9, kSeed),
83             1400800912u);
84   EXPECT_EQ(Hash("\x22\x6f\x39\x1f\xf8\xdd\x4f\x52\x17\x94", 10, kSeed),
85             3420325137u);
86   EXPECT_EQ(Hash("\x32\x89\x2a\x75\x48\x3a\x4a\x02\x69\xdd", 10, kSeed),
87             3427803584u);
88   EXPECT_EQ(Hash("\x06\x92\x5c\xf4\x88\x0e\x7e\x68\x38\x3e", 10, kSeed),
89             1152407945u);
90   EXPECT_EQ(Hash("\xbd\x2c\x63\x38\xbf\xe9\x78\xb7\xbf\x15", 10, kSeed),
91             3382479516u);
92 }
93 
94 // The hash algorithm is part of the file format, for example for the Bloom
95 // filters.
TEST(HashTest,Hash64Misc)96 TEST(HashTest, Hash64Misc) {
97   constexpr uint32_t kSeed = 0;  // Same as GetSliceHash64
98 
99   for (char fill : {'\0', 'a', '1', '\xff'}) {
100     const size_t max_size = 1000;
101     const std::string str(max_size, fill);
102 
103     for (size_t size = 0; size <= max_size; ++size) {
104       uint64_t here = Hash64(str.data(), size, kSeed);
105 
106       // Must be same as unseeded Hash64 and GetSliceHash64
107       EXPECT_EQ(here, Hash64(str.data(), size));
108       EXPECT_EQ(here, GetSliceHash64(Slice(str.data(), size)));
109 
110       // Upper and Lower must reconstruct hash
111       EXPECT_EQ(here, (uint64_t{Upper32of64(here)} << 32) | Lower32of64(here));
112       EXPECT_EQ(here, (uint64_t{Upper32of64(here)} << 32) + Lower32of64(here));
113       EXPECT_EQ(here, (uint64_t{Upper32of64(here)} << 32) ^ Lower32of64(here));
114 
115       // Seed changes hash value (with high probability)
116       for (uint64_t var_seed = 1; var_seed != 0; var_seed <<= 1) {
117         EXPECT_NE(here, Hash64(str.data(), size, var_seed));
118       }
119 
120       // Size changes hash value (with high probability)
121       size_t max_smaller_by = std::min(size_t{30}, size);
122       for (size_t smaller_by = 1; smaller_by <= max_smaller_by; ++smaller_by) {
123         EXPECT_NE(here, Hash64(str.data(), size - smaller_by, kSeed));
124       }
125     }
126   }
127 }
128 
129 // Test that hash values are "non-trivial" for "trivial" inputs
TEST(HashTest,Hash64Trivial)130 TEST(HashTest, Hash64Trivial) {
131   // Thorough test too slow for regression testing
132   constexpr bool thorough = false;
133 
134   // For various seeds, make sure hash of empty string is not zero.
135   constexpr uint64_t max_seed = thorough ? 0x1000000 : 0x10000;
136   for (uint64_t seed = 0; seed < max_seed; ++seed) {
137     uint64_t here = Hash64("", 0, seed);
138     EXPECT_NE(Lower32of64(here), 0u);
139     EXPECT_NE(Upper32of64(here), 0u);
140   }
141 
142   // For standard seed, make sure hash of small strings are not zero
143   constexpr uint32_t kSeed = 0;  // Same as GetSliceHash64
144   char input[4];
145   constexpr int max_len = thorough ? 3 : 2;
146   for (int len = 1; len <= max_len; ++len) {
147     for (uint32_t i = 0; (i >> (len * 8)) == 0; ++i) {
148       EncodeFixed32(input, i);
149       uint64_t here = Hash64(input, len, kSeed);
150       EXPECT_NE(Lower32of64(here), 0u);
151       EXPECT_NE(Upper32of64(here), 0u);
152     }
153   }
154 }
155 
156 // Test that the hash values are stable for a set of random strings of
157 // varying small lengths.
TEST(HashTest,Hash64SmallValueSchema)158 TEST(HashTest, Hash64SmallValueSchema) {
159   constexpr uint32_t kSeed = 0;  // Same as GetSliceHash64
160 
161   EXPECT_EQ(Hash64("", 0, kSeed), uint64_t{5999572062939766020u});
162   EXPECT_EQ(Hash64("\x08", 1, kSeed), uint64_t{583283813901344696u});
163   EXPECT_EQ(Hash64("\x17", 1, kSeed), uint64_t{16175549975585474943u});
164   EXPECT_EQ(Hash64("\x9a", 1, kSeed), uint64_t{16322991629225003903u});
165   EXPECT_EQ(Hash64("\x1c", 1, kSeed), uint64_t{13269285487706833447u});
166   EXPECT_EQ(Hash64("\x4d\x76", 2, kSeed), uint64_t{6859542833406258115u});
167   EXPECT_EQ(Hash64("\x52\xd5", 2, kSeed), uint64_t{4919611532550636959u});
168   EXPECT_EQ(Hash64("\x91\xf7", 2, kSeed), uint64_t{14199427467559720719u});
169   EXPECT_EQ(Hash64("\xd6\x27", 2, kSeed), uint64_t{12292689282614532691u});
170   EXPECT_EQ(Hash64("\x30\x46\x0b", 3, kSeed), uint64_t{11404699285340020889u});
171   EXPECT_EQ(Hash64("\x56\xdc\xd6", 3, kSeed), uint64_t{12404347133785524237u});
172   EXPECT_EQ(Hash64("\xd4\x52\x33", 3, kSeed), uint64_t{15853805298481534034u});
173   EXPECT_EQ(Hash64("\x6a\xb5\xf4", 3, kSeed), uint64_t{16863488758399383382u});
174   EXPECT_EQ(Hash64("\x67\x53\x81\x1c", 4, kSeed),
175             uint64_t{9010661983527562386u});
176   EXPECT_EQ(Hash64("\x69\xb8\xc0\x88", 4, kSeed),
177             uint64_t{6611781377647041447u});
178   EXPECT_EQ(Hash64("\x1e\x84\xaf\x2d", 4, kSeed),
179             uint64_t{15290969111616346501u});
180   EXPECT_EQ(Hash64("\x46\xdc\x54\xbe", 4, kSeed),
181             uint64_t{7063754590279313623u});
182   EXPECT_EQ(Hash64("\xd0\x7a\x6e\xea\x56", 5, kSeed),
183             uint64_t{6384167718754869899u});
184   EXPECT_EQ(Hash64("\x86\x83\xd5\xa4\xd8", 5, kSeed),
185             uint64_t{16874407254108011067u});
186   EXPECT_EQ(Hash64("\xb7\x46\xbb\x77\xce", 5, kSeed),
187             uint64_t{16809880630149135206u});
188   EXPECT_EQ(Hash64("\x6c\xa8\xbc\xe5\x99", 5, kSeed),
189             uint64_t{1249038833153141148u});
190   EXPECT_EQ(Hash64("\x5c\x5e\xe1\xa0\x73\x81", 6, kSeed),
191             uint64_t{17358142495308219330u});
192   EXPECT_EQ(Hash64("\x08\x5d\x73\x1c\xe5\x2e", 6, kSeed),
193             uint64_t{4237646583134806322u});
194   EXPECT_EQ(Hash64("\x42\xfb\xf2\x52\xb4\x10", 6, kSeed),
195             uint64_t{4373664924115234051u});
196   EXPECT_EQ(Hash64("\x73\xe1\xff\x56\x9c\xce", 6, kSeed),
197             uint64_t{12012981210634596029u});
198   EXPECT_EQ(Hash64("\x5c\xbe\x97\x75\x54\x9a\x52", 7, kSeed),
199             uint64_t{5716522398211028826u});
200   EXPECT_EQ(Hash64("\x16\x82\x39\x49\x88\x2b\x36", 7, kSeed),
201             uint64_t{15604531309862565013u});
202   EXPECT_EQ(Hash64("\x59\x77\xf0\xa7\x24\xf4\x78", 7, kSeed),
203             uint64_t{8601330687345614172u});
204   EXPECT_EQ(Hash64("\xd3\xa5\x7c\x0e\xc0\x02\x07", 7, kSeed),
205             uint64_t{8088079329364056942u});
206   EXPECT_EQ(Hash64("\x31\x1b\x98\x75\x96\x22\xd3\x9a", 8, kSeed),
207             uint64_t{9844314944338447628u});
208   EXPECT_EQ(Hash64("\x38\xd6\xf7\x28\x20\xb4\x8a\xe9", 8, kSeed),
209             uint64_t{10973293517982163143u});
210   EXPECT_EQ(Hash64("\xbb\x18\x5d\xf4\x12\x03\xf7\x99", 8, kSeed),
211             uint64_t{9986007080564743219u});
212   EXPECT_EQ(Hash64("\x80\xd4\x3b\x3b\xae\x22\xa2\x78", 8, kSeed),
213             uint64_t{1729303145008254458u});
214   EXPECT_EQ(Hash64("\x1a\xb5\xd0\xfe\xab\xc3\x61\xb2\x99", 9, kSeed),
215             uint64_t{13253403748084181481u});
216   EXPECT_EQ(Hash64("\x8e\x4a\xc3\x18\x20\x2f\x06\xe6\x3c", 9, kSeed),
217             uint64_t{7768754303876232188u});
218   EXPECT_EQ(Hash64("\xb6\xc0\xdd\x05\x3f\xc4\x86\x4c\xef", 9, kSeed),
219             uint64_t{12439346786701492u});
220   EXPECT_EQ(Hash64("\x9a\x5f\x78\x0d\xaf\x50\xe1\x1f\x55", 9, kSeed),
221             uint64_t{10841838338450144690u});
222   EXPECT_EQ(Hash64("\x22\x6f\x39\x1f\xf8\xdd\x4f\x52\x17\x94", 10, kSeed),
223             uint64_t{12883919702069153152u});
224   EXPECT_EQ(Hash64("\x32\x89\x2a\x75\x48\x3a\x4a\x02\x69\xdd", 10, kSeed),
225             uint64_t{12692903507676842188u});
226   EXPECT_EQ(Hash64("\x06\x92\x5c\xf4\x88\x0e\x7e\x68\x38\x3e", 10, kSeed),
227             uint64_t{6540985900674032620u});
228   EXPECT_EQ(Hash64("\xbd\x2c\x63\x38\xbf\xe9\x78\xb7\xbf\x15", 10, kSeed),
229             uint64_t{10551812464348219044u});
230 }
231 
Hash64TestDescriptor(const char * repeat,size_t limit)232 std::string Hash64TestDescriptor(const char *repeat, size_t limit) {
233   const char *mod61_encode =
234       "abcdefghijklmnopqrstuvwxyz123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
235 
236   std::string input;
237   while (input.size() < limit) {
238     input.append(repeat);
239   }
240   std::string rv;
241   for (size_t i = 0; i < limit; ++i) {
242     uint64_t h = GetSliceHash64(Slice(input.data(), i));
243     rv.append(1, mod61_encode[static_cast<size_t>(h % 61)]);
244   }
245   return rv;
246 }
247 
248 // XXPH3 changes its algorithm for various sizes up through 250 bytes, so
249 // we need to check the stability of larger sizes also.
TEST(HashTest,Hash64LargeValueSchema)250 TEST(HashTest, Hash64LargeValueSchema) {
251   // Each of these derives a "descriptor" from the hash values for all
252   // lengths up to 430.
253   // Note that "c" is common for the zero-length string.
254   EXPECT_EQ(
255       Hash64TestDescriptor("foo", 430),
256       "cRhyWsY67B6klRA1udmOuiYuX7IthyGBKqbeosz2hzVglWCmQx8nEdnpkvPfYX56Up2OWOTV"
257       "lTzfAoYwvtqKzjD8E9xttR2unelbXbIV67NUe6bOO23BxaSFRcA3njGu5cUWfgwOqNoTsszp"
258       "uPvKRP6qaUR5VdoBkJUCFIefd7edlNK5mv6JYWaGdwxehg65hTkTmjZoPKxTZo4PLyzbL9U4"
259       "xt12ITSfeP2MfBHuLI2z2pDlBb44UQKVMx27LEoAHsdLp3WfWfgH3sdRBRCHm33UxCM4QmE2"
260       "xJ7gqSvNwTeH7v9GlC8zWbGroyD3UVNeShMLx29O7tH1biemLULwAHyIw8zdtLMDpEJ8m2ic"
261       "l6Lb4fDuuFNAs1GCVUthjK8CV8SWI8Rsz5THSwn5CGhpqUwSZcFknjwWIl5rNCvDxXJqYr");
262   // Note that "1EeRk" is common for "Rocks"
263   EXPECT_EQ(
264       Hash64TestDescriptor("Rocks", 430),
265       "c1EeRkrzgOYWLA8PuhJrwTePJewoB44WdXYDfhbk3ZxTqqg25WlPExDl7IKIQLJvnA6gJxxn"
266       "9TCSLkFGfJeXehaSS1GBqWSzfhEH4VXiXIUCuxJXxtKXcSC6FrNIQGTZbYDiUOLD6Y5inzrF"
267       "9etwQhXUBanw55xAUdNMFQAm2GjJ6UDWp2mISLiMMkLjANWMKLaZMqaFLX37qB4MRO1ooVRv"
268       "zSvaNRSCLxlggQCasQq8icWjzf3HjBlZtU6pd4rkaUxSzHqmo9oM5MghbU5Rtxg8wEfO7lVN"
269       "5wdMONYecslQTwjZUpO1K3LDf3K3XK6sUXM6ShQQ3RHmMn2acB4YtTZ3QQcHYJSOHn2DuWpa"
270       "Q8RqzX5lab92YmOLaCdOHq1BPsM7SIBzMdLgePNsJ1vvMALxAaoDUHPxoFLO2wx18IXnyX");
271   EXPECT_EQ(
272       Hash64TestDescriptor("RocksDB", 430),
273       "c1EeRkukbkb28wLTahwD2sfUhZzaBEnF8SVrxnPVB6A7b8CaAl3UKsDZISF92GSq2wDCukOq"
274       "Jgrsp7A3KZhDiLW8dFXp8UPqPxMCRlMdZeVeJ2dJxrmA6cyt99zkQFj7ELbut6jAeVqARFnw"
275       "fnWVXOsaLrq7bDCbMcns2DKvTaaqTCLMYxI7nhtLpFN1jR755FRQFcOzrrDbh7QhypjdvlYw"
276       "cdAMSZgp9JMHxbM23wPSuH6BOFgxejz35PScZfhDPvTOxIy1jc3MZsWrMC3P324zNolO7JdW"
277       "CX2I5UDKjjaEJfxbgVgJIXxtQGlmj2xkO5sPpjULQV4X2HlY7FQleJ4QRaJIB4buhCA4vUTF"
278       "eMFlxCIYUpTCsal2qsmnGOWa8WCcefrohMjDj1fjzSvSaQwlpyR1GZHF2uPOoQagiCpHpm");
279 }
280 
TEST(HashTest,Hash128Misc)281 TEST(HashTest, Hash128Misc) {
282   constexpr uint32_t kSeed = 0;  // Same as GetSliceHash128
283 
284   for (char fill : {'\0', 'a', '1', '\xff', 'e'}) {
285     const size_t max_size = 1000;
286     std::string str(max_size, fill);
287 
288     if (fill == 'e') {
289       // Use different characters to check endianness handling
290       for (size_t i = 0; i < str.size(); ++i) {
291         str[i] += static_cast<char>(i);
292       }
293     }
294 
295     for (size_t size = 0; size <= max_size; ++size) {
296       Unsigned128 here = Hash128(str.data(), size, kSeed);
297 
298       // Must be same as unseeded Hash128 and GetSliceHash128
299       EXPECT_EQ(here, Hash128(str.data(), size));
300       EXPECT_EQ(here, GetSliceHash128(Slice(str.data(), size)));
301       {
302         uint64_t hi, lo;
303         Hash2x64(str.data(), size, &hi, &lo);
304         EXPECT_EQ(Lower64of128(here), lo);
305         EXPECT_EQ(Upper64of128(here), hi);
306       }
307       if (size == 16) {
308         const uint64_t in_hi = DecodeFixed64(str.data() + 8);
309         const uint64_t in_lo = DecodeFixed64(str.data());
310         uint64_t hi, lo;
311         BijectiveHash2x64(in_hi, in_lo, &hi, &lo);
312         EXPECT_EQ(Lower64of128(here), lo);
313         EXPECT_EQ(Upper64of128(here), hi);
314         uint64_t un_hi, un_lo;
315         BijectiveUnhash2x64(hi, lo, &un_hi, &un_lo);
316         EXPECT_EQ(in_lo, un_lo);
317         EXPECT_EQ(in_hi, un_hi);
318       }
319 
320       // Upper and Lower must reconstruct hash
321       EXPECT_EQ(here,
322                 (Unsigned128{Upper64of128(here)} << 64) | Lower64of128(here));
323       EXPECT_EQ(here,
324                 (Unsigned128{Upper64of128(here)} << 64) ^ Lower64of128(here));
325 
326       // Seed changes hash value (with high probability)
327       for (uint64_t var_seed = 1; var_seed != 0; var_seed <<= 1) {
328         Unsigned128 seeded = Hash128(str.data(), size, var_seed);
329         EXPECT_NE(here, seeded);
330         // Must match seeded Hash2x64
331         {
332           uint64_t hi, lo;
333           Hash2x64(str.data(), size, var_seed, &hi, &lo);
334           EXPECT_EQ(Lower64of128(seeded), lo);
335           EXPECT_EQ(Upper64of128(seeded), hi);
336         }
337         if (size == 16) {
338           const uint64_t in_hi = DecodeFixed64(str.data() + 8);
339           const uint64_t in_lo = DecodeFixed64(str.data());
340           uint64_t hi, lo;
341           BijectiveHash2x64(in_hi, in_lo, var_seed, &hi, &lo);
342           EXPECT_EQ(Lower64of128(seeded), lo);
343           EXPECT_EQ(Upper64of128(seeded), hi);
344           uint64_t un_hi, un_lo;
345           BijectiveUnhash2x64(hi, lo, var_seed, &un_hi, &un_lo);
346           EXPECT_EQ(in_lo, un_lo);
347           EXPECT_EQ(in_hi, un_hi);
348         }
349       }
350 
351       // Size changes hash value (with high probability)
352       size_t max_smaller_by = std::min(size_t{30}, size);
353       for (size_t smaller_by = 1; smaller_by <= max_smaller_by; ++smaller_by) {
354         EXPECT_NE(here, Hash128(str.data(), size - smaller_by, kSeed));
355       }
356     }
357   }
358 }
359 
360 // Test that hash values are "non-trivial" for "trivial" inputs
TEST(HashTest,Hash128Trivial)361 TEST(HashTest, Hash128Trivial) {
362   // Thorough test too slow for regression testing
363   constexpr bool thorough = false;
364 
365   // For various seeds, make sure hash of empty string is not zero.
366   constexpr uint64_t max_seed = thorough ? 0x1000000 : 0x10000;
367   for (uint64_t seed = 0; seed < max_seed; ++seed) {
368     Unsigned128 here = Hash128("", 0, seed);
369     EXPECT_NE(Lower64of128(here), 0u);
370     EXPECT_NE(Upper64of128(here), 0u);
371   }
372 
373   // For standard seed, make sure hash of small strings are not zero
374   constexpr uint32_t kSeed = 0;  // Same as GetSliceHash128
375   char input[4];
376   constexpr int max_len = thorough ? 3 : 2;
377   for (int len = 1; len <= max_len; ++len) {
378     for (uint32_t i = 0; (i >> (len * 8)) == 0; ++i) {
379       EncodeFixed32(input, i);
380       Unsigned128 here = Hash128(input, len, kSeed);
381       EXPECT_NE(Lower64of128(here), 0u);
382       EXPECT_NE(Upper64of128(here), 0u);
383     }
384   }
385 }
386 
Hash128TestDescriptor(const char * repeat,size_t limit)387 std::string Hash128TestDescriptor(const char *repeat, size_t limit) {
388   const char *mod61_encode =
389       "abcdefghijklmnopqrstuvwxyz123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ";
390 
391   std::string input;
392   while (input.size() < limit) {
393     input.append(repeat);
394   }
395   std::string rv;
396   for (size_t i = 0; i < limit; ++i) {
397     auto h = GetSliceHash128(Slice(input.data(), i));
398     uint64_t h2 = Upper64of128(h) + Lower64of128(h);
399     rv.append(1, mod61_encode[static_cast<size_t>(h2 % 61)]);
400   }
401   return rv;
402 }
403 
404 // XXH3 changes its algorithm for various sizes up through 250 bytes, so
405 // we need to check the stability of larger sizes also.
TEST(HashTest,Hash128ValueSchema)406 TEST(HashTest, Hash128ValueSchema) {
407   // Each of these derives a "descriptor" from the hash values for all
408   // lengths up to 430.
409   // Note that "b" is common for the zero-length string.
410   EXPECT_EQ(
411       Hash128TestDescriptor("foo", 430),
412       "bUMA3As8n9I4vNGhThXlEevxZlyMcbb6TYAlIKJ2f5ponsv99q962rYclQ7u3gfnRdCDQ5JI"
413       "2LrGUaCycbXrvLFe4SjgRb9RQwCfrnmNQ7VSEwSKMnkGCK3bDbXSrnIh5qLXdtvIZklbJpGH"
414       "Dqr93BlqF9ubTnOSYkSdx89XvQqflMIW8bjfQp9BPjQejWOeEQspnN1D3sfgVdFhpaQdHYA5"
415       "pI2XcPlCMFPxvrFuRr7joaDvjNe9IUZaunLPMewuXmC3EL95h52Ju3D7y9RNKhgYxMTrA84B"
416       "yJrMvyjdm3vlBxet4EN7v2GEyjbGuaZW9UL6lrX6PghJDg7ACfLGdxNbH3qXM4zaiG2RKnL5"
417       "S3WXKR78RBB5fRFQ8KDIEQjHFvSNsc3GrAEi6W8P2lv8JMTzjBODO2uN4wadVQFT9wpGfV");
418   // Note that "35D2v" is common for "Rocks"
419   EXPECT_EQ(
420       Hash128TestDescriptor("Rocks", 430),
421       "b35D2vzvklFVDqJmyLRXyApwGGO3EAT3swhe8XJAN3mY2UVPglzdmydxcba6JI2tSvwO6zSu"
422       "ANpjSM7tc9G5iMhsa7R8GfyCXRO1TnLg7HvdWNdgGGBirxZR68BgT7TQsYJt6zyEyISeXI1n"
423       "MXA48Xo7dWfJeYN6Z4KWlqZY7TgFXGbks9AX4ehZNSGtIhdO5i58qlgVX1bEejeOVaCcjC79"
424       "67DrMfOKds7rUQzjBa77sMPcoPW1vu6ljGJPZH3XkRyDMZ1twxXKkNxN3tE8nR7JHwyqBAxE"
425       "fTcjbOWrLZ1irWxRSombD8sGDEmclgF11IxqEhe3Rt7gyofO3nExGckKkS9KfRqsCHbiUyva"
426       "JGkJwUHRXaZnh58b4i1Ei9aQKZjXlvIVDixoZrjcNaH5XJIJlRZce9Z9t82wYapTpckYSg");
427   EXPECT_EQ(
428       Hash128TestDescriptor("RocksDB", 430),
429       "b35D2vFUst3XDZCRlSrhmYYakmqImV97LbBsV6EZlOEQpUPH1d1sD3xMKAPlA5UErHehg5O7"
430       "n966fZqhAf3hRc24kGCLfNAWjyUa7vSNOx3IcPoTyVRFZeFlcCtfl7t1QJumHOCpS33EBmBF"
431       "hvK13QjBbDWYWeHQhJhgV9Mqbx17TIcvUkEnYZxb8IzWNmjVsJG44Z7v52DjGj1ZzS62S2Vv"
432       "qWcDO7apvH5VHg68E9Wl6nXP21vlmUqEH9GeWRehfWVvY7mUpsAg5drHHQyDSdiMceiUuUxJ"
433       "XJqHFcDdzbbPk7xDvbLgWCKvH8k3MpQNWOmbSSRDdAP6nGlDjoTToYkcqVREHJzztSWAAq5h"
434       "GHSUNJ6OxsMHhf8EhXfHtKyUzRmPtjYyeckQcGmrQfFFLidc6cjMDKCdBG6c6HVBrS7H2R");
435 }
436 
TEST(FastRange32Test,Values)437 TEST(FastRange32Test, Values) {
438   using ROCKSDB_NAMESPACE::FastRange32;
439   // Zero range
440   EXPECT_EQ(FastRange32(0, 0), 0U);
441   EXPECT_EQ(FastRange32(123, 0), 0U);
442   EXPECT_EQ(FastRange32(0xffffffff, 0), 0U);
443 
444   // One range
445   EXPECT_EQ(FastRange32(0, 1), 0U);
446   EXPECT_EQ(FastRange32(123, 1), 0U);
447   EXPECT_EQ(FastRange32(0xffffffff, 1), 0U);
448 
449   // Two range
450   EXPECT_EQ(FastRange32(0, 2), 0U);
451   EXPECT_EQ(FastRange32(123, 2), 0U);
452   EXPECT_EQ(FastRange32(0x7fffffff, 2), 0U);
453   EXPECT_EQ(FastRange32(0x80000000, 2), 1U);
454   EXPECT_EQ(FastRange32(0xffffffff, 2), 1U);
455 
456   // Seven range
457   EXPECT_EQ(FastRange32(0, 7), 0U);
458   EXPECT_EQ(FastRange32(123, 7), 0U);
459   EXPECT_EQ(FastRange32(613566756, 7), 0U);
460   EXPECT_EQ(FastRange32(613566757, 7), 1U);
461   EXPECT_EQ(FastRange32(1227133513, 7), 1U);
462   EXPECT_EQ(FastRange32(1227133514, 7), 2U);
463   // etc.
464   EXPECT_EQ(FastRange32(0xffffffff, 7), 6U);
465 
466   // Big
467   EXPECT_EQ(FastRange32(1, 0x80000000), 0U);
468   EXPECT_EQ(FastRange32(2, 0x80000000), 1U);
469   EXPECT_EQ(FastRange32(4, 0x7fffffff), 1U);
470   EXPECT_EQ(FastRange32(4, 0x80000000), 2U);
471   EXPECT_EQ(FastRange32(0xffffffff, 0x7fffffff), 0x7ffffffeU);
472   EXPECT_EQ(FastRange32(0xffffffff, 0x80000000), 0x7fffffffU);
473 }
474 
TEST(FastRange64Test,Values)475 TEST(FastRange64Test, Values) {
476   using ROCKSDB_NAMESPACE::FastRange64;
477   // Zero range
478   EXPECT_EQ(FastRange64(0, 0), 0U);
479   EXPECT_EQ(FastRange64(123, 0), 0U);
480   EXPECT_EQ(FastRange64(0xffffFFFF, 0), 0U);
481   EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 0), 0U);
482 
483   // One range
484   EXPECT_EQ(FastRange64(0, 1), 0U);
485   EXPECT_EQ(FastRange64(123, 1), 0U);
486   EXPECT_EQ(FastRange64(0xffffFFFF, 1), 0U);
487   EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 1), 0U);
488 
489   // Two range
490   EXPECT_EQ(FastRange64(0, 2), 0U);
491   EXPECT_EQ(FastRange64(123, 2), 0U);
492   EXPECT_EQ(FastRange64(0xffffFFFF, 2), 0U);
493   EXPECT_EQ(FastRange64(0x7fffFFFFffffFFFF, 2), 0U);
494   EXPECT_EQ(FastRange64(0x8000000000000000, 2), 1U);
495   EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 2), 1U);
496 
497   // Seven range
498   EXPECT_EQ(FastRange64(0, 7), 0U);
499   EXPECT_EQ(FastRange64(123, 7), 0U);
500   EXPECT_EQ(FastRange64(0xffffFFFF, 7), 0U);
501   EXPECT_EQ(FastRange64(2635249153387078802, 7), 0U);
502   EXPECT_EQ(FastRange64(2635249153387078803, 7), 1U);
503   EXPECT_EQ(FastRange64(5270498306774157604, 7), 1U);
504   EXPECT_EQ(FastRange64(5270498306774157605, 7), 2U);
505   EXPECT_EQ(FastRange64(0x7fffFFFFffffFFFF, 7), 3U);
506   EXPECT_EQ(FastRange64(0x8000000000000000, 7), 3U);
507   EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 7), 6U);
508 
509   // Big but 32-bit range
510   EXPECT_EQ(FastRange64(0x100000000, 0x80000000), 0U);
511   EXPECT_EQ(FastRange64(0x200000000, 0x80000000), 1U);
512   EXPECT_EQ(FastRange64(0x400000000, 0x7fffFFFF), 1U);
513   EXPECT_EQ(FastRange64(0x400000000, 0x80000000), 2U);
514   EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 0x7fffFFFF), 0x7fffFFFEU);
515   EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 0x80000000), 0x7fffFFFFU);
516 
517   // Big, > 32-bit range
518 #if SIZE_MAX == UINT64_MAX
519   EXPECT_EQ(FastRange64(0x7fffFFFFffffFFFF, 0x4200000002), 0x2100000000U);
520   EXPECT_EQ(FastRange64(0x8000000000000000, 0x4200000002), 0x2100000001U);
521 
522   EXPECT_EQ(FastRange64(0x0000000000000000, 420000000002), 0U);
523   EXPECT_EQ(FastRange64(0x7fffFFFFffffFFFF, 420000000002), 210000000000U);
524   EXPECT_EQ(FastRange64(0x8000000000000000, 420000000002), 210000000001U);
525   EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 420000000002), 420000000001U);
526 
527   EXPECT_EQ(FastRange64(0xffffFFFFffffFFFF, 0xffffFFFFffffFFFF),
528             0xffffFFFFffffFFFEU);
529 #endif
530 }
531 
TEST(FastRangeGenericTest,Values)532 TEST(FastRangeGenericTest, Values) {
533   using ROCKSDB_NAMESPACE::FastRangeGeneric;
534   // Generic (including big and small)
535   // Note that FastRangeGeneric is also tested indirectly above via
536   // FastRange32 and FastRange64.
537   EXPECT_EQ(
538       FastRangeGeneric(uint64_t{0x8000000000000000}, uint64_t{420000000002}),
539       uint64_t{210000000001});
540   EXPECT_EQ(FastRangeGeneric(uint64_t{0x8000000000000000}, uint16_t{12468}),
541             uint16_t{6234});
542   EXPECT_EQ(FastRangeGeneric(uint32_t{0x80000000}, uint16_t{12468}),
543             uint16_t{6234});
544   // Not recommended for typical use because for example this could fail on
545   // some platforms and pass on others:
546   //EXPECT_EQ(FastRangeGeneric(static_cast<unsigned long>(0x80000000),
547   //                           uint16_t{12468}),
548   //          uint16_t{6234});
549 }
550 
551 // for inspection of disassembly
FastRange32(uint32_t hash,uint32_t range)552 uint32_t FastRange32(uint32_t hash, uint32_t range) {
553   return ROCKSDB_NAMESPACE::FastRange32(hash, range);
554 }
555 
556 // for inspection of disassembly
FastRange64(uint64_t hash,size_t range)557 size_t FastRange64(uint64_t hash, size_t range) {
558   return ROCKSDB_NAMESPACE::FastRange64(hash, range);
559 }
560 
561 // Tests for math.h / math128.h (not worth a separate test binary)
562 using ROCKSDB_NAMESPACE::BitParity;
563 using ROCKSDB_NAMESPACE::BitsSetToOne;
564 using ROCKSDB_NAMESPACE::CountTrailingZeroBits;
565 using ROCKSDB_NAMESPACE::DecodeFixed128;
566 using ROCKSDB_NAMESPACE::DecodeFixedGeneric;
567 using ROCKSDB_NAMESPACE::EncodeFixed128;
568 using ROCKSDB_NAMESPACE::EncodeFixedGeneric;
569 using ROCKSDB_NAMESPACE::FloorLog2;
570 using ROCKSDB_NAMESPACE::Lower64of128;
571 using ROCKSDB_NAMESPACE::Multiply64to128;
572 using ROCKSDB_NAMESPACE::Unsigned128;
573 using ROCKSDB_NAMESPACE::Upper64of128;
574 
575 template <typename T>
test_BitOps()576 static void test_BitOps() {
577   // This complex code is to generalize to 128-bit values. Otherwise
578   // we could just use = static_cast<T>(0x5555555555555555ULL);
579   T everyOtherBit = 0;
580   for (unsigned i = 0; i < sizeof(T); ++i) {
581     everyOtherBit = (everyOtherBit << 8) | T{0x55};
582   }
583 
584   // This one built using bit operations, as our 128-bit layer
585   // might not implement arithmetic such as subtraction.
586   T vm1 = 0;  // "v minus one"
587 
588   for (int i = 0; i < int{8 * sizeof(T)}; ++i) {
589     T v = T{1} << i;
590     // If we could directly use arithmetic:
591     // T vm1 = static_cast<T>(v - 1);
592 
593     // FloorLog2
594     if (v > 0) {
595       EXPECT_EQ(FloorLog2(v), i);
596     }
597     if (vm1 > 0) {
598       EXPECT_EQ(FloorLog2(vm1), i - 1);
599       EXPECT_EQ(FloorLog2(everyOtherBit & vm1), (i - 1) & ~1);
600     }
601 
602     // CountTrailingZeroBits
603     if (v != 0) {
604       EXPECT_EQ(CountTrailingZeroBits(v), i);
605     }
606     if (vm1 != 0) {
607       EXPECT_EQ(CountTrailingZeroBits(vm1), 0);
608     }
609     if (i < int{8 * sizeof(T)} - 1) {
610       EXPECT_EQ(CountTrailingZeroBits(~vm1 & everyOtherBit), (i + 1) & ~1);
611     }
612 
613     // BitsSetToOne
614     EXPECT_EQ(BitsSetToOne(v), 1);
615     EXPECT_EQ(BitsSetToOne(vm1), i);
616     EXPECT_EQ(BitsSetToOne(vm1 & everyOtherBit), (i + 1) / 2);
617 
618     // BitParity
619     EXPECT_EQ(BitParity(v), 1);
620     EXPECT_EQ(BitParity(vm1), i & 1);
621     EXPECT_EQ(BitParity(vm1 & everyOtherBit), ((i + 1) / 2) & 1);
622 
623     vm1 = (vm1 << 1) | 1;
624   }
625 }
626 
TEST(MathTest,BitOps)627 TEST(MathTest, BitOps) {
628   test_BitOps<uint32_t>();
629   test_BitOps<uint64_t>();
630   test_BitOps<uint16_t>();
631   test_BitOps<uint8_t>();
632   test_BitOps<unsigned char>();
633   test_BitOps<unsigned short>();
634   test_BitOps<unsigned int>();
635   test_BitOps<unsigned long>();
636   test_BitOps<unsigned long long>();
637   test_BitOps<char>();
638   test_BitOps<size_t>();
639   test_BitOps<int32_t>();
640   test_BitOps<int64_t>();
641   test_BitOps<int16_t>();
642   test_BitOps<int8_t>();
643   test_BitOps<signed char>();
644   test_BitOps<short>();
645   test_BitOps<int>();
646   test_BitOps<long>();
647   test_BitOps<long long>();
648   test_BitOps<ptrdiff_t>();
649 }
650 
TEST(MathTest,BitOps128)651 TEST(MathTest, BitOps128) { test_BitOps<Unsigned128>(); }
652 
TEST(MathTest,Math128)653 TEST(MathTest, Math128) {
654   const Unsigned128 sixteenHexOnes = 0x1111111111111111U;
655   const Unsigned128 thirtyHexOnes = (sixteenHexOnes << 56) | sixteenHexOnes;
656   const Unsigned128 sixteenHexTwos = 0x2222222222222222U;
657   const Unsigned128 thirtyHexTwos = (sixteenHexTwos << 56) | sixteenHexTwos;
658 
659   // v will slide from all hex ones to all hex twos
660   Unsigned128 v = thirtyHexOnes;
661   for (int i = 0; i <= 30; ++i) {
662     // Test bitwise operations
663     EXPECT_EQ(BitsSetToOne(v), 30);
664     EXPECT_EQ(BitsSetToOne(~v), 128 - 30);
665     EXPECT_EQ(BitsSetToOne(v & thirtyHexOnes), 30 - i);
666     EXPECT_EQ(BitsSetToOne(v | thirtyHexOnes), 30 + i);
667     EXPECT_EQ(BitsSetToOne(v ^ thirtyHexOnes), 2 * i);
668     EXPECT_EQ(BitsSetToOne(v & thirtyHexTwos), i);
669     EXPECT_EQ(BitsSetToOne(v | thirtyHexTwos), 60 - i);
670     EXPECT_EQ(BitsSetToOne(v ^ thirtyHexTwos), 60 - 2 * i);
671 
672     // Test comparisons
673     EXPECT_EQ(v == thirtyHexOnes, i == 0);
674     EXPECT_EQ(v == thirtyHexTwos, i == 30);
675     EXPECT_EQ(v > thirtyHexOnes, i > 0);
676     EXPECT_EQ(v > thirtyHexTwos, false);
677     EXPECT_EQ(v >= thirtyHexOnes, true);
678     EXPECT_EQ(v >= thirtyHexTwos, i == 30);
679     EXPECT_EQ(v < thirtyHexOnes, false);
680     EXPECT_EQ(v < thirtyHexTwos, i < 30);
681     EXPECT_EQ(v <= thirtyHexOnes, i == 0);
682     EXPECT_EQ(v <= thirtyHexTwos, true);
683 
684     // Update v, clearing upper-most byte
685     v = ((v << 12) >> 8) | 0x2;
686   }
687 
688   for (int i = 0; i < 128; ++i) {
689     // Test shifts
690     Unsigned128 sl = thirtyHexOnes << i;
691     Unsigned128 sr = thirtyHexOnes >> i;
692     EXPECT_EQ(BitsSetToOne(sl), std::min(30, 32 - i / 4));
693     EXPECT_EQ(BitsSetToOne(sr), std::max(0, 30 - (i + 3) / 4));
694     EXPECT_EQ(BitsSetToOne(sl & sr), i % 2 ? 0 : std::max(0, 30 - i / 2));
695   }
696 
697   // Test 64x64->128 multiply
698   Unsigned128 product =
699       Multiply64to128(0x1111111111111111U, 0x2222222222222222U);
700   EXPECT_EQ(Lower64of128(product), 2295594818061633090U);
701   EXPECT_EQ(Upper64of128(product), 163971058432973792U);
702 }
703 
TEST(MathTest,Coding128)704 TEST(MathTest, Coding128) {
705   const char *in = "_1234567890123456";
706   // Note: in + 1 is likely unaligned
707   Unsigned128 decoded = DecodeFixed128(in + 1);
708   EXPECT_EQ(Lower64of128(decoded), 0x3837363534333231U);
709   EXPECT_EQ(Upper64of128(decoded), 0x3635343332313039U);
710   char out[18];
711   out[0] = '_';
712   EncodeFixed128(out + 1, decoded);
713   out[17] = '\0';
714   EXPECT_EQ(std::string(in), std::string(out));
715 }
716 
TEST(MathTest,CodingGeneric)717 TEST(MathTest, CodingGeneric) {
718   const char *in = "_1234567890123456";
719   // Decode
720   // Note: in + 1 is likely unaligned
721   Unsigned128 decoded128 = DecodeFixedGeneric<Unsigned128>(in + 1);
722   EXPECT_EQ(Lower64of128(decoded128), 0x3837363534333231U);
723   EXPECT_EQ(Upper64of128(decoded128), 0x3635343332313039U);
724 
725   uint64_t decoded64 = DecodeFixedGeneric<uint64_t>(in + 1);
726   EXPECT_EQ(decoded64, 0x3837363534333231U);
727 
728   uint32_t decoded32 = DecodeFixedGeneric<uint32_t>(in + 1);
729   EXPECT_EQ(decoded32, 0x34333231U);
730 
731   uint16_t decoded16 = DecodeFixedGeneric<uint16_t>(in + 1);
732   EXPECT_EQ(decoded16, 0x3231U);
733 
734   // Encode
735   char out[18];
736   out[0] = '_';
737   memset(out + 1, '\0', 17);
738   EncodeFixedGeneric(out + 1, decoded128);
739   EXPECT_EQ(std::string(in), std::string(out));
740 
741   memset(out + 1, '\0', 9);
742   EncodeFixedGeneric(out + 1, decoded64);
743   EXPECT_EQ(std::string("_12345678"), std::string(out));
744 
745   memset(out + 1, '\0', 5);
746   EncodeFixedGeneric(out + 1, decoded32);
747   EXPECT_EQ(std::string("_1234"), std::string(out));
748 
749   memset(out + 1, '\0', 3);
750   EncodeFixedGeneric(out + 1, decoded16);
751   EXPECT_EQ(std::string("_12"), std::string(out));
752 }
753 
main(int argc,char ** argv)754 int main(int argc, char** argv) {
755   fprintf(stderr, "NPHash64 id: %x\n",
756           static_cast<int>(ROCKSDB_NAMESPACE::GetSliceNPHash64("RocksDB")));
757   ::testing::InitGoogleTest(&argc, argv);
758 
759   return RUN_ALL_TESTS();
760 }
761