1 // Copyright 2008-present Contributors to the OpenImageIO project.
2 // SPDX-License-Identifier: BSD-3-Clause
3 // https://github.com/OpenImageIO/oiio/blob/master/LICENSE.md
4
5
6 #include <cstdio>
7 #include <functional>
8 #include <iostream>
9 #include <random>
10 #include <vector>
11
12 #include <OpenImageIO/argparse.h>
13 #include <OpenImageIO/benchmark.h>
14 #include <OpenImageIO/hash.h>
15 #include <OpenImageIO/strutil.h>
16 #include <OpenImageIO/timer.h>
17 #include <OpenImageIO/unittest.h>
18
19 #include <OpenImageIO/detail/farmhash.h>
20
21
22
23 using namespace OIIO;
24 using OIIO::Strutil::print;
25
26 static int iterations = 100 << 20;
27 static int ntrials = 1;
28 static bool verbose = false;
29
30 static std::vector<uint32_t> data;
31
32 uint64_t
test_bjhash(int len)33 test_bjhash(int len)
34 {
35 char* ptr = reinterpret_cast<char*>(data.data());
36 uint64_t a = 0;
37 for (int i = 0, e = iterations / len; i < e; i++, ptr += len)
38 a += bjhash::hashlittle(ptr, len);
39 return a;
40 }
41
42 uint64_t
test_xxhash(int len)43 test_xxhash(int len)
44 {
45 char* ptr = reinterpret_cast<char*>(data.data());
46 uint64_t a = 0;
47 for (int i = 0, e = iterations / len; i < e; i++, ptr += len)
48 a += xxhash::xxhash(ptr, len, 0);
49 return a;
50 }
51
52 uint64_t
test_farmhash(int len)53 test_farmhash(int len)
54 {
55 char* ptr = reinterpret_cast<char*>(data.data());
56 uint64_t a = 0;
57 for (int i = 0, e = iterations / len; i < e; i++, ptr += len)
58 a += farmhash::Hash(ptr, len);
59 return a;
60 }
61
62 uint64_t
test_farmhash_inlined(int len)63 test_farmhash_inlined(int len)
64 {
65 char* ptr = reinterpret_cast<char*>(data.data());
66 uint64_t a = 0;
67 for (int i = 0, e = iterations / len; i < e; i++, ptr += len)
68 a += farmhash::inlined::Hash(ptr, len);
69 return a;
70 }
71
72 uint64_t
test_fasthash64(int len)73 test_fasthash64(int len)
74 {
75 char* ptr = reinterpret_cast<char*>(data.data());
76 uint64_t a = 0;
77 for (int i = 0, e = iterations / len; i < e; i++, ptr += len)
78 a += fasthash::fasthash64(ptr, len, 0);
79 return a;
80 }
81
82 #ifdef __AES__
83
84 // https://github.com/gamozolabs/falkhash
85
86 // Licensed with the unlicense ( http://choosealicense.com/licenses/unlicense/ )
87
88 inline uint64_t
falkhash(const void * pbuf,uint64_t len,uint64_t pseed=0)89 falkhash(const void* pbuf, uint64_t len, uint64_t pseed = 0)
90 {
91 uint8_t* buf = (uint8_t*)pbuf;
92
93 uint64_t iv[2];
94
95 __m128i hash, seed;
96
97 /* Create the 128-bit seed. Low 64-bits gets seed, high 64-bits gets
98 * seed + len + 1. The +1 ensures that both 64-bits values will never be
99 * the same (with the exception of a length of -1. If you have that much
100 * ram, send me some).
101 */
102 iv[0] = pseed;
103 iv[1] = pseed + len + 1;
104
105 /* Load the IV into a __m128i */
106 seed = _mm_loadu_si128((__m128i*)iv);
107
108 /* Hash starts out with the seed */
109 hash = seed;
110
111 while (len) {
112 uint8_t tmp[0x50];
113
114 __m128i piece[5];
115
116 /* If the data is smaller than one chunk, pad it with zeros */
117 if (len < 0x50) {
118 memset(tmp, 0, 0x50);
119 for (int i = 0; i < int(len); i++)
120 tmp[i] = buf[i];
121 buf = tmp;
122 len = 0x50;
123 }
124
125 /* Load up the data into __m128is */
126 piece[0] = _mm_loadu_si128((__m128i*)(buf + 0 * 0x10));
127 piece[1] = _mm_loadu_si128((__m128i*)(buf + 1 * 0x10));
128 piece[2] = _mm_loadu_si128((__m128i*)(buf + 2 * 0x10));
129 piece[3] = _mm_loadu_si128((__m128i*)(buf + 3 * 0x10));
130 piece[4] = _mm_loadu_si128((__m128i*)(buf + 4 * 0x10));
131
132 /* xor each piece against the seed */
133 piece[0] = _mm_xor_si128(piece[0], seed);
134 piece[1] = _mm_xor_si128(piece[1], seed);
135 piece[2] = _mm_xor_si128(piece[2], seed);
136 piece[3] = _mm_xor_si128(piece[3], seed);
137 piece[4] = _mm_xor_si128(piece[4], seed);
138
139 /* aesenc all into piece[0] */
140 piece[0] = _mm_aesenc_si128(piece[0], piece[1]);
141 piece[0] = _mm_aesenc_si128(piece[0], piece[2]);
142 piece[0] = _mm_aesenc_si128(piece[0], piece[3]);
143 piece[0] = _mm_aesenc_si128(piece[0], piece[4]);
144
145 /* Finalize piece[0] by aesencing against seed */
146 piece[0] = _mm_aesenc_si128(piece[0], seed);
147
148 /* aesenc the piece into the hash */
149 hash = _mm_aesenc_si128(hash, piece[0]);
150
151 buf += 0x50;
152 len -= 0x50;
153 }
154
155 /* Finalize hash by aesencing against seed four times */
156 hash = _mm_aesenc_si128(hash, seed);
157 hash = _mm_aesenc_si128(hash, seed);
158 hash = _mm_aesenc_si128(hash, seed);
159 hash = _mm_aesenc_si128(hash, seed);
160
161 return _mm_cvtsi128_si64(hash);
162 }
163
164 uint64_t
test_falkhash(int len)165 test_falkhash(int len)
166 {
167 char* ptr = reinterpret_cast<char*>(data.data());
168 uint64_t a = 0;
169 for (int i = 0, e = iterations / len; i < e; i++, ptr += len)
170 a += falkhash(ptr, len, 0);
171 return a;
172 }
173 #endif
174
175
176
177 static void
getargs(int argc,char * argv[])178 getargs(int argc, char* argv[])
179 {
180 ArgParse ap;
181 // clang-format off
182 ap.intro("hash_test\n" OIIO_INTRO_STRING)
183 .usage("hash_test [options]");
184
185 ap.arg("-v", &verbose)
186 .help("Verbose mode");
187 ap.arg("--iters %d", &iterations)
188 .help(Strutil::sprintf("Number of iterations (default: %d)", iterations));
189 ap.arg("--trials %d", &ntrials)
190 .help("Number of trials");
191 // clang-format on
192
193 ap.parse(argc, (const char**)argv);
194 }
195
196
197
198 int
main(int argc,char * argv[])199 main(int argc, char* argv[])
200 {
201 #if !defined(NDEBUG) || defined(OIIO_CI) || defined(OIIO_CODE_COVERAGE)
202 // For the sake of test time, reduce the default iterations for DEBUG,
203 // CI, and code coverage builds. Explicit use of --iters or --trials
204 // will override this, since it comes before the getargs() call.
205 iterations /= 10;
206 ntrials = 1;
207 #endif
208
209 getargs(argc, argv);
210
211 // fill data with random values so we can hash it a bunch of different ways
212 std::mt19937 rng(42);
213 data.resize(iterations / sizeof(data[0]), 0);
214 for (uint32_t& d : data)
215 d = rng();
216
217 print("All times are seconds per {}\n", Strutil::memformat(iterations));
218
219 // a sampling of sizes, both tiny and large-ish
220 int hashlen[] = {
221 1, 2, 4, 8, 12,
222 16, 20, 24, 32, 64, // small to medium
223 3, 5, 6, 7, 13,
224 15, 19, 23, 49, 67, // small to medium - odd sizes
225 128, 256, 512, 1024, // large (even)
226 95, 155, 243, 501, 1337, // large (odd sizes)
227 iterations // huge
228 };
229
230 // present results from smallest to largest
231 std::sort(std::begin(hashlen), std::end(hashlen));
232
233 auto candidates = {
234 std::make_pair("BJ hash ", test_bjhash),
235 std::make_pair("XX hash ", test_xxhash),
236 std::make_pair("farmhash ", test_farmhash),
237 std::make_pair("farmhash::inlined ", test_farmhash_inlined),
238 std::make_pair("fasthash64 ", test_fasthash64),
239 #ifdef __AES__
240 std::make_pair("falkhash ", test_falkhash),
241 #endif
242 };
243
244 for (int len : hashlen) {
245 auto mem = Strutil::memformat(len, 2);
246 print("\nHash benchmark for {} hashes\n", mem);
247
248 double best_time = std::numeric_limits<double>::max();
249 const char* best = "";
250 for (auto&& c : candidates) {
251 double range;
252 double t = time_trial(std::bind(c.second, len), ntrials, 1, &range);
253 print(" {} took {} (range {})\n", c.first,
254 Strutil::timeintervalformat(t, 3),
255 Strutil::timeintervalformat(range, 3));
256 if (t < best_time) {
257 best_time = t;
258 best = c.first;
259 }
260 }
261
262 print("{} winner: {}\n", mem, best);
263 }
264
265 print("\nTesting correctness\n");
266 using hashfn_t = uint64_t(string_view);
267 auto hashes = {
268 std::make_pair<string_view, hashfn_t*>("BJ hash ",
269 [](string_view s) -> uint64_t {
270 return bjhash::strhash(s);
271 }),
272 std::make_pair<string_view, hashfn_t*>("XX hash ",
273 [](string_view s) -> uint64_t {
274 return xxhash::xxhash(s);
275 }),
276 std::make_pair<string_view, hashfn_t*>("farmhash ",
277 [](string_view s) -> uint64_t {
278 return farmhash::Hash(s);
279 }),
280 std::make_pair<string_view, hashfn_t*>(
281 "farmhash::inlined ",
282 [](string_view s) -> uint64_t {
283 return farmhash::inlined::Hash(s.data(), s.size());
284 }),
285 std::make_pair<string_view, hashfn_t*>("fasthash64 ",
286 [](string_view s) -> uint64_t {
287 return fasthash::fasthash64(
288 s.data(), s.size());
289 }),
290 #ifdef __AES__
291 std::make_pair<string_view, hashfn_t*>("falkhash ",
292 [](string_view s) -> uint64_t {
293 return falkhash(s.data(),
294 s.size());
295 }),
296 #endif
297 };
298 const char* teststrings[]
299 = { "", // empty string
300 "P", // one-char string
301 "openimageio_2008", // identifier-length string
302 "/shots/abc/seq42/tex/my_texture/my_texture_acscg.0042.tx" };
303 uint64_t expected[][4] = {
304 {
305 // bjhash
306 0x0000000000000000,
307 0x00000000b7656eb4,
308 0x0000000055af8ab5,
309 0x00000000c000c946,
310 },
311 {
312 // xxhash
313 0x03b139605dc5b187,
314 0xa4820414c8aff387,
315 0x4465cf017b51e76b,
316 0x1c9ebf5ebae6e8ad,
317 },
318 {
319 // farmhash
320 0x9ae16a3b2f90404f,
321 0x5b5dffc690bdcd30,
322 0x0dd8ef814e8a4317,
323 0x43ad136c828d5214,
324 },
325 {
326 // farmhash::inlined
327 0x9ae16a3b2f90404f,
328 0x5b5dffc690bdcd30,
329 0x0dd8ef814e8a4317,
330 0x43ad136c828d5214,
331 },
332 {
333 // fasthash64
334 0x5b38e9e25863460c,
335 0x38951d1ac28aad44,
336 0x91271089669c4608,
337 0xc66714c1deabacf0,
338 },
339 {
340 // falkhash
341 0xaa7f7a3188504dd7,
342 0x8bae7d7501558eeb,
343 0x0af667ed264008a1,
344 0x25f0142ed7151208,
345 },
346 };
347
348 int stringno = 0;
349 for (string_view s : teststrings) {
350 print(" Hash testing '{}'\n", s);
351 int hashno = 0;
352 for (auto&& h : hashes) {
353 auto val = (*h.second)(s);
354 print(" {} {:016x}\n", h.first, val);
355 OIIO_CHECK_EQUAL(val, expected[hashno][stringno]);
356 ++hashno;
357 }
358 ++stringno;
359 }
360
361 return unit_test_failures;
362 }
363