1 // Copyright 2008-present Contributors to the OpenImageIO project.
2 // SPDX-License-Identifier: BSD-3-Clause
3 // https://github.com/OpenImageIO/oiio/blob/master/LICENSE.md
4 
5 
6 #include <cstdio>
7 #include <functional>
8 #include <iostream>
9 #include <random>
10 #include <vector>
11 
12 #include <OpenImageIO/argparse.h>
13 #include <OpenImageIO/benchmark.h>
14 #include <OpenImageIO/hash.h>
15 #include <OpenImageIO/strutil.h>
16 #include <OpenImageIO/timer.h>
17 #include <OpenImageIO/unittest.h>
18 
19 #include <OpenImageIO/detail/farmhash.h>
20 
21 
22 
23 using namespace OIIO;
24 using OIIO::Strutil::print;
25 
26 static int iterations = 100 << 20;
27 static int ntrials    = 1;
28 static bool verbose   = false;
29 
30 static std::vector<uint32_t> data;
31 
32 uint64_t
test_bjhash(int len)33 test_bjhash(int len)
34 {
35     char* ptr  = reinterpret_cast<char*>(data.data());
36     uint64_t a = 0;
37     for (int i = 0, e = iterations / len; i < e; i++, ptr += len)
38         a += bjhash::hashlittle(ptr, len);
39     return a;
40 }
41 
42 uint64_t
test_xxhash(int len)43 test_xxhash(int len)
44 {
45     char* ptr  = reinterpret_cast<char*>(data.data());
46     uint64_t a = 0;
47     for (int i = 0, e = iterations / len; i < e; i++, ptr += len)
48         a += xxhash::xxhash(ptr, len, 0);
49     return a;
50 }
51 
52 uint64_t
test_farmhash(int len)53 test_farmhash(int len)
54 {
55     char* ptr  = reinterpret_cast<char*>(data.data());
56     uint64_t a = 0;
57     for (int i = 0, e = iterations / len; i < e; i++, ptr += len)
58         a += farmhash::Hash(ptr, len);
59     return a;
60 }
61 
62 uint64_t
test_farmhash_inlined(int len)63 test_farmhash_inlined(int len)
64 {
65     char* ptr  = reinterpret_cast<char*>(data.data());
66     uint64_t a = 0;
67     for (int i = 0, e = iterations / len; i < e; i++, ptr += len)
68         a += farmhash::inlined::Hash(ptr, len);
69     return a;
70 }
71 
72 uint64_t
test_fasthash64(int len)73 test_fasthash64(int len)
74 {
75     char* ptr  = reinterpret_cast<char*>(data.data());
76     uint64_t a = 0;
77     for (int i = 0, e = iterations / len; i < e; i++, ptr += len)
78         a += fasthash::fasthash64(ptr, len, 0);
79     return a;
80 }
81 
82 #ifdef __AES__
83 
84 // https://github.com/gamozolabs/falkhash
85 
86 // Licensed with the unlicense ( http://choosealicense.com/licenses/unlicense/ )
87 
88 inline uint64_t
falkhash(const void * pbuf,uint64_t len,uint64_t pseed=0)89 falkhash(const void* pbuf, uint64_t len, uint64_t pseed = 0)
90 {
91     uint8_t* buf = (uint8_t*)pbuf;
92 
93     uint64_t iv[2];
94 
95     __m128i hash, seed;
96 
97     /* Create the 128-bit seed. Low 64-bits gets seed, high 64-bits gets
98      * seed + len + 1. The +1 ensures that both 64-bits values will never be
99      * the same (with the exception of a length of -1. If you have that much
100      * ram, send me some).
101      */
102     iv[0] = pseed;
103     iv[1] = pseed + len + 1;
104 
105     /* Load the IV into a __m128i */
106     seed = _mm_loadu_si128((__m128i*)iv);
107 
108     /* Hash starts out with the seed */
109     hash = seed;
110 
111     while (len) {
112         uint8_t tmp[0x50];
113 
114         __m128i piece[5];
115 
116         /* If the data is smaller than one chunk, pad it with zeros */
117         if (len < 0x50) {
118             memset(tmp, 0, 0x50);
119             for (int i = 0; i < int(len); i++)
120                 tmp[i] = buf[i];
121             buf = tmp;
122             len = 0x50;
123         }
124 
125         /* Load up the data into __m128is */
126         piece[0] = _mm_loadu_si128((__m128i*)(buf + 0 * 0x10));
127         piece[1] = _mm_loadu_si128((__m128i*)(buf + 1 * 0x10));
128         piece[2] = _mm_loadu_si128((__m128i*)(buf + 2 * 0x10));
129         piece[3] = _mm_loadu_si128((__m128i*)(buf + 3 * 0x10));
130         piece[4] = _mm_loadu_si128((__m128i*)(buf + 4 * 0x10));
131 
132         /* xor each piece against the seed */
133         piece[0] = _mm_xor_si128(piece[0], seed);
134         piece[1] = _mm_xor_si128(piece[1], seed);
135         piece[2] = _mm_xor_si128(piece[2], seed);
136         piece[3] = _mm_xor_si128(piece[3], seed);
137         piece[4] = _mm_xor_si128(piece[4], seed);
138 
139         /* aesenc all into piece[0] */
140         piece[0] = _mm_aesenc_si128(piece[0], piece[1]);
141         piece[0] = _mm_aesenc_si128(piece[0], piece[2]);
142         piece[0] = _mm_aesenc_si128(piece[0], piece[3]);
143         piece[0] = _mm_aesenc_si128(piece[0], piece[4]);
144 
145         /* Finalize piece[0] by aesencing against seed */
146         piece[0] = _mm_aesenc_si128(piece[0], seed);
147 
148         /* aesenc the piece into the hash */
149         hash = _mm_aesenc_si128(hash, piece[0]);
150 
151         buf += 0x50;
152         len -= 0x50;
153     }
154 
155     /* Finalize hash by aesencing against seed four times */
156     hash = _mm_aesenc_si128(hash, seed);
157     hash = _mm_aesenc_si128(hash, seed);
158     hash = _mm_aesenc_si128(hash, seed);
159     hash = _mm_aesenc_si128(hash, seed);
160 
161     return _mm_cvtsi128_si64(hash);
162 }
163 
164 uint64_t
test_falkhash(int len)165 test_falkhash(int len)
166 {
167     char* ptr  = reinterpret_cast<char*>(data.data());
168     uint64_t a = 0;
169     for (int i = 0, e = iterations / len; i < e; i++, ptr += len)
170         a += falkhash(ptr, len, 0);
171     return a;
172 }
173 #endif
174 
175 
176 
177 static void
getargs(int argc,char * argv[])178 getargs(int argc, char* argv[])
179 {
180     ArgParse ap;
181     // clang-format off
182     ap.intro("hash_test\n" OIIO_INTRO_STRING)
183       .usage("hash_test [options]");
184 
185     ap.arg("-v", &verbose)
186       .help("Verbose mode");
187     ap.arg("--iters %d", &iterations)
188       .help(Strutil::sprintf("Number of iterations (default: %d)", iterations));
189     ap.arg("--trials %d", &ntrials)
190       .help("Number of trials");
191     // clang-format on
192 
193     ap.parse(argc, (const char**)argv);
194 }
195 
196 
197 
198 int
main(int argc,char * argv[])199 main(int argc, char* argv[])
200 {
201 #if !defined(NDEBUG) || defined(OIIO_CI) || defined(OIIO_CODE_COVERAGE)
202     // For the sake of test time, reduce the default iterations for DEBUG,
203     // CI, and code coverage builds. Explicit use of --iters or --trials
204     // will override this, since it comes before the getargs() call.
205     iterations /= 10;
206     ntrials = 1;
207 #endif
208 
209     getargs(argc, argv);
210 
211     // fill data with random values so we can hash it a bunch of different ways
212     std::mt19937 rng(42);
213     data.resize(iterations / sizeof(data[0]), 0);
214     for (uint32_t& d : data)
215         d = rng();
216 
217     print("All times are seconds per {}\n", Strutil::memformat(iterations));
218 
219     // a sampling of sizes, both tiny and large-ish
220     int hashlen[] = {
221         1,         2,   4,   8,    12,
222         16,        20,  24,  32,   64,  // small to medium
223         3,         5,   6,   7,    13,
224         15,        19,  23,  49,   67,    // small to medium - odd sizes
225         128,       256, 512, 1024,        // large (even)
226         95,        155, 243, 501,  1337,  // large (odd sizes)
227         iterations                        // huge
228     };
229 
230     // present results from smallest to largest
231     std::sort(std::begin(hashlen), std::end(hashlen));
232 
233     auto candidates = {
234         std::make_pair("BJ hash           ", test_bjhash),
235         std::make_pair("XX hash           ", test_xxhash),
236         std::make_pair("farmhash          ", test_farmhash),
237         std::make_pair("farmhash::inlined ", test_farmhash_inlined),
238         std::make_pair("fasthash64        ", test_fasthash64),
239 #ifdef __AES__
240         std::make_pair("falkhash          ", test_falkhash),
241 #endif
242     };
243 
244     for (int len : hashlen) {
245         auto mem = Strutil::memformat(len, 2);
246         print("\nHash benchmark for {} hashes\n", mem);
247 
248         double best_time = std::numeric_limits<double>::max();
249         const char* best = "";
250         for (auto&& c : candidates) {
251             double range;
252             double t = time_trial(std::bind(c.second, len), ntrials, 1, &range);
253             print("  {} took {} (range {})\n", c.first,
254                   Strutil::timeintervalformat(t, 3),
255                   Strutil::timeintervalformat(range, 3));
256             if (t < best_time) {
257                 best_time = t;
258                 best      = c.first;
259             }
260         }
261 
262         print("{} winner: {}\n", mem, best);
263     }
264 
265     print("\nTesting correctness\n");
266     using hashfn_t = uint64_t(string_view);
267     auto hashes    = {
268         std::make_pair<string_view, hashfn_t*>("BJ hash           ",
269                                                [](string_view s) -> uint64_t {
270                                                    return bjhash::strhash(s);
271                                                }),
272         std::make_pair<string_view, hashfn_t*>("XX hash           ",
273                                                [](string_view s) -> uint64_t {
274                                                    return xxhash::xxhash(s);
275                                                }),
276         std::make_pair<string_view, hashfn_t*>("farmhash          ",
277                                                [](string_view s) -> uint64_t {
278                                                    return farmhash::Hash(s);
279                                                }),
280         std::make_pair<string_view, hashfn_t*>(
281             "farmhash::inlined ",
282             [](string_view s) -> uint64_t {
283                 return farmhash::inlined::Hash(s.data(), s.size());
284             }),
285         std::make_pair<string_view, hashfn_t*>("fasthash64        ",
286                                                [](string_view s) -> uint64_t {
287                                                    return fasthash::fasthash64(
288                                                        s.data(), s.size());
289                                                }),
290 #ifdef __AES__
291         std::make_pair<string_view, hashfn_t*>("falkhash          ",
292                                                [](string_view s) -> uint64_t {
293                                                    return falkhash(s.data(),
294                                                                    s.size());
295                                                }),
296 #endif
297     };
298     const char* teststrings[]
299         = { "",                  // empty string
300             "P",                 // one-char string
301             "openimageio_2008",  // identifier-length string
302             "/shots/abc/seq42/tex/my_texture/my_texture_acscg.0042.tx" };
303     uint64_t expected[][4] = {
304         {
305             // bjhash
306             0x0000000000000000,
307             0x00000000b7656eb4,
308             0x0000000055af8ab5,
309             0x00000000c000c946,
310         },
311         {
312             // xxhash
313             0x03b139605dc5b187,
314             0xa4820414c8aff387,
315             0x4465cf017b51e76b,
316             0x1c9ebf5ebae6e8ad,
317         },
318         {
319             // farmhash
320             0x9ae16a3b2f90404f,
321             0x5b5dffc690bdcd30,
322             0x0dd8ef814e8a4317,
323             0x43ad136c828d5214,
324         },
325         {
326             // farmhash::inlined
327             0x9ae16a3b2f90404f,
328             0x5b5dffc690bdcd30,
329             0x0dd8ef814e8a4317,
330             0x43ad136c828d5214,
331         },
332         {
333             // fasthash64
334             0x5b38e9e25863460c,
335             0x38951d1ac28aad44,
336             0x91271089669c4608,
337             0xc66714c1deabacf0,
338         },
339         {
340             // falkhash
341             0xaa7f7a3188504dd7,
342             0x8bae7d7501558eeb,
343             0x0af667ed264008a1,
344             0x25f0142ed7151208,
345         },
346     };
347 
348     int stringno = 0;
349     for (string_view s : teststrings) {
350         print(" Hash testing '{}'\n", s);
351         int hashno = 0;
352         for (auto&& h : hashes) {
353             auto val = (*h.second)(s);
354             print("  {}  {:016x}\n", h.first, val);
355             OIIO_CHECK_EQUAL(val, expected[hashno][stringno]);
356             ++hashno;
357         }
358         ++stringno;
359     }
360 
361     return unit_test_failures;
362 }
363