1 // Copyright (C) 2011  Davis E. King (davis@dlib.net)
2 // License: Boost Software License   See LICENSE.txt for the full license.
3 #ifndef DLIB_MURMUR_HAsH_3_Hh_
4 #define DLIB_MURMUR_HAsH_3_Hh_
5 
6 #include "murmur_hash3_abstract.h"
7 #include "../uintn.h"
8 #include <utility>
9 #include <string.h>
10 
11 namespace dlib
12 {
13     //-----------------------------------------------------------------------------
14     // The original MurmurHash3 code was written by Austin Appleby, and is placed
15     // in the public domain. The author hereby disclaims copyright to this source code.
16     // The code in this particular file was modified by Davis E. King.  In
17     // particular, endian-swapping was added along with some other minor code
18     // changes like avoiding strict aliasing violations.
19 
20 
21     //-----------------------------------------------------------------------------
22     // Platform-specific functions and macros
23 
24     // Microsoft Visual Studio
25 
26 
27 #if ((defined(__GNUC__) && __GNUC__ >= 7) || defined(__clang__))
28 #define DLIB_FALLTHROUGH [[fallthrough]]
29 #else
30 #define DLIB_FALLTHROUGH
31 #endif
32 
33 #if defined(_MSC_VER)
34 
35 #define DLIB_FORCE_INLINE	__forceinline
36 
37 #include <stdlib.h>
38 
39 #define DLIB_ROTL32(x,y)	_rotl(x,y)
40 #define DLIB_ROTL64(x,y)	_rotl64(x,y)
41 
42 #define DLIB_BIG_CONSTANT(x) (x)
43 
44     // Other compilers
45 
46 #else	// defined(_MSC_VER)
47 
48 #define	DLIB_FORCE_INLINE __attribute__((always_inline)) inline
49 
50     inline uint32 murmur_rotl32 ( uint32 x, int8 r )
51     {
52         return (x << r) | (x >> (32 - r));
53     }
54 
55     inline uint64 murmur_rotl64 ( uint64 x, int8 r )
56     {
57         return (x << r) | (x >> (64 - r));
58     }
59 
60 #define	DLIB_ROTL32(x,y)	dlib::murmur_rotl32(x,y)
61 #define DLIB_ROTL64(x,y)	dlib::murmur_rotl64(x,y)
62 
63 #define DLIB_BIG_CONSTANT(x) (x##LLU)
64 
65 #endif // !defined(_MSC_VER)
66 
67 // ----------------------------------------------------------------------------------------
68     // Block read - if your platform needs to do endian-swapping or can only
69     // handle aligned reads, do the conversion here
70 
murmur_getblock(const uint32 * p,int i)71     DLIB_FORCE_INLINE uint32 murmur_getblock ( const uint32 * p, int i )
72     {
73         // The reason we do a memcpy() here instead of simply returning p[i] is because
74         // doing it this way avoids violations of the strict aliasing rule when all these
75         // functions are inlined into the user's code.
76         uint32 temp;
77         memcpy(&temp, p+i, 4);
78         return temp;
79     }
80 
murmur_getblock_byte_swap(const uint32 * p,int i)81     DLIB_FORCE_INLINE uint32 murmur_getblock_byte_swap ( const uint32 * p, int i )
82     {
83         union
84         {
85             uint8 bytes[4];
86             uint32 val;
87         } temp;
88 
89         const uint8* pp = reinterpret_cast<const uint8*>(p + i);
90         temp.bytes[0] = pp[3];
91         temp.bytes[1] = pp[2];
92         temp.bytes[2] = pp[1];
93         temp.bytes[3] = pp[0];
94 
95         return temp.val;
96     }
97 
murmur_getblock(const uint64 * p,int i)98     DLIB_FORCE_INLINE uint64 murmur_getblock ( const uint64 * p, int i )
99     {
100         // The reason we do a memcpy() here instead of simply returning p[i] is because
101         // doing it this way avoids violations of the strict aliasing rule when all these
102         // functions are inlined into the user's code.
103         uint64 temp;
104         memcpy(&temp, p+i, 8);
105         return temp;
106     }
107 
murmur_getblock_byte_swap(const uint64 * p,int i)108     DLIB_FORCE_INLINE uint64 murmur_getblock_byte_swap ( const uint64 * p, int i )
109     {
110         union
111         {
112             uint8 bytes[8];
113             uint64 val;
114         } temp;
115 
116         const uint8* pp = reinterpret_cast<const uint8*>(p + i);
117         temp.bytes[0] = pp[7];
118         temp.bytes[1] = pp[6];
119         temp.bytes[2] = pp[5];
120         temp.bytes[3] = pp[4];
121         temp.bytes[4] = pp[3];
122         temp.bytes[5] = pp[2];
123         temp.bytes[6] = pp[1];
124         temp.bytes[7] = pp[0];
125 
126         return temp.val;
127     }
128 
129 // ----------------------------------------------------------------------------------------
130     // Finalization mix - force all bits of a hash block to avalanche
131 
murmur_fmix(uint32 h)132     DLIB_FORCE_INLINE uint32 murmur_fmix ( uint32 h )
133     {
134         h ^= h >> 16;
135         h *= 0x85ebca6b;
136         h ^= h >> 13;
137         h *= 0xc2b2ae35;
138         h ^= h >> 16;
139 
140         return h;
141     }
142 
143 // ----------------------------------------------------------------------------------------
144 
murmur_fmix(uint64 k)145     DLIB_FORCE_INLINE uint64 murmur_fmix ( uint64 k )
146     {
147         k ^= k >> 33;
148         k *= DLIB_BIG_CONSTANT(0xff51afd7ed558ccd);
149         k ^= k >> 33;
150         k *= DLIB_BIG_CONSTANT(0xc4ceb9fe1a85ec53);
151         k ^= k >> 33;
152 
153         return k;
154     }
155 
156 // ----------------------------------------------------------------------------------------
157 
158     inline uint32 murmur_hash3 (
159         const void * key,
160         const int len,
161         const uint32 seed = 0
162     )
163     {
164         const uint8 * data = (const uint8*)key;
165         const int nblocks = len / 4;
166 
167         uint32 h1 = seed;
168 
169         uint32 c1 = 0xcc9e2d51;
170         uint32 c2 = 0x1b873593;
171 
172         //----------
173         // body
174 
175         const uint32 * blocks = (const uint32 *)(data + nblocks*4);
176 
177         bool is_little_endian = true;
178         uint32 endian_test = 1;
179         if (*reinterpret_cast<unsigned char*>(&endian_test) != 1)
180             is_little_endian = false;
181 
182 
183         if (is_little_endian)
184         {
185             for(int i = -nblocks; i; i++)
186             {
187                 uint32 k1 = murmur_getblock(blocks,i);
188 
189                 k1 *= c1;
190                 k1 = DLIB_ROTL32(k1,15);
191                 k1 *= c2;
192 
193                 h1 ^= k1;
194                 h1 = DLIB_ROTL32(h1,13);
195                 h1 = h1*5+0xe6546b64;
196             }
197         }
198         else
199         {
200             for(int i = -nblocks; i; i++)
201             {
202                 uint32 k1 = murmur_getblock_byte_swap(blocks,i);
203 
204                 k1 *= c1;
205                 k1 = DLIB_ROTL32(k1,15);
206                 k1 *= c2;
207 
208                 h1 ^= k1;
209                 h1 = DLIB_ROTL32(h1,13);
210                 h1 = h1*5+0xe6546b64;
211             }
212         }
213 
214         //----------
215         // tail
216 
217         const uint8 * tail = (const uint8*)(data + nblocks*4);
218 
219         uint32 k1 = 0;
220 
221         switch(len & 3)
222         {
223             case 3: k1 ^= tail[2] << 16;
224                     DLIB_FALLTHROUGH; // fall through
225             case 2: k1 ^= tail[1] << 8;
226                     DLIB_FALLTHROUGH; // fall through
227             case 1: k1 ^= tail[0];
228                     k1 *= c1; k1 = DLIB_ROTL32(k1,15); k1 *= c2; h1 ^= k1;
229         };
230 
231         //----------
232         // finalization
233 
234         h1 ^= len;
235 
236         h1 = murmur_fmix(h1);
237 
238         return h1;
239     }
240 
241 // ----------------------------------------------------------------------------------------
242 
murmur_hash3_2(const uint32 v1,const uint32 v2)243     inline uint32 murmur_hash3_2 (
244         const uint32 v1,
245         const uint32 v2
246     )
247     {
248         uint32 h1 = v2;
249 
250         uint32 c1 = 0xcc9e2d51;
251         uint32 c2 = 0x1b873593;
252 
253         //----------
254         // body
255 
256 
257         uint32 k1 = v1;
258 
259         k1 *= c1;
260         k1 = DLIB_ROTL32(k1,15);
261         k1 *= c2;
262 
263         h1 ^= k1;
264         h1 = DLIB_ROTL32(h1,13);
265         h1 = h1*5+0xe6546b64;
266 
267 
268         //----------
269         // finalization
270 
271         h1 ^= 4; // =^ by length in bytes
272 
273         h1 = murmur_fmix(h1);
274 
275         return h1;
276     }
277 
278 // ----------------------------------------------------------------------------------------
279 
murmur_hash3_3(const uint32 v1,const uint32 v2,const uint32 v3)280     inline uint32 murmur_hash3_3 (
281         const uint32 v1,
282         const uint32 v2,
283         const uint32 v3
284     )
285     {
286 
287         uint32 h1 = v3;
288 
289         uint32 c1 = 0xcc9e2d51;
290         uint32 c2 = 0x1b873593;
291 
292         //----------
293         // body
294 
295 
296         uint32 k1 = v1;
297 
298         k1 *= c1;
299         k1 = DLIB_ROTL32(k1,15);
300         k1 *= c2;
301 
302         h1 ^= k1;
303         h1 = DLIB_ROTL32(h1,13);
304         h1 = h1*5+0xe6546b64;
305 
306         k1 = v2;
307         k1 *= c1;
308         k1 = DLIB_ROTL32(k1,15);
309         k1 *= c2;
310 
311         h1 ^= k1;
312         h1 = DLIB_ROTL32(h1,13);
313         h1 = h1*5+0xe6546b64;
314 
315         //----------
316         // finalization
317 
318         h1 ^= 8; // =^ by length in bytes
319 
320         h1 = murmur_fmix(h1);
321 
322         return h1;
323     }
324 
325 // ----------------------------------------------------------------------------------------
326 
327     inline std::pair<uint64,uint64> murmur_hash3_128bit (
328         const void* key,
329         const int len,
330         const uint64 seed = 0
331     )
332     {
333         const uint8 * data = (const uint8*)key;
334         const int nblocks = len / 16;
335 
336         uint64 h1 = seed;
337         uint64 h2 = seed;
338 
339         uint64 c1 = DLIB_BIG_CONSTANT(0x87c37b91114253d5);
340         uint64 c2 = DLIB_BIG_CONSTANT(0x4cf5ad432745937f);
341 
342         //----------
343         // body
344 
345         const uint64 * blocks = (const uint64 *)(data);
346 
347         bool is_little_endian = true;
348         uint32 endian_test = 1;
349         if (*reinterpret_cast<unsigned char*>(&endian_test) != 1)
350             is_little_endian = false;
351 
352 
353         if (is_little_endian)
354         {
355             for(int i = 0; i < nblocks; i++)
356             {
357                 uint64 k1 = murmur_getblock(blocks,i*2+0);
358                 uint64 k2 = murmur_getblock(blocks,i*2+1);
359 
360                 k1 *= c1; k1  = DLIB_ROTL64(k1,31); k1 *= c2; h1 ^= k1;
361 
362                 h1 = DLIB_ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
363 
364                 k2 *= c2; k2  = DLIB_ROTL64(k2,33); k2 *= c1; h2 ^= k2;
365 
366                 h2 = DLIB_ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
367             }
368         }
369         else
370         {
371             for(int i = 0; i < nblocks; i++)
372             {
373                 uint64 k1 = murmur_getblock_byte_swap(blocks,i*2+0);
374                 uint64 k2 = murmur_getblock_byte_swap(blocks,i*2+1);
375 
376                 k1 *= c1; k1  = DLIB_ROTL64(k1,31); k1 *= c2; h1 ^= k1;
377 
378                 h1 = DLIB_ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
379 
380                 k2 *= c2; k2  = DLIB_ROTL64(k2,33); k2 *= c1; h2 ^= k2;
381 
382                 h2 = DLIB_ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
383             }
384         }
385 
386         //----------
387         // tail
388 
389         const uint8 * tail = (const uint8*)(data + nblocks*16);
390 
391         uint64 k1 = 0;
392         uint64 k2 = 0;
393 
394         switch(len & 15)
395         {
396             case 15: k2 ^= uint64(tail[14]) << 48; DLIB_FALLTHROUGH; // fall through
397             case 14: k2 ^= uint64(tail[13]) << 40; DLIB_FALLTHROUGH; // fall through
398             case 13: k2 ^= uint64(tail[12]) << 32; DLIB_FALLTHROUGH; // fall through
399             case 12: k2 ^= uint64(tail[11]) << 24; DLIB_FALLTHROUGH; // fall through
400             case 11: k2 ^= uint64(tail[10]) << 16; DLIB_FALLTHROUGH; // fall through
401             case 10: k2 ^= uint64(tail[ 9]) << 8; DLIB_FALLTHROUGH; // fall through
402             case  9: k2 ^= uint64(tail[ 8]) << 0;
403                      k2 *= c2; k2  = DLIB_ROTL64(k2,33); k2 *= c1; h2 ^= k2; DLIB_FALLTHROUGH; // fall through
404 
405             case  8: k1 ^= uint64(tail[ 7]) << 56; DLIB_FALLTHROUGH; // fall through
406             case  7: k1 ^= uint64(tail[ 6]) << 48; DLIB_FALLTHROUGH; // fall through
407             case  6: k1 ^= uint64(tail[ 5]) << 40; DLIB_FALLTHROUGH; // fall through
408             case  5: k1 ^= uint64(tail[ 4]) << 32; DLIB_FALLTHROUGH; // fall through
409             case  4: k1 ^= uint64(tail[ 3]) << 24; DLIB_FALLTHROUGH; // fall through
410             case  3: k1 ^= uint64(tail[ 2]) << 16; DLIB_FALLTHROUGH; // fall through
411             case  2: k1 ^= uint64(tail[ 1]) << 8; DLIB_FALLTHROUGH; // fall through
412             case  1: k1 ^= uint64(tail[ 0]) << 0;
413                      k1 *= c1; k1  = DLIB_ROTL64(k1,31); k1 *= c2; h1 ^= k1;
414         };
415 
416         //----------
417         // finalization
418 
419         h1 ^= len; h2 ^= len;
420 
421         h1 += h2;
422         h2 += h1;
423 
424         h1 = murmur_fmix(h1);
425         h2 = murmur_fmix(h2);
426 
427         h1 += h2;
428         h2 += h1;
429 
430         return std::make_pair(h1,h2);
431     }
432 
433 // ----------------------------------------------------------------------------------------
434 
murmur_hash3_128bit(const uint32 & v1,const uint32 & v2,const uint32 & v3,const uint32 & v4)435     inline std::pair<uint64,uint64> murmur_hash3_128bit (
436         const uint32& v1,
437         const uint32& v2,
438         const uint32& v3,
439         const uint32& v4
440     )
441     {
442         uint64 h1 = 0;
443         uint64 h2 = 0;
444 
445         const uint64 c1 = DLIB_BIG_CONSTANT(0x87c37b91114253d5);
446         const uint64 c2 = DLIB_BIG_CONSTANT(0x4cf5ad432745937f);
447 
448         //----------
449         // body
450 
451         uint64 k1 = (static_cast<uint64>(v2)<<32)|v1;
452         uint64 k2 = (static_cast<uint64>(v4)<<32)|v3;
453 
454         k1 *= c1; k1  = DLIB_ROTL64(k1,31); k1 *= c2;
455 
456         h1 = DLIB_ROTL64(k1,27); h1 = h1*5+0x52dce729;
457 
458         k2 *= c2; k2  = DLIB_ROTL64(k2,33); k2 *= c1;
459 
460         h2 = DLIB_ROTL64(k2,31); h2 += h1; h2 = h2*5+0x38495ab5;
461 
462         //----------
463         // finalization
464 
465         h1 ^= 16; h2 ^= 16;
466 
467         h1 += h2;
468         h2 += h1;
469 
470         h1 = murmur_fmix(h1);
471         h2 = murmur_fmix(h2);
472 
473         h1 += h2;
474         h2 += h1;
475 
476         return std::make_pair(h1,h2);
477     }
478 
479 // ----------------------------------------------------------------------------------------
480 
murmur_hash3_128bit_3(uint64 k1,uint64 k2,uint64 k3)481     inline std::pair<uint64,uint64> murmur_hash3_128bit_3 (
482         uint64 k1,
483         uint64 k2,
484         uint64 k3
485     )
486     {
487         uint64 h1 = k3;
488         uint64 h2 = k3;
489 
490         const uint64 c1 = DLIB_BIG_CONSTANT(0x87c37b91114253d5);
491         const uint64 c2 = DLIB_BIG_CONSTANT(0x4cf5ad432745937f);
492 
493         //----------
494         // body
495 
496         k1 *= c1; k1  = DLIB_ROTL64(k1,31); k1 *= c2; h1 ^= k1;
497 
498         h1 = DLIB_ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
499 
500         k2 *= c2; k2  = DLIB_ROTL64(k2,33); k2 *= c1; h2 ^= k2;
501 
502         h2 = DLIB_ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
503 
504         //----------
505         // finalization
506 
507         h1 ^= 16; h2 ^= 16;
508 
509         h1 += h2;
510         h2 += h1;
511 
512         h1 = murmur_fmix(h1);
513         h2 = murmur_fmix(h2);
514 
515         h1 += h2;
516         h2 += h1;
517 
518         return std::make_pair(h1,h2);
519     }
520 
521 // ----------------------------------------------------------------------------------------
522 
523 }
524 
525 #endif // DLIB_MURMUR_HAsH_3_Hh_
526 
527