1 //-----------------------------------------------------------------------------
2 // Streaming implementation of MurmurHash3 by Daniel Thomas
3 // Based on single-buffer implementation by Austin Appleby
4 // Code is placed in the public domain.
5 // The authors disclaim copyright to this source code.
6 
7 // Note - The x86 and x64 versions do _not_ produce the same results, as the
8 // algorithms are optimized for their respective platforms. You can still
9 // compile and run any of them on any platform, but your performance with the
10 // non-native version will be less than optimal.
11 // Also will give different (but equally strong) results on big- vs
12 // little-endian platforms
13 
14 #include "murmur3.h"
15 #include <glib.h>
16 #include <string.h>
17 
18 //-----------------------------------------------------------------------------
19 // Platform-specific functions and macros
20 
rotl32(uint32_t x,int8_t r)21 __attribute__((__unused__)) static inline uint32_t rotl32(uint32_t x, int8_t r) {
22     return (x << r) | (x >> (32 - r));
23 }
24 
rotl64(uint64_t x,int8_t r)25 static inline uint64_t rotl64(uint64_t x, int8_t r) {
26     return (x << r) | (x >> (64 - r));
27 }
28 
29 #define ROTL32(x, y) rotl32(x, y)
30 #define ROTL64(x, y) rotl64(x, y)
31 
32 #define BIG_CONSTANT(x) (x##LLU)
33 
34     //-----------------------------------------------------------------------------
35     // Block read - if your platform needs to do endian-swapping or can only
36     // handle aligned reads, do the conversion here
37 
38 #define GET_UINT64(p) *((uint64_t *)(p));
39 #define GET_UINT32(p) *((uint32_t *)(p));
40 
41 struct _MurmurHash3_x86_32_state {
42     uint32_t h1;
43     union {
44 		uint8_t xs[4]; /* unhashed data from last increment */
45 		uint32_t xs32;
46 	};
47     uint8_t xs_len;
48     size_t len;
49 };
50 
51 struct _MurmurHash3_x86_128_state {
52     uint32_t h1;
53     uint32_t h2;
54     uint32_t h3;
55     uint32_t h4;
56     union {
57 		uint8_t xs[16]; /* unhashed data from last increment */
58 		uint32_t xs32[4];
59 	};
60     uint8_t xs_len;
61     size_t len;
62 };
63 
64 struct _MurmurHash3_x64_128_state {
65     uint64_t h1;
66     uint64_t h2;
67     union {
68 		uint8_t xs[16]; /* unhashed data from last increment */
69 		uint64_t xs64[2];
70 	};
71     uint8_t xs_len;
72     size_t len;
73 };
74 
75 //-----------------------------------------------------------------------------
76 // Finalization mix - force all bits of a hash block to avalanche
77 
fmix32(uint32_t h)78 static inline uint32_t fmix32(uint32_t h) {
79     h ^= h >> 16;
80     h *= 0x85ebca6b;
81     h ^= h >> 13;
82     h *= 0xc2b2ae35;
83     h ^= h >> 16;
84 
85     return h;
86 }
87 
88 //----------
89 
fmix64(uint64_t k)90 static inline uint64_t fmix64(uint64_t k) {
91     k ^= k >> 33;
92     k *= BIG_CONSTANT(0xff51afd7ed558ccd);
93     k ^= k >> 33;
94     k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
95     k ^= k >> 33;
96 
97     return k;
98 }
99 
100     //-----------------------------------------------------------------------------
101 
102 #define MURMUR_UPDATE_X86(h, k, rotl, ca, cb) \
103 	k *= ca;  \
104 	k = ROTL32(k, rotl);  \
105 	k *= cb;  \
106 	h ^= k;
107 
108 #define MURMUR_MIX_X86(ha, hb, rotl, c) \
109 	ha = ROTL32(ha, rotl); \
110 	ha += hb;   \
111 	ha = ha * 5 + c;
112 
113 
114 #define MURMUR_UPDATE_X64(h, k, rotl, ca, cb) \
115     k = k * ca;                              \
116     k = ROTL64(k, rotl);                  \
117     k *= cb;                              \
118     h ^= k;										\
119 
120 
121 #define MURMUR_MIX_X64(ha, hb, rotl, c) \
122     ha = ROTL64(ha, rotl);          \
123     ha += hb;                       \
124     ha = ha * 5 + c;  \
125 
126 
127 
128 #define MURMUR_FILL_XS(xs, xs_len, xs_cap, data, data_len)                        \
129     const int bytes =                                                             \
130         ((int)data_len + (int)xs_len > (int)xs_cap) ? (int)xs_cap - (int)xs_len : (int)data_len; \
131     memcpy(xs + xs_len, data, bytes);                                             \
132     xs_len += bytes;                                                              \
133     data += bytes;
134 
135 //-----------------------------------------------------------------------------
136 
MurmurHash3_x86_32_new()137 MurmurHash3_x86_32_state *MurmurHash3_x86_32_new() {
138     return g_slice_new0(MurmurHash3_x86_32_state);
139 }
140 
MurmurHash3_x86_32_copy(MurmurHash3_x86_32_state * state)141 MurmurHash3_x86_32_state *MurmurHash3_x86_32_copy(MurmurHash3_x86_32_state *state) {
142     return g_slice_copy(sizeof(MurmurHash3_x86_32_state), state);
143 }
144 
145 #define MURMUR_UPDATE_H1_X86_32(H1, K1) MURMUR_UPDATE_X86(H1, K1, 15, 0xcc9e2d51, 0x1b873593);
146 
MurmurHash3_x86_32_update(MurmurHash3_x86_32_state * const state,const void * restrict key,const size_t len)147 void MurmurHash3_x86_32_update(MurmurHash3_x86_32_state *const state,
148                                const void *restrict key, const size_t len) {
149     state->len += len;
150     uint8_t *data = (uint8_t *)key;
151     const uint8_t *stop = data + len;
152 
153     if(state->xs_len > 0) {
154         MURMUR_FILL_XS(state->xs, state->xs_len, 4, data, len);
155     }
156 
157     /* process blocks of 4 bytes */
158     while(state->xs_len == 4 || data + 4 <= stop) {
159         uint32_t k1;
160 
161         if(state->xs_len == 4) {
162             /* process remnant data from previous update */
163             k1 = state->xs32;
164             state->xs_len = 0;
165         } else {
166             /* process new data */
167             k1 = GET_UINT32(data);
168             data += 4;
169         }
170 
171         MURMUR_UPDATE_H1_X86_32(state->h1, k1);
172         MURMUR_MIX_X86(state->h1, 0, 13, 0xe6546b64);
173     }
174 
175     if(state->xs_len == 0 && stop > data) {
176         // store excess data in state
177         state->xs_len = stop - data;
178         memcpy(state->xs, data, state->xs_len);
179     }
180 }
181 
MurmurHash3_x86_32_steal(const MurmurHash3_x86_32_state * const restrict state,void * const restrict out)182 void MurmurHash3_x86_32_steal(const MurmurHash3_x86_32_state *const restrict state,
183                               void *const restrict out) {
184     uint32_t k1 = 0;
185 
186     /* copy h to make this a non-destructive steal */
187     uint32_t h1 = state->h1;
188 
189     switch(state->xs_len) {
190     case 3:
191         k1 ^= state->xs[2] << 16;
192     case 2:
193         k1 ^= state->xs[1] << 8;
194     case 1:
195         k1 ^= state->xs[0];
196 
197         MURMUR_UPDATE_H1_X86_32(h1, k1);
198     };
199 
200     //----------
201     // finalization
202 
203     h1 ^= state->len;
204 
205     h1 = fmix32(h1);
206 
207     *(uint32_t *)out = h1;
208 }
209 
MurmurHash3_x86_32_finalise(MurmurHash3_x86_32_state * state,void * out)210 void MurmurHash3_x86_32_finalise(MurmurHash3_x86_32_state *state, void *out) {
211     MurmurHash3_x86_32_steal(state, out);
212     MurmurHash3_x86_32_free(state);
213 }
214 
MurmurHash3_x86_32_free(MurmurHash3_x86_32_state * state)215 void MurmurHash3_x86_32_free(MurmurHash3_x86_32_state *state) {
216     g_slice_free(MurmurHash3_x86_32_state, state);
217 }
218 
MurmurHash3_x86_32(const void * key,size_t len,uint32_t seed)219 uint32_t MurmurHash3_x86_32(const void *key, size_t len, uint32_t seed) {
220     uint32_t out;
221     MurmurHash3_x86_32_state *state = MurmurHash3_x86_32_new();
222     if(seed != 0) {
223         MurmurHash3_x86_32_update(state, &seed, sizeof(seed));
224     }
225     MurmurHash3_x86_32_update(state, key, len);
226     MurmurHash3_x86_32_finalise(state, &out);
227     return out;
228 }
229 
230 //-----------------------------------------------------------------------------
231 
MurmurHash3_x86_128_new(void)232 MurmurHash3_x86_128_state *MurmurHash3_x86_128_new(void) {
233     return g_slice_new0(MurmurHash3_x86_128_state);
234 }
235 
MurmurHash3_x86_128_copy(MurmurHash3_x86_128_state * state)236 MurmurHash3_x86_128_state *MurmurHash3_x86_128_copy(MurmurHash3_x86_128_state *state) {
237     return g_slice_copy(sizeof(MurmurHash3_x86_128_state), state);
238 }
239 
240 
241 #define MURMUR_UPDATE_H1_X86_128(H, K) MURMUR_UPDATE_X86(H, K, 15, 0x239b961b, 0xab0e9789);
242 #define MURMUR_UPDATE_H2_X86_128(H, K) MURMUR_UPDATE_X86(H, K, 16, 0xab0e9789, 0x38b34ae5);
243 #define MURMUR_UPDATE_H3_X86_128(H, K) MURMUR_UPDATE_X86(H, K, 17, 0x38b34ae5, 0xa1e38b93);
244 #define MURMUR_UPDATE_H4_X86_128(H, K) MURMUR_UPDATE_X86(H, K, 18, 0xa1e38b93, 0x239b961b);
245 
MurmurHash3_x86_128_update(MurmurHash3_x86_128_state * const state,const void * restrict key,const size_t len)246 void MurmurHash3_x86_128_update(MurmurHash3_x86_128_state *const state,
247                                 const void *restrict key, const size_t len) {
248     state->len += len;
249     uint8_t *data = (uint8_t *)key;
250     const uint8_t *stop = data + len;
251 
252     if(state->xs_len > 0) {
253         MURMUR_FILL_XS(state->xs, state->xs_len, 16, data, len);
254     }
255 
256     /* process blocks of 16 bytes */
257     while(state->xs_len == 16 || data + 16 <= stop) {
258         uint32_t k1;
259         uint32_t k2;
260         uint32_t k3;
261         uint32_t k4;
262 
263         if(state->xs_len == 16) {
264             /* process remnant data from previous update */
265             k1 = state->xs32[0];
266             k2 = state->xs32[1];
267             k3 = state->xs32[2];
268             k4 = state->xs32[3];
269             state->xs_len = 0;
270         } else {
271             /* process new data */
272             k1 = GET_UINT32(data);
273             k2 = GET_UINT32(data + 4);
274             k3 = GET_UINT32(data + 8);
275             k4 = GET_UINT32(data + 12);
276             data += 16;
277         }
278 
279         MURMUR_UPDATE_H1_X86_128(state->h1, k1);
280         MURMUR_MIX_X86(state->h1, state->h2, 19, 0x561ccd1b);
281 
282         MURMUR_UPDATE_H2_X86_128(state->h2, k2);
283         MURMUR_MIX_X86(state->h2, state->h3, 17, 0x0bcaa747);
284 
285         MURMUR_UPDATE_H3_X86_128(state->h3, k3);
286         MURMUR_MIX_X86(state->h3, state->h4, 15, 0x96cd1c35);
287 
288         MURMUR_UPDATE_H4_X86_128(state->h4, k4);
289         MURMUR_MIX_X86(state->h4, state->h1, 13, 0x32ac3b17);
290     }
291 
292     if(state->xs_len == 0 && stop > data) {
293         // store excess data in state
294         state->xs_len = stop - data;
295         memcpy(state->xs, data, state->xs_len);
296     }
297 }
298 
MurmurHash3_x86_128_steal(const MurmurHash3_x86_128_state * const restrict state,void * const restrict out)299 void MurmurHash3_x86_128_steal(const MurmurHash3_x86_128_state *const restrict state,
300                                void *const restrict out) {
301     uint32_t k1 = 0;
302     uint32_t k2 = 0;
303     uint32_t k3 = 0;
304     uint32_t k4 = 0;
305 
306     /* copy h to make this a non-destructive steal */
307     uint32_t h1 = state->h1;
308     uint32_t h2 = state->h2;
309     uint32_t h3 = state->h3;
310     uint32_t h4 = state->h4;
311 
312     switch(state->len & 15) {
313     case 15:
314         k4 ^= state->xs[14] << 16;
315     case 14:
316         k4 ^= state->xs[13] << 8;
317     case 13:
318         k4 ^= state->xs[12] << 0;
319 
320         MURMUR_UPDATE_H4_X86_128(h4, k4);
321 
322     case 12:
323         k3 ^= state->xs[11] << 24;
324     case 11:
325         k3 ^= state->xs[10] << 16;
326     case 10:
327         k3 ^= state->xs[9] << 8;
328     case 9:
329         k3 ^= state->xs[8] << 0;
330 
331         MURMUR_UPDATE_H3_X86_128(h3, k3);
332 
333     case 8:
334         k2 ^= state->xs[7] << 24;
335     case 7:
336         k2 ^= state->xs[6] << 16;
337     case 6:
338         k2 ^= state->xs[5] << 8;
339     case 5:
340         k2 ^= state->xs[4] << 0;
341 
342         MURMUR_UPDATE_H2_X86_128(h2, k2);
343 
344     case 4:
345         k1 ^= state->xs[3] << 24;
346     case 3:
347         k1 ^= state->xs[2] << 16;
348     case 2:
349         k1 ^= state->xs[1] << 8;
350     case 1:
351         k1 ^= state->xs[0] << 0;
352 
353         MURMUR_UPDATE_H1_X86_128(h1, k1);
354     };
355 
356     //----------
357     // finalization
358 
359     h1 ^= state->len;
360     h2 ^= state->len;
361     h3 ^= state->len;
362     h4 ^= state->len;
363 
364     h1 += h2;
365     h1 += h3;
366     h1 += h4;
367     h2 += h1;
368     h3 += h1;
369     h4 += h1;
370 
371     h1 = fmix32(h1);
372     h2 = fmix32(h2);
373     h3 = fmix32(h3);
374     h4 = fmix32(h4);
375 
376     h1 += h2;
377     h1 += h3;
378     h1 += h4;
379     h2 += h1;
380     h3 += h1;
381     h4 += h1;
382 
383     ((uint32_t *)out)[0] = h1;
384     ((uint32_t *)out)[1] = h2;
385     ((uint32_t *)out)[2] = h3;
386     ((uint32_t *)out)[3] = h4;
387 }
388 
MurmurHash3_x86_128_finalise(MurmurHash3_x86_128_state * state,void * out)389 void MurmurHash3_x86_128_finalise(MurmurHash3_x86_128_state *state, void *out) {
390     MurmurHash3_x86_128_steal(state, out);
391     MurmurHash3_x86_128_free(state);
392 }
393 
MurmurHash3_x86_128_free(MurmurHash3_x86_128_state * state)394 void MurmurHash3_x86_128_free(MurmurHash3_x86_128_state *state) {
395     g_slice_free(MurmurHash3_x86_128_state, state);
396 }
397 
MurmurHash3_x86_128(const void * key,size_t len,uint32_t seed,void * out)398 void MurmurHash3_x86_128(const void *key, size_t len, uint32_t seed, void *out) {
399     MurmurHash3_x86_128_state *state = MurmurHash3_x86_128_new();
400     if(seed != 0) {
401         MurmurHash3_x86_128_update(state, &seed, sizeof(seed));
402     }
403     MurmurHash3_x86_128_update(state, key, len);
404     MurmurHash3_x86_128_finalise(state, out);
405 }
406 
407 //-----------------------------------------------------------------------------
408 
MurmurHash3_x64_128_new(void)409 MurmurHash3_x64_128_state *MurmurHash3_x64_128_new(void) {
410     return g_slice_new0(MurmurHash3_x64_128_state);
411 }
412 
MurmurHash3_x64_128_copy(MurmurHash3_x64_128_state * state)413 MurmurHash3_x64_128_state *MurmurHash3_x64_128_copy(MurmurHash3_x64_128_state *state) {
414     return g_slice_copy(sizeof(MurmurHash3_x64_128_state), state);
415 }
416 
417 #define MURMUR_UPDATE_H1_X64_128(H1)                            \
418     MURMUR_UPDATE_X64(H1, k1, 31, BIG_CONSTANT(0x87c37b91114253d5), \
419                   BIG_CONSTANT(0x4cf5ad432745937f));
420 #define MURMUR_UPDATE_H2_X64_128(H2)                            \
421     MURMUR_UPDATE_X64(H2, k2, 33, BIG_CONSTANT(0x4cf5ad432745937f), \
422                   BIG_CONSTANT(0x87c37b91114253d5));
423 
MurmurHash3_x64_128_update(MurmurHash3_x64_128_state * const restrict state,const void * restrict key,const size_t len)424 void MurmurHash3_x64_128_update(MurmurHash3_x64_128_state *const restrict state,
425                                 const void *restrict key, const size_t len) {
426     state->len += len;
427     uint8_t *data = (uint8_t *)key;
428     const uint8_t *stop = data + len;
429 
430     if(state->xs_len > 0) {
431         MURMUR_FILL_XS(state->xs, state->xs_len, 16, data, len);
432     }
433 
434     /* process blocks of 16 bytes */
435     while(state->xs_len == 16 || data + 16 <= stop) {
436         uint64_t k1;
437         uint64_t k2;
438 
439         if(state->xs_len == 16) {
440             /* process remnant data from previous update */
441             k1 = state->xs64[0];
442             k2 = state->xs64[1];
443             state->xs_len = 0;
444         } else {
445             /* process new data */
446             k1 = GET_UINT64(data);
447             k2 = GET_UINT64(data + 8);
448             data += 16;
449         }
450 
451         MURMUR_UPDATE_H1_X64_128(state->h1);
452         MURMUR_MIX_X64(state->h1, state->h2, 27, 0x52dce729);
453 
454         MURMUR_UPDATE_H2_X64_128(state->h2);
455         MURMUR_MIX_X64(state->h2, state->h1, 31, 0x38495ab5);
456     }
457 
458     if(state->xs_len == 0 && stop > data) {
459         // store excess data in state
460         state->xs_len = stop - data;
461         memcpy(state->xs, data, state->xs_len);
462     }
463 }
464 
MurmurHash3_x64_128_steal(const MurmurHash3_x64_128_state * const restrict state,void * const restrict out)465 void MurmurHash3_x64_128_steal(const MurmurHash3_x64_128_state *const restrict state,
466                                void *const restrict out) {
467     uint64_t k1 = 0;
468     uint64_t k2 = 0;
469 
470     /* copy h to make this a non-destructive steal */
471     uint64_t h1 = state->h1;
472     uint64_t h2 = state->h2;
473 
474     switch(state->xs_len) {
475     case 15:
476         k2 ^= (uint64_t)(state->xs[14]) << 48;
477     case 14:
478         k2 ^= (uint64_t)(state->xs[13]) << 40;
479     case 13:
480         k2 ^= (uint64_t)(state->xs[12]) << 32;
481     case 12:
482         k2 ^= (uint64_t)(state->xs[11]) << 24;
483     case 11:
484         k2 ^= (uint64_t)(state->xs[10]) << 16;
485     case 10:
486         k2 ^= (uint64_t)(state->xs[9]) << 8;
487     case 9:
488         k2 ^= (uint64_t)(state->xs[8]) << 0;
489 
490         MURMUR_UPDATE_H2_X64_128(h2);
491 
492     case 8:
493         k1 ^= (uint64_t)(state->xs[7]) << 56;
494     case 7:
495         k1 ^= (uint64_t)(state->xs[6]) << 48;
496     case 6:
497         k1 ^= (uint64_t)(state->xs[5]) << 40;
498     case 5:
499         k1 ^= (uint64_t)(state->xs[4]) << 32;
500     case 4:
501         k1 ^= (uint64_t)(state->xs[3]) << 24;
502     case 3:
503         k1 ^= (uint64_t)(state->xs[2]) << 16;
504     case 2:
505         k1 ^= (uint64_t)(state->xs[1]) << 8;
506     case 1:
507         k1 ^= (uint64_t)(state->xs[0]) << 0;
508 
509         MURMUR_UPDATE_H1_X64_128(h1);
510     };
511 
512     //----------
513     // finalization
514 
515     h1 ^= state->len;
516     h2 ^= state->len;
517 
518     h1 += h2;
519     h2 += h1;
520 
521     h1 = fmix64(h1);
522     h2 = fmix64(h2);
523 
524     h1 += h2;
525     h2 += h1;
526 
527     ((uint64_t *)out)[0] = h1;
528     ((uint64_t *)out)[1] = h2;
529 }
530 
MurmurHash3_x64_128_free(MurmurHash3_x64_128_state * state)531 void MurmurHash3_x64_128_free(MurmurHash3_x64_128_state *state) {
532     g_slice_free(MurmurHash3_x64_128_state, state);
533 }
534 
MurmurHash3_x64_128(const void * key,const size_t len,const uint32_t seed,void * out)535 void MurmurHash3_x64_128(const void *key, const size_t len, const uint32_t seed,
536                          void *out) {
537     MurmurHash3_x64_128_state *state = MurmurHash3_x64_128_new();
538     if(seed != 0) {
539         MurmurHash3_x64_128_update(state, &seed, sizeof(seed));
540     }
541     MurmurHash3_x64_128_update(state, key, len);
542     MurmurHash3_x64_128_finalise(state, out);
543 }
544 
MurmurHash3_x64_128_finalise(MurmurHash3_x64_128_state * state,void * out)545 void MurmurHash3_x64_128_finalise(MurmurHash3_x64_128_state *state, void *out) {
546     MurmurHash3_x64_128_steal(state, out);
547     MurmurHash3_x64_128_free(state);
548 }
549 
MurmurHash3_x64_128_equal(MurmurHash3_x64_128_state * a,MurmurHash3_x64_128_state * b)550 int MurmurHash3_x64_128_equal(MurmurHash3_x64_128_state *a,
551                               MurmurHash3_x64_128_state *b) {
552     if(a->h1 != b->h1 || a->h2 != b->h2 || a->xs_len != b->xs_len || a->len != b->len) {
553         return 0;
554     }
555     return (a->xs_len == 0 || !memcmp(a->xs, b->xs, a->xs_len));
556 }
557 
558 //-----------------------------------------------------------------------------
559