1 //-----------------------------------------------------------------------------
2 // Streaming implementation of MurmurHash3 by Daniel Thomas
3 // Based on single-buffer implementation by Austin Appleby
4 // Code is placed in the public domain.
5 // The authors disclaim copyright to this source code.
6
7 // Note - The x86 and x64 versions do _not_ produce the same results, as the
8 // algorithms are optimized for their respective platforms. You can still
9 // compile and run any of them on any platform, but your performance with the
10 // non-native version will be less than optimal.
11 // Also will give different (but equally strong) results on big- vs
12 // little-endian platforms
13
14 #include "murmur3.h"
15 #include <glib.h>
16 #include <string.h>
17
18 //-----------------------------------------------------------------------------
19 // Platform-specific functions and macros
20
rotl32(uint32_t x,int8_t r)21 __attribute__((__unused__)) static inline uint32_t rotl32(uint32_t x, int8_t r) {
22 return (x << r) | (x >> (32 - r));
23 }
24
rotl64(uint64_t x,int8_t r)25 static inline uint64_t rotl64(uint64_t x, int8_t r) {
26 return (x << r) | (x >> (64 - r));
27 }
28
29 #define ROTL32(x, y) rotl32(x, y)
30 #define ROTL64(x, y) rotl64(x, y)
31
32 #define BIG_CONSTANT(x) (x##LLU)
33
34 //-----------------------------------------------------------------------------
35 // Block read - if your platform needs to do endian-swapping or can only
36 // handle aligned reads, do the conversion here
37
38 #define GET_UINT64(p) *((uint64_t *)(p));
39 #define GET_UINT32(p) *((uint32_t *)(p));
40
41 struct _MurmurHash3_x86_32_state {
42 uint32_t h1;
43 union {
44 uint8_t xs[4]; /* unhashed data from last increment */
45 uint32_t xs32;
46 };
47 uint8_t xs_len;
48 size_t len;
49 };
50
51 struct _MurmurHash3_x86_128_state {
52 uint32_t h1;
53 uint32_t h2;
54 uint32_t h3;
55 uint32_t h4;
56 union {
57 uint8_t xs[16]; /* unhashed data from last increment */
58 uint32_t xs32[4];
59 };
60 uint8_t xs_len;
61 size_t len;
62 };
63
64 struct _MurmurHash3_x64_128_state {
65 uint64_t h1;
66 uint64_t h2;
67 union {
68 uint8_t xs[16]; /* unhashed data from last increment */
69 uint64_t xs64[2];
70 };
71 uint8_t xs_len;
72 size_t len;
73 };
74
75 //-----------------------------------------------------------------------------
76 // Finalization mix - force all bits of a hash block to avalanche
77
fmix32(uint32_t h)78 static inline uint32_t fmix32(uint32_t h) {
79 h ^= h >> 16;
80 h *= 0x85ebca6b;
81 h ^= h >> 13;
82 h *= 0xc2b2ae35;
83 h ^= h >> 16;
84
85 return h;
86 }
87
88 //----------
89
fmix64(uint64_t k)90 static inline uint64_t fmix64(uint64_t k) {
91 k ^= k >> 33;
92 k *= BIG_CONSTANT(0xff51afd7ed558ccd);
93 k ^= k >> 33;
94 k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
95 k ^= k >> 33;
96
97 return k;
98 }
99
100 //-----------------------------------------------------------------------------
101
102 #define MURMUR_UPDATE_X86(h, k, rotl, ca, cb) \
103 k *= ca; \
104 k = ROTL32(k, rotl); \
105 k *= cb; \
106 h ^= k;
107
108 #define MURMUR_MIX_X86(ha, hb, rotl, c) \
109 ha = ROTL32(ha, rotl); \
110 ha += hb; \
111 ha = ha * 5 + c;
112
113
114 #define MURMUR_UPDATE_X64(h, k, rotl, ca, cb) \
115 k = k * ca; \
116 k = ROTL64(k, rotl); \
117 k *= cb; \
118 h ^= k; \
119
120
121 #define MURMUR_MIX_X64(ha, hb, rotl, c) \
122 ha = ROTL64(ha, rotl); \
123 ha += hb; \
124 ha = ha * 5 + c; \
125
126
127
128 #define MURMUR_FILL_XS(xs, xs_len, xs_cap, data, data_len) \
129 const int bytes = \
130 ((int)data_len + (int)xs_len > (int)xs_cap) ? (int)xs_cap - (int)xs_len : (int)data_len; \
131 memcpy(xs + xs_len, data, bytes); \
132 xs_len += bytes; \
133 data += bytes;
134
135 //-----------------------------------------------------------------------------
136
MurmurHash3_x86_32_new()137 MurmurHash3_x86_32_state *MurmurHash3_x86_32_new() {
138 return g_slice_new0(MurmurHash3_x86_32_state);
139 }
140
MurmurHash3_x86_32_copy(MurmurHash3_x86_32_state * state)141 MurmurHash3_x86_32_state *MurmurHash3_x86_32_copy(MurmurHash3_x86_32_state *state) {
142 return g_slice_copy(sizeof(MurmurHash3_x86_32_state), state);
143 }
144
145 #define MURMUR_UPDATE_H1_X86_32(H1, K1) MURMUR_UPDATE_X86(H1, K1, 15, 0xcc9e2d51, 0x1b873593);
146
MurmurHash3_x86_32_update(MurmurHash3_x86_32_state * const state,const void * restrict key,const size_t len)147 void MurmurHash3_x86_32_update(MurmurHash3_x86_32_state *const state,
148 const void *restrict key, const size_t len) {
149 state->len += len;
150 uint8_t *data = (uint8_t *)key;
151 const uint8_t *stop = data + len;
152
153 if(state->xs_len > 0) {
154 MURMUR_FILL_XS(state->xs, state->xs_len, 4, data, len);
155 }
156
157 /* process blocks of 4 bytes */
158 while(state->xs_len == 4 || data + 4 <= stop) {
159 uint32_t k1;
160
161 if(state->xs_len == 4) {
162 /* process remnant data from previous update */
163 k1 = state->xs32;
164 state->xs_len = 0;
165 } else {
166 /* process new data */
167 k1 = GET_UINT32(data);
168 data += 4;
169 }
170
171 MURMUR_UPDATE_H1_X86_32(state->h1, k1);
172 MURMUR_MIX_X86(state->h1, 0, 13, 0xe6546b64);
173 }
174
175 if(state->xs_len == 0 && stop > data) {
176 // store excess data in state
177 state->xs_len = stop - data;
178 memcpy(state->xs, data, state->xs_len);
179 }
180 }
181
MurmurHash3_x86_32_steal(const MurmurHash3_x86_32_state * const restrict state,void * const restrict out)182 void MurmurHash3_x86_32_steal(const MurmurHash3_x86_32_state *const restrict state,
183 void *const restrict out) {
184 uint32_t k1 = 0;
185
186 /* copy h to make this a non-destructive steal */
187 uint32_t h1 = state->h1;
188
189 switch(state->xs_len) {
190 case 3:
191 k1 ^= state->xs[2] << 16;
192 case 2:
193 k1 ^= state->xs[1] << 8;
194 case 1:
195 k1 ^= state->xs[0];
196
197 MURMUR_UPDATE_H1_X86_32(h1, k1);
198 };
199
200 //----------
201 // finalization
202
203 h1 ^= state->len;
204
205 h1 = fmix32(h1);
206
207 *(uint32_t *)out = h1;
208 }
209
MurmurHash3_x86_32_finalise(MurmurHash3_x86_32_state * state,void * out)210 void MurmurHash3_x86_32_finalise(MurmurHash3_x86_32_state *state, void *out) {
211 MurmurHash3_x86_32_steal(state, out);
212 MurmurHash3_x86_32_free(state);
213 }
214
MurmurHash3_x86_32_free(MurmurHash3_x86_32_state * state)215 void MurmurHash3_x86_32_free(MurmurHash3_x86_32_state *state) {
216 g_slice_free(MurmurHash3_x86_32_state, state);
217 }
218
MurmurHash3_x86_32(const void * key,size_t len,uint32_t seed)219 uint32_t MurmurHash3_x86_32(const void *key, size_t len, uint32_t seed) {
220 uint32_t out;
221 MurmurHash3_x86_32_state *state = MurmurHash3_x86_32_new();
222 if(seed != 0) {
223 MurmurHash3_x86_32_update(state, &seed, sizeof(seed));
224 }
225 MurmurHash3_x86_32_update(state, key, len);
226 MurmurHash3_x86_32_finalise(state, &out);
227 return out;
228 }
229
230 //-----------------------------------------------------------------------------
231
MurmurHash3_x86_128_new(void)232 MurmurHash3_x86_128_state *MurmurHash3_x86_128_new(void) {
233 return g_slice_new0(MurmurHash3_x86_128_state);
234 }
235
MurmurHash3_x86_128_copy(MurmurHash3_x86_128_state * state)236 MurmurHash3_x86_128_state *MurmurHash3_x86_128_copy(MurmurHash3_x86_128_state *state) {
237 return g_slice_copy(sizeof(MurmurHash3_x86_128_state), state);
238 }
239
240
241 #define MURMUR_UPDATE_H1_X86_128(H, K) MURMUR_UPDATE_X86(H, K, 15, 0x239b961b, 0xab0e9789);
242 #define MURMUR_UPDATE_H2_X86_128(H, K) MURMUR_UPDATE_X86(H, K, 16, 0xab0e9789, 0x38b34ae5);
243 #define MURMUR_UPDATE_H3_X86_128(H, K) MURMUR_UPDATE_X86(H, K, 17, 0x38b34ae5, 0xa1e38b93);
244 #define MURMUR_UPDATE_H4_X86_128(H, K) MURMUR_UPDATE_X86(H, K, 18, 0xa1e38b93, 0x239b961b);
245
MurmurHash3_x86_128_update(MurmurHash3_x86_128_state * const state,const void * restrict key,const size_t len)246 void MurmurHash3_x86_128_update(MurmurHash3_x86_128_state *const state,
247 const void *restrict key, const size_t len) {
248 state->len += len;
249 uint8_t *data = (uint8_t *)key;
250 const uint8_t *stop = data + len;
251
252 if(state->xs_len > 0) {
253 MURMUR_FILL_XS(state->xs, state->xs_len, 16, data, len);
254 }
255
256 /* process blocks of 16 bytes */
257 while(state->xs_len == 16 || data + 16 <= stop) {
258 uint32_t k1;
259 uint32_t k2;
260 uint32_t k3;
261 uint32_t k4;
262
263 if(state->xs_len == 16) {
264 /* process remnant data from previous update */
265 k1 = state->xs32[0];
266 k2 = state->xs32[1];
267 k3 = state->xs32[2];
268 k4 = state->xs32[3];
269 state->xs_len = 0;
270 } else {
271 /* process new data */
272 k1 = GET_UINT32(data);
273 k2 = GET_UINT32(data + 4);
274 k3 = GET_UINT32(data + 8);
275 k4 = GET_UINT32(data + 12);
276 data += 16;
277 }
278
279 MURMUR_UPDATE_H1_X86_128(state->h1, k1);
280 MURMUR_MIX_X86(state->h1, state->h2, 19, 0x561ccd1b);
281
282 MURMUR_UPDATE_H2_X86_128(state->h2, k2);
283 MURMUR_MIX_X86(state->h2, state->h3, 17, 0x0bcaa747);
284
285 MURMUR_UPDATE_H3_X86_128(state->h3, k3);
286 MURMUR_MIX_X86(state->h3, state->h4, 15, 0x96cd1c35);
287
288 MURMUR_UPDATE_H4_X86_128(state->h4, k4);
289 MURMUR_MIX_X86(state->h4, state->h1, 13, 0x32ac3b17);
290 }
291
292 if(state->xs_len == 0 && stop > data) {
293 // store excess data in state
294 state->xs_len = stop - data;
295 memcpy(state->xs, data, state->xs_len);
296 }
297 }
298
MurmurHash3_x86_128_steal(const MurmurHash3_x86_128_state * const restrict state,void * const restrict out)299 void MurmurHash3_x86_128_steal(const MurmurHash3_x86_128_state *const restrict state,
300 void *const restrict out) {
301 uint32_t k1 = 0;
302 uint32_t k2 = 0;
303 uint32_t k3 = 0;
304 uint32_t k4 = 0;
305
306 /* copy h to make this a non-destructive steal */
307 uint32_t h1 = state->h1;
308 uint32_t h2 = state->h2;
309 uint32_t h3 = state->h3;
310 uint32_t h4 = state->h4;
311
312 switch(state->len & 15) {
313 case 15:
314 k4 ^= state->xs[14] << 16;
315 case 14:
316 k4 ^= state->xs[13] << 8;
317 case 13:
318 k4 ^= state->xs[12] << 0;
319
320 MURMUR_UPDATE_H4_X86_128(h4, k4);
321
322 case 12:
323 k3 ^= state->xs[11] << 24;
324 case 11:
325 k3 ^= state->xs[10] << 16;
326 case 10:
327 k3 ^= state->xs[9] << 8;
328 case 9:
329 k3 ^= state->xs[8] << 0;
330
331 MURMUR_UPDATE_H3_X86_128(h3, k3);
332
333 case 8:
334 k2 ^= state->xs[7] << 24;
335 case 7:
336 k2 ^= state->xs[6] << 16;
337 case 6:
338 k2 ^= state->xs[5] << 8;
339 case 5:
340 k2 ^= state->xs[4] << 0;
341
342 MURMUR_UPDATE_H2_X86_128(h2, k2);
343
344 case 4:
345 k1 ^= state->xs[3] << 24;
346 case 3:
347 k1 ^= state->xs[2] << 16;
348 case 2:
349 k1 ^= state->xs[1] << 8;
350 case 1:
351 k1 ^= state->xs[0] << 0;
352
353 MURMUR_UPDATE_H1_X86_128(h1, k1);
354 };
355
356 //----------
357 // finalization
358
359 h1 ^= state->len;
360 h2 ^= state->len;
361 h3 ^= state->len;
362 h4 ^= state->len;
363
364 h1 += h2;
365 h1 += h3;
366 h1 += h4;
367 h2 += h1;
368 h3 += h1;
369 h4 += h1;
370
371 h1 = fmix32(h1);
372 h2 = fmix32(h2);
373 h3 = fmix32(h3);
374 h4 = fmix32(h4);
375
376 h1 += h2;
377 h1 += h3;
378 h1 += h4;
379 h2 += h1;
380 h3 += h1;
381 h4 += h1;
382
383 ((uint32_t *)out)[0] = h1;
384 ((uint32_t *)out)[1] = h2;
385 ((uint32_t *)out)[2] = h3;
386 ((uint32_t *)out)[3] = h4;
387 }
388
MurmurHash3_x86_128_finalise(MurmurHash3_x86_128_state * state,void * out)389 void MurmurHash3_x86_128_finalise(MurmurHash3_x86_128_state *state, void *out) {
390 MurmurHash3_x86_128_steal(state, out);
391 MurmurHash3_x86_128_free(state);
392 }
393
MurmurHash3_x86_128_free(MurmurHash3_x86_128_state * state)394 void MurmurHash3_x86_128_free(MurmurHash3_x86_128_state *state) {
395 g_slice_free(MurmurHash3_x86_128_state, state);
396 }
397
MurmurHash3_x86_128(const void * key,size_t len,uint32_t seed,void * out)398 void MurmurHash3_x86_128(const void *key, size_t len, uint32_t seed, void *out) {
399 MurmurHash3_x86_128_state *state = MurmurHash3_x86_128_new();
400 if(seed != 0) {
401 MurmurHash3_x86_128_update(state, &seed, sizeof(seed));
402 }
403 MurmurHash3_x86_128_update(state, key, len);
404 MurmurHash3_x86_128_finalise(state, out);
405 }
406
407 //-----------------------------------------------------------------------------
408
MurmurHash3_x64_128_new(void)409 MurmurHash3_x64_128_state *MurmurHash3_x64_128_new(void) {
410 return g_slice_new0(MurmurHash3_x64_128_state);
411 }
412
MurmurHash3_x64_128_copy(MurmurHash3_x64_128_state * state)413 MurmurHash3_x64_128_state *MurmurHash3_x64_128_copy(MurmurHash3_x64_128_state *state) {
414 return g_slice_copy(sizeof(MurmurHash3_x64_128_state), state);
415 }
416
417 #define MURMUR_UPDATE_H1_X64_128(H1) \
418 MURMUR_UPDATE_X64(H1, k1, 31, BIG_CONSTANT(0x87c37b91114253d5), \
419 BIG_CONSTANT(0x4cf5ad432745937f));
420 #define MURMUR_UPDATE_H2_X64_128(H2) \
421 MURMUR_UPDATE_X64(H2, k2, 33, BIG_CONSTANT(0x4cf5ad432745937f), \
422 BIG_CONSTANT(0x87c37b91114253d5));
423
MurmurHash3_x64_128_update(MurmurHash3_x64_128_state * const restrict state,const void * restrict key,const size_t len)424 void MurmurHash3_x64_128_update(MurmurHash3_x64_128_state *const restrict state,
425 const void *restrict key, const size_t len) {
426 state->len += len;
427 uint8_t *data = (uint8_t *)key;
428 const uint8_t *stop = data + len;
429
430 if(state->xs_len > 0) {
431 MURMUR_FILL_XS(state->xs, state->xs_len, 16, data, len);
432 }
433
434 /* process blocks of 16 bytes */
435 while(state->xs_len == 16 || data + 16 <= stop) {
436 uint64_t k1;
437 uint64_t k2;
438
439 if(state->xs_len == 16) {
440 /* process remnant data from previous update */
441 k1 = state->xs64[0];
442 k2 = state->xs64[1];
443 state->xs_len = 0;
444 } else {
445 /* process new data */
446 k1 = GET_UINT64(data);
447 k2 = GET_UINT64(data + 8);
448 data += 16;
449 }
450
451 MURMUR_UPDATE_H1_X64_128(state->h1);
452 MURMUR_MIX_X64(state->h1, state->h2, 27, 0x52dce729);
453
454 MURMUR_UPDATE_H2_X64_128(state->h2);
455 MURMUR_MIX_X64(state->h2, state->h1, 31, 0x38495ab5);
456 }
457
458 if(state->xs_len == 0 && stop > data) {
459 // store excess data in state
460 state->xs_len = stop - data;
461 memcpy(state->xs, data, state->xs_len);
462 }
463 }
464
MurmurHash3_x64_128_steal(const MurmurHash3_x64_128_state * const restrict state,void * const restrict out)465 void MurmurHash3_x64_128_steal(const MurmurHash3_x64_128_state *const restrict state,
466 void *const restrict out) {
467 uint64_t k1 = 0;
468 uint64_t k2 = 0;
469
470 /* copy h to make this a non-destructive steal */
471 uint64_t h1 = state->h1;
472 uint64_t h2 = state->h2;
473
474 switch(state->xs_len) {
475 case 15:
476 k2 ^= (uint64_t)(state->xs[14]) << 48;
477 case 14:
478 k2 ^= (uint64_t)(state->xs[13]) << 40;
479 case 13:
480 k2 ^= (uint64_t)(state->xs[12]) << 32;
481 case 12:
482 k2 ^= (uint64_t)(state->xs[11]) << 24;
483 case 11:
484 k2 ^= (uint64_t)(state->xs[10]) << 16;
485 case 10:
486 k2 ^= (uint64_t)(state->xs[9]) << 8;
487 case 9:
488 k2 ^= (uint64_t)(state->xs[8]) << 0;
489
490 MURMUR_UPDATE_H2_X64_128(h2);
491
492 case 8:
493 k1 ^= (uint64_t)(state->xs[7]) << 56;
494 case 7:
495 k1 ^= (uint64_t)(state->xs[6]) << 48;
496 case 6:
497 k1 ^= (uint64_t)(state->xs[5]) << 40;
498 case 5:
499 k1 ^= (uint64_t)(state->xs[4]) << 32;
500 case 4:
501 k1 ^= (uint64_t)(state->xs[3]) << 24;
502 case 3:
503 k1 ^= (uint64_t)(state->xs[2]) << 16;
504 case 2:
505 k1 ^= (uint64_t)(state->xs[1]) << 8;
506 case 1:
507 k1 ^= (uint64_t)(state->xs[0]) << 0;
508
509 MURMUR_UPDATE_H1_X64_128(h1);
510 };
511
512 //----------
513 // finalization
514
515 h1 ^= state->len;
516 h2 ^= state->len;
517
518 h1 += h2;
519 h2 += h1;
520
521 h1 = fmix64(h1);
522 h2 = fmix64(h2);
523
524 h1 += h2;
525 h2 += h1;
526
527 ((uint64_t *)out)[0] = h1;
528 ((uint64_t *)out)[1] = h2;
529 }
530
MurmurHash3_x64_128_free(MurmurHash3_x64_128_state * state)531 void MurmurHash3_x64_128_free(MurmurHash3_x64_128_state *state) {
532 g_slice_free(MurmurHash3_x64_128_state, state);
533 }
534
MurmurHash3_x64_128(const void * key,const size_t len,const uint32_t seed,void * out)535 void MurmurHash3_x64_128(const void *key, const size_t len, const uint32_t seed,
536 void *out) {
537 MurmurHash3_x64_128_state *state = MurmurHash3_x64_128_new();
538 if(seed != 0) {
539 MurmurHash3_x64_128_update(state, &seed, sizeof(seed));
540 }
541 MurmurHash3_x64_128_update(state, key, len);
542 MurmurHash3_x64_128_finalise(state, out);
543 }
544
MurmurHash3_x64_128_finalise(MurmurHash3_x64_128_state * state,void * out)545 void MurmurHash3_x64_128_finalise(MurmurHash3_x64_128_state *state, void *out) {
546 MurmurHash3_x64_128_steal(state, out);
547 MurmurHash3_x64_128_free(state);
548 }
549
MurmurHash3_x64_128_equal(MurmurHash3_x64_128_state * a,MurmurHash3_x64_128_state * b)550 int MurmurHash3_x64_128_equal(MurmurHash3_x64_128_state *a,
551 MurmurHash3_x64_128_state *b) {
552 if(a->h1 != b->h1 || a->h2 != b->h2 || a->xs_len != b->xs_len || a->len != b->len) {
553 return 0;
554 }
555 return (a->xs_len == 0 || !memcmp(a->xs, b->xs, a->xs_len));
556 }
557
558 //-----------------------------------------------------------------------------
559