1 /*-----------------------------------------------------------------------------
2  * MurmurHash3 was written by Austin Appleby, and is placed in the public
3  * domain.
4  *
5  * This implementation was written by Shane Day, and is also public domain.
6  *
7  * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A)
8  * with support for progressive processing.
9  */
10 
11 /*-----------------------------------------------------------------------------
12 
13 If you want to understand the MurmurHash algorithm you would be much better
14 off reading the original source. Just point your browser at:
15 http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp
16 
17 
18 What this version provides?
19 
20 1. Progressive data feeding. Useful when the entire payload to be hashed
21 does not fit in memory or when the data is streamed through the application.
22 Also useful when hashing a number of strings with a common prefix. A partial
23 hash of a prefix string can be generated and reused for each suffix string.
24 
25 2. Portability. Plain old C so that it should compile on any old compiler.
26 Both CPU endian and access-alignment neutral, but avoiding inefficient code
27 when possible depending on CPU capabilities.
28 
29 3. Drop in. I personally like nice self contained public domain code, making it
30 easy to pilfer without loads of refactoring to work properly in the existing
31 application code & makefile structure and mucking around with licence files.
32 Just copy PMurHash.h and PMurHash.c and you're ready to go.
33 
34 
35 How does it work?
36 
37 We can only process entire 32 bit chunks of input, except for the very end
38 that may be shorter. So along with the partial hash we need to give back to
39 the caller a carry containing up to 3 bytes that we were unable to process.
40 This carry also needs to record the number of bytes the carry holds. I use
41 the low 2 bits as a count (0..3) and the carry bytes are shifted into the
42 high byte in stream order.
43 
44 To handle endianess I simply use a macro that reads a uint32_t and define
45 that macro to be a direct read on little endian machines, a read and swap
46 on big endian machines, or a byte-by-byte read if the endianess is unknown.
47 
48 -----------------------------------------------------------------------------*/
49 
50 
51 #include "PMurHash.h"
52 
53 #include <sys/types.h>
54 
55 /* I used ugly type names in the header to avoid potential conflicts with
56  * application or system typedefs & defines. Since I'm not including any more
57  * headers below here I can rename these so that the code reads like C99 */
58 #undef uint32_t
59 #define uint32_t MH_UINT32
60 #undef uint8_t
61 #define uint8_t  MH_UINT8
62 
63 /* MSVC warnings we choose to ignore */
64 #if defined(_MSC_VER)
65   #pragma warning(disable: 4127) /* conditional expression is constant */
66 #endif
67 
68 /*-----------------------------------------------------------------------------
69  * Endianess, misalignment capabilities and util macros
70  *
71  * The following 3 macros are defined in this section. The other macros defined
72  * are only needed to help derive these 3.
73  *
74  * READ_UINT32(x)   Read a little endian unsigned 32-bit int
75  * UNALIGNED_SAFE   Defined if READ_UINT32 works on non-word boundaries
76  * ROTL32(x,r)      Rotate x left by r bits
77  */
78 
79 #ifdef __DragonFly__
80 #include <sys/endian.h>
81 #ifndef __BYTE_ORDER
82 #define	__BYTE_ORDER	_BYTE_ORDER
83 #define	__BIG_ENDIAN	_BIG_ENDIAN
84 #define	__LITTLE_ENDIAN	_LITTLE_ENDIAN
85 #endif
86 #endif
87 
88 /* Convention is to define __BYTE_ORDER == to one of these values */
89 #if !defined(__BIG_ENDIAN)
90   #define __BIG_ENDIAN 4321
91 #endif
92 #if !defined(__LITTLE_ENDIAN)
93   #define __LITTLE_ENDIAN 1234
94 #endif
95 
96 /* I386 */
97 #if defined(_M_IX86) || defined(__i386__) || defined(__i386) || defined(i386)
98   #define __BYTE_ORDER __LITTLE_ENDIAN
99   #define UNALIGNED_SAFE
100 #endif
101 
102 /* gcc 'may' define __LITTLE_ENDIAN__ or __BIG_ENDIAN__ to 1 (Note the trailing __),
103  * or even _LITTLE_ENDIAN or _BIG_ENDIAN (Note the single _ prefix) */
104 #if !defined(__BYTE_ORDER)
105   #if defined(__LITTLE_ENDIAN__) && __LITTLE_ENDIAN__==1 || defined(_LITTLE_ENDIAN) && _LITTLE_ENDIAN==1
106     #define __BYTE_ORDER __LITTLE_ENDIAN
107   #elif defined(__BIG_ENDIAN__) && __BIG_ENDIAN__==1 || defined(_BIG_ENDIAN) && _BIG_ENDIAN==1
108     #define __BYTE_ORDER __BIG_ENDIAN
109   #endif
110 #endif
111 
112 /* gcc (usually) defines xEL/EB macros for ARM and MIPS endianess */
113 #if !defined(__BYTE_ORDER)
114   #if defined(__ARMEL__) || defined(__MIPSEL__)
115     #define __BYTE_ORDER __LITTLE_ENDIAN
116   #endif
117   #if defined(__ARMEB__) || defined(__MIPSEB__)
118     #define __BYTE_ORDER __BIG_ENDIAN
119   #endif
120 #endif
121 
122 /* Now find best way we can to READ_UINT32 */
123 #if __BYTE_ORDER==__LITTLE_ENDIAN
124   /* CPU endian matches murmurhash algorithm, so read 32-bit word directly */
125   #define READ_UINT32(ptr)   (*((const uint32_t*)(ptr)))
126 #elif __BYTE_ORDER==__BIG_ENDIAN
127   /* TODO: Add additional cases below where a compiler provided bswap32 is available */
128   #if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3))
129     #define READ_UINT32(ptr)   (__builtin_bswap32(*((uint32_t*)(ptr))))
130   #else
131     /* Without a known fast bswap32 we're just as well off doing this */
132     #define READ_UINT32(ptr)   (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
133     #define UNALIGNED_SAFE
134   #endif
135 #else
136   /* Unknown endianess so last resort is to read individual bytes */
137   #define READ_UINT32(ptr)   (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24)
138 
139   /* Since we're not doing word-reads we can skip the messing about with realignment */
140   #define UNALIGNED_SAFE
141 #endif
142 
143 /* Find best way to ROTL32 */
144 #if defined(_MSC_VER)
145   #include <stdlib.h>  /* Microsoft put _rotl declaration in here */
146   #define ROTL32(x,r)  _rotl(x,r)
147 #else
148   /* gcc recognises this code and generates a rotate instruction for CPUs with one */
149   #define ROTL32(x,r)  (((uint32_t)x << r) | ((uint32_t)x >> (32 - r)))
150 #endif
151 
152 
153 /*-----------------------------------------------------------------------------
154  * Core murmurhash algorithm macros */
155 
156 #define C1  (0xcc9e2d51)
157 #define C2  (0x1b873593)
158 
159 /* This is the main processing body of the algorithm. It operates
160  * on each full 32-bits of input. */
161 #define DOBLOCK(h1, k1) do{ \
162         k1 *= C1; \
163         k1 = ROTL32(k1,15); \
164         k1 *= C2; \
165         \
166         h1 ^= k1; \
167         h1 = ROTL32(h1,13); \
168         h1 = h1*5+0xe6546b64; \
169     }while(0)
170 
171 
172 /* Append unaligned bytes to carry, forcing hash churn if we have 4 bytes */
173 /* cnt=bytes to process, h1=name of h1 var, c=carry, n=bytes in c, ptr/len=payload */
174 #define DOBYTES(cnt, h1, c, n, ptr, len) do{ \
175     int _i = cnt; \
176     while(_i--) { \
177         c = c>>8 | *ptr++<<24; \
178         n++; len--; \
179         if(n==4) { \
180             DOBLOCK(h1, c); \
181             n = 0; \
182         } \
183     } }while(0)
184 
185 /*---------------------------------------------------------------------------*/
186 
187 /* Main hashing function. Initialise carry to 0 and h1 to 0 or an initial seed
188  * if wanted. Both ph1 and pcarry are required arguments. */
PMurHash32_Process(uint32_t * ph1,uint32_t * pcarry,const void * key,int len)189 void PMurHash32_Process(uint32_t *ph1, uint32_t *pcarry, const void *key, int len)
190 {
191   uint32_t h1 = *ph1;
192   uint32_t c = *pcarry;
193 
194   const uint8_t *ptr = (const uint8_t*)key;
195   const uint8_t *end;
196 
197   /* Extract carry count from low 2 bits of c value */
198   int n = c & 3;
199 
200 #if defined(UNALIGNED_SAFE)
201   /* This CPU handles unaligned word access */
202 
203   /* Consume any carry bytes */
204   int i = (4-n) & 3;
205   if(i && i <= len) {
206     DOBYTES(i, h1, c, n, ptr, len);
207   }
208 
209   /* Process 32-bit chunks */
210   end = ptr + len/4*4;
211   for( ; ptr < end ; ptr+=4) {
212     uint32_t k1 = READ_UINT32(ptr);
213     DOBLOCK(h1, k1);
214   }
215 
216 #else /*UNALIGNED_SAFE*/
217   /* This CPU does not handle unaligned word access */
218 
219   /* Consume enough so that the next data byte is word aligned */
220   int i = -(long)ptr & 3;
221   if(i && i <= len) {
222       DOBYTES(i, h1, c, n, ptr, len);
223   }
224 
225   /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */
226   end = ptr + len/4*4;
227   switch(n) { /* how many bytes in c */
228   case 0: /* c=[----]  w=[3210]  b=[3210]=w            c'=[----] */
229     for( ; ptr < end ; ptr+=4) {
230       uint32_t k1 = READ_UINT32(ptr);
231       DOBLOCK(h1, k1);
232     }
233     break;
234   case 1: /* c=[0---]  w=[4321]  b=[3210]=c>>24|w<<8   c'=[4---] */
235     for( ; ptr < end ; ptr+=4) {
236       uint32_t k1 = c>>24;
237       c = READ_UINT32(ptr);
238       k1 |= c<<8;
239       DOBLOCK(h1, k1);
240     }
241     break;
242   case 2: /* c=[10--]  w=[5432]  b=[3210]=c>>16|w<<16  c'=[54--] */
243     for( ; ptr < end ; ptr+=4) {
244       uint32_t k1 = c>>16;
245       c = READ_UINT32(ptr);
246       k1 |= c<<16;
247       DOBLOCK(h1, k1);
248     }
249     break;
250   case 3: /* c=[210-]  w=[6543]  b=[3210]=c>>8|w<<24   c'=[654-] */
251     for( ; ptr < end ; ptr+=4) {
252       uint32_t k1 = c>>8;
253       c = READ_UINT32(ptr);
254       k1 |= c<<24;
255       DOBLOCK(h1, k1);
256     }
257   }
258 #endif /*UNALIGNED_SAFE*/
259 
260   /* Advance over whole 32-bit chunks, possibly leaving 1..3 bytes */
261   len -= len/4*4;
262 
263   /* Append any remaining bytes into carry */
264   DOBYTES(len, h1, c, n, ptr, len);
265 
266   /* Copy out new running hash and carry */
267   *ph1 = h1;
268   *pcarry = (c & ~0xff) | n;
269 }
270 
271 /*---------------------------------------------------------------------------*/
272 
273 /* Finalize a hash. To match the original Murmur3A the total_length must be provided */
PMurHash32_Result(uint32_t h,uint32_t carry,uint32_t total_length)274 uint32_t PMurHash32_Result(uint32_t h, uint32_t carry, uint32_t total_length)
275 {
276   uint32_t k1;
277   int n = carry & 3;
278   if(n) {
279     k1 = carry >> (4-n)*8;
280     k1 *= C1; k1 = ROTL32(k1,15); k1 *= C2; h ^= k1;
281   }
282   h ^= total_length;
283 
284   /* fmix */
285   h ^= h >> 16;
286   h *= 0x85ebca6b;
287   h ^= h >> 13;
288   h *= 0xc2b2ae35;
289   h ^= h >> 16;
290 
291   return h;
292 }
293 
294 /*---------------------------------------------------------------------------*/
295 
296 /* Murmur3A compatable all-at-once */
PMurHash32(uint32_t seed,const void * key,int len)297 uint32_t PMurHash32(uint32_t seed, const void *key, int len)
298 {
299   uint32_t h1=seed, carry=0;
300   PMurHash32_Process(&h1, &carry, key, len);
301   return PMurHash32_Result(h1, carry, len);
302 }
303 
304 /*---------------------------------------------------------------------------*/
305 
306 /* Provide an API suitable for smhasher */
PMurHash32_test(const void * key,int len,uint32_t seed,void * out)307 void PMurHash32_test(const void *key, int len, uint32_t seed, void *out)
308 {
309   uint32_t h1=seed, carry=0;
310   const uint8_t *ptr = (const uint8_t*)key;
311   const uint8_t *end = ptr + len;
312 
313 #if 0 /* Exercise the progressive processing */
314   while(ptr < end) {
315     //const uint8_t *mid = ptr + rand()%(end-ptr)+1;
316     const uint8_t *mid = ptr + (rand()&0xF);
317     mid = mid<end?mid:end;
318     PMurHash32_Process(&h1, &carry, ptr, mid-ptr);
319     ptr = mid;
320   }
321 #else
322   PMurHash32_Process(&h1, &carry, ptr, (int)(end-ptr));
323 #endif
324   h1 = PMurHash32_Result(h1, carry, len);
325   *(uint32_t*)out = h1;
326 }
327 
328 /*---------------------------------------------------------------------------*/
329