1 /*
2 -------------------------------------------------------------------------------
3 lookup3.c, by Bob Jenkins, May 2006, Public Domain.
4
5 These are functions for producing 32-bit hashes for hash table lookup.
6 hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
7 are externally useful functions. Routines to test the hash are included
8 if SELF_TEST is defined. You can use this free for any purpose. It's in
9 the public domain. It has no warranty.
10
11 You probably want to use hashlittle(). hashlittle() and hashbig()
12 hash byte arrays. hashlittle() is is faster than hashbig() on
13 little-endian machines. Intel and AMD are little-endian machines.
14 On second thought, you probably want hashlittle2(), which is identical to
15 hashlittle() except it returns two 32-bit hashes for the price of one.
16 You could implement hashbig2() if you wanted but I haven't bothered here.
17
18 If you want to find a hash of, say, exactly 7 integers, do
19 a = i1; b = i2; c = i3;
20 mix(a,b,c);
21 a += i4; b += i5; c += i6;
22 mix(a,b,c);
23 a += i7;
24 final(a,b,c);
25 then use c as the hash value. If you have a variable length array of
26 4-byte integers to hash, use hashword(). If you have a byte array (like
27 a character string), use hashlittle(). If you have several byte arrays, or
28 a mix of things, see the comments above hashlittle().
29
30 Why is this so big? I read 12 bytes at a time into 3 4-byte integers,
31 then mix those integers. This is fast (you can do a lot more thorough
32 mixing with 12*3 instructions on 3 integers than you can with 3 instructions
33 on 1 byte), but shoehorning those bytes into integers efficiently is messy.
34 -------------------------------------------------------------------------------
35 */
36 //#define SELF_TEST 1
37
38 #include <stdio.h> /* defines printf for tests */
39 #include <time.h> /* defines time_t for timings in the test */
40 #include <stdint.h> /* defines uint32_t etc */
41 #include <sys/param.h> /* attempt to define endianness */
42
43 #ifdef HAVE_ENDIAN_H
44 # include <endian.h>
45 #else
46 # ifdef HAVE_SYS_ENDIAN_H
47 # include <sys/endian.h>
48 # endif
49 #endif
50
51 #include "lookup3.h"
52
53 /*
54 * My best guess at if you are big-endian or little-endian. This may
55 * need adjustment.
56 */
57 #if (defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && \
58 __BYTE_ORDER == __LITTLE_ENDIAN) || \
59 (defined(i386) || defined(__i386__) || defined(__i486__) || \
60 defined(__i586__) || defined(__i686__) || defined(vax) || defined(MIPSEL))
61 # define HASH_LITTLE_ENDIAN 1
62 # define HASH_BIG_ENDIAN 0
63 #elif (defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && \
64 __BYTE_ORDER == __BIG_ENDIAN) || \
65 (defined(sparc) || defined(POWERPC) || defined(mc68000) || defined(sel))
66 # define HASH_LITTLE_ENDIAN 0
67 # define HASH_BIG_ENDIAN 1
68 #else
69 # define HASH_LITTLE_ENDIAN 0
70 # define HASH_BIG_ENDIAN 0
71 #endif
72
73 #define hashsize(n) ((uint32_t)1<<(n))
74 #define hashmask(n) (hashsize(n)-1)
75 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
76
77 /*
78 -------------------------------------------------------------------------------
79 mix -- mix 3 32-bit values reversibly.
80
81 This is reversible, so any information in (a,b,c) before mix() is
82 still in (a,b,c) after mix().
83
84 If four pairs of (a,b,c) inputs are run through mix(), or through
85 mix() in reverse, there are at least 32 bits of the output that
86 are sometimes the same for one pair and different for another pair.
87 This was tested for:
88 * pairs that differed by one bit, by two bits, in any combination
89 of top bits of (a,b,c), or in any combination of bottom bits of
90 (a,b,c).
91 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
92 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
93 is commonly produced by subtraction) look like a single 1-bit
94 difference.
95 * the base values were pseudorandom, all zero but one bit set, or
96 all zero plus a counter that starts at zero.
97
98 Some k values for my "a-=c; a^=rot(c,k); c+=b;" arrangement that
99 satisfy this are
100 4 6 8 16 19 4
101 9 15 3 18 27 15
102 14 9 3 7 17 3
103 Well, "9 15 3 18 27 15" didn't quite get 32 bits diffing
104 for "differ" defined as + with a one-bit base and a two-bit delta. I
105 used http://burtleburtle.net/bob/hash/avalanche.html to choose
106 the operations, constants, and arrangements of the variables.
107
108 This does not achieve avalanche. There are input bits of (a,b,c)
109 that fail to affect some output bits of (a,b,c), especially of a. The
110 most thoroughly mixed value is c, but it doesn't really even achieve
111 avalanche in c.
112
113 This allows some parallelism. Read-after-writes are good at doubling
114 the number of bits affected, so the goal of mixing pulls in the opposite
115 direction as the goal of parallelism. I did what I could. Rotates
116 seem to cost as much as shifts on every machine I could lay my hands
117 on, and rotates are much kinder to the top and bottom bits, so I used
118 rotates.
119 -------------------------------------------------------------------------------
120 */
121 #define mix(a,b,c) \
122 { \
123 a -= c; a ^= rot(c, 4); c += b; \
124 b -= a; b ^= rot(a, 6); a += c; \
125 c -= b; c ^= rot(b, 8); b += a; \
126 a -= c; a ^= rot(c,16); c += b; \
127 b -= a; b ^= rot(a,19); a += c; \
128 c -= b; c ^= rot(b, 4); b += a; \
129 }
130
131 /*
132 -------------------------------------------------------------------------------
133 final -- final mixing of 3 32-bit values (a,b,c) into c
134
135 Pairs of (a,b,c) values differing in only a few bits will usually
136 produce values of c that look totally different. This was tested for
137 * pairs that differed by one bit, by two bits, in any combination
138 of top bits of (a,b,c), or in any combination of bottom bits of
139 (a,b,c).
140 * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
141 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
142 is commonly produced by subtraction) look like a single 1-bit
143 difference.
144 * the base values were pseudorandom, all zero but one bit set, or
145 all zero plus a counter that starts at zero.
146
147 These constants passed:
148 14 11 25 16 4 14 24
149 12 14 25 16 4 14 24
150 and these came close:
151 4 8 15 26 3 22 24
152 10 8 15 26 3 22 24
153 11 8 15 26 3 22 24
154 -------------------------------------------------------------------------------
155 */
156 #define final(a,b,c) \
157 { \
158 c ^= b; c -= rot(b,14); \
159 a ^= c; a -= rot(c,11); \
160 b ^= a; b -= rot(a,25); \
161 c ^= b; c -= rot(b,16); \
162 a ^= c; a -= rot(c,4); \
163 b ^= a; b -= rot(a,14); \
164 c ^= b; c -= rot(b,24); \
165 }
166
167 /*
168 --------------------------------------------------------------------
169 This works on all machines. To be useful, it requires
170 -- that the key be an array of uint32_t's, and
171 -- that the length be the number of uint32_t's in the key
172
173 The function hashword() is identical to hashlittle() on little-endian
174 machines, and identical to hashbig() on big-endian machines,
175 except that the length has to be measured in uint32_ts rather than in
176 bytes. hashlittle() is more complicated than hashword() only because
177 hashlittle() has to dance around fitting the key bytes into registers.
178 --------------------------------------------------------------------
179 */
my_hashword(const uint32_t * k,size_t length,uint32_t initval)180 uint32_t my_hashword(
181 const uint32_t *k, /* the key, an array of uint32_t values */
182 size_t length, /* the length of the key, in uint32_ts */
183 uint32_t initval) /* the previous hash, or an arbitrary value */
184 {
185 uint32_t a,b,c;
186
187 /* Set up the internal state */
188 a = b = c = 0xdeadbeef + (((uint32_t)length)<<2) + initval;
189
190 /*------------------------------------------------- handle most of the key */
191 while (length > 3)
192 {
193 a += k[0];
194 b += k[1];
195 c += k[2];
196 mix(a,b,c);
197 length -= 3;
198 k += 3;
199 }
200
201 /*------------------------------------------- handle the last 3 uint32_t's */
202 switch(length) /* all the case statements fall through */
203 {
204 case 3 : c+=k[2];
205 case 2 : b+=k[1];
206 case 1 : a+=k[0];
207 final(a,b,c);
208 case 0: /* case 0: nothing left to add */
209 break;
210 }
211 /*------------------------------------------------------ report the result */
212 return c;
213 }
214
215
216 /*
217 --------------------------------------------------------------------
218 hashword2() -- same as hashword(), but take two seeds and return two
219 32-bit values. pc and pb must both be nonnull, and *pc and *pb must
220 both be initialized with seeds. If you pass in (*pb)==0, the output
221 (*pc) will be the same as the return value from hashword().
222 --------------------------------------------------------------------
223 */
my_hashword2(const uint32_t * k,size_t length,uint32_t * pc,uint32_t * pb)224 void my_hashword2 (
225 const uint32_t *k, /* the key, an array of uint32_t values */
226 size_t length, /* the length of the key, in uint32_ts */
227 uint32_t *pc, /* IN: seed OUT: primary hash value */
228 uint32_t *pb) /* IN: more seed OUT: secondary hash value */
229 {
230 uint32_t a,b,c;
231
232 /* Set up the internal state */
233 a = b = c = 0xdeadbeef + ((uint32_t)(length<<2)) + *pc;
234 c += *pb;
235
236 /*------------------------------------------------- handle most of the key */
237 while (length > 3)
238 {
239 a += k[0];
240 b += k[1];
241 c += k[2];
242 mix(a,b,c);
243 length -= 3;
244 k += 3;
245 }
246
247 /*------------------------------------------- handle the last 3 uint32_t's */
248 switch(length) /* all the case statements fall through */
249 {
250 case 3 : c+=k[2];
251 case 2 : b+=k[1];
252 case 1 : a+=k[0];
253 final(a,b,c);
254 case 0: /* case 0: nothing left to add */
255 break;
256 }
257 /*------------------------------------------------------ report the result */
258 *pc=c; *pb=b;
259 }
260
261
262 /*
263 -------------------------------------------------------------------------------
264 hashlittle() -- hash a variable-length key into a 32-bit value
265 k : the key (the unaligned variable-length array of bytes)
266 length : the length of the key, counting by bytes
267 initval : can be any 4-byte value
268 Returns a 32-bit value. Every bit of the key affects every bit of
269 the return value. Two keys differing by one or two bits will have
270 totally different hash values.
271
272 The best hash table sizes are powers of 2. There is no need to do
273 mod a prime (mod is sooo slow!). If you need less than 32 bits,
274 use a bitmask. For example, if you need only 10 bits, do
275 h = (h & hashmask(10));
276 In which case, the hash table should have hashsize(10) elements.
277
278 If you are hashing n strings (uint8_t **)k, do it like this:
279 for (i=0, h=0; i<n; ++i) h = hashlittle( k[i], len[i], h);
280
281 By Bob Jenkins, 2006. bob_jenkins@burtleburtle.net. You may use this
282 code any way you wish, private, educational, or commercial. It's free.
283
284 Use for hash table lookup, or anything where one collision in 2^^32 is
285 acceptable. Do NOT use for cryptographic purposes.
286 -------------------------------------------------------------------------------
287 */
288
my_hashlittle(const void * key,size_t length,uint32_t initval)289 uint32_t my_hashlittle( const void *key, size_t length, uint32_t initval)
290 {
291 uint32_t a,b,c; /* internal state */
292 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
293
294 /* Set up the internal state */
295 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
296
297 u.ptr = key;
298 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
299 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
300 #ifdef VALGRIND
301 const uint8_t *k8;
302 #endif
303
304 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
305 while (length > 12)
306 {
307 a += k[0];
308 b += k[1];
309 c += k[2];
310 mix(a,b,c);
311 length -= 12;
312 k += 3;
313 }
314
315 /*----------------------------- handle the last (probably partial) block */
316 /*
317 * "k[2]&0xffffff" actually reads beyond the end of the string, but
318 * then masks off the part it's not allowed to read. Because the
319 * string is aligned, the masked-off tail is in the same word as the
320 * rest of the string. Every machine with memory protection I've seen
321 * does it on word boundaries, so is OK with this. But VALGRIND will
322 * still catch it and complain. The masking trick does make the hash
323 * noticably faster for short strings (like English words).
324 */
325 #ifndef VALGRIND
326
327 switch(length)
328 {
329 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
330 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
331 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
332 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
333 case 8 : b+=k[1]; a+=k[0]; break;
334 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
335 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
336 case 5 : b+=k[1]&0xff; a+=k[0]; break;
337 case 4 : a+=k[0]; break;
338 case 3 : a+=k[0]&0xffffff; break;
339 case 2 : a+=k[0]&0xffff; break;
340 case 1 : a+=k[0]&0xff; break;
341 case 0 : return c; /* zero length strings require no mixing */
342 }
343
344 #else /* make valgrind happy */
345
346 k8 = (const uint8_t *)k;
347 switch(length)
348 {
349 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
350 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
351 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
352 case 9 : c+=k8[8]; /* fall through */
353 case 8 : b+=k[1]; a+=k[0]; break;
354 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
355 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
356 case 5 : b+=k8[4]; /* fall through */
357 case 4 : a+=k[0]; break;
358 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
359 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
360 case 1 : a+=k8[0]; break;
361 case 0 : return c;
362 }
363
364 #endif /* !valgrind */
365
366 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
367 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
368 const uint8_t *k8;
369
370 /*--------------- all but last block: aligned reads and different mixing */
371 while (length > 12)
372 {
373 a += k[0] + (((uint32_t)k[1])<<16);
374 b += k[2] + (((uint32_t)k[3])<<16);
375 c += k[4] + (((uint32_t)k[5])<<16);
376 mix(a,b,c);
377 length -= 12;
378 k += 6;
379 }
380
381 /*----------------------------- handle the last (probably partial) block */
382 k8 = (const uint8_t *)k;
383 switch(length)
384 {
385 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
386 b+=k[2]+(((uint32_t)k[3])<<16);
387 a+=k[0]+(((uint32_t)k[1])<<16);
388 break;
389 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
390 case 10: c+=k[4];
391 b+=k[2]+(((uint32_t)k[3])<<16);
392 a+=k[0]+(((uint32_t)k[1])<<16);
393 break;
394 case 9 : c+=k8[8]; /* fall through */
395 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
396 a+=k[0]+(((uint32_t)k[1])<<16);
397 break;
398 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
399 case 6 : b+=k[2];
400 a+=k[0]+(((uint32_t)k[1])<<16);
401 break;
402 case 5 : b+=k8[4]; /* fall through */
403 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
404 break;
405 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
406 case 2 : a+=k[0];
407 break;
408 case 1 : a+=k8[0];
409 break;
410 case 0 : return c; /* zero length requires no mixing */
411 }
412
413 } else { /* need to read the key one byte at a time */
414 const uint8_t *k = (const uint8_t *)key;
415
416 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
417 while (length > 12)
418 {
419 a += k[0];
420 a += ((uint32_t)k[1])<<8;
421 a += ((uint32_t)k[2])<<16;
422 a += ((uint32_t)k[3])<<24;
423 b += k[4];
424 b += ((uint32_t)k[5])<<8;
425 b += ((uint32_t)k[6])<<16;
426 b += ((uint32_t)k[7])<<24;
427 c += k[8];
428 c += ((uint32_t)k[9])<<8;
429 c += ((uint32_t)k[10])<<16;
430 c += ((uint32_t)k[11])<<24;
431 mix(a,b,c);
432 length -= 12;
433 k += 12;
434 }
435
436 /*-------------------------------- last block: affect all 32 bits of (c) */
437 switch(length) /* all the case statements fall through */
438 {
439 case 12: c+=((uint32_t)k[11])<<24;
440 case 11: c+=((uint32_t)k[10])<<16;
441 case 10: c+=((uint32_t)k[9])<<8;
442 case 9 : c+=k[8];
443 case 8 : b+=((uint32_t)k[7])<<24;
444 case 7 : b+=((uint32_t)k[6])<<16;
445 case 6 : b+=((uint32_t)k[5])<<8;
446 case 5 : b+=k[4];
447 case 4 : a+=((uint32_t)k[3])<<24;
448 case 3 : a+=((uint32_t)k[2])<<16;
449 case 2 : a+=((uint32_t)k[1])<<8;
450 case 1 : a+=k[0];
451 break;
452 case 0 : return c;
453 }
454 }
455
456 final(a,b,c);
457 return c;
458 }
459
460
461 /*
462 * hashlittle2: return 2 32-bit hash values
463 *
464 * This is identical to hashlittle(), except it returns two 32-bit hash
465 * values instead of just one. This is good enough for hash table
466 * lookup with 2^^64 buckets, or if you want a second hash if you're not
467 * happy with the first, or if you want a probably-unique 64-bit ID for
468 * the key. *pc is better mixed than *pb, so use *pc first. If you want
469 * a 64-bit value do something like "*pc + (((uint64_t)*pb)<<32)".
470 */
my_hashlittle2(const void * key,size_t length,uint32_t * pc,uint32_t * pb)471 void my_hashlittle2(
472 const void *key, /* the key to hash */
473 size_t length, /* length of the key */
474 uint32_t *pc, /* IN: primary initval, OUT: primary hash */
475 uint32_t *pb) /* IN: secondary initval, OUT: secondary hash */
476 {
477 uint32_t a,b,c; /* internal state */
478 union { const void *ptr; size_t i; } u; /* needed for Mac Powerbook G4 */
479
480 /* Set up the internal state */
481 a = b = c = 0xdeadbeef + ((uint32_t)length) + *pc;
482 c += *pb;
483
484 u.ptr = key;
485 if (HASH_LITTLE_ENDIAN && ((u.i & 0x3) == 0)) {
486 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
487 #ifdef VALGRIND
488 const uint8_t *k8;
489 #endif
490
491 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
492 while (length > 12)
493 {
494 a += k[0];
495 b += k[1];
496 c += k[2];
497 mix(a,b,c);
498 length -= 12;
499 k += 3;
500 }
501
502 /*----------------------------- handle the last (probably partial) block */
503 /*
504 * "k[2]&0xffffff" actually reads beyond the end of the string, but
505 * then masks off the part it's not allowed to read. Because the
506 * string is aligned, the masked-off tail is in the same word as the
507 * rest of the string. Every machine with memory protection I've seen
508 * does it on word boundaries, so is OK with this. But VALGRIND will
509 * still catch it and complain. The masking trick does make the hash
510 * noticably faster for short strings (like English words).
511 */
512 #ifndef VALGRIND
513
514 switch(length)
515 {
516 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
517 case 11: c+=k[2]&0xffffff; b+=k[1]; a+=k[0]; break;
518 case 10: c+=k[2]&0xffff; b+=k[1]; a+=k[0]; break;
519 case 9 : c+=k[2]&0xff; b+=k[1]; a+=k[0]; break;
520 case 8 : b+=k[1]; a+=k[0]; break;
521 case 7 : b+=k[1]&0xffffff; a+=k[0]; break;
522 case 6 : b+=k[1]&0xffff; a+=k[0]; break;
523 case 5 : b+=k[1]&0xff; a+=k[0]; break;
524 case 4 : a+=k[0]; break;
525 case 3 : a+=k[0]&0xffffff; break;
526 case 2 : a+=k[0]&0xffff; break;
527 case 1 : a+=k[0]&0xff; break;
528 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
529 }
530
531 #else /* make valgrind happy */
532
533 k8 = (const uint8_t *)k;
534 switch(length)
535 {
536 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
537 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
538 case 10: c+=((uint32_t)k8[9])<<8; /* fall through */
539 case 9 : c+=k8[8]; /* fall through */
540 case 8 : b+=k[1]; a+=k[0]; break;
541 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
542 case 6 : b+=((uint32_t)k8[5])<<8; /* fall through */
543 case 5 : b+=k8[4]; /* fall through */
544 case 4 : a+=k[0]; break;
545 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
546 case 2 : a+=((uint32_t)k8[1])<<8; /* fall through */
547 case 1 : a+=k8[0]; break;
548 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
549 }
550
551 #endif /* !valgrind */
552
553 } else if (HASH_LITTLE_ENDIAN && ((u.i & 0x1) == 0)) {
554 const uint16_t *k = (const uint16_t *)key; /* read 16-bit chunks */
555 const uint8_t *k8;
556
557 /*--------------- all but last block: aligned reads and different mixing */
558 while (length > 12)
559 {
560 a += k[0] + (((uint32_t)k[1])<<16);
561 b += k[2] + (((uint32_t)k[3])<<16);
562 c += k[4] + (((uint32_t)k[5])<<16);
563 mix(a,b,c);
564 length -= 12;
565 k += 6;
566 }
567
568 /*----------------------------- handle the last (probably partial) block */
569 k8 = (const uint8_t *)k;
570 switch(length)
571 {
572 case 12: c+=k[4]+(((uint32_t)k[5])<<16);
573 b+=k[2]+(((uint32_t)k[3])<<16);
574 a+=k[0]+(((uint32_t)k[1])<<16);
575 break;
576 case 11: c+=((uint32_t)k8[10])<<16; /* fall through */
577 case 10: c+=k[4];
578 b+=k[2]+(((uint32_t)k[3])<<16);
579 a+=k[0]+(((uint32_t)k[1])<<16);
580 break;
581 case 9 : c+=k8[8]; /* fall through */
582 case 8 : b+=k[2]+(((uint32_t)k[3])<<16);
583 a+=k[0]+(((uint32_t)k[1])<<16);
584 break;
585 case 7 : b+=((uint32_t)k8[6])<<16; /* fall through */
586 case 6 : b+=k[2];
587 a+=k[0]+(((uint32_t)k[1])<<16);
588 break;
589 case 5 : b+=k8[4]; /* fall through */
590 case 4 : a+=k[0]+(((uint32_t)k[1])<<16);
591 break;
592 case 3 : a+=((uint32_t)k8[2])<<16; /* fall through */
593 case 2 : a+=k[0];
594 break;
595 case 1 : a+=k8[0];
596 break;
597 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
598 }
599
600 } else { /* need to read the key one byte at a time */
601 const uint8_t *k = (const uint8_t *)key;
602
603 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
604 while (length > 12)
605 {
606 a += k[0];
607 a += ((uint32_t)k[1])<<8;
608 a += ((uint32_t)k[2])<<16;
609 a += ((uint32_t)k[3])<<24;
610 b += k[4];
611 b += ((uint32_t)k[5])<<8;
612 b += ((uint32_t)k[6])<<16;
613 b += ((uint32_t)k[7])<<24;
614 c += k[8];
615 c += ((uint32_t)k[9])<<8;
616 c += ((uint32_t)k[10])<<16;
617 c += ((uint32_t)k[11])<<24;
618 mix(a,b,c);
619 length -= 12;
620 k += 12;
621 }
622
623 /*-------------------------------- last block: affect all 32 bits of (c) */
624 switch(length) /* all the case statements fall through */
625 {
626 case 12: c+=((uint32_t)k[11])<<24;
627 case 11: c+=((uint32_t)k[10])<<16;
628 case 10: c+=((uint32_t)k[9])<<8;
629 case 9 : c+=k[8];
630 case 8 : b+=((uint32_t)k[7])<<24;
631 case 7 : b+=((uint32_t)k[6])<<16;
632 case 6 : b+=((uint32_t)k[5])<<8;
633 case 5 : b+=k[4];
634 case 4 : a+=((uint32_t)k[3])<<24;
635 case 3 : a+=((uint32_t)k[2])<<16;
636 case 2 : a+=((uint32_t)k[1])<<8;
637 case 1 : a+=k[0];
638 break;
639 case 0 : *pc=c; *pb=b; return; /* zero length strings require no mixing */
640 }
641 }
642
643 final(a,b,c);
644 *pc=c; *pb=b;
645 }
646
647
648
649 /*
650 * hashbig():
651 * This is the same as hashword() on big-endian machines. It is different
652 * from hashlittle() on all machines. hashbig() takes advantage of
653 * big-endian byte ordering.
654 */
my_hashbig(const void * key,size_t length,uint32_t initval)655 uint32_t my_hashbig( const void *key, size_t length, uint32_t initval)
656 {
657 uint32_t a,b,c;
658 union { const void *ptr; size_t i; } u; /* to cast key to (size_t) happily */
659
660 /* Set up the internal state */
661 a = b = c = 0xdeadbeef + ((uint32_t)length) + initval;
662
663 u.ptr = key;
664 if (HASH_BIG_ENDIAN && ((u.i & 0x3) == 0)) {
665 const uint32_t *k = (const uint32_t *)key; /* read 32-bit chunks */
666 #ifdef VALGRIND
667 const uint8_t *k8;
668 #endif
669
670 /*------ all but last block: aligned reads and affect 32 bits of (a,b,c) */
671 while (length > 12)
672 {
673 a += k[0];
674 b += k[1];
675 c += k[2];
676 mix(a,b,c);
677 length -= 12;
678 k += 3;
679 }
680
681 /*----------------------------- handle the last (probably partial) block */
682 /*
683 * "k[2]<<8" actually reads beyond the end of the string, but
684 * then shifts out the part it's not allowed to read. Because the
685 * string is aligned, the illegal read is in the same word as the
686 * rest of the string. Every machine with memory protection I've seen
687 * does it on word boundaries, so is OK with this. But VALGRIND will
688 * still catch it and complain. The masking trick does make the hash
689 * noticably faster for short strings (like English words).
690 */
691 #ifndef VALGRIND
692
693 switch(length)
694 {
695 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
696 case 11: c+=k[2]&0xffffff00; b+=k[1]; a+=k[0]; break;
697 case 10: c+=k[2]&0xffff0000; b+=k[1]; a+=k[0]; break;
698 case 9 : c+=k[2]&0xff000000; b+=k[1]; a+=k[0]; break;
699 case 8 : b+=k[1]; a+=k[0]; break;
700 case 7 : b+=k[1]&0xffffff00; a+=k[0]; break;
701 case 6 : b+=k[1]&0xffff0000; a+=k[0]; break;
702 case 5 : b+=k[1]&0xff000000; a+=k[0]; break;
703 case 4 : a+=k[0]; break;
704 case 3 : a+=k[0]&0xffffff00; break;
705 case 2 : a+=k[0]&0xffff0000; break;
706 case 1 : a+=k[0]&0xff000000; break;
707 case 0 : return c; /* zero length strings require no mixing */
708 }
709
710 #else /* make valgrind happy */
711
712 k8 = (const uint8_t *)k;
713 switch(length) /* all the case statements fall through */
714 {
715 case 12: c+=k[2]; b+=k[1]; a+=k[0]; break;
716 case 11: c+=((uint32_t)k8[10])<<8; /* fall through */
717 case 10: c+=((uint32_t)k8[9])<<16; /* fall through */
718 case 9 : c+=((uint32_t)k8[8])<<24; /* fall through */
719 case 8 : b+=k[1]; a+=k[0]; break;
720 case 7 : b+=((uint32_t)k8[6])<<8; /* fall through */
721 case 6 : b+=((uint32_t)k8[5])<<16; /* fall through */
722 case 5 : b+=((uint32_t)k8[4])<<24; /* fall through */
723 case 4 : a+=k[0]; break;
724 case 3 : a+=((uint32_t)k8[2])<<8; /* fall through */
725 case 2 : a+=((uint32_t)k8[1])<<16; /* fall through */
726 case 1 : a+=((uint32_t)k8[0])<<24; break;
727 case 0 : return c;
728 }
729
730 #endif /* !VALGRIND */
731
732 } else { /* need to read the key one byte at a time */
733 const uint8_t *k = (const uint8_t *)key;
734
735 /*--------------- all but the last block: affect some 32 bits of (a,b,c) */
736 while (length > 12)
737 {
738 a += ((uint32_t)k[0])<<24;
739 a += ((uint32_t)k[1])<<16;
740 a += ((uint32_t)k[2])<<8;
741 a += ((uint32_t)k[3]);
742 b += ((uint32_t)k[4])<<24;
743 b += ((uint32_t)k[5])<<16;
744 b += ((uint32_t)k[6])<<8;
745 b += ((uint32_t)k[7]);
746 c += ((uint32_t)k[8])<<24;
747 c += ((uint32_t)k[9])<<16;
748 c += ((uint32_t)k[10])<<8;
749 c += ((uint32_t)k[11]);
750 mix(a,b,c);
751 length -= 12;
752 k += 12;
753 }
754
755 /*-------------------------------- last block: affect all 32 bits of (c) */
756 switch(length) /* all the case statements fall through */
757 {
758 case 12: c+=k[11];
759 case 11: c+=((uint32_t)k[10])<<8;
760 case 10: c+=((uint32_t)k[9])<<16;
761 case 9 : c+=((uint32_t)k[8])<<24;
762 case 8 : b+=k[7];
763 case 7 : b+=((uint32_t)k[6])<<8;
764 case 6 : b+=((uint32_t)k[5])<<16;
765 case 5 : b+=((uint32_t)k[4])<<24;
766 case 4 : a+=k[3];
767 case 3 : a+=((uint32_t)k[2])<<8;
768 case 2 : a+=((uint32_t)k[1])<<16;
769 case 1 : a+=((uint32_t)k[0])<<24;
770 break;
771 case 0 : return c;
772 }
773 }
774
775 final(a,b,c);
776 return c;
777 }
778
779
780 #ifdef SELF_TEST
781
782 /* used for timings */
driver1()783 void driver1()
784 {
785 uint8_t buf[256];
786 uint32_t i;
787 uint32_t h=0;
788 time_t a,z;
789
790 time(&a);
791 for (i=0; i<256; ++i) buf[i] = 'x';
792 for (i=0; i<1; ++i)
793 {
794 h = hashlittle(&buf[0],1,h);
795 }
796 time(&z);
797 if (z-a > 0) printf("time %d %.8x\n", z-a, h);
798 }
799
800 /* check that every input bit changes every output bit half the time */
801 #define HASHSTATE 1
802 #define HASHLEN 1
803 #define MAXPAIR 60
804 #define MAXLEN 70
driver2()805 void driver2()
806 {
807 uint8_t qa[MAXLEN+1], qb[MAXLEN+2], *a = &qa[0], *b = &qb[1];
808 uint32_t c[HASHSTATE], d[HASHSTATE], i=0, j=0, k, l, m=0, z;
809 uint32_t e[HASHSTATE],f[HASHSTATE],g[HASHSTATE],h[HASHSTATE];
810 uint32_t x[HASHSTATE],y[HASHSTATE];
811 uint32_t hlen;
812
813 printf("No more than %d trials should ever be needed \n",MAXPAIR/2);
814 for (hlen=0; hlen < MAXLEN; ++hlen)
815 {
816 z=0;
817 for (i=0; i<hlen; ++i) /*----------------------- for each input byte, */
818 {
819 for (j=0; j<8; ++j) /*------------------------ for each input bit, */
820 {
821 for (m=1; m<8; ++m) /*------------ for serveral possible initvals, */
822 {
823 for (l=0; l<HASHSTATE; ++l)
824 e[l]=f[l]=g[l]=h[l]=x[l]=y[l]=~((uint32_t)0);
825
826 /*---- check that every output bit is affected by that input bit */
827 for (k=0; k<MAXPAIR; k+=2)
828 {
829 uint32_t finished=1;
830 /* keys have one bit different */
831 for (l=0; l<hlen+1; ++l) {a[l] = b[l] = (uint8_t)0;}
832 /* have a and b be two keys differing in only one bit */
833 a[i] ^= (k<<j);
834 a[i] ^= (k>>(8-j));
835 c[0] = hashlittle(a, hlen, m);
836 b[i] ^= ((k+1)<<j);
837 b[i] ^= ((k+1)>>(8-j));
838 d[0] = hashlittle(b, hlen, m);
839 /* check every bit is 1, 0, set, and not set at least once */
840 for (l=0; l<HASHSTATE; ++l)
841 {
842 e[l] &= (c[l]^d[l]);
843 f[l] &= ~(c[l]^d[l]);
844 g[l] &= c[l];
845 h[l] &= ~c[l];
846 x[l] &= d[l];
847 y[l] &= ~d[l];
848 if (e[l]|f[l]|g[l]|h[l]|x[l]|y[l]) finished=0;
849 }
850 if (finished) break;
851 }
852 if (k>z) z=k;
853 if (k==MAXPAIR)
854 {
855 printf("Some bit didn't change: ");
856 printf("%.8x %.8x %.8x %.8x %.8x %.8x ",
857 e[0],f[0],g[0],h[0],x[0],y[0]);
858 printf("i %d j %d m %d len %d\n", i, j, m, hlen);
859 }
860 if (z==MAXPAIR) goto done;
861 }
862 }
863 }
864 done:
865 if (z < MAXPAIR)
866 {
867 printf("Mix success %2d bytes %2d initvals ",i,m);
868 printf("required %d trials\n", z/2);
869 }
870 }
871 printf("\n");
872 }
873
874 /* Check for reading beyond the end of the buffer and alignment problems */
driver3()875 void driver3()
876 {
877 uint8_t buf[MAXLEN+20], *b;
878 uint32_t len;
879 uint8_t q[] = "This is the time for all good men to come to the aid of their country...";
880 uint32_t h;
881 uint8_t qq[] = "xThis is the time for all good men to come to the aid of their country...";
882 uint32_t i;
883 uint8_t qqq[] = "xxThis is the time for all good men to come to the aid of their country...";
884 uint32_t j;
885 uint8_t qqqq[] = "xxxThis is the time for all good men to come to the aid of their country...";
886 uint32_t ref,x,y;
887 uint8_t *p;
888
889 printf("Endianness. These lines should all be the same (for values filled in):\n");
890 printf("%.8x %.8x %.8x\n",
891 hashword((const uint32_t *)q, (sizeof(q)-1)/4, 13),
892 hashword((const uint32_t *)q, (sizeof(q)-5)/4, 13),
893 hashword((const uint32_t *)q, (sizeof(q)-9)/4, 13));
894 p = q;
895 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
896 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
897 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
898 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
899 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
900 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
901 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
902 p = &qq[1];
903 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
904 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
905 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
906 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
907 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
908 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
909 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
910 p = &qqq[2];
911 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
912 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
913 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
914 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
915 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
916 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
917 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
918 p = &qqqq[3];
919 printf("%.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x %.8x\n",
920 hashlittle(p, sizeof(q)-1, 13), hashlittle(p, sizeof(q)-2, 13),
921 hashlittle(p, sizeof(q)-3, 13), hashlittle(p, sizeof(q)-4, 13),
922 hashlittle(p, sizeof(q)-5, 13), hashlittle(p, sizeof(q)-6, 13),
923 hashlittle(p, sizeof(q)-7, 13), hashlittle(p, sizeof(q)-8, 13),
924 hashlittle(p, sizeof(q)-9, 13), hashlittle(p, sizeof(q)-10, 13),
925 hashlittle(p, sizeof(q)-11, 13), hashlittle(p, sizeof(q)-12, 13));
926 printf("\n");
927
928 /* check that hashlittle2 and hashlittle produce the same results */
929 i=47; j=0;
930 hashlittle2(q, sizeof(q), &i, &j);
931 if (hashlittle(q, sizeof(q), 47) != i)
932 printf("hashlittle2 and hashlittle mismatch\n");
933
934 /* check that hashword2 and hashword produce the same results */
935 len = 0xdeadbeef;
936 i=47, j=0;
937 hashword2(&len, 1, &i, &j);
938 if (hashword(&len, 1, 47) != i)
939 printf("hashword2 and hashword mismatch %x %x\n",
940 i, hashword(&len, 1, 47));
941
942 /* check hashlittle doesn't read before or after the ends of the string */
943 for (h=0, b=buf+1; h<8; ++h, ++b)
944 {
945 for (i=0; i<MAXLEN; ++i)
946 {
947 len = i;
948 for (j=0; j<i; ++j) *(b+j)=0;
949
950 /* these should all be equal */
951 ref = hashlittle(b, len, (uint32_t)1);
952 *(b+i)=(uint8_t)~0;
953 *(b-1)=(uint8_t)~0;
954 x = hashlittle(b, len, (uint32_t)1);
955 y = hashlittle(b, len, (uint32_t)1);
956 if ((ref != x) || (ref != y))
957 {
958 printf("alignment error: %.8x %.8x %.8x %d %d\n",ref,x,y,
959 h, i);
960 }
961 }
962 }
963 }
964
965 /* check for problems with nulls */
driver4()966 void driver4()
967 {
968 uint8_t buf[1];
969 uint32_t h,i,state[HASHSTATE];
970
971
972 buf[0] = ~0;
973 for (i=0; i<HASHSTATE; ++i) state[i] = 1;
974 printf("These should all be different\n");
975 for (i=0, h=0; i<8; ++i)
976 {
977 h = hashlittle(buf, 0, h);
978 printf("%2ld 0-byte strings, hash is %.8x\n", i, h);
979 }
980 }
981
driver5()982 void driver5()
983 {
984 uint32_t b,c;
985 b=0, c=0, hashlittle2("", 0, &c, &b);
986 printf("hash is %.8lx %.8lx\n", c, b); /* deadbeef deadbeef */
987 b=0xdeadbeef, c=0, hashlittle2("", 0, &c, &b);
988 printf("hash is %.8lx %.8lx\n", c, b); /* bd5b7dde deadbeef */
989 b=0xdeadbeef, c=0xdeadbeef, hashlittle2("", 0, &c, &b);
990 printf("hash is %.8lx %.8lx\n", c, b); /* 9c093ccd bd5b7dde */
991 b=0, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b);
992 printf("hash is %.8lx %.8lx\n", c, b); /* 17770551 ce7226e6 */
993 b=1, c=0, hashlittle2("Four score and seven years ago", 30, &c, &b);
994 printf("hash is %.8lx %.8lx\n", c, b); /* e3607cae bd371de4 */
995 b=0, c=1, hashlittle2("Four score and seven years ago", 30, &c, &b);
996 printf("hash is %.8lx %.8lx\n", c, b); /* cd628161 6cbea4b3 */
997 c = hashlittle("Four score and seven years ago", 30, 0);
998 printf("hash is %.8lx\n", c); /* 17770551 */
999 c = hashlittle("Four score and seven years ago", 30, 1);
1000 printf("hash is %.8lx\n", c); /* cd628161 */
1001 }
1002
1003
main()1004 int main()
1005 {
1006 driver1(); /* test that the key is hashed: used for timings */
1007 driver2(); /* test that whole key is hashed thoroughly */
1008 driver3(); /* test that nothing but the key is hashed */
1009 driver4(); /* test hashing multiple buffers (all buffers are null) */
1010 driver5(); /* test the hash against known vectors */
1011 return 1;
1012 }
1013
1014 #endif /* SELF_TEST */
1015