1 /*-------------------------------------------------------------------------
2 *
3 * hashfunc.c
4 * Support functions for hash access method.
5 *
6 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/access/hash/hashfunc.c
12 *
13 * NOTES
14 * These functions are stored in pg_amproc. For each operator class
15 * defined for hash indexes, they compute the hash value of the argument.
16 *
17 * Additional hash functions appear in /utils/adt/ files for various
18 * specialized datatypes.
19 *
20 * It is expected that every bit of a hash function's 32-bit result is
21 * as random as every other; failure to ensure this is likely to lead
22 * to poor performance of hash joins, for example. In most cases a hash
23 * function should use hash_any() or its variant hash_uint32().
24 *-------------------------------------------------------------------------
25 */
26
27 #include "postgres.h"
28
29 #ifdef _MSC_VER
30 #include <float.h> /* for _isnan */
31 #endif
32 #include <math.h>
33
34 #include "access/hash.h"
35 #include "utils/builtins.h"
36
37 /*
38 * Datatype-specific hash functions.
39 *
40 * These support both hash indexes and hash joins.
41 *
42 * NOTE: some of these are also used by catcache operations, without
43 * any direct connection to hash indexes. Also, the common hash_any
44 * routine is also used by dynahash tables.
45 */
46
47 /* Note: this is used for both "char" and boolean datatypes */
48 Datum
hashchar(PG_FUNCTION_ARGS)49 hashchar(PG_FUNCTION_ARGS)
50 {
51 return hash_uint32((int32) PG_GETARG_CHAR(0));
52 }
53
54 Datum
hashcharextended(PG_FUNCTION_ARGS)55 hashcharextended(PG_FUNCTION_ARGS)
56 {
57 return hash_uint32_extended((int32) PG_GETARG_CHAR(0), PG_GETARG_INT64(1));
58 }
59
60 Datum
hashint2(PG_FUNCTION_ARGS)61 hashint2(PG_FUNCTION_ARGS)
62 {
63 return hash_uint32((int32) PG_GETARG_INT16(0));
64 }
65
66 Datum
hashint2extended(PG_FUNCTION_ARGS)67 hashint2extended(PG_FUNCTION_ARGS)
68 {
69 return hash_uint32_extended((int32) PG_GETARG_INT16(0), PG_GETARG_INT64(1));
70 }
71
72 Datum
hashint4(PG_FUNCTION_ARGS)73 hashint4(PG_FUNCTION_ARGS)
74 {
75 return hash_uint32(PG_GETARG_INT32(0));
76 }
77
78 Datum
hashint4extended(PG_FUNCTION_ARGS)79 hashint4extended(PG_FUNCTION_ARGS)
80 {
81 return hash_uint32_extended(PG_GETARG_INT32(0), PG_GETARG_INT64(1));
82 }
83
84 Datum
hashint8(PG_FUNCTION_ARGS)85 hashint8(PG_FUNCTION_ARGS)
86 {
87 /*
88 * The idea here is to produce a hash value compatible with the values
89 * produced by hashint4 and hashint2 for logically equal inputs; this is
90 * necessary to support cross-type hash joins across these input types.
91 * Since all three types are signed, we can xor the high half of the int8
92 * value if the sign is positive, or the complement of the high half when
93 * the sign is negative.
94 */
95 int64 val = PG_GETARG_INT64(0);
96 uint32 lohalf = (uint32) val;
97 uint32 hihalf = (uint32) (val >> 32);
98
99 lohalf ^= (val >= 0) ? hihalf : ~hihalf;
100
101 return hash_uint32(lohalf);
102 }
103
104 Datum
hashint8extended(PG_FUNCTION_ARGS)105 hashint8extended(PG_FUNCTION_ARGS)
106 {
107 /* Same approach as hashint8 */
108 int64 val = PG_GETARG_INT64(0);
109 uint32 lohalf = (uint32) val;
110 uint32 hihalf = (uint32) (val >> 32);
111
112 lohalf ^= (val >= 0) ? hihalf : ~hihalf;
113
114 return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
115 }
116
117 Datum
hashoid(PG_FUNCTION_ARGS)118 hashoid(PG_FUNCTION_ARGS)
119 {
120 return hash_uint32((uint32) PG_GETARG_OID(0));
121 }
122
123 Datum
hashoidextended(PG_FUNCTION_ARGS)124 hashoidextended(PG_FUNCTION_ARGS)
125 {
126 return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
127 }
128
129 Datum
hashenum(PG_FUNCTION_ARGS)130 hashenum(PG_FUNCTION_ARGS)
131 {
132 return hash_uint32((uint32) PG_GETARG_OID(0));
133 }
134
135 Datum
hashenumextended(PG_FUNCTION_ARGS)136 hashenumextended(PG_FUNCTION_ARGS)
137 {
138 return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
139 }
140
141 Datum
hashfloat4(PG_FUNCTION_ARGS)142 hashfloat4(PG_FUNCTION_ARGS)
143 {
144 float4 key = PG_GETARG_FLOAT4(0);
145 float8 key8;
146
147 /*
148 * On IEEE-float machines, minus zero and zero have different bit patterns
149 * but should compare as equal. We must ensure that they have the same
150 * hash value, which is most reliably done this way:
151 */
152 if (key == (float4) 0)
153 PG_RETURN_UINT32(0);
154
155 /*
156 * To support cross-type hashing of float8 and float4, we want to return
157 * the same hash value hashfloat8 would produce for an equal float8 value.
158 * So, widen the value to float8 and hash that. (We must do this rather
159 * than have hashfloat8 try to narrow its value to float4; that could fail
160 * on overflow.)
161 */
162 key8 = key;
163
164 /*
165 * Similarly, NaNs can have different bit patterns but they should all
166 * compare as equal. For backwards-compatibility reasons we force them to
167 * have the hash value of a standard float8 NaN. (You'd think we could
168 * replace key with a float4 NaN and then widen it; but on some old
169 * platforms, that way produces a different bit pattern.)
170 */
171 if (isnan(key8))
172 key8 = get_float8_nan();
173
174 return hash_any((unsigned char *) &key8, sizeof(key8));
175 }
176
177 Datum
hashfloat4extended(PG_FUNCTION_ARGS)178 hashfloat4extended(PG_FUNCTION_ARGS)
179 {
180 float4 key = PG_GETARG_FLOAT4(0);
181 uint64 seed = PG_GETARG_INT64(1);
182 float8 key8;
183
184 /* Same approach as hashfloat4 */
185 if (key == (float4) 0)
186 PG_RETURN_UINT64(seed);
187 key8 = key;
188 if (isnan(key8))
189 key8 = get_float8_nan();
190
191 return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
192 }
193
194 Datum
hashfloat8(PG_FUNCTION_ARGS)195 hashfloat8(PG_FUNCTION_ARGS)
196 {
197 float8 key = PG_GETARG_FLOAT8(0);
198
199 /*
200 * On IEEE-float machines, minus zero and zero have different bit patterns
201 * but should compare as equal. We must ensure that they have the same
202 * hash value, which is most reliably done this way:
203 */
204 if (key == (float8) 0)
205 PG_RETURN_UINT32(0);
206
207 /*
208 * Similarly, NaNs can have different bit patterns but they should all
209 * compare as equal. For backwards-compatibility reasons we force them to
210 * have the hash value of a standard NaN.
211 */
212 if (isnan(key))
213 key = get_float8_nan();
214
215 return hash_any((unsigned char *) &key, sizeof(key));
216 }
217
218 Datum
hashfloat8extended(PG_FUNCTION_ARGS)219 hashfloat8extended(PG_FUNCTION_ARGS)
220 {
221 float8 key = PG_GETARG_FLOAT8(0);
222 uint64 seed = PG_GETARG_INT64(1);
223
224 /* Same approach as hashfloat8 */
225 if (key == (float8) 0)
226 PG_RETURN_UINT64(seed);
227 if (isnan(key))
228 key = get_float8_nan();
229
230 return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
231 }
232
233 Datum
hashoidvector(PG_FUNCTION_ARGS)234 hashoidvector(PG_FUNCTION_ARGS)
235 {
236 oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
237
238 return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
239 }
240
241 Datum
hashoidvectorextended(PG_FUNCTION_ARGS)242 hashoidvectorextended(PG_FUNCTION_ARGS)
243 {
244 oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
245
246 return hash_any_extended((unsigned char *) key->values,
247 key->dim1 * sizeof(Oid),
248 PG_GETARG_INT64(1));
249 }
250
251 Datum
hashname(PG_FUNCTION_ARGS)252 hashname(PG_FUNCTION_ARGS)
253 {
254 char *key = NameStr(*PG_GETARG_NAME(0));
255
256 return hash_any((unsigned char *) key, strlen(key));
257 }
258
259 Datum
hashnameextended(PG_FUNCTION_ARGS)260 hashnameextended(PG_FUNCTION_ARGS)
261 {
262 char *key = NameStr(*PG_GETARG_NAME(0));
263
264 return hash_any_extended((unsigned char *) key, strlen(key),
265 PG_GETARG_INT64(1));
266 }
267
268 Datum
hashtext(PG_FUNCTION_ARGS)269 hashtext(PG_FUNCTION_ARGS)
270 {
271 text *key = PG_GETARG_TEXT_PP(0);
272 Datum result;
273
274 /*
275 * Note: this is currently identical in behavior to hashvarlena, but keep
276 * it as a separate function in case we someday want to do something
277 * different in non-C locales. (See also hashbpchar, if so.)
278 */
279 result = hash_any((unsigned char *) VARDATA_ANY(key),
280 VARSIZE_ANY_EXHDR(key));
281
282 /* Avoid leaking memory for toasted inputs */
283 PG_FREE_IF_COPY(key, 0);
284
285 return result;
286 }
287
288 Datum
hashtextextended(PG_FUNCTION_ARGS)289 hashtextextended(PG_FUNCTION_ARGS)
290 {
291 text *key = PG_GETARG_TEXT_PP(0);
292 Datum result;
293
294 /* Same approach as hashtext */
295 result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
296 VARSIZE_ANY_EXHDR(key),
297 PG_GETARG_INT64(1));
298
299 PG_FREE_IF_COPY(key, 0);
300
301 return result;
302 }
303
304 /*
305 * hashvarlena() can be used for any varlena datatype in which there are
306 * no non-significant bits, ie, distinct bitpatterns never compare as equal.
307 */
308 Datum
hashvarlena(PG_FUNCTION_ARGS)309 hashvarlena(PG_FUNCTION_ARGS)
310 {
311 struct varlena *key = PG_GETARG_VARLENA_PP(0);
312 Datum result;
313
314 result = hash_any((unsigned char *) VARDATA_ANY(key),
315 VARSIZE_ANY_EXHDR(key));
316
317 /* Avoid leaking memory for toasted inputs */
318 PG_FREE_IF_COPY(key, 0);
319
320 return result;
321 }
322
323 Datum
hashvarlenaextended(PG_FUNCTION_ARGS)324 hashvarlenaextended(PG_FUNCTION_ARGS)
325 {
326 struct varlena *key = PG_GETARG_VARLENA_PP(0);
327 Datum result;
328
329 result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
330 VARSIZE_ANY_EXHDR(key),
331 PG_GETARG_INT64(1));
332
333 PG_FREE_IF_COPY(key, 0);
334
335 return result;
336 }
337
338 /*
339 * This hash function was written by Bob Jenkins
340 * (bob_jenkins@burtleburtle.net), and superficially adapted
341 * for PostgreSQL by Neil Conway. For more information on this
342 * hash function, see http://burtleburtle.net/bob/hash/doobs.html,
343 * or Bob's article in Dr. Dobb's Journal, Sept. 1997.
344 *
345 * In the current code, we have adopted Bob's 2006 update of his hash
346 * function to fetch the data a word at a time when it is suitably aligned.
347 * This makes for a useful speedup, at the cost of having to maintain
348 * four code paths (aligned vs unaligned, and little-endian vs big-endian).
349 * It also uses two separate mixing functions mix() and final(), instead
350 * of a slower multi-purpose function.
351 */
352
353 /* Get a bit mask of the bits set in non-uint32 aligned addresses */
354 #define UINT32_ALIGN_MASK (sizeof(uint32) - 1)
355
356 /* Rotate a uint32 value left by k bits - note multiple evaluation! */
357 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
358
359 /*----------
360 * mix -- mix 3 32-bit values reversibly.
361 *
362 * This is reversible, so any information in (a,b,c) before mix() is
363 * still in (a,b,c) after mix().
364 *
365 * If four pairs of (a,b,c) inputs are run through mix(), or through
366 * mix() in reverse, there are at least 32 bits of the output that
367 * are sometimes the same for one pair and different for another pair.
368 * This was tested for:
369 * * pairs that differed by one bit, by two bits, in any combination
370 * of top bits of (a,b,c), or in any combination of bottom bits of
371 * (a,b,c).
372 * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
373 * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
374 * is commonly produced by subtraction) look like a single 1-bit
375 * difference.
376 * * the base values were pseudorandom, all zero but one bit set, or
377 * all zero plus a counter that starts at zero.
378 *
379 * This does not achieve avalanche. There are input bits of (a,b,c)
380 * that fail to affect some output bits of (a,b,c), especially of a. The
381 * most thoroughly mixed value is c, but it doesn't really even achieve
382 * avalanche in c.
383 *
384 * This allows some parallelism. Read-after-writes are good at doubling
385 * the number of bits affected, so the goal of mixing pulls in the opposite
386 * direction from the goal of parallelism. I did what I could. Rotates
387 * seem to cost as much as shifts on every machine I could lay my hands on,
388 * and rotates are much kinder to the top and bottom bits, so I used rotates.
389 *----------
390 */
391 #define mix(a,b,c) \
392 { \
393 a -= c; a ^= rot(c, 4); c += b; \
394 b -= a; b ^= rot(a, 6); a += c; \
395 c -= b; c ^= rot(b, 8); b += a; \
396 a -= c; a ^= rot(c,16); c += b; \
397 b -= a; b ^= rot(a,19); a += c; \
398 c -= b; c ^= rot(b, 4); b += a; \
399 }
400
401 /*----------
402 * final -- final mixing of 3 32-bit values (a,b,c) into c
403 *
404 * Pairs of (a,b,c) values differing in only a few bits will usually
405 * produce values of c that look totally different. This was tested for
406 * * pairs that differed by one bit, by two bits, in any combination
407 * of top bits of (a,b,c), or in any combination of bottom bits of
408 * (a,b,c).
409 * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
410 * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
411 * is commonly produced by subtraction) look like a single 1-bit
412 * difference.
413 * * the base values were pseudorandom, all zero but one bit set, or
414 * all zero plus a counter that starts at zero.
415 *
416 * The use of separate functions for mix() and final() allow for a
417 * substantial performance increase since final() does not need to
418 * do well in reverse, but is does need to affect all output bits.
419 * mix(), on the other hand, does not need to affect all output
420 * bits (affecting 32 bits is enough). The original hash function had
421 * a single mixing operation that had to satisfy both sets of requirements
422 * and was slower as a result.
423 *----------
424 */
425 #define final(a,b,c) \
426 { \
427 c ^= b; c -= rot(b,14); \
428 a ^= c; a -= rot(c,11); \
429 b ^= a; b -= rot(a,25); \
430 c ^= b; c -= rot(b,16); \
431 a ^= c; a -= rot(c, 4); \
432 b ^= a; b -= rot(a,14); \
433 c ^= b; c -= rot(b,24); \
434 }
435
436 /*
437 * hash_any() -- hash a variable-length key into a 32-bit value
438 * k : the key (the unaligned variable-length array of bytes)
439 * len : the length of the key, counting by bytes
440 *
441 * Returns a uint32 value. Every bit of the key affects every bit of
442 * the return value. Every 1-bit and 2-bit delta achieves avalanche.
443 * About 6*len+35 instructions. The best hash table sizes are powers
444 * of 2. There is no need to do mod a prime (mod is sooo slow!).
445 * If you need less than 32 bits, use a bitmask.
446 *
447 * This procedure must never throw elog(ERROR); the ResourceOwner code
448 * relies on this not to fail.
449 *
450 * Note: we could easily change this function to return a 64-bit hash value
451 * by using the final values of both b and c. b is perhaps a little less
452 * well mixed than c, however.
453 */
454 Datum
hash_any(register const unsigned char * k,register int keylen)455 hash_any(register const unsigned char *k, register int keylen)
456 {
457 register uint32 a,
458 b,
459 c,
460 len;
461
462 /* Set up the internal state */
463 len = keylen;
464 a = b = c = 0x9e3779b9 + len + 3923095;
465
466 /* If the source pointer is word-aligned, we use word-wide fetches */
467 if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)
468 {
469 /* Code path for aligned source data */
470 register const uint32 *ka = (const uint32 *) k;
471
472 /* handle most of the key */
473 while (len >= 12)
474 {
475 a += ka[0];
476 b += ka[1];
477 c += ka[2];
478 mix(a, b, c);
479 ka += 3;
480 len -= 12;
481 }
482
483 /* handle the last 11 bytes */
484 k = (const unsigned char *) ka;
485 #ifdef WORDS_BIGENDIAN
486 switch (len)
487 {
488 case 11:
489 c += ((uint32) k[10] << 8);
490 /* fall through */
491 case 10:
492 c += ((uint32) k[9] << 16);
493 /* fall through */
494 case 9:
495 c += ((uint32) k[8] << 24);
496 /* fall through */
497 case 8:
498 /* the lowest byte of c is reserved for the length */
499 b += ka[1];
500 a += ka[0];
501 break;
502 case 7:
503 b += ((uint32) k[6] << 8);
504 /* fall through */
505 case 6:
506 b += ((uint32) k[5] << 16);
507 /* fall through */
508 case 5:
509 b += ((uint32) k[4] << 24);
510 /* fall through */
511 case 4:
512 a += ka[0];
513 break;
514 case 3:
515 a += ((uint32) k[2] << 8);
516 /* fall through */
517 case 2:
518 a += ((uint32) k[1] << 16);
519 /* fall through */
520 case 1:
521 a += ((uint32) k[0] << 24);
522 /* case 0: nothing left to add */
523 }
524 #else /* !WORDS_BIGENDIAN */
525 switch (len)
526 {
527 case 11:
528 c += ((uint32) k[10] << 24);
529 /* fall through */
530 case 10:
531 c += ((uint32) k[9] << 16);
532 /* fall through */
533 case 9:
534 c += ((uint32) k[8] << 8);
535 /* fall through */
536 case 8:
537 /* the lowest byte of c is reserved for the length */
538 b += ka[1];
539 a += ka[0];
540 break;
541 case 7:
542 b += ((uint32) k[6] << 16);
543 /* fall through */
544 case 6:
545 b += ((uint32) k[5] << 8);
546 /* fall through */
547 case 5:
548 b += k[4];
549 /* fall through */
550 case 4:
551 a += ka[0];
552 break;
553 case 3:
554 a += ((uint32) k[2] << 16);
555 /* fall through */
556 case 2:
557 a += ((uint32) k[1] << 8);
558 /* fall through */
559 case 1:
560 a += k[0];
561 /* case 0: nothing left to add */
562 }
563 #endif /* WORDS_BIGENDIAN */
564 }
565 else
566 {
567 /* Code path for non-aligned source data */
568
569 /* handle most of the key */
570 while (len >= 12)
571 {
572 #ifdef WORDS_BIGENDIAN
573 a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
574 b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
575 c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
576 #else /* !WORDS_BIGENDIAN */
577 a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
578 b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
579 c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
580 #endif /* WORDS_BIGENDIAN */
581 mix(a, b, c);
582 k += 12;
583 len -= 12;
584 }
585
586 /* handle the last 11 bytes */
587 #ifdef WORDS_BIGENDIAN
588 switch (len)
589 {
590 case 11:
591 c += ((uint32) k[10] << 8);
592 /* fall through */
593 case 10:
594 c += ((uint32) k[9] << 16);
595 /* fall through */
596 case 9:
597 c += ((uint32) k[8] << 24);
598 /* fall through */
599 case 8:
600 /* the lowest byte of c is reserved for the length */
601 b += k[7];
602 /* fall through */
603 case 7:
604 b += ((uint32) k[6] << 8);
605 /* fall through */
606 case 6:
607 b += ((uint32) k[5] << 16);
608 /* fall through */
609 case 5:
610 b += ((uint32) k[4] << 24);
611 /* fall through */
612 case 4:
613 a += k[3];
614 /* fall through */
615 case 3:
616 a += ((uint32) k[2] << 8);
617 /* fall through */
618 case 2:
619 a += ((uint32) k[1] << 16);
620 /* fall through */
621 case 1:
622 a += ((uint32) k[0] << 24);
623 /* case 0: nothing left to add */
624 }
625 #else /* !WORDS_BIGENDIAN */
626 switch (len)
627 {
628 case 11:
629 c += ((uint32) k[10] << 24);
630 /* fall through */
631 case 10:
632 c += ((uint32) k[9] << 16);
633 /* fall through */
634 case 9:
635 c += ((uint32) k[8] << 8);
636 /* fall through */
637 case 8:
638 /* the lowest byte of c is reserved for the length */
639 b += ((uint32) k[7] << 24);
640 /* fall through */
641 case 7:
642 b += ((uint32) k[6] << 16);
643 /* fall through */
644 case 6:
645 b += ((uint32) k[5] << 8);
646 /* fall through */
647 case 5:
648 b += k[4];
649 /* fall through */
650 case 4:
651 a += ((uint32) k[3] << 24);
652 /* fall through */
653 case 3:
654 a += ((uint32) k[2] << 16);
655 /* fall through */
656 case 2:
657 a += ((uint32) k[1] << 8);
658 /* fall through */
659 case 1:
660 a += k[0];
661 /* case 0: nothing left to add */
662 }
663 #endif /* WORDS_BIGENDIAN */
664 }
665
666 final(a, b, c);
667
668 /* report the result */
669 return UInt32GetDatum(c);
670 }
671
672 /*
673 * hash_any_extended() -- hash into a 64-bit value, using an optional seed
674 * k : the key (the unaligned variable-length array of bytes)
675 * len : the length of the key, counting by bytes
676 * seed : a 64-bit seed (0 means no seed)
677 *
678 * Returns a uint64 value. Otherwise similar to hash_any.
679 */
680 Datum
hash_any_extended(register const unsigned char * k,register int keylen,uint64 seed)681 hash_any_extended(register const unsigned char *k, register int keylen,
682 uint64 seed)
683 {
684 register uint32 a,
685 b,
686 c,
687 len;
688
689 /* Set up the internal state */
690 len = keylen;
691 a = b = c = 0x9e3779b9 + len + 3923095;
692
693 /* If the seed is non-zero, use it to perturb the internal state. */
694 if (seed != 0)
695 {
696 /*
697 * In essence, the seed is treated as part of the data being hashed,
698 * but for simplicity, we pretend that it's padded with four bytes of
699 * zeroes so that the seed constitutes a 12-byte chunk.
700 */
701 a += (uint32) (seed >> 32);
702 b += (uint32) seed;
703 mix(a, b, c);
704 }
705
706 /* If the source pointer is word-aligned, we use word-wide fetches */
707 if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)
708 {
709 /* Code path for aligned source data */
710 register const uint32 *ka = (const uint32 *) k;
711
712 /* handle most of the key */
713 while (len >= 12)
714 {
715 a += ka[0];
716 b += ka[1];
717 c += ka[2];
718 mix(a, b, c);
719 ka += 3;
720 len -= 12;
721 }
722
723 /* handle the last 11 bytes */
724 k = (const unsigned char *) ka;
725 #ifdef WORDS_BIGENDIAN
726 switch (len)
727 {
728 case 11:
729 c += ((uint32) k[10] << 8);
730 /* fall through */
731 case 10:
732 c += ((uint32) k[9] << 16);
733 /* fall through */
734 case 9:
735 c += ((uint32) k[8] << 24);
736 /* fall through */
737 case 8:
738 /* the lowest byte of c is reserved for the length */
739 b += ka[1];
740 a += ka[0];
741 break;
742 case 7:
743 b += ((uint32) k[6] << 8);
744 /* fall through */
745 case 6:
746 b += ((uint32) k[5] << 16);
747 /* fall through */
748 case 5:
749 b += ((uint32) k[4] << 24);
750 /* fall through */
751 case 4:
752 a += ka[0];
753 break;
754 case 3:
755 a += ((uint32) k[2] << 8);
756 /* fall through */
757 case 2:
758 a += ((uint32) k[1] << 16);
759 /* fall through */
760 case 1:
761 a += ((uint32) k[0] << 24);
762 /* case 0: nothing left to add */
763 }
764 #else /* !WORDS_BIGENDIAN */
765 switch (len)
766 {
767 case 11:
768 c += ((uint32) k[10] << 24);
769 /* fall through */
770 case 10:
771 c += ((uint32) k[9] << 16);
772 /* fall through */
773 case 9:
774 c += ((uint32) k[8] << 8);
775 /* fall through */
776 case 8:
777 /* the lowest byte of c is reserved for the length */
778 b += ka[1];
779 a += ka[0];
780 break;
781 case 7:
782 b += ((uint32) k[6] << 16);
783 /* fall through */
784 case 6:
785 b += ((uint32) k[5] << 8);
786 /* fall through */
787 case 5:
788 b += k[4];
789 /* fall through */
790 case 4:
791 a += ka[0];
792 break;
793 case 3:
794 a += ((uint32) k[2] << 16);
795 /* fall through */
796 case 2:
797 a += ((uint32) k[1] << 8);
798 /* fall through */
799 case 1:
800 a += k[0];
801 /* case 0: nothing left to add */
802 }
803 #endif /* WORDS_BIGENDIAN */
804 }
805 else
806 {
807 /* Code path for non-aligned source data */
808
809 /* handle most of the key */
810 while (len >= 12)
811 {
812 #ifdef WORDS_BIGENDIAN
813 a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
814 b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
815 c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
816 #else /* !WORDS_BIGENDIAN */
817 a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
818 b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
819 c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
820 #endif /* WORDS_BIGENDIAN */
821 mix(a, b, c);
822 k += 12;
823 len -= 12;
824 }
825
826 /* handle the last 11 bytes */
827 #ifdef WORDS_BIGENDIAN
828 switch (len)
829 {
830 case 11:
831 c += ((uint32) k[10] << 8);
832 /* fall through */
833 case 10:
834 c += ((uint32) k[9] << 16);
835 /* fall through */
836 case 9:
837 c += ((uint32) k[8] << 24);
838 /* fall through */
839 case 8:
840 /* the lowest byte of c is reserved for the length */
841 b += k[7];
842 /* fall through */
843 case 7:
844 b += ((uint32) k[6] << 8);
845 /* fall through */
846 case 6:
847 b += ((uint32) k[5] << 16);
848 /* fall through */
849 case 5:
850 b += ((uint32) k[4] << 24);
851 /* fall through */
852 case 4:
853 a += k[3];
854 /* fall through */
855 case 3:
856 a += ((uint32) k[2] << 8);
857 /* fall through */
858 case 2:
859 a += ((uint32) k[1] << 16);
860 /* fall through */
861 case 1:
862 a += ((uint32) k[0] << 24);
863 /* case 0: nothing left to add */
864 }
865 #else /* !WORDS_BIGENDIAN */
866 switch (len)
867 {
868 case 11:
869 c += ((uint32) k[10] << 24);
870 /* fall through */
871 case 10:
872 c += ((uint32) k[9] << 16);
873 /* fall through */
874 case 9:
875 c += ((uint32) k[8] << 8);
876 /* fall through */
877 case 8:
878 /* the lowest byte of c is reserved for the length */
879 b += ((uint32) k[7] << 24);
880 /* fall through */
881 case 7:
882 b += ((uint32) k[6] << 16);
883 /* fall through */
884 case 6:
885 b += ((uint32) k[5] << 8);
886 /* fall through */
887 case 5:
888 b += k[4];
889 /* fall through */
890 case 4:
891 a += ((uint32) k[3] << 24);
892 /* fall through */
893 case 3:
894 a += ((uint32) k[2] << 16);
895 /* fall through */
896 case 2:
897 a += ((uint32) k[1] << 8);
898 /* fall through */
899 case 1:
900 a += k[0];
901 /* case 0: nothing left to add */
902 }
903 #endif /* WORDS_BIGENDIAN */
904 }
905
906 final(a, b, c);
907
908 /* report the result */
909 PG_RETURN_UINT64(((uint64) b << 32) | c);
910 }
911
912 /*
913 * hash_uint32() -- hash a 32-bit value to a 32-bit value
914 *
915 * This has the same result as
916 * hash_any(&k, sizeof(uint32))
917 * but is faster and doesn't force the caller to store k into memory.
918 */
919 Datum
hash_uint32(uint32 k)920 hash_uint32(uint32 k)
921 {
922 register uint32 a,
923 b,
924 c;
925
926 a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;
927 a += k;
928
929 final(a, b, c);
930
931 /* report the result */
932 return UInt32GetDatum(c);
933 }
934
935 /*
936 * hash_uint32_extended() -- hash a 32-bit value to a 64-bit value, with a seed
937 *
938 * Like hash_uint32, this is a convenience function.
939 */
940 Datum
hash_uint32_extended(uint32 k,uint64 seed)941 hash_uint32_extended(uint32 k, uint64 seed)
942 {
943 register uint32 a,
944 b,
945 c;
946
947 a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;
948
949 if (seed != 0)
950 {
951 a += (uint32) (seed >> 32);
952 b += (uint32) seed;
953 mix(a, b, c);
954 }
955
956 a += k;
957
958 final(a, b, c);
959
960 /* report the result */
961 PG_RETURN_UINT64(((uint64) b << 32) | c);
962 }
963