1 /*-------------------------------------------------------------------------
2  *
3  * hashfunc.c
4  *	  Support functions for hash access method.
5  *
6  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/access/hash/hashfunc.c
12  *
13  * NOTES
14  *	  These functions are stored in pg_amproc.  For each operator class
15  *	  defined for hash indexes, they compute the hash value of the argument.
16  *
17  *	  Additional hash functions appear in /utils/adt/ files for various
18  *	  specialized datatypes.
19  *
20  *	  It is expected that every bit of a hash function's 32-bit result is
21  *	  as random as every other; failure to ensure this is likely to lead
22  *	  to poor performance of hash joins, for example.  In most cases a hash
23  *	  function should use hash_any() or its variant hash_uint32().
24  *-------------------------------------------------------------------------
25  */
26 
27 #include "postgres.h"
28 
29 #ifdef _MSC_VER
30 #include <float.h>				/* for _isnan */
31 #endif
32 #include <math.h>
33 
34 #include "access/hash.h"
35 #include "utils/builtins.h"
36 
37 /*
38  * Datatype-specific hash functions.
39  *
40  * These support both hash indexes and hash joins.
41  *
42  * NOTE: some of these are also used by catcache operations, without
43  * any direct connection to hash indexes.  Also, the common hash_any
44  * routine is also used by dynahash tables.
45  */
46 
47 /* Note: this is used for both "char" and boolean datatypes */
48 Datum
hashchar(PG_FUNCTION_ARGS)49 hashchar(PG_FUNCTION_ARGS)
50 {
51 	return hash_uint32((int32) PG_GETARG_CHAR(0));
52 }
53 
54 Datum
hashcharextended(PG_FUNCTION_ARGS)55 hashcharextended(PG_FUNCTION_ARGS)
56 {
57 	return hash_uint32_extended((int32) PG_GETARG_CHAR(0), PG_GETARG_INT64(1));
58 }
59 
60 Datum
hashint2(PG_FUNCTION_ARGS)61 hashint2(PG_FUNCTION_ARGS)
62 {
63 	return hash_uint32((int32) PG_GETARG_INT16(0));
64 }
65 
66 Datum
hashint2extended(PG_FUNCTION_ARGS)67 hashint2extended(PG_FUNCTION_ARGS)
68 {
69 	return hash_uint32_extended((int32) PG_GETARG_INT16(0), PG_GETARG_INT64(1));
70 }
71 
72 Datum
hashint4(PG_FUNCTION_ARGS)73 hashint4(PG_FUNCTION_ARGS)
74 {
75 	return hash_uint32(PG_GETARG_INT32(0));
76 }
77 
78 Datum
hashint4extended(PG_FUNCTION_ARGS)79 hashint4extended(PG_FUNCTION_ARGS)
80 {
81 	return hash_uint32_extended(PG_GETARG_INT32(0), PG_GETARG_INT64(1));
82 }
83 
84 Datum
hashint8(PG_FUNCTION_ARGS)85 hashint8(PG_FUNCTION_ARGS)
86 {
87 	/*
88 	 * The idea here is to produce a hash value compatible with the values
89 	 * produced by hashint4 and hashint2 for logically equal inputs; this is
90 	 * necessary to support cross-type hash joins across these input types.
91 	 * Since all three types are signed, we can xor the high half of the int8
92 	 * value if the sign is positive, or the complement of the high half when
93 	 * the sign is negative.
94 	 */
95 	int64		val = PG_GETARG_INT64(0);
96 	uint32		lohalf = (uint32) val;
97 	uint32		hihalf = (uint32) (val >> 32);
98 
99 	lohalf ^= (val >= 0) ? hihalf : ~hihalf;
100 
101 	return hash_uint32(lohalf);
102 }
103 
104 Datum
hashint8extended(PG_FUNCTION_ARGS)105 hashint8extended(PG_FUNCTION_ARGS)
106 {
107 	/* Same approach as hashint8 */
108 	int64		val = PG_GETARG_INT64(0);
109 	uint32		lohalf = (uint32) val;
110 	uint32		hihalf = (uint32) (val >> 32);
111 
112 	lohalf ^= (val >= 0) ? hihalf : ~hihalf;
113 
114 	return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
115 }
116 
117 Datum
hashoid(PG_FUNCTION_ARGS)118 hashoid(PG_FUNCTION_ARGS)
119 {
120 	return hash_uint32((uint32) PG_GETARG_OID(0));
121 }
122 
123 Datum
hashoidextended(PG_FUNCTION_ARGS)124 hashoidextended(PG_FUNCTION_ARGS)
125 {
126 	return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
127 }
128 
129 Datum
hashenum(PG_FUNCTION_ARGS)130 hashenum(PG_FUNCTION_ARGS)
131 {
132 	return hash_uint32((uint32) PG_GETARG_OID(0));
133 }
134 
135 Datum
hashenumextended(PG_FUNCTION_ARGS)136 hashenumextended(PG_FUNCTION_ARGS)
137 {
138 	return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
139 }
140 
141 Datum
hashfloat4(PG_FUNCTION_ARGS)142 hashfloat4(PG_FUNCTION_ARGS)
143 {
144 	float4		key = PG_GETARG_FLOAT4(0);
145 	float8		key8;
146 
147 	/*
148 	 * On IEEE-float machines, minus zero and zero have different bit patterns
149 	 * but should compare as equal.  We must ensure that they have the same
150 	 * hash value, which is most reliably done this way:
151 	 */
152 	if (key == (float4) 0)
153 		PG_RETURN_UINT32(0);
154 
155 	/*
156 	 * To support cross-type hashing of float8 and float4, we want to return
157 	 * the same hash value hashfloat8 would produce for an equal float8 value.
158 	 * So, widen the value to float8 and hash that.  (We must do this rather
159 	 * than have hashfloat8 try to narrow its value to float4; that could fail
160 	 * on overflow.)
161 	 */
162 	key8 = key;
163 
164 	/*
165 	 * Similarly, NaNs can have different bit patterns but they should all
166 	 * compare as equal.  For backwards-compatibility reasons we force them to
167 	 * have the hash value of a standard float8 NaN.  (You'd think we could
168 	 * replace key with a float4 NaN and then widen it; but on some old
169 	 * platforms, that way produces a different bit pattern.)
170 	 */
171 	if (isnan(key8))
172 		key8 = get_float8_nan();
173 
174 	return hash_any((unsigned char *) &key8, sizeof(key8));
175 }
176 
177 Datum
hashfloat4extended(PG_FUNCTION_ARGS)178 hashfloat4extended(PG_FUNCTION_ARGS)
179 {
180 	float4		key = PG_GETARG_FLOAT4(0);
181 	uint64		seed = PG_GETARG_INT64(1);
182 	float8		key8;
183 
184 	/* Same approach as hashfloat4 */
185 	if (key == (float4) 0)
186 		PG_RETURN_UINT64(seed);
187 	key8 = key;
188 	if (isnan(key8))
189 		key8 = get_float8_nan();
190 
191 	return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
192 }
193 
194 Datum
hashfloat8(PG_FUNCTION_ARGS)195 hashfloat8(PG_FUNCTION_ARGS)
196 {
197 	float8		key = PG_GETARG_FLOAT8(0);
198 
199 	/*
200 	 * On IEEE-float machines, minus zero and zero have different bit patterns
201 	 * but should compare as equal.  We must ensure that they have the same
202 	 * hash value, which is most reliably done this way:
203 	 */
204 	if (key == (float8) 0)
205 		PG_RETURN_UINT32(0);
206 
207 	/*
208 	 * Similarly, NaNs can have different bit patterns but they should all
209 	 * compare as equal.  For backwards-compatibility reasons we force them to
210 	 * have the hash value of a standard NaN.
211 	 */
212 	if (isnan(key))
213 		key = get_float8_nan();
214 
215 	return hash_any((unsigned char *) &key, sizeof(key));
216 }
217 
218 Datum
hashfloat8extended(PG_FUNCTION_ARGS)219 hashfloat8extended(PG_FUNCTION_ARGS)
220 {
221 	float8		key = PG_GETARG_FLOAT8(0);
222 	uint64		seed = PG_GETARG_INT64(1);
223 
224 	/* Same approach as hashfloat8 */
225 	if (key == (float8) 0)
226 		PG_RETURN_UINT64(seed);
227 	if (isnan(key))
228 		key = get_float8_nan();
229 
230 	return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
231 }
232 
233 Datum
hashoidvector(PG_FUNCTION_ARGS)234 hashoidvector(PG_FUNCTION_ARGS)
235 {
236 	oidvector  *key = (oidvector *) PG_GETARG_POINTER(0);
237 
238 	return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
239 }
240 
241 Datum
hashoidvectorextended(PG_FUNCTION_ARGS)242 hashoidvectorextended(PG_FUNCTION_ARGS)
243 {
244 	oidvector  *key = (oidvector *) PG_GETARG_POINTER(0);
245 
246 	return hash_any_extended((unsigned char *) key->values,
247 							 key->dim1 * sizeof(Oid),
248 							 PG_GETARG_INT64(1));
249 }
250 
251 Datum
hashname(PG_FUNCTION_ARGS)252 hashname(PG_FUNCTION_ARGS)
253 {
254 	char	   *key = NameStr(*PG_GETARG_NAME(0));
255 
256 	return hash_any((unsigned char *) key, strlen(key));
257 }
258 
259 Datum
hashnameextended(PG_FUNCTION_ARGS)260 hashnameextended(PG_FUNCTION_ARGS)
261 {
262 	char	   *key = NameStr(*PG_GETARG_NAME(0));
263 
264 	return hash_any_extended((unsigned char *) key, strlen(key),
265 							 PG_GETARG_INT64(1));
266 }
267 
268 Datum
hashtext(PG_FUNCTION_ARGS)269 hashtext(PG_FUNCTION_ARGS)
270 {
271 	text	   *key = PG_GETARG_TEXT_PP(0);
272 	Datum		result;
273 
274 	/*
275 	 * Note: this is currently identical in behavior to hashvarlena, but keep
276 	 * it as a separate function in case we someday want to do something
277 	 * different in non-C locales.  (See also hashbpchar, if so.)
278 	 */
279 	result = hash_any((unsigned char *) VARDATA_ANY(key),
280 					  VARSIZE_ANY_EXHDR(key));
281 
282 	/* Avoid leaking memory for toasted inputs */
283 	PG_FREE_IF_COPY(key, 0);
284 
285 	return result;
286 }
287 
288 Datum
hashtextextended(PG_FUNCTION_ARGS)289 hashtextextended(PG_FUNCTION_ARGS)
290 {
291 	text	   *key = PG_GETARG_TEXT_PP(0);
292 	Datum		result;
293 
294 	/* Same approach as hashtext */
295 	result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
296 							   VARSIZE_ANY_EXHDR(key),
297 							   PG_GETARG_INT64(1));
298 
299 	PG_FREE_IF_COPY(key, 0);
300 
301 	return result;
302 }
303 
304 /*
305  * hashvarlena() can be used for any varlena datatype in which there are
306  * no non-significant bits, ie, distinct bitpatterns never compare as equal.
307  */
308 Datum
hashvarlena(PG_FUNCTION_ARGS)309 hashvarlena(PG_FUNCTION_ARGS)
310 {
311 	struct varlena *key = PG_GETARG_VARLENA_PP(0);
312 	Datum		result;
313 
314 	result = hash_any((unsigned char *) VARDATA_ANY(key),
315 					  VARSIZE_ANY_EXHDR(key));
316 
317 	/* Avoid leaking memory for toasted inputs */
318 	PG_FREE_IF_COPY(key, 0);
319 
320 	return result;
321 }
322 
323 Datum
hashvarlenaextended(PG_FUNCTION_ARGS)324 hashvarlenaextended(PG_FUNCTION_ARGS)
325 {
326 	struct varlena *key = PG_GETARG_VARLENA_PP(0);
327 	Datum		result;
328 
329 	result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
330 							   VARSIZE_ANY_EXHDR(key),
331 							   PG_GETARG_INT64(1));
332 
333 	PG_FREE_IF_COPY(key, 0);
334 
335 	return result;
336 }
337 
338 /*
339  * This hash function was written by Bob Jenkins
340  * (bob_jenkins@burtleburtle.net), and superficially adapted
341  * for PostgreSQL by Neil Conway. For more information on this
342  * hash function, see http://burtleburtle.net/bob/hash/doobs.html,
343  * or Bob's article in Dr. Dobb's Journal, Sept. 1997.
344  *
345  * In the current code, we have adopted Bob's 2006 update of his hash
346  * function to fetch the data a word at a time when it is suitably aligned.
347  * This makes for a useful speedup, at the cost of having to maintain
348  * four code paths (aligned vs unaligned, and little-endian vs big-endian).
349  * It also uses two separate mixing functions mix() and final(), instead
350  * of a slower multi-purpose function.
351  */
352 
353 /* Get a bit mask of the bits set in non-uint32 aligned addresses */
354 #define UINT32_ALIGN_MASK (sizeof(uint32) - 1)
355 
356 /* Rotate a uint32 value left by k bits - note multiple evaluation! */
357 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
358 
359 /*----------
360  * mix -- mix 3 32-bit values reversibly.
361  *
362  * This is reversible, so any information in (a,b,c) before mix() is
363  * still in (a,b,c) after mix().
364  *
365  * If four pairs of (a,b,c) inputs are run through mix(), or through
366  * mix() in reverse, there are at least 32 bits of the output that
367  * are sometimes the same for one pair and different for another pair.
368  * This was tested for:
369  * * pairs that differed by one bit, by two bits, in any combination
370  *	 of top bits of (a,b,c), or in any combination of bottom bits of
371  *	 (a,b,c).
372  * * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
373  *	 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
374  *	 is commonly produced by subtraction) look like a single 1-bit
375  *	 difference.
376  * * the base values were pseudorandom, all zero but one bit set, or
377  *	 all zero plus a counter that starts at zero.
378  *
379  * This does not achieve avalanche.  There are input bits of (a,b,c)
380  * that fail to affect some output bits of (a,b,c), especially of a.  The
381  * most thoroughly mixed value is c, but it doesn't really even achieve
382  * avalanche in c.
383  *
384  * This allows some parallelism.  Read-after-writes are good at doubling
385  * the number of bits affected, so the goal of mixing pulls in the opposite
386  * direction from the goal of parallelism.  I did what I could.  Rotates
387  * seem to cost as much as shifts on every machine I could lay my hands on,
388  * and rotates are much kinder to the top and bottom bits, so I used rotates.
389  *----------
390  */
391 #define mix(a,b,c) \
392 { \
393   a -= c;  a ^= rot(c, 4);	c += b; \
394   b -= a;  b ^= rot(a, 6);	a += c; \
395   c -= b;  c ^= rot(b, 8);	b += a; \
396   a -= c;  a ^= rot(c,16);	c += b; \
397   b -= a;  b ^= rot(a,19);	a += c; \
398   c -= b;  c ^= rot(b, 4);	b += a; \
399 }
400 
401 /*----------
402  * final -- final mixing of 3 32-bit values (a,b,c) into c
403  *
404  * Pairs of (a,b,c) values differing in only a few bits will usually
405  * produce values of c that look totally different.  This was tested for
406  * * pairs that differed by one bit, by two bits, in any combination
407  *	 of top bits of (a,b,c), or in any combination of bottom bits of
408  *	 (a,b,c).
409  * * "differ" is defined as +, -, ^, or ~^.  For + and -, I transformed
410  *	 the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
411  *	 is commonly produced by subtraction) look like a single 1-bit
412  *	 difference.
413  * * the base values were pseudorandom, all zero but one bit set, or
414  *	 all zero plus a counter that starts at zero.
415  *
416  * The use of separate functions for mix() and final() allow for a
417  * substantial performance increase since final() does not need to
418  * do well in reverse, but is does need to affect all output bits.
419  * mix(), on the other hand, does not need to affect all output
420  * bits (affecting 32 bits is enough).  The original hash function had
421  * a single mixing operation that had to satisfy both sets of requirements
422  * and was slower as a result.
423  *----------
424  */
425 #define final(a,b,c) \
426 { \
427   c ^= b; c -= rot(b,14); \
428   a ^= c; a -= rot(c,11); \
429   b ^= a; b -= rot(a,25); \
430   c ^= b; c -= rot(b,16); \
431   a ^= c; a -= rot(c, 4); \
432   b ^= a; b -= rot(a,14); \
433   c ^= b; c -= rot(b,24); \
434 }
435 
436 /*
437  * hash_any() -- hash a variable-length key into a 32-bit value
438  *		k		: the key (the unaligned variable-length array of bytes)
439  *		len		: the length of the key, counting by bytes
440  *
441  * Returns a uint32 value.  Every bit of the key affects every bit of
442  * the return value.  Every 1-bit and 2-bit delta achieves avalanche.
443  * About 6*len+35 instructions. The best hash table sizes are powers
444  * of 2.  There is no need to do mod a prime (mod is sooo slow!).
445  * If you need less than 32 bits, use a bitmask.
446  *
447  * This procedure must never throw elog(ERROR); the ResourceOwner code
448  * relies on this not to fail.
449  *
450  * Note: we could easily change this function to return a 64-bit hash value
451  * by using the final values of both b and c.  b is perhaps a little less
452  * well mixed than c, however.
453  */
454 Datum
hash_any(register const unsigned char * k,register int keylen)455 hash_any(register const unsigned char *k, register int keylen)
456 {
457 	register uint32 a,
458 				b,
459 				c,
460 				len;
461 
462 	/* Set up the internal state */
463 	len = keylen;
464 	a = b = c = 0x9e3779b9 + len + 3923095;
465 
466 	/* If the source pointer is word-aligned, we use word-wide fetches */
467 	if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)
468 	{
469 		/* Code path for aligned source data */
470 		register const uint32 *ka = (const uint32 *) k;
471 
472 		/* handle most of the key */
473 		while (len >= 12)
474 		{
475 			a += ka[0];
476 			b += ka[1];
477 			c += ka[2];
478 			mix(a, b, c);
479 			ka += 3;
480 			len -= 12;
481 		}
482 
483 		/* handle the last 11 bytes */
484 		k = (const unsigned char *) ka;
485 #ifdef WORDS_BIGENDIAN
486 		switch (len)
487 		{
488 			case 11:
489 				c += ((uint32) k[10] << 8);
490 				/* fall through */
491 			case 10:
492 				c += ((uint32) k[9] << 16);
493 				/* fall through */
494 			case 9:
495 				c += ((uint32) k[8] << 24);
496 				/* fall through */
497 			case 8:
498 				/* the lowest byte of c is reserved for the length */
499 				b += ka[1];
500 				a += ka[0];
501 				break;
502 			case 7:
503 				b += ((uint32) k[6] << 8);
504 				/* fall through */
505 			case 6:
506 				b += ((uint32) k[5] << 16);
507 				/* fall through */
508 			case 5:
509 				b += ((uint32) k[4] << 24);
510 				/* fall through */
511 			case 4:
512 				a += ka[0];
513 				break;
514 			case 3:
515 				a += ((uint32) k[2] << 8);
516 				/* fall through */
517 			case 2:
518 				a += ((uint32) k[1] << 16);
519 				/* fall through */
520 			case 1:
521 				a += ((uint32) k[0] << 24);
522 				/* case 0: nothing left to add */
523 		}
524 #else							/* !WORDS_BIGENDIAN */
525 		switch (len)
526 		{
527 			case 11:
528 				c += ((uint32) k[10] << 24);
529 				/* fall through */
530 			case 10:
531 				c += ((uint32) k[9] << 16);
532 				/* fall through */
533 			case 9:
534 				c += ((uint32) k[8] << 8);
535 				/* fall through */
536 			case 8:
537 				/* the lowest byte of c is reserved for the length */
538 				b += ka[1];
539 				a += ka[0];
540 				break;
541 			case 7:
542 				b += ((uint32) k[6] << 16);
543 				/* fall through */
544 			case 6:
545 				b += ((uint32) k[5] << 8);
546 				/* fall through */
547 			case 5:
548 				b += k[4];
549 				/* fall through */
550 			case 4:
551 				a += ka[0];
552 				break;
553 			case 3:
554 				a += ((uint32) k[2] << 16);
555 				/* fall through */
556 			case 2:
557 				a += ((uint32) k[1] << 8);
558 				/* fall through */
559 			case 1:
560 				a += k[0];
561 				/* case 0: nothing left to add */
562 		}
563 #endif							/* WORDS_BIGENDIAN */
564 	}
565 	else
566 	{
567 		/* Code path for non-aligned source data */
568 
569 		/* handle most of the key */
570 		while (len >= 12)
571 		{
572 #ifdef WORDS_BIGENDIAN
573 			a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
574 			b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
575 			c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
576 #else							/* !WORDS_BIGENDIAN */
577 			a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
578 			b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
579 			c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
580 #endif							/* WORDS_BIGENDIAN */
581 			mix(a, b, c);
582 			k += 12;
583 			len -= 12;
584 		}
585 
586 		/* handle the last 11 bytes */
587 #ifdef WORDS_BIGENDIAN
588 		switch (len)
589 		{
590 			case 11:
591 				c += ((uint32) k[10] << 8);
592 				/* fall through */
593 			case 10:
594 				c += ((uint32) k[9] << 16);
595 				/* fall through */
596 			case 9:
597 				c += ((uint32) k[8] << 24);
598 				/* fall through */
599 			case 8:
600 				/* the lowest byte of c is reserved for the length */
601 				b += k[7];
602 				/* fall through */
603 			case 7:
604 				b += ((uint32) k[6] << 8);
605 				/* fall through */
606 			case 6:
607 				b += ((uint32) k[5] << 16);
608 				/* fall through */
609 			case 5:
610 				b += ((uint32) k[4] << 24);
611 				/* fall through */
612 			case 4:
613 				a += k[3];
614 				/* fall through */
615 			case 3:
616 				a += ((uint32) k[2] << 8);
617 				/* fall through */
618 			case 2:
619 				a += ((uint32) k[1] << 16);
620 				/* fall through */
621 			case 1:
622 				a += ((uint32) k[0] << 24);
623 				/* case 0: nothing left to add */
624 		}
625 #else							/* !WORDS_BIGENDIAN */
626 		switch (len)
627 		{
628 			case 11:
629 				c += ((uint32) k[10] << 24);
630 				/* fall through */
631 			case 10:
632 				c += ((uint32) k[9] << 16);
633 				/* fall through */
634 			case 9:
635 				c += ((uint32) k[8] << 8);
636 				/* fall through */
637 			case 8:
638 				/* the lowest byte of c is reserved for the length */
639 				b += ((uint32) k[7] << 24);
640 				/* fall through */
641 			case 7:
642 				b += ((uint32) k[6] << 16);
643 				/* fall through */
644 			case 6:
645 				b += ((uint32) k[5] << 8);
646 				/* fall through */
647 			case 5:
648 				b += k[4];
649 				/* fall through */
650 			case 4:
651 				a += ((uint32) k[3] << 24);
652 				/* fall through */
653 			case 3:
654 				a += ((uint32) k[2] << 16);
655 				/* fall through */
656 			case 2:
657 				a += ((uint32) k[1] << 8);
658 				/* fall through */
659 			case 1:
660 				a += k[0];
661 				/* case 0: nothing left to add */
662 		}
663 #endif							/* WORDS_BIGENDIAN */
664 	}
665 
666 	final(a, b, c);
667 
668 	/* report the result */
669 	return UInt32GetDatum(c);
670 }
671 
672 /*
673  * hash_any_extended() -- hash into a 64-bit value, using an optional seed
674  *		k		: the key (the unaligned variable-length array of bytes)
675  *		len		: the length of the key, counting by bytes
676  *		seed	: a 64-bit seed (0 means no seed)
677  *
678  * Returns a uint64 value.  Otherwise similar to hash_any.
679  */
680 Datum
hash_any_extended(register const unsigned char * k,register int keylen,uint64 seed)681 hash_any_extended(register const unsigned char *k, register int keylen,
682 				  uint64 seed)
683 {
684 	register uint32 a,
685 				b,
686 				c,
687 				len;
688 
689 	/* Set up the internal state */
690 	len = keylen;
691 	a = b = c = 0x9e3779b9 + len + 3923095;
692 
693 	/* If the seed is non-zero, use it to perturb the internal state. */
694 	if (seed != 0)
695 	{
696 		/*
697 		 * In essence, the seed is treated as part of the data being hashed,
698 		 * but for simplicity, we pretend that it's padded with four bytes of
699 		 * zeroes so that the seed constitutes a 12-byte chunk.
700 		 */
701 		a += (uint32) (seed >> 32);
702 		b += (uint32) seed;
703 		mix(a, b, c);
704 	}
705 
706 	/* If the source pointer is word-aligned, we use word-wide fetches */
707 	if (((uintptr_t) k & UINT32_ALIGN_MASK) == 0)
708 	{
709 		/* Code path for aligned source data */
710 		register const uint32 *ka = (const uint32 *) k;
711 
712 		/* handle most of the key */
713 		while (len >= 12)
714 		{
715 			a += ka[0];
716 			b += ka[1];
717 			c += ka[2];
718 			mix(a, b, c);
719 			ka += 3;
720 			len -= 12;
721 		}
722 
723 		/* handle the last 11 bytes */
724 		k = (const unsigned char *) ka;
725 #ifdef WORDS_BIGENDIAN
726 		switch (len)
727 		{
728 			case 11:
729 				c += ((uint32) k[10] << 8);
730 				/* fall through */
731 			case 10:
732 				c += ((uint32) k[9] << 16);
733 				/* fall through */
734 			case 9:
735 				c += ((uint32) k[8] << 24);
736 				/* fall through */
737 			case 8:
738 				/* the lowest byte of c is reserved for the length */
739 				b += ka[1];
740 				a += ka[0];
741 				break;
742 			case 7:
743 				b += ((uint32) k[6] << 8);
744 				/* fall through */
745 			case 6:
746 				b += ((uint32) k[5] << 16);
747 				/* fall through */
748 			case 5:
749 				b += ((uint32) k[4] << 24);
750 				/* fall through */
751 			case 4:
752 				a += ka[0];
753 				break;
754 			case 3:
755 				a += ((uint32) k[2] << 8);
756 				/* fall through */
757 			case 2:
758 				a += ((uint32) k[1] << 16);
759 				/* fall through */
760 			case 1:
761 				a += ((uint32) k[0] << 24);
762 				/* case 0: nothing left to add */
763 		}
764 #else							/* !WORDS_BIGENDIAN */
765 		switch (len)
766 		{
767 			case 11:
768 				c += ((uint32) k[10] << 24);
769 				/* fall through */
770 			case 10:
771 				c += ((uint32) k[9] << 16);
772 				/* fall through */
773 			case 9:
774 				c += ((uint32) k[8] << 8);
775 				/* fall through */
776 			case 8:
777 				/* the lowest byte of c is reserved for the length */
778 				b += ka[1];
779 				a += ka[0];
780 				break;
781 			case 7:
782 				b += ((uint32) k[6] << 16);
783 				/* fall through */
784 			case 6:
785 				b += ((uint32) k[5] << 8);
786 				/* fall through */
787 			case 5:
788 				b += k[4];
789 				/* fall through */
790 			case 4:
791 				a += ka[0];
792 				break;
793 			case 3:
794 				a += ((uint32) k[2] << 16);
795 				/* fall through */
796 			case 2:
797 				a += ((uint32) k[1] << 8);
798 				/* fall through */
799 			case 1:
800 				a += k[0];
801 				/* case 0: nothing left to add */
802 		}
803 #endif							/* WORDS_BIGENDIAN */
804 	}
805 	else
806 	{
807 		/* Code path for non-aligned source data */
808 
809 		/* handle most of the key */
810 		while (len >= 12)
811 		{
812 #ifdef WORDS_BIGENDIAN
813 			a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
814 			b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
815 			c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
816 #else							/* !WORDS_BIGENDIAN */
817 			a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
818 			b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
819 			c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
820 #endif							/* WORDS_BIGENDIAN */
821 			mix(a, b, c);
822 			k += 12;
823 			len -= 12;
824 		}
825 
826 		/* handle the last 11 bytes */
827 #ifdef WORDS_BIGENDIAN
828 		switch (len)
829 		{
830 			case 11:
831 				c += ((uint32) k[10] << 8);
832 				/* fall through */
833 			case 10:
834 				c += ((uint32) k[9] << 16);
835 				/* fall through */
836 			case 9:
837 				c += ((uint32) k[8] << 24);
838 				/* fall through */
839 			case 8:
840 				/* the lowest byte of c is reserved for the length */
841 				b += k[7];
842 				/* fall through */
843 			case 7:
844 				b += ((uint32) k[6] << 8);
845 				/* fall through */
846 			case 6:
847 				b += ((uint32) k[5] << 16);
848 				/* fall through */
849 			case 5:
850 				b += ((uint32) k[4] << 24);
851 				/* fall through */
852 			case 4:
853 				a += k[3];
854 				/* fall through */
855 			case 3:
856 				a += ((uint32) k[2] << 8);
857 				/* fall through */
858 			case 2:
859 				a += ((uint32) k[1] << 16);
860 				/* fall through */
861 			case 1:
862 				a += ((uint32) k[0] << 24);
863 				/* case 0: nothing left to add */
864 		}
865 #else							/* !WORDS_BIGENDIAN */
866 		switch (len)
867 		{
868 			case 11:
869 				c += ((uint32) k[10] << 24);
870 				/* fall through */
871 			case 10:
872 				c += ((uint32) k[9] << 16);
873 				/* fall through */
874 			case 9:
875 				c += ((uint32) k[8] << 8);
876 				/* fall through */
877 			case 8:
878 				/* the lowest byte of c is reserved for the length */
879 				b += ((uint32) k[7] << 24);
880 				/* fall through */
881 			case 7:
882 				b += ((uint32) k[6] << 16);
883 				/* fall through */
884 			case 6:
885 				b += ((uint32) k[5] << 8);
886 				/* fall through */
887 			case 5:
888 				b += k[4];
889 				/* fall through */
890 			case 4:
891 				a += ((uint32) k[3] << 24);
892 				/* fall through */
893 			case 3:
894 				a += ((uint32) k[2] << 16);
895 				/* fall through */
896 			case 2:
897 				a += ((uint32) k[1] << 8);
898 				/* fall through */
899 			case 1:
900 				a += k[0];
901 				/* case 0: nothing left to add */
902 		}
903 #endif							/* WORDS_BIGENDIAN */
904 	}
905 
906 	final(a, b, c);
907 
908 	/* report the result */
909 	PG_RETURN_UINT64(((uint64) b << 32) | c);
910 }
911 
912 /*
913  * hash_uint32() -- hash a 32-bit value to a 32-bit value
914  *
915  * This has the same result as
916  *		hash_any(&k, sizeof(uint32))
917  * but is faster and doesn't force the caller to store k into memory.
918  */
919 Datum
hash_uint32(uint32 k)920 hash_uint32(uint32 k)
921 {
922 	register uint32 a,
923 				b,
924 				c;
925 
926 	a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;
927 	a += k;
928 
929 	final(a, b, c);
930 
931 	/* report the result */
932 	return UInt32GetDatum(c);
933 }
934 
935 /*
936  * hash_uint32_extended() -- hash a 32-bit value to a 64-bit value, with a seed
937  *
938  * Like hash_uint32, this is a convenience function.
939  */
940 Datum
hash_uint32_extended(uint32 k,uint64 seed)941 hash_uint32_extended(uint32 k, uint64 seed)
942 {
943 	register uint32 a,
944 				b,
945 				c;
946 
947 	a = b = c = 0x9e3779b9 + (uint32) sizeof(uint32) + 3923095;
948 
949 	if (seed != 0)
950 	{
951 		a += (uint32) (seed >> 32);
952 		b += (uint32) seed;
953 		mix(a, b, c);
954 	}
955 
956 	a += k;
957 
958 	final(a, b, c);
959 
960 	/* report the result */
961 	PG_RETURN_UINT64(((uint64) b << 32) | c);
962 }
963