1 /*-------------------------------------------------------------------------
2 *
3 * hashfunc.c
4 * Support functions for hash access method.
5 *
6 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/access/hash/hashfunc.c
12 *
13 * NOTES
14 * These functions are stored in pg_amproc. For each operator class
15 * defined for hash indexes, they compute the hash value of the argument.
16 *
17 * Additional hash functions appear in /utils/adt/ files for various
18 * specialized datatypes.
19 *
20 * It is expected that every bit of a hash function's 32-bit result is
21 * as random as every other; failure to ensure this is likely to lead
22 * to poor performance of hash joins, for example. In most cases a hash
23 * function should use hash_any() or its variant hash_uint32().
24 *-------------------------------------------------------------------------
25 */
26
27 #include "postgres.h"
28
29 #include "access/hash.h"
30 #include "catalog/pg_collation.h"
31 #include "common/hashfn.h"
32 #include "utils/builtins.h"
33 #include "utils/float.h"
34 #include "utils/pg_locale.h"
35
36 /*
37 * Datatype-specific hash functions.
38 *
39 * These support both hash indexes and hash joins.
40 *
41 * NOTE: some of these are also used by catcache operations, without
42 * any direct connection to hash indexes. Also, the common hash_any
43 * routine is also used by dynahash tables.
44 */
45
46 /* Note: this is used for both "char" and boolean datatypes */
47 Datum
hashchar(PG_FUNCTION_ARGS)48 hashchar(PG_FUNCTION_ARGS)
49 {
50 return hash_uint32((int32) PG_GETARG_CHAR(0));
51 }
52
53 Datum
hashcharextended(PG_FUNCTION_ARGS)54 hashcharextended(PG_FUNCTION_ARGS)
55 {
56 return hash_uint32_extended((int32) PG_GETARG_CHAR(0), PG_GETARG_INT64(1));
57 }
58
59 Datum
hashint2(PG_FUNCTION_ARGS)60 hashint2(PG_FUNCTION_ARGS)
61 {
62 return hash_uint32((int32) PG_GETARG_INT16(0));
63 }
64
65 Datum
hashint2extended(PG_FUNCTION_ARGS)66 hashint2extended(PG_FUNCTION_ARGS)
67 {
68 return hash_uint32_extended((int32) PG_GETARG_INT16(0), PG_GETARG_INT64(1));
69 }
70
71 Datum
hashint4(PG_FUNCTION_ARGS)72 hashint4(PG_FUNCTION_ARGS)
73 {
74 return hash_uint32(PG_GETARG_INT32(0));
75 }
76
77 Datum
hashint4extended(PG_FUNCTION_ARGS)78 hashint4extended(PG_FUNCTION_ARGS)
79 {
80 return hash_uint32_extended(PG_GETARG_INT32(0), PG_GETARG_INT64(1));
81 }
82
83 Datum
hashint8(PG_FUNCTION_ARGS)84 hashint8(PG_FUNCTION_ARGS)
85 {
86 /*
87 * The idea here is to produce a hash value compatible with the values
88 * produced by hashint4 and hashint2 for logically equal inputs; this is
89 * necessary to support cross-type hash joins across these input types.
90 * Since all three types are signed, we can xor the high half of the int8
91 * value if the sign is positive, or the complement of the high half when
92 * the sign is negative.
93 */
94 int64 val = PG_GETARG_INT64(0);
95 uint32 lohalf = (uint32) val;
96 uint32 hihalf = (uint32) (val >> 32);
97
98 lohalf ^= (val >= 0) ? hihalf : ~hihalf;
99
100 return hash_uint32(lohalf);
101 }
102
103 Datum
hashint8extended(PG_FUNCTION_ARGS)104 hashint8extended(PG_FUNCTION_ARGS)
105 {
106 /* Same approach as hashint8 */
107 int64 val = PG_GETARG_INT64(0);
108 uint32 lohalf = (uint32) val;
109 uint32 hihalf = (uint32) (val >> 32);
110
111 lohalf ^= (val >= 0) ? hihalf : ~hihalf;
112
113 return hash_uint32_extended(lohalf, PG_GETARG_INT64(1));
114 }
115
116 Datum
hashoid(PG_FUNCTION_ARGS)117 hashoid(PG_FUNCTION_ARGS)
118 {
119 return hash_uint32((uint32) PG_GETARG_OID(0));
120 }
121
122 Datum
hashoidextended(PG_FUNCTION_ARGS)123 hashoidextended(PG_FUNCTION_ARGS)
124 {
125 return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
126 }
127
128 Datum
hashenum(PG_FUNCTION_ARGS)129 hashenum(PG_FUNCTION_ARGS)
130 {
131 return hash_uint32((uint32) PG_GETARG_OID(0));
132 }
133
134 Datum
hashenumextended(PG_FUNCTION_ARGS)135 hashenumextended(PG_FUNCTION_ARGS)
136 {
137 return hash_uint32_extended((uint32) PG_GETARG_OID(0), PG_GETARG_INT64(1));
138 }
139
140 Datum
hashfloat4(PG_FUNCTION_ARGS)141 hashfloat4(PG_FUNCTION_ARGS)
142 {
143 float4 key = PG_GETARG_FLOAT4(0);
144 float8 key8;
145
146 /*
147 * On IEEE-float machines, minus zero and zero have different bit patterns
148 * but should compare as equal. We must ensure that they have the same
149 * hash value, which is most reliably done this way:
150 */
151 if (key == (float4) 0)
152 PG_RETURN_UINT32(0);
153
154 /*
155 * To support cross-type hashing of float8 and float4, we want to return
156 * the same hash value hashfloat8 would produce for an equal float8 value.
157 * So, widen the value to float8 and hash that. (We must do this rather
158 * than have hashfloat8 try to narrow its value to float4; that could fail
159 * on overflow.)
160 */
161 key8 = key;
162
163 /*
164 * Similarly, NaNs can have different bit patterns but they should all
165 * compare as equal. For backwards-compatibility reasons we force them to
166 * have the hash value of a standard float8 NaN. (You'd think we could
167 * replace key with a float4 NaN and then widen it; but on some old
168 * platforms, that way produces a different bit pattern.)
169 */
170 if (isnan(key8))
171 key8 = get_float8_nan();
172
173 return hash_any((unsigned char *) &key8, sizeof(key8));
174 }
175
176 Datum
hashfloat4extended(PG_FUNCTION_ARGS)177 hashfloat4extended(PG_FUNCTION_ARGS)
178 {
179 float4 key = PG_GETARG_FLOAT4(0);
180 uint64 seed = PG_GETARG_INT64(1);
181 float8 key8;
182
183 /* Same approach as hashfloat4 */
184 if (key == (float4) 0)
185 PG_RETURN_UINT64(seed);
186 key8 = key;
187 if (isnan(key8))
188 key8 = get_float8_nan();
189
190 return hash_any_extended((unsigned char *) &key8, sizeof(key8), seed);
191 }
192
193 Datum
hashfloat8(PG_FUNCTION_ARGS)194 hashfloat8(PG_FUNCTION_ARGS)
195 {
196 float8 key = PG_GETARG_FLOAT8(0);
197
198 /*
199 * On IEEE-float machines, minus zero and zero have different bit patterns
200 * but should compare as equal. We must ensure that they have the same
201 * hash value, which is most reliably done this way:
202 */
203 if (key == (float8) 0)
204 PG_RETURN_UINT32(0);
205
206 /*
207 * Similarly, NaNs can have different bit patterns but they should all
208 * compare as equal. For backwards-compatibility reasons we force them to
209 * have the hash value of a standard NaN.
210 */
211 if (isnan(key))
212 key = get_float8_nan();
213
214 return hash_any((unsigned char *) &key, sizeof(key));
215 }
216
217 Datum
hashfloat8extended(PG_FUNCTION_ARGS)218 hashfloat8extended(PG_FUNCTION_ARGS)
219 {
220 float8 key = PG_GETARG_FLOAT8(0);
221 uint64 seed = PG_GETARG_INT64(1);
222
223 /* Same approach as hashfloat8 */
224 if (key == (float8) 0)
225 PG_RETURN_UINT64(seed);
226 if (isnan(key))
227 key = get_float8_nan();
228
229 return hash_any_extended((unsigned char *) &key, sizeof(key), seed);
230 }
231
232 Datum
hashoidvector(PG_FUNCTION_ARGS)233 hashoidvector(PG_FUNCTION_ARGS)
234 {
235 oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
236
237 return hash_any((unsigned char *) key->values, key->dim1 * sizeof(Oid));
238 }
239
240 Datum
hashoidvectorextended(PG_FUNCTION_ARGS)241 hashoidvectorextended(PG_FUNCTION_ARGS)
242 {
243 oidvector *key = (oidvector *) PG_GETARG_POINTER(0);
244
245 return hash_any_extended((unsigned char *) key->values,
246 key->dim1 * sizeof(Oid),
247 PG_GETARG_INT64(1));
248 }
249
250 Datum
hashname(PG_FUNCTION_ARGS)251 hashname(PG_FUNCTION_ARGS)
252 {
253 char *key = NameStr(*PG_GETARG_NAME(0));
254
255 return hash_any((unsigned char *) key, strlen(key));
256 }
257
258 Datum
hashnameextended(PG_FUNCTION_ARGS)259 hashnameextended(PG_FUNCTION_ARGS)
260 {
261 char *key = NameStr(*PG_GETARG_NAME(0));
262
263 return hash_any_extended((unsigned char *) key, strlen(key),
264 PG_GETARG_INT64(1));
265 }
266
267 Datum
hashtext(PG_FUNCTION_ARGS)268 hashtext(PG_FUNCTION_ARGS)
269 {
270 text *key = PG_GETARG_TEXT_PP(0);
271 Oid collid = PG_GET_COLLATION();
272 pg_locale_t mylocale = 0;
273 Datum result;
274
275 if (!collid)
276 ereport(ERROR,
277 (errcode(ERRCODE_INDETERMINATE_COLLATION),
278 errmsg("could not determine which collation to use for string hashing"),
279 errhint("Use the COLLATE clause to set the collation explicitly.")));
280
281 if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
282 mylocale = pg_newlocale_from_collation(collid);
283
284 if (!mylocale || mylocale->deterministic)
285 {
286 result = hash_any((unsigned char *) VARDATA_ANY(key),
287 VARSIZE_ANY_EXHDR(key));
288 }
289 else
290 {
291 #ifdef USE_ICU
292 if (mylocale->provider == COLLPROVIDER_ICU)
293 {
294 int32_t ulen = -1;
295 UChar *uchar = NULL;
296 Size bsize;
297 uint8_t *buf;
298
299 ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
300
301 bsize = ucol_getSortKey(mylocale->info.icu.ucol,
302 uchar, ulen, NULL, 0);
303 buf = palloc(bsize);
304 ucol_getSortKey(mylocale->info.icu.ucol,
305 uchar, ulen, buf, bsize);
306
307 result = hash_any(buf, bsize);
308
309 pfree(buf);
310 }
311 else
312 #endif
313 /* shouldn't happen */
314 elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
315 }
316
317 /* Avoid leaking memory for toasted inputs */
318 PG_FREE_IF_COPY(key, 0);
319
320 return result;
321 }
322
323 Datum
hashtextextended(PG_FUNCTION_ARGS)324 hashtextextended(PG_FUNCTION_ARGS)
325 {
326 text *key = PG_GETARG_TEXT_PP(0);
327 Oid collid = PG_GET_COLLATION();
328 pg_locale_t mylocale = 0;
329 Datum result;
330
331 if (!collid)
332 ereport(ERROR,
333 (errcode(ERRCODE_INDETERMINATE_COLLATION),
334 errmsg("could not determine which collation to use for string hashing"),
335 errhint("Use the COLLATE clause to set the collation explicitly.")));
336
337 if (!lc_collate_is_c(collid) && collid != DEFAULT_COLLATION_OID)
338 mylocale = pg_newlocale_from_collation(collid);
339
340 if (!mylocale || mylocale->deterministic)
341 {
342 result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
343 VARSIZE_ANY_EXHDR(key),
344 PG_GETARG_INT64(1));
345 }
346 else
347 {
348 #ifdef USE_ICU
349 if (mylocale->provider == COLLPROVIDER_ICU)
350 {
351 int32_t ulen = -1;
352 UChar *uchar = NULL;
353 Size bsize;
354 uint8_t *buf;
355
356 ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
357
358 bsize = ucol_getSortKey(mylocale->info.icu.ucol,
359 uchar, ulen, NULL, 0);
360 buf = palloc(bsize);
361 ucol_getSortKey(mylocale->info.icu.ucol,
362 uchar, ulen, buf, bsize);
363
364 result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
365
366 pfree(buf);
367 }
368 else
369 #endif
370 /* shouldn't happen */
371 elog(ERROR, "unsupported collprovider: %c", mylocale->provider);
372 }
373
374 PG_FREE_IF_COPY(key, 0);
375
376 return result;
377 }
378
379 /*
380 * hashvarlena() can be used for any varlena datatype in which there are
381 * no non-significant bits, ie, distinct bitpatterns never compare as equal.
382 */
383 Datum
hashvarlena(PG_FUNCTION_ARGS)384 hashvarlena(PG_FUNCTION_ARGS)
385 {
386 struct varlena *key = PG_GETARG_VARLENA_PP(0);
387 Datum result;
388
389 result = hash_any((unsigned char *) VARDATA_ANY(key),
390 VARSIZE_ANY_EXHDR(key));
391
392 /* Avoid leaking memory for toasted inputs */
393 PG_FREE_IF_COPY(key, 0);
394
395 return result;
396 }
397
398 Datum
hashvarlenaextended(PG_FUNCTION_ARGS)399 hashvarlenaextended(PG_FUNCTION_ARGS)
400 {
401 struct varlena *key = PG_GETARG_VARLENA_PP(0);
402 Datum result;
403
404 result = hash_any_extended((unsigned char *) VARDATA_ANY(key),
405 VARSIZE_ANY_EXHDR(key),
406 PG_GETARG_INT64(1));
407
408 PG_FREE_IF_COPY(key, 0);
409
410 return result;
411 }
412