1 /*------------------------------------------------------------------------- 2 * 3 * execGrouping.c 4 * executor utility routines for grouping, hashing, and aggregation 5 * 6 * Note: we currently assume that equality and hashing functions are not 7 * collation-sensitive, so the code in this file has no support for passing 8 * collation settings through from callers. That may have to change someday. 9 * 10 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group 11 * Portions Copyright (c) 1994, Regents of the University of California 12 * 13 * 14 * IDENTIFICATION 15 * src/backend/executor/execGrouping.c 16 * 17 *------------------------------------------------------------------------- 18 */ 19 #include "postgres.h" 20 21 #include "access/parallel.h" 22 #include "executor/executor.h" 23 #include "miscadmin.h" 24 #include "utils/lsyscache.h" 25 #include "utils/hashutils.h" 26 #include "utils/memutils.h" 27 28 static uint32 TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple); 29 static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2); 30 31 /* 32 * Define parameters for tuple hash table code generation. The interface is 33 * *also* declared in execnodes.h (to generate the types, which are externally 34 * visible). 35 */ 36 #define SH_PREFIX tuplehash 37 #define SH_ELEMENT_TYPE TupleHashEntryData 38 #define SH_KEY_TYPE MinimalTuple 39 #define SH_KEY firstTuple 40 #define SH_HASH_KEY(tb, key) TupleHashTableHash(tb, key) 41 #define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0 42 #define SH_SCOPE extern 43 #define SH_STORE_HASH 44 #define SH_GET_HASH(tb, a) a->hash 45 #define SH_DEFINE 46 #include "lib/simplehash.h" 47 48 49 /***************************************************************************** 50 * Utility routines for grouping tuples together 51 *****************************************************************************/ 52 53 /* 54 * execTuplesMatchPrepare 55 * Build expression that can be evaluated using ExecQual(), returning 56 * whether an ExprContext's inner/outer tuples are NOT DISTINCT 57 */ 58 ExprState * 59 execTuplesMatchPrepare(TupleDesc desc, 60 int numCols, 61 const AttrNumber *keyColIdx, 62 const Oid *eqOperators, 63 const Oid *collations, 64 PlanState *parent) 65 { 66 Oid *eqFunctions = (Oid *) palloc(numCols * sizeof(Oid)); 67 int i; 68 ExprState *expr; 69 70 if (numCols == 0) 71 return NULL; 72 73 /* lookup equality functions */ 74 for (i = 0; i < numCols; i++) 75 eqFunctions[i] = get_opcode(eqOperators[i]); 76 77 /* build actual expression */ 78 expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL, 79 numCols, keyColIdx, eqFunctions, collations, 80 parent); 81 82 return expr; 83 } 84 85 /* 86 * execTuplesHashPrepare 87 * Look up the equality and hashing functions needed for a TupleHashTable. 88 * 89 * This is similar to execTuplesMatchPrepare, but we also need to find the 90 * hash functions associated with the equality operators. *eqFunctions and 91 * *hashFunctions receive the palloc'd result arrays. 92 * 93 * Note: we expect that the given operators are not cross-type comparisons. 94 */ 95 void 96 execTuplesHashPrepare(int numCols, 97 const Oid *eqOperators, 98 Oid **eqFuncOids, 99 FmgrInfo **hashFunctions) 100 { 101 int i; 102 103 *eqFuncOids = (Oid *) palloc(numCols * sizeof(Oid)); 104 *hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo)); 105 106 for (i = 0; i < numCols; i++) 107 { 108 Oid eq_opr = eqOperators[i]; 109 Oid eq_function; 110 Oid left_hash_function; 111 Oid right_hash_function; 112 113 eq_function = get_opcode(eq_opr); 114 if (!get_op_hash_functions(eq_opr, 115 &left_hash_function, &right_hash_function)) 116 elog(ERROR, "could not find hash function for hash operator %u", 117 eq_opr); 118 /* We're not supporting cross-type cases here */ 119 Assert(left_hash_function == right_hash_function); 120 (*eqFuncOids)[i] = eq_function; 121 fmgr_info(right_hash_function, &(*hashFunctions)[i]); 122 } 123 } 124 125 126 /***************************************************************************** 127 * Utility routines for all-in-memory hash tables 128 * 129 * These routines build hash tables for grouping tuples together (eg, for 130 * hash aggregation). There is one entry for each not-distinct set of tuples 131 * presented. 132 *****************************************************************************/ 133 134 /* 135 * Construct an empty TupleHashTable 136 * 137 * numCols, keyColIdx: identify the tuple fields to use as lookup key 138 * eqfunctions: equality comparison functions to use 139 * hashfunctions: datatype-specific hashing functions to use 140 * nbuckets: initial estimate of hashtable size 141 * additionalsize: size of data stored in ->additional 142 * metacxt: memory context for long-lived allocation, but not per-entry data 143 * tablecxt: memory context in which to store table entries 144 * tempcxt: short-lived context for evaluation hash and comparison functions 145 * 146 * The function arrays may be made with execTuplesHashPrepare(). Note they 147 * are not cross-type functions, but expect to see the table datatype(s) 148 * on both sides. 149 * 150 * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in 151 * storage that will live as long as the hashtable does. 152 */ 153 TupleHashTable 154 BuildTupleHashTableExt(PlanState *parent, 155 TupleDesc inputDesc, 156 int numCols, AttrNumber *keyColIdx, 157 const Oid *eqfuncoids, 158 FmgrInfo *hashfunctions, 159 Oid *collations, 160 long nbuckets, Size additionalsize, 161 MemoryContext metacxt, 162 MemoryContext tablecxt, 163 MemoryContext tempcxt, 164 bool use_variable_hash_iv) 165 { 166 TupleHashTable hashtable; 167 Size entrysize = sizeof(TupleHashEntryData) + additionalsize; 168 MemoryContext oldcontext; 169 bool allow_jit; 170 171 Assert(nbuckets > 0); 172 173 /* Limit initial table size request to not more than work_mem */ 174 nbuckets = Min(nbuckets, (long) ((work_mem * 1024L) / entrysize)); 175 176 oldcontext = MemoryContextSwitchTo(metacxt); 177 178 hashtable = (TupleHashTable) palloc(sizeof(TupleHashTableData)); 179 180 hashtable->numCols = numCols; 181 hashtable->keyColIdx = keyColIdx; 182 hashtable->tab_hash_funcs = hashfunctions; 183 hashtable->tab_collations = collations; 184 hashtable->tablecxt = tablecxt; 185 hashtable->tempcxt = tempcxt; 186 hashtable->entrysize = entrysize; 187 hashtable->tableslot = NULL; /* will be made on first lookup */ 188 hashtable->inputslot = NULL; 189 hashtable->in_hash_funcs = NULL; 190 hashtable->cur_eq_func = NULL; 191 192 /* 193 * If parallelism is in use, even if the master backend is performing the 194 * scan itself, we don't want to create the hashtable exactly the same way 195 * in all workers. As hashtables are iterated over in keyspace-order, 196 * doing so in all processes in the same way is likely to lead to 197 * "unbalanced" hashtables when the table size initially is 198 * underestimated. 199 */ 200 if (use_variable_hash_iv) 201 hashtable->hash_iv = murmurhash32(ParallelWorkerNumber); 202 else 203 hashtable->hash_iv = 0; 204 205 hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable); 206 207 /* 208 * We copy the input tuple descriptor just for safety --- we assume all 209 * input tuples will have equivalent descriptors. 210 */ 211 hashtable->tableslot = MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc), 212 &TTSOpsMinimalTuple); 213 214 /* 215 * If the old reset interface is used (i.e. BuildTupleHashTable, rather 216 * than BuildTupleHashTableExt), allowing JIT would lead to the generated 217 * functions to a) live longer than the query b) be re-generated each time 218 * the table is being reset. Therefore prevent JIT from being used in that 219 * case, by not providing a parent node (which prevents accessing the 220 * JitContext in the EState). 221 */ 222 allow_jit = metacxt != tablecxt; 223 224 /* build comparator for all columns */ 225 /* XXX: should we support non-minimal tuples for the inputslot? */ 226 hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc, 227 &TTSOpsMinimalTuple, &TTSOpsMinimalTuple, 228 numCols, 229 keyColIdx, eqfuncoids, collations, 230 allow_jit ? parent : NULL); 231 232 /* 233 * While not pretty, it's ok to not shut down this context, but instead 234 * rely on the containing memory context being reset, as 235 * ExecBuildGroupingEqual() only builds a very simple expression calling 236 * functions (i.e. nothing that'd employ RegisterExprContextCallback()). 237 */ 238 hashtable->exprcontext = CreateStandaloneExprContext(); 239 240 MemoryContextSwitchTo(oldcontext); 241 242 return hashtable; 243 } 244 245 /* 246 * BuildTupleHashTable is a backwards-compatibilty wrapper for 247 * BuildTupleHashTableExt(), that allocates the hashtable's metadata in 248 * tablecxt. Note that hashtables created this way cannot be reset leak-free 249 * with ResetTupleHashTable(). 250 */ 251 TupleHashTable 252 BuildTupleHashTable(PlanState *parent, 253 TupleDesc inputDesc, 254 int numCols, AttrNumber *keyColIdx, 255 const Oid *eqfuncoids, 256 FmgrInfo *hashfunctions, 257 Oid *collations, 258 long nbuckets, Size additionalsize, 259 MemoryContext tablecxt, 260 MemoryContext tempcxt, 261 bool use_variable_hash_iv) 262 { 263 return BuildTupleHashTableExt(parent, 264 inputDesc, 265 numCols, keyColIdx, 266 eqfuncoids, 267 hashfunctions, 268 collations, 269 nbuckets, additionalsize, 270 tablecxt, 271 tablecxt, 272 tempcxt, 273 use_variable_hash_iv); 274 } 275 276 /* 277 * Reset contents of the hashtable to be empty, preserving all the non-content 278 * state. Note that the tablecxt passed to BuildTupleHashTableExt() should 279 * also be reset, otherwise there will be leaks. 280 */ 281 void 282 ResetTupleHashTable(TupleHashTable hashtable) 283 { 284 tuplehash_reset(hashtable->hashtab); 285 } 286 287 /* 288 * Find or create a hashtable entry for the tuple group containing the 289 * given tuple. The tuple must be the same type as the hashtable entries. 290 * 291 * If isnew is NULL, we do not create new entries; we return NULL if no 292 * match is found. 293 * 294 * If isnew isn't NULL, then a new entry is created if no existing entry 295 * matches. On return, *isnew is true if the entry is newly created, 296 * false if it existed already. ->additional_data in the new entry has 297 * been zeroed. 298 */ 299 TupleHashEntry 300 LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, 301 bool *isnew) 302 { 303 TupleHashEntryData *entry; 304 MemoryContext oldContext; 305 bool found; 306 MinimalTuple key; 307 308 /* Need to run the hash functions in short-lived context */ 309 oldContext = MemoryContextSwitchTo(hashtable->tempcxt); 310 311 /* set up data needed by hash and match functions */ 312 hashtable->inputslot = slot; 313 hashtable->in_hash_funcs = hashtable->tab_hash_funcs; 314 hashtable->cur_eq_func = hashtable->tab_eq_func; 315 316 key = NULL; /* flag to reference inputslot */ 317 318 if (isnew) 319 { 320 entry = tuplehash_insert(hashtable->hashtab, key, &found); 321 322 if (found) 323 { 324 /* found pre-existing entry */ 325 *isnew = false; 326 } 327 else 328 { 329 /* created new entry */ 330 *isnew = true; 331 /* zero caller data */ 332 entry->additional = NULL; 333 MemoryContextSwitchTo(hashtable->tablecxt); 334 /* Copy the first tuple into the table context */ 335 entry->firstTuple = ExecCopySlotMinimalTuple(slot); 336 } 337 } 338 else 339 { 340 entry = tuplehash_lookup(hashtable->hashtab, key); 341 } 342 343 MemoryContextSwitchTo(oldContext); 344 345 return entry; 346 } 347 348 /* 349 * Search for a hashtable entry matching the given tuple. No entry is 350 * created if there's not a match. This is similar to the non-creating 351 * case of LookupTupleHashEntry, except that it supports cross-type 352 * comparisons, in which the given tuple is not of the same type as the 353 * table entries. The caller must provide the hash functions to use for 354 * the input tuple, as well as the equality functions, since these may be 355 * different from the table's internal functions. 356 */ 357 TupleHashEntry 358 FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, 359 ExprState *eqcomp, 360 FmgrInfo *hashfunctions) 361 { 362 TupleHashEntry entry; 363 MemoryContext oldContext; 364 MinimalTuple key; 365 366 /* Need to run the hash functions in short-lived context */ 367 oldContext = MemoryContextSwitchTo(hashtable->tempcxt); 368 369 /* Set up data needed by hash and match functions */ 370 hashtable->inputslot = slot; 371 hashtable->in_hash_funcs = hashfunctions; 372 hashtable->cur_eq_func = eqcomp; 373 374 /* Search the hash table */ 375 key = NULL; /* flag to reference inputslot */ 376 entry = tuplehash_lookup(hashtable->hashtab, key); 377 MemoryContextSwitchTo(oldContext); 378 379 return entry; 380 } 381 382 /* 383 * Compute the hash value for a tuple 384 * 385 * The passed-in key is a pointer to TupleHashEntryData. In an actual hash 386 * table entry, the firstTuple field points to a tuple (in MinimalTuple 387 * format). LookupTupleHashEntry sets up a dummy TupleHashEntryData with a 388 * NULL firstTuple field --- that cues us to look at the inputslot instead. 389 * This convention avoids the need to materialize virtual input tuples unless 390 * they actually need to get copied into the table. 391 * 392 * Also, the caller must select an appropriate memory context for running 393 * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.) 394 */ 395 static uint32 396 TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple) 397 { 398 TupleHashTable hashtable = (TupleHashTable) tb->private_data; 399 int numCols = hashtable->numCols; 400 AttrNumber *keyColIdx = hashtable->keyColIdx; 401 uint32 hashkey = hashtable->hash_iv; 402 TupleTableSlot *slot; 403 FmgrInfo *hashfunctions; 404 int i; 405 406 if (tuple == NULL) 407 { 408 /* Process the current input tuple for the table */ 409 slot = hashtable->inputslot; 410 hashfunctions = hashtable->in_hash_funcs; 411 } 412 else 413 { 414 /* 415 * Process a tuple already stored in the table. 416 * 417 * (this case never actually occurs due to the way simplehash.h is 418 * used, as the hash-value is stored in the entries) 419 */ 420 slot = hashtable->tableslot; 421 ExecStoreMinimalTuple(tuple, slot, false); 422 hashfunctions = hashtable->tab_hash_funcs; 423 } 424 425 for (i = 0; i < numCols; i++) 426 { 427 AttrNumber att = keyColIdx[i]; 428 Datum attr; 429 bool isNull; 430 431 /* rotate hashkey left 1 bit at each step */ 432 hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0); 433 434 attr = slot_getattr(slot, att, &isNull); 435 436 if (!isNull) /* treat nulls as having hash key 0 */ 437 { 438 uint32 hkey; 439 440 hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i], 441 hashtable->tab_collations[i], 442 attr)); 443 hashkey ^= hkey; 444 } 445 } 446 447 /* 448 * The way hashes are combined above, among each other and with the IV, 449 * doesn't lead to good bit perturbation. As the IV's goal is to lead to 450 * achieve that, perform a round of hashing of the combined hash - 451 * resulting in near perfect perturbation. 452 */ 453 return murmurhash32(hashkey); 454 } 455 456 /* 457 * See whether two tuples (presumably of the same hash value) match 458 * 459 * As above, the passed pointers are pointers to TupleHashEntryData. 460 */ 461 static int 462 TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2) 463 { 464 TupleTableSlot *slot1; 465 TupleTableSlot *slot2; 466 TupleHashTable hashtable = (TupleHashTable) tb->private_data; 467 ExprContext *econtext = hashtable->exprcontext; 468 469 /* 470 * We assume that simplehash.h will only ever call us with the first 471 * argument being an actual table entry, and the second argument being 472 * LookupTupleHashEntry's dummy TupleHashEntryData. The other direction 473 * could be supported too, but is not currently required. 474 */ 475 Assert(tuple1 != NULL); 476 slot1 = hashtable->tableslot; 477 ExecStoreMinimalTuple(tuple1, slot1, false); 478 Assert(tuple2 == NULL); 479 slot2 = hashtable->inputslot; 480 481 /* For crosstype comparisons, the inputslot must be first */ 482 econtext->ecxt_innertuple = slot2; 483 econtext->ecxt_outertuple = slot1; 484 return !ExecQualAndReset(hashtable->cur_eq_func, econtext); 485 } 486