1 /*-------------------------------------------------------------------------
2 *
3 * execGrouping.c
4 * executor utility routines for grouping, hashing, and aggregation
5 *
6 * Note: we currently assume that equality and hashing functions are not
7 * collation-sensitive, so the code in this file has no support for passing
8 * collation settings through from callers. That may have to change someday.
9 *
10 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
11 * Portions Copyright (c) 1994, Regents of the University of California
12 *
13 *
14 * IDENTIFICATION
15 * src/backend/executor/execGrouping.c
16 *
17 *-------------------------------------------------------------------------
18 */
19 #include "postgres.h"
20
21 #include "access/hash.h"
22 #include "access/parallel.h"
23 #include "executor/executor.h"
24 #include "miscadmin.h"
25 #include "utils/lsyscache.h"
26 #include "utils/hashutils.h"
27 #include "utils/memutils.h"
28
29 static uint32 TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple);
30 static int TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
31
32 /*
33 * Define parameters for tuple hash table code generation. The interface is
34 * *also* declared in execnodes.h (to generate the types, which are externally
35 * visible).
36 */
37 #define SH_PREFIX tuplehash
38 #define SH_ELEMENT_TYPE TupleHashEntryData
39 #define SH_KEY_TYPE MinimalTuple
40 #define SH_KEY firstTuple
41 #define SH_HASH_KEY(tb, key) TupleHashTableHash(tb, key)
42 #define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0
43 #define SH_SCOPE extern
44 #define SH_STORE_HASH
45 #define SH_GET_HASH(tb, a) a->hash
46 #define SH_DEFINE
47 #include "lib/simplehash.h"
48
49
50 /*****************************************************************************
51 * Utility routines for grouping tuples together
52 *****************************************************************************/
53
54 /*
55 * execTuplesMatchPrepare
56 * Build expression that can be evaluated using ExecQual(), returning
57 * whether an ExprContext's inner/outer tuples are NOT DISTINCT
58 */
59 ExprState *
execTuplesMatchPrepare(TupleDesc desc,int numCols,AttrNumber * keyColIdx,Oid * eqOperators,PlanState * parent)60 execTuplesMatchPrepare(TupleDesc desc,
61 int numCols,
62 AttrNumber *keyColIdx,
63 Oid *eqOperators,
64 PlanState *parent)
65 {
66 Oid *eqFunctions = (Oid *) palloc(numCols * sizeof(Oid));
67 int i;
68 ExprState *expr;
69
70 if (numCols == 0)
71 return NULL;
72
73 /* lookup equality functions */
74 for (i = 0; i < numCols; i++)
75 eqFunctions[i] = get_opcode(eqOperators[i]);
76
77 /* build actual expression */
78 expr = ExecBuildGroupingEqual(desc, desc, numCols, keyColIdx, eqFunctions,
79 parent);
80
81 return expr;
82 }
83
84 /*
85 * execTuplesHashPrepare
86 * Look up the equality and hashing functions needed for a TupleHashTable.
87 *
88 * This is similar to execTuplesMatchPrepare, but we also need to find the
89 * hash functions associated with the equality operators. *eqFunctions and
90 * *hashFunctions receive the palloc'd result arrays.
91 *
92 * Note: we expect that the given operators are not cross-type comparisons.
93 */
94 void
execTuplesHashPrepare(int numCols,Oid * eqOperators,Oid ** eqFuncOids,FmgrInfo ** hashFunctions)95 execTuplesHashPrepare(int numCols,
96 Oid *eqOperators,
97 Oid **eqFuncOids,
98 FmgrInfo **hashFunctions)
99 {
100 int i;
101
102 *eqFuncOids = (Oid *) palloc(numCols * sizeof(Oid));
103 *hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
104
105 for (i = 0; i < numCols; i++)
106 {
107 Oid eq_opr = eqOperators[i];
108 Oid eq_function;
109 Oid left_hash_function;
110 Oid right_hash_function;
111
112 eq_function = get_opcode(eq_opr);
113 if (!get_op_hash_functions(eq_opr,
114 &left_hash_function, &right_hash_function))
115 elog(ERROR, "could not find hash function for hash operator %u",
116 eq_opr);
117 /* We're not supporting cross-type cases here */
118 Assert(left_hash_function == right_hash_function);
119 (*eqFuncOids)[i] = eq_function;
120 fmgr_info(right_hash_function, &(*hashFunctions)[i]);
121 }
122 }
123
124
125 /*****************************************************************************
126 * Utility routines for all-in-memory hash tables
127 *
128 * These routines build hash tables for grouping tuples together (eg, for
129 * hash aggregation). There is one entry for each not-distinct set of tuples
130 * presented.
131 *****************************************************************************/
132
133 /*
134 * Construct an empty TupleHashTable
135 *
136 * numCols, keyColIdx: identify the tuple fields to use as lookup key
137 * eqfunctions: equality comparison functions to use
138 * hashfunctions: datatype-specific hashing functions to use
139 * nbuckets: initial estimate of hashtable size
140 * additionalsize: size of data stored in ->additional
141 * metacxt: memory context for long-lived allocation, but not per-entry data
142 * tablecxt: memory context in which to store table entries
143 * tempcxt: short-lived context for evaluation hash and comparison functions
144 *
145 * The function arrays may be made with execTuplesHashPrepare(). Note they
146 * are not cross-type functions, but expect to see the table datatype(s)
147 * on both sides.
148 *
149 * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
150 * storage that will live as long as the hashtable does.
151 */
152 TupleHashTable
BuildTupleHashTableExt(PlanState * parent,TupleDesc inputDesc,int numCols,AttrNumber * keyColIdx,Oid * eqfuncoids,FmgrInfo * hashfunctions,long nbuckets,Size additionalsize,MemoryContext metacxt,MemoryContext tablecxt,MemoryContext tempcxt,bool use_variable_hash_iv)153 BuildTupleHashTableExt(PlanState *parent,
154 TupleDesc inputDesc,
155 int numCols, AttrNumber *keyColIdx,
156 Oid *eqfuncoids,
157 FmgrInfo *hashfunctions,
158 long nbuckets, Size additionalsize,
159 MemoryContext metacxt,
160 MemoryContext tablecxt,
161 MemoryContext tempcxt,
162 bool use_variable_hash_iv)
163 {
164 TupleHashTable hashtable;
165 Size entrysize = sizeof(TupleHashEntryData) + additionalsize;
166 MemoryContext oldcontext;
167 bool allow_jit;
168
169 Assert(nbuckets > 0);
170
171 /* Limit initial table size request to not more than work_mem */
172 nbuckets = Min(nbuckets, (long) ((work_mem * 1024L) / entrysize));
173
174 oldcontext = MemoryContextSwitchTo(metacxt);
175
176 hashtable = (TupleHashTable) palloc(sizeof(TupleHashTableData));
177
178 hashtable->numCols = numCols;
179 hashtable->keyColIdx = keyColIdx;
180 hashtable->tab_hash_funcs = hashfunctions;
181 hashtable->tablecxt = tablecxt;
182 hashtable->tempcxt = tempcxt;
183 hashtable->entrysize = entrysize;
184 hashtable->tableslot = NULL; /* will be made on first lookup */
185 hashtable->inputslot = NULL;
186 hashtable->in_hash_funcs = NULL;
187 hashtable->cur_eq_func = NULL;
188
189 /*
190 * If parallelism is in use, even if the master backend is performing the
191 * scan itself, we don't want to create the hashtable exactly the same way
192 * in all workers. As hashtables are iterated over in keyspace-order,
193 * doing so in all processes in the same way is likely to lead to
194 * "unbalanced" hashtables when the table size initially is
195 * underestimated.
196 */
197 if (use_variable_hash_iv)
198 hashtable->hash_iv = murmurhash32(ParallelWorkerNumber);
199 else
200 hashtable->hash_iv = 0;
201
202 hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);
203
204 /*
205 * We copy the input tuple descriptor just for safety --- we assume all
206 * input tuples will have equivalent descriptors.
207 */
208 hashtable->tableslot = MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc));
209
210 /*
211 * If the old reset interface is used (i.e. BuildTupleHashTable, rather
212 * than BuildTupleHashTableExt), allowing JIT would lead to the generated
213 * functions to a) live longer than the query b) be re-generated each time
214 * the table is being reset. Therefore prevent JIT from being used in that
215 * case, by not providing a parent node (which prevents accessing the
216 * JitContext in the EState).
217 */
218 allow_jit = metacxt != tablecxt;
219
220 /* build comparator for all columns */
221 hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
222 numCols,
223 keyColIdx, eqfuncoids,
224 allow_jit ? parent : NULL);
225
226 /*
227 * While not pretty, it's ok to not shut down this context, but instead
228 * rely on the containing memory context being reset, as
229 * ExecBuildGroupingEqual() only builds a very simple expression calling
230 * functions (i.e. nothing that'd employ RegisterExprContextCallback()).
231 */
232 hashtable->exprcontext = CreateStandaloneExprContext();
233
234 MemoryContextSwitchTo(oldcontext);
235
236 return hashtable;
237 }
238
239 /*
240 * BuildTupleHashTable is a backwards-compatibilty wrapper for
241 * BuildTupleHashTableExt(), that allocates the hashtable's metadata in
242 * tablecxt. Note that hashtables created this way cannot be reset leak-free
243 * with ResetTupleHashTable().
244 */
245 TupleHashTable
BuildTupleHashTable(PlanState * parent,TupleDesc inputDesc,int numCols,AttrNumber * keyColIdx,Oid * eqfuncoids,FmgrInfo * hashfunctions,long nbuckets,Size additionalsize,MemoryContext tablecxt,MemoryContext tempcxt,bool use_variable_hash_iv)246 BuildTupleHashTable(PlanState *parent,
247 TupleDesc inputDesc,
248 int numCols, AttrNumber *keyColIdx,
249 Oid *eqfuncoids,
250 FmgrInfo *hashfunctions,
251 long nbuckets, Size additionalsize,
252 MemoryContext tablecxt,
253 MemoryContext tempcxt,
254 bool use_variable_hash_iv)
255 {
256 return BuildTupleHashTableExt(parent,
257 inputDesc,
258 numCols, keyColIdx,
259 eqfuncoids,
260 hashfunctions,
261 nbuckets, additionalsize,
262 tablecxt,
263 tablecxt,
264 tempcxt,
265 use_variable_hash_iv);
266 }
267
268 /*
269 * Reset contents of the hashtable to be empty, preserving all the non-content
270 * state. Note that the tablecxt passed to BuildTupleHashTableExt() should
271 * also be reset, otherwise there will be leaks.
272 */
273 void
ResetTupleHashTable(TupleHashTable hashtable)274 ResetTupleHashTable(TupleHashTable hashtable)
275 {
276 tuplehash_reset(hashtable->hashtab);
277 }
278
279 /*
280 * Find or create a hashtable entry for the tuple group containing the
281 * given tuple. The tuple must be the same type as the hashtable entries.
282 *
283 * If isnew is NULL, we do not create new entries; we return NULL if no
284 * match is found.
285 *
286 * If isnew isn't NULL, then a new entry is created if no existing entry
287 * matches. On return, *isnew is true if the entry is newly created,
288 * false if it existed already. ->additional_data in the new entry has
289 * been zeroed.
290 */
291 TupleHashEntry
LookupTupleHashEntry(TupleHashTable hashtable,TupleTableSlot * slot,bool * isnew)292 LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
293 bool *isnew)
294 {
295 TupleHashEntryData *entry;
296 MemoryContext oldContext;
297 bool found;
298 MinimalTuple key;
299
300 /* Need to run the hash functions in short-lived context */
301 oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
302
303 /* set up data needed by hash and match functions */
304 hashtable->inputslot = slot;
305 hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
306 hashtable->cur_eq_func = hashtable->tab_eq_func;
307
308 key = NULL; /* flag to reference inputslot */
309
310 if (isnew)
311 {
312 entry = tuplehash_insert(hashtable->hashtab, key, &found);
313
314 if (found)
315 {
316 /* found pre-existing entry */
317 *isnew = false;
318 }
319 else
320 {
321 /* created new entry */
322 *isnew = true;
323 /* zero caller data */
324 entry->additional = NULL;
325 MemoryContextSwitchTo(hashtable->tablecxt);
326 /* Copy the first tuple into the table context */
327 entry->firstTuple = ExecCopySlotMinimalTuple(slot);
328 }
329 }
330 else
331 {
332 entry = tuplehash_lookup(hashtable->hashtab, key);
333 }
334
335 MemoryContextSwitchTo(oldContext);
336
337 return entry;
338 }
339
340 /*
341 * Search for a hashtable entry matching the given tuple. No entry is
342 * created if there's not a match. This is similar to the non-creating
343 * case of LookupTupleHashEntry, except that it supports cross-type
344 * comparisons, in which the given tuple is not of the same type as the
345 * table entries. The caller must provide the hash functions to use for
346 * the input tuple, as well as the equality functions, since these may be
347 * different from the table's internal functions.
348 */
349 TupleHashEntry
FindTupleHashEntry(TupleHashTable hashtable,TupleTableSlot * slot,ExprState * eqcomp,FmgrInfo * hashfunctions)350 FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
351 ExprState *eqcomp,
352 FmgrInfo *hashfunctions)
353 {
354 TupleHashEntry entry;
355 MemoryContext oldContext;
356 MinimalTuple key;
357
358 /* Need to run the hash functions in short-lived context */
359 oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
360
361 /* Set up data needed by hash and match functions */
362 hashtable->inputslot = slot;
363 hashtable->in_hash_funcs = hashfunctions;
364 hashtable->cur_eq_func = eqcomp;
365
366 /* Search the hash table */
367 key = NULL; /* flag to reference inputslot */
368 entry = tuplehash_lookup(hashtable->hashtab, key);
369 MemoryContextSwitchTo(oldContext);
370
371 return entry;
372 }
373
374 /*
375 * Compute the hash value for a tuple
376 *
377 * The passed-in key is a pointer to TupleHashEntryData. In an actual hash
378 * table entry, the firstTuple field points to a tuple (in MinimalTuple
379 * format). LookupTupleHashEntry sets up a dummy TupleHashEntryData with a
380 * NULL firstTuple field --- that cues us to look at the inputslot instead.
381 * This convention avoids the need to materialize virtual input tuples unless
382 * they actually need to get copied into the table.
383 *
384 * Also, the caller must select an appropriate memory context for running
385 * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
386 */
387 static uint32
TupleHashTableHash(struct tuplehash_hash * tb,const MinimalTuple tuple)388 TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple)
389 {
390 TupleHashTable hashtable = (TupleHashTable) tb->private_data;
391 int numCols = hashtable->numCols;
392 AttrNumber *keyColIdx = hashtable->keyColIdx;
393 uint32 hashkey = hashtable->hash_iv;
394 TupleTableSlot *slot;
395 FmgrInfo *hashfunctions;
396 int i;
397
398 if (tuple == NULL)
399 {
400 /* Process the current input tuple for the table */
401 slot = hashtable->inputslot;
402 hashfunctions = hashtable->in_hash_funcs;
403 }
404 else
405 {
406 /*
407 * Process a tuple already stored in the table.
408 *
409 * (this case never actually occurs due to the way simplehash.h is
410 * used, as the hash-value is stored in the entries)
411 */
412 slot = hashtable->tableslot;
413 ExecStoreMinimalTuple(tuple, slot, false);
414 hashfunctions = hashtable->tab_hash_funcs;
415 }
416
417 for (i = 0; i < numCols; i++)
418 {
419 AttrNumber att = keyColIdx[i];
420 Datum attr;
421 bool isNull;
422
423 /* rotate hashkey left 1 bit at each step */
424 hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
425
426 attr = slot_getattr(slot, att, &isNull);
427
428 if (!isNull) /* treat nulls as having hash key 0 */
429 {
430 uint32 hkey;
431
432 hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i],
433 attr));
434 hashkey ^= hkey;
435 }
436 }
437
438 /*
439 * The way hashes are combined above, among each other and with the IV,
440 * doesn't lead to good bit perturbation. As the IV's goal is to lead to
441 * achieve that, perform a round of hashing of the combined hash -
442 * resulting in near perfect perturbation.
443 */
444 return murmurhash32(hashkey);
445 }
446
447 /*
448 * See whether two tuples (presumably of the same hash value) match
449 *
450 * As above, the passed pointers are pointers to TupleHashEntryData.
451 */
452 static int
TupleHashTableMatch(struct tuplehash_hash * tb,const MinimalTuple tuple1,const MinimalTuple tuple2)453 TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)
454 {
455 TupleTableSlot *slot1;
456 TupleTableSlot *slot2;
457 TupleHashTable hashtable = (TupleHashTable) tb->private_data;
458 ExprContext *econtext = hashtable->exprcontext;
459
460 /*
461 * We assume that simplehash.h will only ever call us with the first
462 * argument being an actual table entry, and the second argument being
463 * LookupTupleHashEntry's dummy TupleHashEntryData. The other direction
464 * could be supported too, but is not currently required.
465 */
466 Assert(tuple1 != NULL);
467 slot1 = hashtable->tableslot;
468 ExecStoreMinimalTuple(tuple1, slot1, false);
469 Assert(tuple2 == NULL);
470 slot2 = hashtable->inputslot;
471
472 /* For crosstype comparisons, the inputslot must be first */
473 econtext->ecxt_innertuple = slot2;
474 econtext->ecxt_outertuple = slot1;
475 return !ExecQualAndReset(hashtable->cur_eq_func, econtext);
476 }
477