1 /*-------------------------------------------------------------------------
2  *
3  * execGrouping.c
4  *	  executor utility routines for grouping, hashing, and aggregation
5  *
6  * Note: we currently assume that equality and hashing functions are not
7  * collation-sensitive, so the code in this file has no support for passing
8  * collation settings through from callers.  That may have to change someday.
9  *
10  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
11  * Portions Copyright (c) 1994, Regents of the University of California
12  *
13  *
14  * IDENTIFICATION
15  *	  src/backend/executor/execGrouping.c
16  *
17  *-------------------------------------------------------------------------
18  */
19 #include "postgres.h"
20 
21 #include "access/parallel.h"
22 #include "executor/executor.h"
23 #include "miscadmin.h"
24 #include "utils/lsyscache.h"
25 #include "utils/hashutils.h"
26 #include "utils/memutils.h"
27 
28 static uint32 TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple);
29 static int	TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
30 
31 /*
32  * Define parameters for tuple hash table code generation. The interface is
33  * *also* declared in execnodes.h (to generate the types, which are externally
34  * visible).
35  */
36 #define SH_PREFIX tuplehash
37 #define SH_ELEMENT_TYPE TupleHashEntryData
38 #define SH_KEY_TYPE MinimalTuple
39 #define SH_KEY firstTuple
40 #define SH_HASH_KEY(tb, key) TupleHashTableHash(tb, key)
41 #define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0
42 #define SH_SCOPE extern
43 #define SH_STORE_HASH
44 #define SH_GET_HASH(tb, a) a->hash
45 #define SH_DEFINE
46 #include "lib/simplehash.h"
47 
48 
49 /*****************************************************************************
50  *		Utility routines for grouping tuples together
51  *****************************************************************************/
52 
53 /*
54  * execTuplesMatchPrepare
55  *		Build expression that can be evaluated using ExecQual(), returning
56  *		whether an ExprContext's inner/outer tuples are NOT DISTINCT
57  */
58 ExprState *
execTuplesMatchPrepare(TupleDesc desc,int numCols,const AttrNumber * keyColIdx,const Oid * eqOperators,const Oid * collations,PlanState * parent)59 execTuplesMatchPrepare(TupleDesc desc,
60 					   int numCols,
61 					   const AttrNumber *keyColIdx,
62 					   const Oid *eqOperators,
63 					   const Oid *collations,
64 					   PlanState *parent)
65 {
66 	Oid		   *eqFunctions = (Oid *) palloc(numCols * sizeof(Oid));
67 	int			i;
68 	ExprState  *expr;
69 
70 	if (numCols == 0)
71 		return NULL;
72 
73 	/* lookup equality functions */
74 	for (i = 0; i < numCols; i++)
75 		eqFunctions[i] = get_opcode(eqOperators[i]);
76 
77 	/* build actual expression */
78 	expr = ExecBuildGroupingEqual(desc, desc, NULL, NULL,
79 								  numCols, keyColIdx, eqFunctions, collations,
80 								  parent);
81 
82 	return expr;
83 }
84 
85 /*
86  * execTuplesHashPrepare
87  *		Look up the equality and hashing functions needed for a TupleHashTable.
88  *
89  * This is similar to execTuplesMatchPrepare, but we also need to find the
90  * hash functions associated with the equality operators.  *eqFunctions and
91  * *hashFunctions receive the palloc'd result arrays.
92  *
93  * Note: we expect that the given operators are not cross-type comparisons.
94  */
95 void
execTuplesHashPrepare(int numCols,const Oid * eqOperators,Oid ** eqFuncOids,FmgrInfo ** hashFunctions)96 execTuplesHashPrepare(int numCols,
97 					  const Oid *eqOperators,
98 					  Oid **eqFuncOids,
99 					  FmgrInfo **hashFunctions)
100 {
101 	int			i;
102 
103 	*eqFuncOids = (Oid *) palloc(numCols * sizeof(Oid));
104 	*hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
105 
106 	for (i = 0; i < numCols; i++)
107 	{
108 		Oid			eq_opr = eqOperators[i];
109 		Oid			eq_function;
110 		Oid			left_hash_function;
111 		Oid			right_hash_function;
112 
113 		eq_function = get_opcode(eq_opr);
114 		if (!get_op_hash_functions(eq_opr,
115 								   &left_hash_function, &right_hash_function))
116 			elog(ERROR, "could not find hash function for hash operator %u",
117 				 eq_opr);
118 		/* We're not supporting cross-type cases here */
119 		Assert(left_hash_function == right_hash_function);
120 		(*eqFuncOids)[i] = eq_function;
121 		fmgr_info(right_hash_function, &(*hashFunctions)[i]);
122 	}
123 }
124 
125 
126 /*****************************************************************************
127  *		Utility routines for all-in-memory hash tables
128  *
129  * These routines build hash tables for grouping tuples together (eg, for
130  * hash aggregation).  There is one entry for each not-distinct set of tuples
131  * presented.
132  *****************************************************************************/
133 
134 /*
135  * Construct an empty TupleHashTable
136  *
137  *	numCols, keyColIdx: identify the tuple fields to use as lookup key
138  *	eqfunctions: equality comparison functions to use
139  *	hashfunctions: datatype-specific hashing functions to use
140  *	nbuckets: initial estimate of hashtable size
141  *	additionalsize: size of data stored in ->additional
142  *	metacxt: memory context for long-lived allocation, but not per-entry data
143  *	tablecxt: memory context in which to store table entries
144  *	tempcxt: short-lived context for evaluation hash and comparison functions
145  *
146  * The function arrays may be made with execTuplesHashPrepare().  Note they
147  * are not cross-type functions, but expect to see the table datatype(s)
148  * on both sides.
149  *
150  * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
151  * storage that will live as long as the hashtable does.
152  */
153 TupleHashTable
BuildTupleHashTableExt(PlanState * parent,TupleDesc inputDesc,int numCols,AttrNumber * keyColIdx,const Oid * eqfuncoids,FmgrInfo * hashfunctions,Oid * collations,long nbuckets,Size additionalsize,MemoryContext metacxt,MemoryContext tablecxt,MemoryContext tempcxt,bool use_variable_hash_iv)154 BuildTupleHashTableExt(PlanState *parent,
155 					   TupleDesc inputDesc,
156 					   int numCols, AttrNumber *keyColIdx,
157 					   const Oid *eqfuncoids,
158 					   FmgrInfo *hashfunctions,
159 					   Oid *collations,
160 					   long nbuckets, Size additionalsize,
161 					   MemoryContext metacxt,
162 					   MemoryContext tablecxt,
163 					   MemoryContext tempcxt,
164 					   bool use_variable_hash_iv)
165 {
166 	TupleHashTable hashtable;
167 	Size		entrysize = sizeof(TupleHashEntryData) + additionalsize;
168 	MemoryContext oldcontext;
169 	bool		allow_jit;
170 
171 	Assert(nbuckets > 0);
172 
173 	/* Limit initial table size request to not more than work_mem */
174 	nbuckets = Min(nbuckets, (long) ((work_mem * 1024L) / entrysize));
175 
176 	oldcontext = MemoryContextSwitchTo(metacxt);
177 
178 	hashtable = (TupleHashTable) palloc(sizeof(TupleHashTableData));
179 
180 	hashtable->numCols = numCols;
181 	hashtable->keyColIdx = keyColIdx;
182 	hashtable->tab_hash_funcs = hashfunctions;
183 	hashtable->tab_collations = collations;
184 	hashtable->tablecxt = tablecxt;
185 	hashtable->tempcxt = tempcxt;
186 	hashtable->entrysize = entrysize;
187 	hashtable->tableslot = NULL;	/* will be made on first lookup */
188 	hashtable->inputslot = NULL;
189 	hashtable->in_hash_funcs = NULL;
190 	hashtable->cur_eq_func = NULL;
191 
192 	/*
193 	 * If parallelism is in use, even if the master backend is performing the
194 	 * scan itself, we don't want to create the hashtable exactly the same way
195 	 * in all workers. As hashtables are iterated over in keyspace-order,
196 	 * doing so in all processes in the same way is likely to lead to
197 	 * "unbalanced" hashtables when the table size initially is
198 	 * underestimated.
199 	 */
200 	if (use_variable_hash_iv)
201 		hashtable->hash_iv = murmurhash32(ParallelWorkerNumber);
202 	else
203 		hashtable->hash_iv = 0;
204 
205 	hashtable->hashtab = tuplehash_create(metacxt, nbuckets, hashtable);
206 
207 	/*
208 	 * We copy the input tuple descriptor just for safety --- we assume all
209 	 * input tuples will have equivalent descriptors.
210 	 */
211 	hashtable->tableslot = MakeSingleTupleTableSlot(CreateTupleDescCopy(inputDesc),
212 													&TTSOpsMinimalTuple);
213 
214 	/*
215 	 * If the old reset interface is used (i.e. BuildTupleHashTable, rather
216 	 * than BuildTupleHashTableExt), allowing JIT would lead to the generated
217 	 * functions to a) live longer than the query b) be re-generated each time
218 	 * the table is being reset. Therefore prevent JIT from being used in that
219 	 * case, by not providing a parent node (which prevents accessing the
220 	 * JitContext in the EState).
221 	 */
222 	allow_jit = metacxt != tablecxt;
223 
224 	/* build comparator for all columns */
225 	/* XXX: should we support non-minimal tuples for the inputslot? */
226 	hashtable->tab_eq_func = ExecBuildGroupingEqual(inputDesc, inputDesc,
227 													&TTSOpsMinimalTuple, &TTSOpsMinimalTuple,
228 													numCols,
229 													keyColIdx, eqfuncoids, collations,
230 													allow_jit ? parent : NULL);
231 
232 	/*
233 	 * While not pretty, it's ok to not shut down this context, but instead
234 	 * rely on the containing memory context being reset, as
235 	 * ExecBuildGroupingEqual() only builds a very simple expression calling
236 	 * functions (i.e. nothing that'd employ RegisterExprContextCallback()).
237 	 */
238 	hashtable->exprcontext = CreateStandaloneExprContext();
239 
240 	MemoryContextSwitchTo(oldcontext);
241 
242 	return hashtable;
243 }
244 
245 /*
246  * BuildTupleHashTable is a backwards-compatibilty wrapper for
247  * BuildTupleHashTableExt(), that allocates the hashtable's metadata in
248  * tablecxt. Note that hashtables created this way cannot be reset leak-free
249  * with ResetTupleHashTable().
250  */
251 TupleHashTable
BuildTupleHashTable(PlanState * parent,TupleDesc inputDesc,int numCols,AttrNumber * keyColIdx,const Oid * eqfuncoids,FmgrInfo * hashfunctions,Oid * collations,long nbuckets,Size additionalsize,MemoryContext tablecxt,MemoryContext tempcxt,bool use_variable_hash_iv)252 BuildTupleHashTable(PlanState *parent,
253 					TupleDesc inputDesc,
254 					int numCols, AttrNumber *keyColIdx,
255 					const Oid *eqfuncoids,
256 					FmgrInfo *hashfunctions,
257 					Oid *collations,
258 					long nbuckets, Size additionalsize,
259 					MemoryContext tablecxt,
260 					MemoryContext tempcxt,
261 					bool use_variable_hash_iv)
262 {
263 	return BuildTupleHashTableExt(parent,
264 								  inputDesc,
265 								  numCols, keyColIdx,
266 								  eqfuncoids,
267 								  hashfunctions,
268 								  collations,
269 								  nbuckets, additionalsize,
270 								  tablecxt,
271 								  tablecxt,
272 								  tempcxt,
273 								  use_variable_hash_iv);
274 }
275 
276 /*
277  * Reset contents of the hashtable to be empty, preserving all the non-content
278  * state. Note that the tablecxt passed to BuildTupleHashTableExt() should
279  * also be reset, otherwise there will be leaks.
280  */
281 void
ResetTupleHashTable(TupleHashTable hashtable)282 ResetTupleHashTable(TupleHashTable hashtable)
283 {
284 	tuplehash_reset(hashtable->hashtab);
285 }
286 
287 /*
288  * Find or create a hashtable entry for the tuple group containing the
289  * given tuple.  The tuple must be the same type as the hashtable entries.
290  *
291  * If isnew is NULL, we do not create new entries; we return NULL if no
292  * match is found.
293  *
294  * If isnew isn't NULL, then a new entry is created if no existing entry
295  * matches.  On return, *isnew is true if the entry is newly created,
296  * false if it existed already.  ->additional_data in the new entry has
297  * been zeroed.
298  */
299 TupleHashEntry
LookupTupleHashEntry(TupleHashTable hashtable,TupleTableSlot * slot,bool * isnew)300 LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
301 					 bool *isnew)
302 {
303 	TupleHashEntryData *entry;
304 	MemoryContext oldContext;
305 	bool		found;
306 	MinimalTuple key;
307 
308 	/* Need to run the hash functions in short-lived context */
309 	oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
310 
311 	/* set up data needed by hash and match functions */
312 	hashtable->inputslot = slot;
313 	hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
314 	hashtable->cur_eq_func = hashtable->tab_eq_func;
315 
316 	key = NULL;					/* flag to reference inputslot */
317 
318 	if (isnew)
319 	{
320 		entry = tuplehash_insert(hashtable->hashtab, key, &found);
321 
322 		if (found)
323 		{
324 			/* found pre-existing entry */
325 			*isnew = false;
326 		}
327 		else
328 		{
329 			/* created new entry */
330 			*isnew = true;
331 			/* zero caller data */
332 			entry->additional = NULL;
333 			MemoryContextSwitchTo(hashtable->tablecxt);
334 			/* Copy the first tuple into the table context */
335 			entry->firstTuple = ExecCopySlotMinimalTuple(slot);
336 		}
337 	}
338 	else
339 	{
340 		entry = tuplehash_lookup(hashtable->hashtab, key);
341 	}
342 
343 	MemoryContextSwitchTo(oldContext);
344 
345 	return entry;
346 }
347 
348 /*
349  * Search for a hashtable entry matching the given tuple.  No entry is
350  * created if there's not a match.  This is similar to the non-creating
351  * case of LookupTupleHashEntry, except that it supports cross-type
352  * comparisons, in which the given tuple is not of the same type as the
353  * table entries.  The caller must provide the hash functions to use for
354  * the input tuple, as well as the equality functions, since these may be
355  * different from the table's internal functions.
356  */
357 TupleHashEntry
FindTupleHashEntry(TupleHashTable hashtable,TupleTableSlot * slot,ExprState * eqcomp,FmgrInfo * hashfunctions)358 FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
359 				   ExprState *eqcomp,
360 				   FmgrInfo *hashfunctions)
361 {
362 	TupleHashEntry entry;
363 	MemoryContext oldContext;
364 	MinimalTuple key;
365 
366 	/* Need to run the hash functions in short-lived context */
367 	oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
368 
369 	/* Set up data needed by hash and match functions */
370 	hashtable->inputslot = slot;
371 	hashtable->in_hash_funcs = hashfunctions;
372 	hashtable->cur_eq_func = eqcomp;
373 
374 	/* Search the hash table */
375 	key = NULL;					/* flag to reference inputslot */
376 	entry = tuplehash_lookup(hashtable->hashtab, key);
377 	MemoryContextSwitchTo(oldContext);
378 
379 	return entry;
380 }
381 
382 /*
383  * Compute the hash value for a tuple
384  *
385  * The passed-in key is a pointer to TupleHashEntryData.  In an actual hash
386  * table entry, the firstTuple field points to a tuple (in MinimalTuple
387  * format).  LookupTupleHashEntry sets up a dummy TupleHashEntryData with a
388  * NULL firstTuple field --- that cues us to look at the inputslot instead.
389  * This convention avoids the need to materialize virtual input tuples unless
390  * they actually need to get copied into the table.
391  *
392  * Also, the caller must select an appropriate memory context for running
393  * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
394  */
395 static uint32
TupleHashTableHash(struct tuplehash_hash * tb,const MinimalTuple tuple)396 TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple)
397 {
398 	TupleHashTable hashtable = (TupleHashTable) tb->private_data;
399 	int			numCols = hashtable->numCols;
400 	AttrNumber *keyColIdx = hashtable->keyColIdx;
401 	uint32		hashkey = hashtable->hash_iv;
402 	TupleTableSlot *slot;
403 	FmgrInfo   *hashfunctions;
404 	int			i;
405 
406 	if (tuple == NULL)
407 	{
408 		/* Process the current input tuple for the table */
409 		slot = hashtable->inputslot;
410 		hashfunctions = hashtable->in_hash_funcs;
411 	}
412 	else
413 	{
414 		/*
415 		 * Process a tuple already stored in the table.
416 		 *
417 		 * (this case never actually occurs due to the way simplehash.h is
418 		 * used, as the hash-value is stored in the entries)
419 		 */
420 		slot = hashtable->tableslot;
421 		ExecStoreMinimalTuple(tuple, slot, false);
422 		hashfunctions = hashtable->tab_hash_funcs;
423 	}
424 
425 	for (i = 0; i < numCols; i++)
426 	{
427 		AttrNumber	att = keyColIdx[i];
428 		Datum		attr;
429 		bool		isNull;
430 
431 		/* rotate hashkey left 1 bit at each step */
432 		hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
433 
434 		attr = slot_getattr(slot, att, &isNull);
435 
436 		if (!isNull)			/* treat nulls as having hash key 0 */
437 		{
438 			uint32		hkey;
439 
440 			hkey = DatumGetUInt32(FunctionCall1Coll(&hashfunctions[i],
441 													hashtable->tab_collations[i],
442 													attr));
443 			hashkey ^= hkey;
444 		}
445 	}
446 
447 	/*
448 	 * The way hashes are combined above, among each other and with the IV,
449 	 * doesn't lead to good bit perturbation. As the IV's goal is to lead to
450 	 * achieve that, perform a round of hashing of the combined hash -
451 	 * resulting in near perfect perturbation.
452 	 */
453 	return murmurhash32(hashkey);
454 }
455 
456 /*
457  * See whether two tuples (presumably of the same hash value) match
458  *
459  * As above, the passed pointers are pointers to TupleHashEntryData.
460  */
461 static int
TupleHashTableMatch(struct tuplehash_hash * tb,const MinimalTuple tuple1,const MinimalTuple tuple2)462 TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)
463 {
464 	TupleTableSlot *slot1;
465 	TupleTableSlot *slot2;
466 	TupleHashTable hashtable = (TupleHashTable) tb->private_data;
467 	ExprContext *econtext = hashtable->exprcontext;
468 
469 	/*
470 	 * We assume that simplehash.h will only ever call us with the first
471 	 * argument being an actual table entry, and the second argument being
472 	 * LookupTupleHashEntry's dummy TupleHashEntryData.  The other direction
473 	 * could be supported too, but is not currently required.
474 	 */
475 	Assert(tuple1 != NULL);
476 	slot1 = hashtable->tableslot;
477 	ExecStoreMinimalTuple(tuple1, slot1, false);
478 	Assert(tuple2 == NULL);
479 	slot2 = hashtable->inputslot;
480 
481 	/* For crosstype comparisons, the inputslot must be first */
482 	econtext->ecxt_innertuple = slot2;
483 	econtext->ecxt_outertuple = slot1;
484 	return !ExecQualAndReset(hashtable->cur_eq_func, econtext);
485 }
486