1 /*-------------------------------------------------------------------------
2  *
3  * execGrouping.c
4  *	  executor utility routines for grouping, hashing, and aggregation
5  *
6  * Note: we currently assume that equality and hashing functions are not
7  * collation-sensitive, so the code in this file has no support for passing
8  * collation settings through from callers.  That may have to change someday.
9  *
10  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
11  * Portions Copyright (c) 1994, Regents of the University of California
12  *
13  *
14  * IDENTIFICATION
15  *	  src/backend/executor/execGrouping.c
16  *
17  *-------------------------------------------------------------------------
18  */
19 #include "postgres.h"
20 
21 #include "access/hash.h"
22 #include "access/parallel.h"
23 #include "executor/executor.h"
24 #include "miscadmin.h"
25 #include "utils/lsyscache.h"
26 #include "utils/hashutils.h"
27 #include "utils/memutils.h"
28 
29 static uint32 TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple);
30 static int	TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
31 
32 /*
33  * Define parameters for tuple hash table code generation. The interface is
34  * *also* declared in execnodes.h (to generate the types, which are externally
35  * visible).
36  */
37 #define SH_PREFIX tuplehash
38 #define SH_ELEMENT_TYPE TupleHashEntryData
39 #define SH_KEY_TYPE MinimalTuple
40 #define SH_KEY firstTuple
41 #define SH_HASH_KEY(tb, key) TupleHashTableHash(tb, key)
42 #define SH_EQUAL(tb, a, b) TupleHashTableMatch(tb, a, b) == 0
43 #define SH_SCOPE extern
44 #define SH_STORE_HASH
45 #define SH_GET_HASH(tb, a) a->hash
46 #define SH_DEFINE
47 #include "lib/simplehash.h"
48 
49 
50 /*****************************************************************************
51  *		Utility routines for grouping tuples together
52  *****************************************************************************/
53 
54 /*
55  * execTuplesMatch
56  *		Return true if two tuples match in all the indicated fields.
57  *
58  * This actually implements SQL's notion of "not distinct".  Two nulls
59  * match, a null and a not-null don't match.
60  *
61  * slot1, slot2: the tuples to compare (must have same columns!)
62  * numCols: the number of attributes to be examined
63  * matchColIdx: array of attribute column numbers
64  * eqFunctions: array of fmgr lookup info for the equality functions to use
65  * evalContext: short-term memory context for executing the functions
66  *
67  * NB: evalContext is reset each time!
68  */
69 bool
execTuplesMatch(TupleTableSlot * slot1,TupleTableSlot * slot2,int numCols,AttrNumber * matchColIdx,FmgrInfo * eqfunctions,MemoryContext evalContext)70 execTuplesMatch(TupleTableSlot *slot1,
71 				TupleTableSlot *slot2,
72 				int numCols,
73 				AttrNumber *matchColIdx,
74 				FmgrInfo *eqfunctions,
75 				MemoryContext evalContext)
76 {
77 	MemoryContext oldContext;
78 	bool		result;
79 	int			i;
80 
81 	/* Reset and switch into the temp context. */
82 	MemoryContextReset(evalContext);
83 	oldContext = MemoryContextSwitchTo(evalContext);
84 
85 	/*
86 	 * We cannot report a match without checking all the fields, but we can
87 	 * report a non-match as soon as we find unequal fields.  So, start
88 	 * comparing at the last field (least significant sort key). That's the
89 	 * most likely to be different if we are dealing with sorted input.
90 	 */
91 	result = true;
92 
93 	for (i = numCols; --i >= 0;)
94 	{
95 		AttrNumber	att = matchColIdx[i];
96 		Datum		attr1,
97 					attr2;
98 		bool		isNull1,
99 					isNull2;
100 
101 		attr1 = slot_getattr(slot1, att, &isNull1);
102 
103 		attr2 = slot_getattr(slot2, att, &isNull2);
104 
105 		if (isNull1 != isNull2)
106 		{
107 			result = false;		/* one null and one not; they aren't equal */
108 			break;
109 		}
110 
111 		if (isNull1)
112 			continue;			/* both are null, treat as equal */
113 
114 		/* Apply the type-specific equality function */
115 
116 		if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
117 										attr1, attr2)))
118 		{
119 			result = false;		/* they aren't equal */
120 			break;
121 		}
122 	}
123 
124 	MemoryContextSwitchTo(oldContext);
125 
126 	return result;
127 }
128 
129 /*
130  * execTuplesUnequal
131  *		Return true if two tuples are definitely unequal in the indicated
132  *		fields.
133  *
134  * Nulls are neither equal nor unequal to anything else.  A true result
135  * is obtained only if there are non-null fields that compare not-equal.
136  *
137  * Parameters are identical to execTuplesMatch.
138  */
139 bool
execTuplesUnequal(TupleTableSlot * slot1,TupleTableSlot * slot2,int numCols,AttrNumber * matchColIdx,FmgrInfo * eqfunctions,MemoryContext evalContext)140 execTuplesUnequal(TupleTableSlot *slot1,
141 				  TupleTableSlot *slot2,
142 				  int numCols,
143 				  AttrNumber *matchColIdx,
144 				  FmgrInfo *eqfunctions,
145 				  MemoryContext evalContext)
146 {
147 	MemoryContext oldContext;
148 	bool		result;
149 	int			i;
150 
151 	/* Reset and switch into the temp context. */
152 	MemoryContextReset(evalContext);
153 	oldContext = MemoryContextSwitchTo(evalContext);
154 
155 	/*
156 	 * We cannot report a match without checking all the fields, but we can
157 	 * report a non-match as soon as we find unequal fields.  So, start
158 	 * comparing at the last field (least significant sort key). That's the
159 	 * most likely to be different if we are dealing with sorted input.
160 	 */
161 	result = false;
162 
163 	for (i = numCols; --i >= 0;)
164 	{
165 		AttrNumber	att = matchColIdx[i];
166 		Datum		attr1,
167 					attr2;
168 		bool		isNull1,
169 					isNull2;
170 
171 		attr1 = slot_getattr(slot1, att, &isNull1);
172 
173 		if (isNull1)
174 			continue;			/* can't prove anything here */
175 
176 		attr2 = slot_getattr(slot2, att, &isNull2);
177 
178 		if (isNull2)
179 			continue;			/* can't prove anything here */
180 
181 		/* Apply the type-specific equality function */
182 
183 		if (!DatumGetBool(FunctionCall2(&eqfunctions[i],
184 										attr1, attr2)))
185 		{
186 			result = true;		/* they are unequal */
187 			break;
188 		}
189 	}
190 
191 	MemoryContextSwitchTo(oldContext);
192 
193 	return result;
194 }
195 
196 
197 /*
198  * execTuplesMatchPrepare
199  *		Look up the equality functions needed for execTuplesMatch or
200  *		execTuplesUnequal, given an array of equality operator OIDs.
201  *
202  * The result is a palloc'd array.
203  */
204 FmgrInfo *
execTuplesMatchPrepare(int numCols,Oid * eqOperators)205 execTuplesMatchPrepare(int numCols,
206 					   Oid *eqOperators)
207 {
208 	FmgrInfo   *eqFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
209 	int			i;
210 
211 	for (i = 0; i < numCols; i++)
212 	{
213 		Oid			eq_opr = eqOperators[i];
214 		Oid			eq_function;
215 
216 		eq_function = get_opcode(eq_opr);
217 		fmgr_info(eq_function, &eqFunctions[i]);
218 	}
219 
220 	return eqFunctions;
221 }
222 
223 /*
224  * execTuplesHashPrepare
225  *		Look up the equality and hashing functions needed for a TupleHashTable.
226  *
227  * This is similar to execTuplesMatchPrepare, but we also need to find the
228  * hash functions associated with the equality operators.  *eqFunctions and
229  * *hashFunctions receive the palloc'd result arrays.
230  *
231  * Note: we expect that the given operators are not cross-type comparisons.
232  */
233 void
execTuplesHashPrepare(int numCols,Oid * eqOperators,FmgrInfo ** eqFunctions,FmgrInfo ** hashFunctions)234 execTuplesHashPrepare(int numCols,
235 					  Oid *eqOperators,
236 					  FmgrInfo **eqFunctions,
237 					  FmgrInfo **hashFunctions)
238 {
239 	int			i;
240 
241 	*eqFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
242 	*hashFunctions = (FmgrInfo *) palloc(numCols * sizeof(FmgrInfo));
243 
244 	for (i = 0; i < numCols; i++)
245 	{
246 		Oid			eq_opr = eqOperators[i];
247 		Oid			eq_function;
248 		Oid			left_hash_function;
249 		Oid			right_hash_function;
250 
251 		eq_function = get_opcode(eq_opr);
252 		if (!get_op_hash_functions(eq_opr,
253 								   &left_hash_function, &right_hash_function))
254 			elog(ERROR, "could not find hash function for hash operator %u",
255 				 eq_opr);
256 		/* We're not supporting cross-type cases here */
257 		Assert(left_hash_function == right_hash_function);
258 		fmgr_info(eq_function, &(*eqFunctions)[i]);
259 		fmgr_info(right_hash_function, &(*hashFunctions)[i]);
260 	}
261 }
262 
263 
264 /*****************************************************************************
265  *		Utility routines for all-in-memory hash tables
266  *
267  * These routines build hash tables for grouping tuples together (eg, for
268  * hash aggregation).  There is one entry for each not-distinct set of tuples
269  * presented.
270  *****************************************************************************/
271 
272 /*
273  * Construct an empty TupleHashTable
274  *
275  *	numCols, keyColIdx: identify the tuple fields to use as lookup key
276  *	eqfunctions: equality comparison functions to use
277  *	hashfunctions: datatype-specific hashing functions to use
278  *	nbuckets: initial estimate of hashtable size
279  *	additionalsize: size of data stored in ->additional
280  *	tablecxt: memory context in which to store table and table entries
281  *	tempcxt: short-lived context for evaluation hash and comparison functions
282  *
283  * The function arrays may be made with execTuplesHashPrepare().  Note they
284  * are not cross-type functions, but expect to see the table datatype(s)
285  * on both sides.
286  *
287  * Note that keyColIdx, eqfunctions, and hashfunctions must be allocated in
288  * storage that will live as long as the hashtable does.
289  */
290 TupleHashTable
BuildTupleHashTable(int numCols,AttrNumber * keyColIdx,FmgrInfo * eqfunctions,FmgrInfo * hashfunctions,long nbuckets,Size additionalsize,MemoryContext tablecxt,MemoryContext tempcxt,bool use_variable_hash_iv)291 BuildTupleHashTable(int numCols, AttrNumber *keyColIdx,
292 					FmgrInfo *eqfunctions,
293 					FmgrInfo *hashfunctions,
294 					long nbuckets, Size additionalsize,
295 					MemoryContext tablecxt, MemoryContext tempcxt,
296 					bool use_variable_hash_iv)
297 {
298 	TupleHashTable hashtable;
299 	Size		entrysize = sizeof(TupleHashEntryData) + additionalsize;
300 
301 	Assert(nbuckets > 0);
302 
303 	/* Limit initial table size request to not more than work_mem */
304 	nbuckets = Min(nbuckets, (long) ((work_mem * 1024L) / entrysize));
305 
306 	hashtable = (TupleHashTable)
307 		MemoryContextAlloc(tablecxt, sizeof(TupleHashTableData));
308 
309 	hashtable->numCols = numCols;
310 	hashtable->keyColIdx = keyColIdx;
311 	hashtable->tab_hash_funcs = hashfunctions;
312 	hashtable->tab_eq_funcs = eqfunctions;
313 	hashtable->tablecxt = tablecxt;
314 	hashtable->tempcxt = tempcxt;
315 	hashtable->entrysize = entrysize;
316 	hashtable->tableslot = NULL;	/* will be made on first lookup */
317 	hashtable->inputslot = NULL;
318 	hashtable->in_hash_funcs = NULL;
319 	hashtable->cur_eq_funcs = NULL;
320 
321 	/*
322 	 * If parallelism is in use, even if the master backend is performing the
323 	 * scan itself, we don't want to create the hashtable exactly the same way
324 	 * in all workers. As hashtables are iterated over in keyspace-order,
325 	 * doing so in all processes in the same way is likely to lead to
326 	 * "unbalanced" hashtables when the table size initially is
327 	 * underestimated.
328 	 */
329 	if (use_variable_hash_iv)
330 		hashtable->hash_iv = murmurhash32(ParallelWorkerNumber);
331 	else
332 		hashtable->hash_iv = 0;
333 
334 	hashtable->hashtab = tuplehash_create(tablecxt, nbuckets, hashtable);
335 
336 	return hashtable;
337 }
338 
339 /*
340  * Find or create a hashtable entry for the tuple group containing the
341  * given tuple.  The tuple must be the same type as the hashtable entries.
342  *
343  * If isnew is NULL, we do not create new entries; we return NULL if no
344  * match is found.
345  *
346  * If isnew isn't NULL, then a new entry is created if no existing entry
347  * matches.  On return, *isnew is true if the entry is newly created,
348  * false if it existed already.  ->additional_data in the new entry has
349  * been zeroed.
350  */
351 TupleHashEntry
LookupTupleHashEntry(TupleHashTable hashtable,TupleTableSlot * slot,bool * isnew)352 LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
353 					 bool *isnew)
354 {
355 	TupleHashEntryData *entry;
356 	MemoryContext oldContext;
357 	bool		found;
358 	MinimalTuple key;
359 
360 	/* If first time through, clone the input slot to make table slot */
361 	if (hashtable->tableslot == NULL)
362 	{
363 		TupleDesc	tupdesc;
364 
365 		oldContext = MemoryContextSwitchTo(hashtable->tablecxt);
366 
367 		/*
368 		 * We copy the input tuple descriptor just for safety --- we assume
369 		 * all input tuples will have equivalent descriptors.
370 		 */
371 		tupdesc = CreateTupleDescCopy(slot->tts_tupleDescriptor);
372 		hashtable->tableslot = MakeSingleTupleTableSlot(tupdesc);
373 		MemoryContextSwitchTo(oldContext);
374 	}
375 
376 	/* Need to run the hash functions in short-lived context */
377 	oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
378 
379 	/* set up data needed by hash and match functions */
380 	hashtable->inputslot = slot;
381 	hashtable->in_hash_funcs = hashtable->tab_hash_funcs;
382 	hashtable->cur_eq_funcs = hashtable->tab_eq_funcs;
383 
384 	key = NULL;					/* flag to reference inputslot */
385 
386 	if (isnew)
387 	{
388 		entry = tuplehash_insert(hashtable->hashtab, key, &found);
389 
390 		if (found)
391 		{
392 			/* found pre-existing entry */
393 			*isnew = false;
394 		}
395 		else
396 		{
397 			/* created new entry */
398 			*isnew = true;
399 			/* zero caller data */
400 			entry->additional = NULL;
401 			MemoryContextSwitchTo(hashtable->tablecxt);
402 			/* Copy the first tuple into the table context */
403 			entry->firstTuple = ExecCopySlotMinimalTuple(slot);
404 		}
405 	}
406 	else
407 	{
408 		entry = tuplehash_lookup(hashtable->hashtab, key);
409 	}
410 
411 	MemoryContextSwitchTo(oldContext);
412 
413 	return entry;
414 }
415 
416 /*
417  * Search for a hashtable entry matching the given tuple.  No entry is
418  * created if there's not a match.  This is similar to the non-creating
419  * case of LookupTupleHashEntry, except that it supports cross-type
420  * comparisons, in which the given tuple is not of the same type as the
421  * table entries.  The caller must provide the hash functions to use for
422  * the input tuple, as well as the equality functions, since these may be
423  * different from the table's internal functions.
424  */
425 TupleHashEntry
FindTupleHashEntry(TupleHashTable hashtable,TupleTableSlot * slot,FmgrInfo * eqfunctions,FmgrInfo * hashfunctions)426 FindTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot,
427 				   FmgrInfo *eqfunctions,
428 				   FmgrInfo *hashfunctions)
429 {
430 	TupleHashEntry entry;
431 	MemoryContext oldContext;
432 	MinimalTuple key;
433 
434 	/* Need to run the hash functions in short-lived context */
435 	oldContext = MemoryContextSwitchTo(hashtable->tempcxt);
436 
437 	/* Set up data needed by hash and match functions */
438 	hashtable->inputslot = slot;
439 	hashtable->in_hash_funcs = hashfunctions;
440 	hashtable->cur_eq_funcs = eqfunctions;
441 
442 	/* Search the hash table */
443 	key = NULL;					/* flag to reference inputslot */
444 	entry = tuplehash_lookup(hashtable->hashtab, key);
445 	MemoryContextSwitchTo(oldContext);
446 
447 	return entry;
448 }
449 
450 /*
451  * Compute the hash value for a tuple
452  *
453  * The passed-in key is a pointer to TupleHashEntryData.  In an actual hash
454  * table entry, the firstTuple field points to a tuple (in MinimalTuple
455  * format).  LookupTupleHashEntry sets up a dummy TupleHashEntryData with a
456  * NULL firstTuple field --- that cues us to look at the inputslot instead.
457  * This convention avoids the need to materialize virtual input tuples unless
458  * they actually need to get copied into the table.
459  *
460  * Also, the caller must select an appropriate memory context for running
461  * the hash functions. (dynahash.c doesn't change CurrentMemoryContext.)
462  */
463 static uint32
TupleHashTableHash(struct tuplehash_hash * tb,const MinimalTuple tuple)464 TupleHashTableHash(struct tuplehash_hash *tb, const MinimalTuple tuple)
465 {
466 	TupleHashTable hashtable = (TupleHashTable) tb->private_data;
467 	int			numCols = hashtable->numCols;
468 	AttrNumber *keyColIdx = hashtable->keyColIdx;
469 	uint32		hashkey = hashtable->hash_iv;
470 	TupleTableSlot *slot;
471 	FmgrInfo   *hashfunctions;
472 	int			i;
473 
474 	if (tuple == NULL)
475 	{
476 		/* Process the current input tuple for the table */
477 		slot = hashtable->inputslot;
478 		hashfunctions = hashtable->in_hash_funcs;
479 	}
480 	else
481 	{
482 		/*
483 		 * Process a tuple already stored in the table.
484 		 *
485 		 * (this case never actually occurs due to the way simplehash.h is
486 		 * used, as the hash-value is stored in the entries)
487 		 */
488 		slot = hashtable->tableslot;
489 		ExecStoreMinimalTuple(tuple, slot, false);
490 		hashfunctions = hashtable->tab_hash_funcs;
491 	}
492 
493 	for (i = 0; i < numCols; i++)
494 	{
495 		AttrNumber	att = keyColIdx[i];
496 		Datum		attr;
497 		bool		isNull;
498 
499 		/* rotate hashkey left 1 bit at each step */
500 		hashkey = (hashkey << 1) | ((hashkey & 0x80000000) ? 1 : 0);
501 
502 		attr = slot_getattr(slot, att, &isNull);
503 
504 		if (!isNull)			/* treat nulls as having hash key 0 */
505 		{
506 			uint32		hkey;
507 
508 			hkey = DatumGetUInt32(FunctionCall1(&hashfunctions[i],
509 												attr));
510 			hashkey ^= hkey;
511 		}
512 	}
513 
514 	/*
515 	 * The way hashes are combined above, among each other and with the IV,
516 	 * doesn't lead to good bit perturbation. As the IV's goal is to lead to
517 	 * achieve that, perform a round of hashing of the combined hash -
518 	 * resulting in near perfect perturbation.
519 	 */
520 	return murmurhash32(hashkey);
521 }
522 
523 /*
524  * See whether two tuples (presumably of the same hash value) match
525  *
526  * As above, the passed pointers are pointers to TupleHashEntryData.
527  *
528  * Also, the caller must select an appropriate memory context for running
529  * the compare functions.  (dynahash.c doesn't change CurrentMemoryContext.)
530  */
531 static int
TupleHashTableMatch(struct tuplehash_hash * tb,const MinimalTuple tuple1,const MinimalTuple tuple2)532 TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2)
533 {
534 	TupleTableSlot *slot1;
535 	TupleTableSlot *slot2;
536 	TupleHashTable hashtable = (TupleHashTable) tb->private_data;
537 
538 	/*
539 	 * We assume that simplehash.h will only ever call us with the first
540 	 * argument being an actual table entry, and the second argument being
541 	 * LookupTupleHashEntry's dummy TupleHashEntryData.  The other direction
542 	 * could be supported too, but is not currently required.
543 	 */
544 	Assert(tuple1 != NULL);
545 	slot1 = hashtable->tableslot;
546 	ExecStoreMinimalTuple(tuple1, slot1, false);
547 	Assert(tuple2 == NULL);
548 	slot2 = hashtable->inputslot;
549 
550 	/* For crosstype comparisons, the inputslot must be first */
551 	if (execTuplesMatch(slot2,
552 						slot1,
553 						hashtable->numCols,
554 						hashtable->keyColIdx,
555 						hashtable->cur_eq_funcs,
556 						hashtable->tempcxt))
557 		return 0;
558 	else
559 		return 1;
560 }
561