1 /*-------------------------------------------------------------------------
2  *
3  * nodeSetOp.c
4  *	  Routines to handle INTERSECT and EXCEPT selection
5  *
6  * The input of a SetOp node consists of tuples from two relations,
7  * which have been combined into one dataset, with a junk attribute added
8  * that shows which relation each tuple came from.  In SETOP_SORTED mode,
9  * the input has furthermore been sorted according to all the grouping
10  * columns (ie, all the non-junk attributes).  The SetOp node scans each
11  * group of identical tuples to determine how many came from each input
12  * relation.  Then it is a simple matter to emit the output demanded by the
13  * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
14  *
15  * In SETOP_HASHED mode, the input is delivered in no particular order,
16  * except that we know all the tuples from one input relation will come before
17  * all the tuples of the other.  The planner guarantees that the first input
18  * relation is the left-hand one for EXCEPT, and tries to make the smaller
19  * input relation come first for INTERSECT.  We build a hash table in memory
20  * with one entry for each group of identical tuples, and count the number of
21  * tuples in the group from each relation.  After seeing all the input, we
22  * scan the hashtable and generate the correct output using those counts.
23  * We can avoid making hashtable entries for any tuples appearing only in the
24  * second input relation, since they cannot result in any output.
25  *
26  * This node type is not used for UNION or UNION ALL, since those can be
27  * implemented more cheaply (there's no need for the junk attribute to
28  * identify the source relation).
29  *
30  * Note that SetOp does no qual checking nor projection.  The delivered
31  * output tuples are just copies of the first-to-arrive tuple in each
32  * input group.
33  *
34  *
35  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  *	  src/backend/executor/nodeSetOp.c
41  *
42  *-------------------------------------------------------------------------
43  */
44 
45 #include "postgres.h"
46 
47 #include "access/htup_details.h"
48 #include "executor/executor.h"
49 #include "executor/nodeSetOp.h"
50 #include "utils/memutils.h"
51 
52 
53 /*
54  * SetOpStatePerGroupData - per-group working state
55  *
56  * These values are working state that is initialized at the start of
57  * an input tuple group and updated for each input tuple.
58  *
59  * In SETOP_SORTED mode, we need only one of these structs, and it's kept in
60  * the plan state node.  In SETOP_HASHED mode, the hash table contains one
61  * of these for each tuple group.
62  */
63 typedef struct SetOpStatePerGroupData
64 {
65 	long		numLeft;		/* number of left-input dups in group */
66 	long		numRight;		/* number of right-input dups in group */
67 } SetOpStatePerGroupData;
68 
69 /*
70  * To implement hashed mode, we need a hashtable that stores a
71  * representative tuple and the duplicate counts for each distinct set
72  * of grouping columns.  We compute the hash key from the grouping columns.
73  */
74 typedef struct SetOpHashEntryData *SetOpHashEntry;
75 
76 typedef struct SetOpHashEntryData
77 {
78 	TupleHashEntryData shared;	/* common header for hash table entries */
79 	SetOpStatePerGroupData pergroup;
80 }	SetOpHashEntryData;
81 
82 
83 static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate);
84 static void setop_fill_hash_table(SetOpState *setopstate);
85 static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate);
86 
87 
88 /*
89  * Initialize state for a new group of input values.
90  */
91 static inline void
initialize_counts(SetOpStatePerGroup pergroup)92 initialize_counts(SetOpStatePerGroup pergroup)
93 {
94 	pergroup->numLeft = pergroup->numRight = 0;
95 }
96 
97 /*
98  * Advance the appropriate counter for one input tuple.
99  */
100 static inline void
advance_counts(SetOpStatePerGroup pergroup,int flag)101 advance_counts(SetOpStatePerGroup pergroup, int flag)
102 {
103 	if (flag)
104 		pergroup->numRight++;
105 	else
106 		pergroup->numLeft++;
107 }
108 
109 /*
110  * Fetch the "flag" column from an input tuple.
111  * This is an integer column with value 0 for left side, 1 for right side.
112  */
113 static int
fetch_tuple_flag(SetOpState * setopstate,TupleTableSlot * inputslot)114 fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot)
115 {
116 	SetOp	   *node = (SetOp *) setopstate->ps.plan;
117 	int			flag;
118 	bool		isNull;
119 
120 	flag = DatumGetInt32(slot_getattr(inputslot,
121 									  node->flagColIdx,
122 									  &isNull));
123 	Assert(!isNull);
124 	Assert(flag == 0 || flag == 1);
125 	return flag;
126 }
127 
128 /*
129  * Initialize the hash table to empty.
130  */
131 static void
build_hash_table(SetOpState * setopstate)132 build_hash_table(SetOpState *setopstate)
133 {
134 	SetOp	   *node = (SetOp *) setopstate->ps.plan;
135 
136 	Assert(node->strategy == SETOP_HASHED);
137 	Assert(node->numGroups > 0);
138 
139 	setopstate->hashtable = BuildTupleHashTable(node->numCols,
140 												node->dupColIdx,
141 												setopstate->eqfunctions,
142 												setopstate->hashfunctions,
143 												node->numGroups,
144 												sizeof(SetOpHashEntryData),
145 												setopstate->tableContext,
146 												setopstate->tempContext);
147 }
148 
149 /*
150  * We've completed processing a tuple group.  Decide how many copies (if any)
151  * of its representative row to emit, and store the count into numOutput.
152  * This logic is straight from the SQL92 specification.
153  */
154 static void
set_output_count(SetOpState * setopstate,SetOpStatePerGroup pergroup)155 set_output_count(SetOpState *setopstate, SetOpStatePerGroup pergroup)
156 {
157 	SetOp	   *plannode = (SetOp *) setopstate->ps.plan;
158 
159 	switch (plannode->cmd)
160 	{
161 		case SETOPCMD_INTERSECT:
162 			if (pergroup->numLeft > 0 && pergroup->numRight > 0)
163 				setopstate->numOutput = 1;
164 			else
165 				setopstate->numOutput = 0;
166 			break;
167 		case SETOPCMD_INTERSECT_ALL:
168 			setopstate->numOutput =
169 				(pergroup->numLeft < pergroup->numRight) ?
170 				pergroup->numLeft : pergroup->numRight;
171 			break;
172 		case SETOPCMD_EXCEPT:
173 			if (pergroup->numLeft > 0 && pergroup->numRight == 0)
174 				setopstate->numOutput = 1;
175 			else
176 				setopstate->numOutput = 0;
177 			break;
178 		case SETOPCMD_EXCEPT_ALL:
179 			setopstate->numOutput =
180 				(pergroup->numLeft < pergroup->numRight) ?
181 				0 : (pergroup->numLeft - pergroup->numRight);
182 			break;
183 		default:
184 			elog(ERROR, "unrecognized set op: %d", (int) plannode->cmd);
185 			break;
186 	}
187 }
188 
189 
190 /* ----------------------------------------------------------------
191  *		ExecSetOp
192  * ----------------------------------------------------------------
193  */
194 TupleTableSlot *				/* return: a tuple or NULL */
ExecSetOp(SetOpState * node)195 ExecSetOp(SetOpState *node)
196 {
197 	SetOp	   *plannode = (SetOp *) node->ps.plan;
198 	TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot;
199 
200 	/*
201 	 * If the previously-returned tuple needs to be returned more than once,
202 	 * keep returning it.
203 	 */
204 	if (node->numOutput > 0)
205 	{
206 		node->numOutput--;
207 		return resultTupleSlot;
208 	}
209 
210 	/* Otherwise, we're done if we are out of groups */
211 	if (node->setop_done)
212 		return NULL;
213 
214 	/* Fetch the next tuple group according to the correct strategy */
215 	if (plannode->strategy == SETOP_HASHED)
216 	{
217 		if (!node->table_filled)
218 			setop_fill_hash_table(node);
219 		return setop_retrieve_hash_table(node);
220 	}
221 	else
222 		return setop_retrieve_direct(node);
223 }
224 
225 /*
226  * ExecSetOp for non-hashed case
227  */
228 static TupleTableSlot *
setop_retrieve_direct(SetOpState * setopstate)229 setop_retrieve_direct(SetOpState *setopstate)
230 {
231 	SetOp	   *node = (SetOp *) setopstate->ps.plan;
232 	PlanState  *outerPlan;
233 	SetOpStatePerGroup pergroup;
234 	TupleTableSlot *outerslot;
235 	TupleTableSlot *resultTupleSlot;
236 
237 	/*
238 	 * get state info from node
239 	 */
240 	outerPlan = outerPlanState(setopstate);
241 	pergroup = setopstate->pergroup;
242 	resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
243 
244 	/*
245 	 * We loop retrieving groups until we find one we should return
246 	 */
247 	while (!setopstate->setop_done)
248 	{
249 		/*
250 		 * If we don't already have the first tuple of the new group, fetch it
251 		 * from the outer plan.
252 		 */
253 		if (setopstate->grp_firstTuple == NULL)
254 		{
255 			outerslot = ExecProcNode(outerPlan);
256 			if (!TupIsNull(outerslot))
257 			{
258 				/* Make a copy of the first input tuple */
259 				setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
260 			}
261 			else
262 			{
263 				/* outer plan produced no tuples at all */
264 				setopstate->setop_done = true;
265 				return NULL;
266 			}
267 		}
268 
269 		/*
270 		 * Store the copied first input tuple in the tuple table slot reserved
271 		 * for it.  The tuple will be deleted when it is cleared from the
272 		 * slot.
273 		 */
274 		ExecStoreTuple(setopstate->grp_firstTuple,
275 					   resultTupleSlot,
276 					   InvalidBuffer,
277 					   true);
278 		setopstate->grp_firstTuple = NULL;		/* don't keep two pointers */
279 
280 		/* Initialize working state for a new input tuple group */
281 		initialize_counts(pergroup);
282 
283 		/* Count the first input tuple */
284 		advance_counts(pergroup,
285 					   fetch_tuple_flag(setopstate, resultTupleSlot));
286 
287 		/*
288 		 * Scan the outer plan until we exhaust it or cross a group boundary.
289 		 */
290 		for (;;)
291 		{
292 			outerslot = ExecProcNode(outerPlan);
293 			if (TupIsNull(outerslot))
294 			{
295 				/* no more outer-plan tuples available */
296 				setopstate->setop_done = true;
297 				break;
298 			}
299 
300 			/*
301 			 * Check whether we've crossed a group boundary.
302 			 */
303 			if (!execTuplesMatch(resultTupleSlot,
304 								 outerslot,
305 								 node->numCols, node->dupColIdx,
306 								 setopstate->eqfunctions,
307 								 setopstate->tempContext))
308 			{
309 				/*
310 				 * Save the first input tuple of the next group.
311 				 */
312 				setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
313 				break;
314 			}
315 
316 			/* Still in same group, so count this tuple */
317 			advance_counts(pergroup,
318 						   fetch_tuple_flag(setopstate, outerslot));
319 		}
320 
321 		/*
322 		 * Done scanning input tuple group.  See if we should emit any copies
323 		 * of result tuple, and if so return the first copy.
324 		 */
325 		set_output_count(setopstate, pergroup);
326 
327 		if (setopstate->numOutput > 0)
328 		{
329 			setopstate->numOutput--;
330 			return resultTupleSlot;
331 		}
332 	}
333 
334 	/* No more groups */
335 	ExecClearTuple(resultTupleSlot);
336 	return NULL;
337 }
338 
339 /*
340  * ExecSetOp for hashed case: phase 1, read input and build hash table
341  */
342 static void
setop_fill_hash_table(SetOpState * setopstate)343 setop_fill_hash_table(SetOpState *setopstate)
344 {
345 	SetOp	   *node = (SetOp *) setopstate->ps.plan;
346 	PlanState  *outerPlan;
347 	int			firstFlag;
348 	bool in_first_rel PG_USED_FOR_ASSERTS_ONLY;
349 
350 	/*
351 	 * get state info from node
352 	 */
353 	outerPlan = outerPlanState(setopstate);
354 	firstFlag = node->firstFlag;
355 	/* verify planner didn't mess up */
356 	Assert(firstFlag == 0 ||
357 		   (firstFlag == 1 &&
358 			(node->cmd == SETOPCMD_INTERSECT ||
359 			 node->cmd == SETOPCMD_INTERSECT_ALL)));
360 
361 	/*
362 	 * Process each outer-plan tuple, and then fetch the next one, until we
363 	 * exhaust the outer plan.
364 	 */
365 	in_first_rel = true;
366 	for (;;)
367 	{
368 		TupleTableSlot *outerslot;
369 		int			flag;
370 		SetOpHashEntry entry;
371 		bool		isnew;
372 
373 		outerslot = ExecProcNode(outerPlan);
374 		if (TupIsNull(outerslot))
375 			break;
376 
377 		/* Identify whether it's left or right input */
378 		flag = fetch_tuple_flag(setopstate, outerslot);
379 
380 		if (flag == firstFlag)
381 		{
382 			/* (still) in first input relation */
383 			Assert(in_first_rel);
384 
385 			/* Find or build hashtable entry for this tuple's group */
386 			entry = (SetOpHashEntry)
387 				LookupTupleHashEntry(setopstate->hashtable, outerslot, &isnew);
388 
389 			/* If new tuple group, initialize counts */
390 			if (isnew)
391 				initialize_counts(&entry->pergroup);
392 
393 			/* Advance the counts */
394 			advance_counts(&entry->pergroup, flag);
395 		}
396 		else
397 		{
398 			/* reached second relation */
399 			in_first_rel = false;
400 
401 			/* For tuples not seen previously, do not make hashtable entry */
402 			entry = (SetOpHashEntry)
403 				LookupTupleHashEntry(setopstate->hashtable, outerslot, NULL);
404 
405 			/* Advance the counts if entry is already present */
406 			if (entry)
407 				advance_counts(&entry->pergroup, flag);
408 		}
409 
410 		/* Must reset temp context after each hashtable lookup */
411 		MemoryContextReset(setopstate->tempContext);
412 	}
413 
414 	setopstate->table_filled = true;
415 	/* Initialize to walk the hash table */
416 	ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter);
417 }
418 
419 /*
420  * ExecSetOp for hashed case: phase 2, retrieving groups from hash table
421  */
422 static TupleTableSlot *
setop_retrieve_hash_table(SetOpState * setopstate)423 setop_retrieve_hash_table(SetOpState *setopstate)
424 {
425 	SetOpHashEntry entry;
426 	TupleTableSlot *resultTupleSlot;
427 
428 	/*
429 	 * get state info from node
430 	 */
431 	resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
432 
433 	/*
434 	 * We loop retrieving groups until we find one we should return
435 	 */
436 	while (!setopstate->setop_done)
437 	{
438 		/*
439 		 * Find the next entry in the hash table
440 		 */
441 		entry = (SetOpHashEntry) ScanTupleHashTable(&setopstate->hashiter);
442 		if (entry == NULL)
443 		{
444 			/* No more entries in hashtable, so done */
445 			setopstate->setop_done = true;
446 			return NULL;
447 		}
448 
449 		/*
450 		 * See if we should emit any copies of this tuple, and if so return
451 		 * the first copy.
452 		 */
453 		set_output_count(setopstate, &entry->pergroup);
454 
455 		if (setopstate->numOutput > 0)
456 		{
457 			setopstate->numOutput--;
458 			return ExecStoreMinimalTuple(entry->shared.firstTuple,
459 										 resultTupleSlot,
460 										 false);
461 		}
462 	}
463 
464 	/* No more groups */
465 	ExecClearTuple(resultTupleSlot);
466 	return NULL;
467 }
468 
469 /* ----------------------------------------------------------------
470  *		ExecInitSetOp
471  *
472  *		This initializes the setop node state structures and
473  *		the node's subplan.
474  * ----------------------------------------------------------------
475  */
476 SetOpState *
ExecInitSetOp(SetOp * node,EState * estate,int eflags)477 ExecInitSetOp(SetOp *node, EState *estate, int eflags)
478 {
479 	SetOpState *setopstate;
480 
481 	/* check for unsupported flags */
482 	Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
483 
484 	/*
485 	 * create state structure
486 	 */
487 	setopstate = makeNode(SetOpState);
488 	setopstate->ps.plan = (Plan *) node;
489 	setopstate->ps.state = estate;
490 
491 	setopstate->eqfunctions = NULL;
492 	setopstate->hashfunctions = NULL;
493 	setopstate->setop_done = false;
494 	setopstate->numOutput = 0;
495 	setopstate->pergroup = NULL;
496 	setopstate->grp_firstTuple = NULL;
497 	setopstate->hashtable = NULL;
498 	setopstate->tableContext = NULL;
499 
500 	/*
501 	 * Miscellaneous initialization
502 	 *
503 	 * SetOp nodes have no ExprContext initialization because they never call
504 	 * ExecQual or ExecProject.  But they do need a per-tuple memory context
505 	 * anyway for calling execTuplesMatch.
506 	 */
507 	setopstate->tempContext =
508 		AllocSetContextCreate(CurrentMemoryContext,
509 							  "SetOp",
510 							  ALLOCSET_DEFAULT_SIZES);
511 
512 	/*
513 	 * If hashing, we also need a longer-lived context to store the hash
514 	 * table.  The table can't just be kept in the per-query context because
515 	 * we want to be able to throw it away in ExecReScanSetOp.
516 	 */
517 	if (node->strategy == SETOP_HASHED)
518 		setopstate->tableContext =
519 			AllocSetContextCreate(CurrentMemoryContext,
520 								  "SetOp hash table",
521 								  ALLOCSET_DEFAULT_SIZES);
522 
523 	/*
524 	 * Tuple table initialization
525 	 */
526 	ExecInitResultTupleSlot(estate, &setopstate->ps);
527 
528 	/*
529 	 * initialize child nodes
530 	 *
531 	 * If we are hashing then the child plan does not need to handle REWIND
532 	 * efficiently; see ExecReScanSetOp.
533 	 */
534 	if (node->strategy == SETOP_HASHED)
535 		eflags &= ~EXEC_FLAG_REWIND;
536 	outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
537 
538 	/*
539 	 * setop nodes do no projections, so initialize projection info for this
540 	 * node appropriately
541 	 */
542 	ExecAssignResultTypeFromTL(&setopstate->ps);
543 	setopstate->ps.ps_ProjInfo = NULL;
544 
545 	/*
546 	 * Precompute fmgr lookup data for inner loop. We need both equality and
547 	 * hashing functions to do it by hashing, but only equality if not
548 	 * hashing.
549 	 */
550 	if (node->strategy == SETOP_HASHED)
551 		execTuplesHashPrepare(node->numCols,
552 							  node->dupOperators,
553 							  &setopstate->eqfunctions,
554 							  &setopstate->hashfunctions);
555 	else
556 		setopstate->eqfunctions =
557 			execTuplesMatchPrepare(node->numCols,
558 								   node->dupOperators);
559 
560 	if (node->strategy == SETOP_HASHED)
561 	{
562 		build_hash_table(setopstate);
563 		setopstate->table_filled = false;
564 	}
565 	else
566 	{
567 		setopstate->pergroup =
568 			(SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData));
569 	}
570 
571 	return setopstate;
572 }
573 
574 /* ----------------------------------------------------------------
575  *		ExecEndSetOp
576  *
577  *		This shuts down the subplan and frees resources allocated
578  *		to this node.
579  * ----------------------------------------------------------------
580  */
581 void
ExecEndSetOp(SetOpState * node)582 ExecEndSetOp(SetOpState *node)
583 {
584 	/* clean up tuple table */
585 	ExecClearTuple(node->ps.ps_ResultTupleSlot);
586 
587 	/* free subsidiary stuff including hashtable */
588 	MemoryContextDelete(node->tempContext);
589 	if (node->tableContext)
590 		MemoryContextDelete(node->tableContext);
591 
592 	ExecEndNode(outerPlanState(node));
593 }
594 
595 
596 void
ExecReScanSetOp(SetOpState * node)597 ExecReScanSetOp(SetOpState *node)
598 {
599 	ExecClearTuple(node->ps.ps_ResultTupleSlot);
600 	node->setop_done = false;
601 	node->numOutput = 0;
602 
603 	if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
604 	{
605 		/*
606 		 * In the hashed case, if we haven't yet built the hash table then we
607 		 * can just return; nothing done yet, so nothing to undo. If subnode's
608 		 * chgParam is not NULL then it will be re-scanned by ExecProcNode,
609 		 * else no reason to re-scan it at all.
610 		 */
611 		if (!node->table_filled)
612 			return;
613 
614 		/*
615 		 * If we do have the hash table and the subplan does not have any
616 		 * parameter changes, then we can just rescan the existing hash table;
617 		 * no need to build it again.
618 		 */
619 		if (node->ps.lefttree->chgParam == NULL)
620 		{
621 			ResetTupleHashIterator(node->hashtable, &node->hashiter);
622 			return;
623 		}
624 	}
625 
626 	/* Release first tuple of group, if we have made a copy */
627 	if (node->grp_firstTuple != NULL)
628 	{
629 		heap_freetuple(node->grp_firstTuple);
630 		node->grp_firstTuple = NULL;
631 	}
632 
633 	/* Release any hashtable storage */
634 	if (node->tableContext)
635 		MemoryContextResetAndDeleteChildren(node->tableContext);
636 
637 	/* And rebuild empty hashtable if needed */
638 	if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
639 	{
640 		build_hash_table(node);
641 		node->table_filled = false;
642 	}
643 
644 	/*
645 	 * if chgParam of subnode is not null then plan will be re-scanned by
646 	 * first ExecProcNode.
647 	 */
648 	if (node->ps.lefttree->chgParam == NULL)
649 		ExecReScan(node->ps.lefttree);
650 }
651