1 /*-------------------------------------------------------------------------
2  *
3  * nodeSetOp.c
4  *	  Routines to handle INTERSECT and EXCEPT selection
5  *
6  * The input of a SetOp node consists of tuples from two relations,
7  * which have been combined into one dataset, with a junk attribute added
8  * that shows which relation each tuple came from.  In SETOP_SORTED mode,
9  * the input has furthermore been sorted according to all the grouping
10  * columns (ie, all the non-junk attributes).  The SetOp node scans each
11  * group of identical tuples to determine how many came from each input
12  * relation.  Then it is a simple matter to emit the output demanded by the
13  * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
14  *
15  * In SETOP_HASHED mode, the input is delivered in no particular order,
16  * except that we know all the tuples from one input relation will come before
17  * all the tuples of the other.  The planner guarantees that the first input
18  * relation is the left-hand one for EXCEPT, and tries to make the smaller
19  * input relation come first for INTERSECT.  We build a hash table in memory
20  * with one entry for each group of identical tuples, and count the number of
21  * tuples in the group from each relation.  After seeing all the input, we
22  * scan the hashtable and generate the correct output using those counts.
23  * We can avoid making hashtable entries for any tuples appearing only in the
24  * second input relation, since they cannot result in any output.
25  *
26  * This node type is not used for UNION or UNION ALL, since those can be
27  * implemented more cheaply (there's no need for the junk attribute to
28  * identify the source relation).
29  *
30  * Note that SetOp does no qual checking nor projection.  The delivered
31  * output tuples are just copies of the first-to-arrive tuple in each
32  * input group.
33  *
34  *
35  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  *	  src/backend/executor/nodeSetOp.c
41  *
42  *-------------------------------------------------------------------------
43  */
44 
45 #include "postgres.h"
46 
47 #include "access/htup_details.h"
48 #include "executor/executor.h"
49 #include "executor/nodeSetOp.h"
50 #include "miscadmin.h"
51 #include "utils/memutils.h"
52 
53 
54 /*
55  * SetOpStatePerGroupData - per-group working state
56  *
57  * These values are working state that is initialized at the start of
58  * an input tuple group and updated for each input tuple.
59  *
60  * In SETOP_SORTED mode, we need only one of these structs, and it's kept in
61  * the plan state node.  In SETOP_HASHED mode, the hash table contains one
62  * of these for each tuple group.
63  */
64 typedef struct SetOpStatePerGroupData
65 {
66 	long		numLeft;		/* number of left-input dups in group */
67 	long		numRight;		/* number of right-input dups in group */
68 }			SetOpStatePerGroupData;
69 
70 
71 static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate);
72 static void setop_fill_hash_table(SetOpState *setopstate);
73 static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate);
74 
75 
76 /*
77  * Initialize state for a new group of input values.
78  */
79 static inline void
initialize_counts(SetOpStatePerGroup pergroup)80 initialize_counts(SetOpStatePerGroup pergroup)
81 {
82 	pergroup->numLeft = pergroup->numRight = 0;
83 }
84 
85 /*
86  * Advance the appropriate counter for one input tuple.
87  */
88 static inline void
advance_counts(SetOpStatePerGroup pergroup,int flag)89 advance_counts(SetOpStatePerGroup pergroup, int flag)
90 {
91 	if (flag)
92 		pergroup->numRight++;
93 	else
94 		pergroup->numLeft++;
95 }
96 
97 /*
98  * Fetch the "flag" column from an input tuple.
99  * This is an integer column with value 0 for left side, 1 for right side.
100  */
101 static int
fetch_tuple_flag(SetOpState * setopstate,TupleTableSlot * inputslot)102 fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot)
103 {
104 	SetOp	   *node = (SetOp *) setopstate->ps.plan;
105 	int			flag;
106 	bool		isNull;
107 
108 	flag = DatumGetInt32(slot_getattr(inputslot,
109 									  node->flagColIdx,
110 									  &isNull));
111 	Assert(!isNull);
112 	Assert(flag == 0 || flag == 1);
113 	return flag;
114 }
115 
116 /*
117  * Initialize the hash table to empty.
118  */
119 static void
build_hash_table(SetOpState * setopstate)120 build_hash_table(SetOpState *setopstate)
121 {
122 	SetOp	   *node = (SetOp *) setopstate->ps.plan;
123 	ExprContext *econtext = setopstate->ps.ps_ExprContext;
124 	TupleDesc	desc = ExecGetResultType(outerPlanState(setopstate));
125 
126 	Assert(node->strategy == SETOP_HASHED);
127 	Assert(node->numGroups > 0);
128 
129 	setopstate->hashtable = BuildTupleHashTableExt(&setopstate->ps,
130 												   desc,
131 												   node->numCols,
132 												   node->dupColIdx,
133 												   setopstate->eqfuncoids,
134 												   setopstate->hashfunctions,
135 												   node->dupCollations,
136 												   node->numGroups,
137 												   0,
138 												   setopstate->ps.state->es_query_cxt,
139 												   setopstate->tableContext,
140 												   econtext->ecxt_per_tuple_memory,
141 												   false);
142 }
143 
144 /*
145  * We've completed processing a tuple group.  Decide how many copies (if any)
146  * of its representative row to emit, and store the count into numOutput.
147  * This logic is straight from the SQL92 specification.
148  */
149 static void
set_output_count(SetOpState * setopstate,SetOpStatePerGroup pergroup)150 set_output_count(SetOpState *setopstate, SetOpStatePerGroup pergroup)
151 {
152 	SetOp	   *plannode = (SetOp *) setopstate->ps.plan;
153 
154 	switch (plannode->cmd)
155 	{
156 		case SETOPCMD_INTERSECT:
157 			if (pergroup->numLeft > 0 && pergroup->numRight > 0)
158 				setopstate->numOutput = 1;
159 			else
160 				setopstate->numOutput = 0;
161 			break;
162 		case SETOPCMD_INTERSECT_ALL:
163 			setopstate->numOutput =
164 				(pergroup->numLeft < pergroup->numRight) ?
165 				pergroup->numLeft : pergroup->numRight;
166 			break;
167 		case SETOPCMD_EXCEPT:
168 			if (pergroup->numLeft > 0 && pergroup->numRight == 0)
169 				setopstate->numOutput = 1;
170 			else
171 				setopstate->numOutput = 0;
172 			break;
173 		case SETOPCMD_EXCEPT_ALL:
174 			setopstate->numOutput =
175 				(pergroup->numLeft < pergroup->numRight) ?
176 				0 : (pergroup->numLeft - pergroup->numRight);
177 			break;
178 		default:
179 			elog(ERROR, "unrecognized set op: %d", (int) plannode->cmd);
180 			break;
181 	}
182 }
183 
184 
185 /* ----------------------------------------------------------------
186  *		ExecSetOp
187  * ----------------------------------------------------------------
188  */
189 static TupleTableSlot *			/* return: a tuple or NULL */
ExecSetOp(PlanState * pstate)190 ExecSetOp(PlanState *pstate)
191 {
192 	SetOpState *node = castNode(SetOpState, pstate);
193 	SetOp	   *plannode = (SetOp *) node->ps.plan;
194 	TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot;
195 
196 	CHECK_FOR_INTERRUPTS();
197 
198 	/*
199 	 * If the previously-returned tuple needs to be returned more than once,
200 	 * keep returning it.
201 	 */
202 	if (node->numOutput > 0)
203 	{
204 		node->numOutput--;
205 		return resultTupleSlot;
206 	}
207 
208 	/* Otherwise, we're done if we are out of groups */
209 	if (node->setop_done)
210 		return NULL;
211 
212 	/* Fetch the next tuple group according to the correct strategy */
213 	if (plannode->strategy == SETOP_HASHED)
214 	{
215 		if (!node->table_filled)
216 			setop_fill_hash_table(node);
217 		return setop_retrieve_hash_table(node);
218 	}
219 	else
220 		return setop_retrieve_direct(node);
221 }
222 
223 /*
224  * ExecSetOp for non-hashed case
225  */
226 static TupleTableSlot *
setop_retrieve_direct(SetOpState * setopstate)227 setop_retrieve_direct(SetOpState *setopstate)
228 {
229 	PlanState  *outerPlan;
230 	SetOpStatePerGroup pergroup;
231 	TupleTableSlot *outerslot;
232 	TupleTableSlot *resultTupleSlot;
233 	ExprContext *econtext = setopstate->ps.ps_ExprContext;
234 
235 	/*
236 	 * get state info from node
237 	 */
238 	outerPlan = outerPlanState(setopstate);
239 	pergroup = (SetOpStatePerGroup) setopstate->pergroup;
240 	resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
241 
242 	/*
243 	 * We loop retrieving groups until we find one we should return
244 	 */
245 	while (!setopstate->setop_done)
246 	{
247 		/*
248 		 * If we don't already have the first tuple of the new group, fetch it
249 		 * from the outer plan.
250 		 */
251 		if (setopstate->grp_firstTuple == NULL)
252 		{
253 			outerslot = ExecProcNode(outerPlan);
254 			if (!TupIsNull(outerslot))
255 			{
256 				/* Make a copy of the first input tuple */
257 				setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot);
258 			}
259 			else
260 			{
261 				/* outer plan produced no tuples at all */
262 				setopstate->setop_done = true;
263 				return NULL;
264 			}
265 		}
266 
267 		/*
268 		 * Store the copied first input tuple in the tuple table slot reserved
269 		 * for it.  The tuple will be deleted when it is cleared from the
270 		 * slot.
271 		 */
272 		ExecStoreHeapTuple(setopstate->grp_firstTuple,
273 						   resultTupleSlot,
274 						   true);
275 		setopstate->grp_firstTuple = NULL;	/* don't keep two pointers */
276 
277 		/* Initialize working state for a new input tuple group */
278 		initialize_counts(pergroup);
279 
280 		/* Count the first input tuple */
281 		advance_counts(pergroup,
282 					   fetch_tuple_flag(setopstate, resultTupleSlot));
283 
284 		/*
285 		 * Scan the outer plan until we exhaust it or cross a group boundary.
286 		 */
287 		for (;;)
288 		{
289 			outerslot = ExecProcNode(outerPlan);
290 			if (TupIsNull(outerslot))
291 			{
292 				/* no more outer-plan tuples available */
293 				setopstate->setop_done = true;
294 				break;
295 			}
296 
297 			/*
298 			 * Check whether we've crossed a group boundary.
299 			 */
300 			econtext->ecxt_outertuple = resultTupleSlot;
301 			econtext->ecxt_innertuple = outerslot;
302 
303 			if (!ExecQualAndReset(setopstate->eqfunction, econtext))
304 			{
305 				/*
306 				 * Save the first input tuple of the next group.
307 				 */
308 				setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot);
309 				break;
310 			}
311 
312 			/* Still in same group, so count this tuple */
313 			advance_counts(pergroup,
314 						   fetch_tuple_flag(setopstate, outerslot));
315 		}
316 
317 		/*
318 		 * Done scanning input tuple group.  See if we should emit any copies
319 		 * of result tuple, and if so return the first copy.
320 		 */
321 		set_output_count(setopstate, pergroup);
322 
323 		if (setopstate->numOutput > 0)
324 		{
325 			setopstate->numOutput--;
326 			return resultTupleSlot;
327 		}
328 	}
329 
330 	/* No more groups */
331 	ExecClearTuple(resultTupleSlot);
332 	return NULL;
333 }
334 
335 /*
336  * ExecSetOp for hashed case: phase 1, read input and build hash table
337  */
338 static void
setop_fill_hash_table(SetOpState * setopstate)339 setop_fill_hash_table(SetOpState *setopstate)
340 {
341 	SetOp	   *node = (SetOp *) setopstate->ps.plan;
342 	PlanState  *outerPlan;
343 	int			firstFlag;
344 	bool		in_first_rel PG_USED_FOR_ASSERTS_ONLY;
345 	ExprContext *econtext = setopstate->ps.ps_ExprContext;
346 
347 	/*
348 	 * get state info from node
349 	 */
350 	outerPlan = outerPlanState(setopstate);
351 	firstFlag = node->firstFlag;
352 	/* verify planner didn't mess up */
353 	Assert(firstFlag == 0 ||
354 		   (firstFlag == 1 &&
355 			(node->cmd == SETOPCMD_INTERSECT ||
356 			 node->cmd == SETOPCMD_INTERSECT_ALL)));
357 
358 	/*
359 	 * Process each outer-plan tuple, and then fetch the next one, until we
360 	 * exhaust the outer plan.
361 	 */
362 	in_first_rel = true;
363 	for (;;)
364 	{
365 		TupleTableSlot *outerslot;
366 		int			flag;
367 		TupleHashEntryData *entry;
368 		bool		isnew;
369 
370 		outerslot = ExecProcNode(outerPlan);
371 		if (TupIsNull(outerslot))
372 			break;
373 
374 		/* Identify whether it's left or right input */
375 		flag = fetch_tuple_flag(setopstate, outerslot);
376 
377 		if (flag == firstFlag)
378 		{
379 			/* (still) in first input relation */
380 			Assert(in_first_rel);
381 
382 			/* Find or build hashtable entry for this tuple's group */
383 			entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
384 										 &isnew, NULL);
385 
386 			/* If new tuple group, initialize counts */
387 			if (isnew)
388 			{
389 				entry->additional = (SetOpStatePerGroup)
390 					MemoryContextAlloc(setopstate->hashtable->tablecxt,
391 									   sizeof(SetOpStatePerGroupData));
392 				initialize_counts((SetOpStatePerGroup) entry->additional);
393 			}
394 
395 			/* Advance the counts */
396 			advance_counts((SetOpStatePerGroup) entry->additional, flag);
397 		}
398 		else
399 		{
400 			/* reached second relation */
401 			in_first_rel = false;
402 
403 			/* For tuples not seen previously, do not make hashtable entry */
404 			entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
405 										 NULL, NULL);
406 
407 			/* Advance the counts if entry is already present */
408 			if (entry)
409 				advance_counts((SetOpStatePerGroup) entry->additional, flag);
410 		}
411 
412 		/* Must reset expression context after each hashtable lookup */
413 		ResetExprContext(econtext);
414 	}
415 
416 	setopstate->table_filled = true;
417 	/* Initialize to walk the hash table */
418 	ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter);
419 }
420 
421 /*
422  * ExecSetOp for hashed case: phase 2, retrieving groups from hash table
423  */
424 static TupleTableSlot *
setop_retrieve_hash_table(SetOpState * setopstate)425 setop_retrieve_hash_table(SetOpState *setopstate)
426 {
427 	TupleHashEntryData *entry;
428 	TupleTableSlot *resultTupleSlot;
429 
430 	/*
431 	 * get state info from node
432 	 */
433 	resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
434 
435 	/*
436 	 * We loop retrieving groups until we find one we should return
437 	 */
438 	while (!setopstate->setop_done)
439 	{
440 		CHECK_FOR_INTERRUPTS();
441 
442 		/*
443 		 * Find the next entry in the hash table
444 		 */
445 		entry = ScanTupleHashTable(setopstate->hashtable, &setopstate->hashiter);
446 		if (entry == NULL)
447 		{
448 			/* No more entries in hashtable, so done */
449 			setopstate->setop_done = true;
450 			return NULL;
451 		}
452 
453 		/*
454 		 * See if we should emit any copies of this tuple, and if so return
455 		 * the first copy.
456 		 */
457 		set_output_count(setopstate, (SetOpStatePerGroup) entry->additional);
458 
459 		if (setopstate->numOutput > 0)
460 		{
461 			setopstate->numOutput--;
462 			return ExecStoreMinimalTuple(entry->firstTuple,
463 										 resultTupleSlot,
464 										 false);
465 		}
466 	}
467 
468 	/* No more groups */
469 	ExecClearTuple(resultTupleSlot);
470 	return NULL;
471 }
472 
473 /* ----------------------------------------------------------------
474  *		ExecInitSetOp
475  *
476  *		This initializes the setop node state structures and
477  *		the node's subplan.
478  * ----------------------------------------------------------------
479  */
480 SetOpState *
ExecInitSetOp(SetOp * node,EState * estate,int eflags)481 ExecInitSetOp(SetOp *node, EState *estate, int eflags)
482 {
483 	SetOpState *setopstate;
484 	TupleDesc	outerDesc;
485 
486 	/* check for unsupported flags */
487 	Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
488 
489 	/*
490 	 * create state structure
491 	 */
492 	setopstate = makeNode(SetOpState);
493 	setopstate->ps.plan = (Plan *) node;
494 	setopstate->ps.state = estate;
495 	setopstate->ps.ExecProcNode = ExecSetOp;
496 
497 	setopstate->eqfuncoids = NULL;
498 	setopstate->hashfunctions = NULL;
499 	setopstate->setop_done = false;
500 	setopstate->numOutput = 0;
501 	setopstate->pergroup = NULL;
502 	setopstate->grp_firstTuple = NULL;
503 	setopstate->hashtable = NULL;
504 	setopstate->tableContext = NULL;
505 
506 	/*
507 	 * create expression context
508 	 */
509 	ExecAssignExprContext(estate, &setopstate->ps);
510 
511 	/*
512 	 * If hashing, we also need a longer-lived context to store the hash
513 	 * table.  The table can't just be kept in the per-query context because
514 	 * we want to be able to throw it away in ExecReScanSetOp.
515 	 */
516 	if (node->strategy == SETOP_HASHED)
517 		setopstate->tableContext =
518 			AllocSetContextCreate(CurrentMemoryContext,
519 								  "SetOp hash table",
520 								  ALLOCSET_DEFAULT_SIZES);
521 
522 	/*
523 	 * initialize child nodes
524 	 *
525 	 * If we are hashing then the child plan does not need to handle REWIND
526 	 * efficiently; see ExecReScanSetOp.
527 	 */
528 	if (node->strategy == SETOP_HASHED)
529 		eflags &= ~EXEC_FLAG_REWIND;
530 	outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
531 	outerDesc = ExecGetResultType(outerPlanState(setopstate));
532 
533 	/*
534 	 * Initialize result slot and type. Setop nodes do no projections, so
535 	 * initialize projection info for this node appropriately.
536 	 */
537 	ExecInitResultTupleSlotTL(&setopstate->ps,
538 							  node->strategy == SETOP_HASHED ?
539 							  &TTSOpsMinimalTuple : &TTSOpsHeapTuple);
540 	setopstate->ps.ps_ProjInfo = NULL;
541 
542 	/*
543 	 * Precompute fmgr lookup data for inner loop. We need both equality and
544 	 * hashing functions to do it by hashing, but only equality if not
545 	 * hashing.
546 	 */
547 	if (node->strategy == SETOP_HASHED)
548 		execTuplesHashPrepare(node->numCols,
549 							  node->dupOperators,
550 							  &setopstate->eqfuncoids,
551 							  &setopstate->hashfunctions);
552 	else
553 		setopstate->eqfunction =
554 			execTuplesMatchPrepare(outerDesc,
555 								   node->numCols,
556 								   node->dupColIdx,
557 								   node->dupOperators,
558 								   node->dupCollations,
559 								   &setopstate->ps);
560 
561 	if (node->strategy == SETOP_HASHED)
562 	{
563 		build_hash_table(setopstate);
564 		setopstate->table_filled = false;
565 	}
566 	else
567 	{
568 		setopstate->pergroup =
569 			(SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData));
570 	}
571 
572 	return setopstate;
573 }
574 
575 /* ----------------------------------------------------------------
576  *		ExecEndSetOp
577  *
578  *		This shuts down the subplan and frees resources allocated
579  *		to this node.
580  * ----------------------------------------------------------------
581  */
582 void
ExecEndSetOp(SetOpState * node)583 ExecEndSetOp(SetOpState *node)
584 {
585 	/* clean up tuple table */
586 	ExecClearTuple(node->ps.ps_ResultTupleSlot);
587 
588 	/* free subsidiary stuff including hashtable */
589 	if (node->tableContext)
590 		MemoryContextDelete(node->tableContext);
591 	ExecFreeExprContext(&node->ps);
592 
593 	ExecEndNode(outerPlanState(node));
594 }
595 
596 
597 void
ExecReScanSetOp(SetOpState * node)598 ExecReScanSetOp(SetOpState *node)
599 {
600 	ExecClearTuple(node->ps.ps_ResultTupleSlot);
601 	node->setop_done = false;
602 	node->numOutput = 0;
603 
604 	if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
605 	{
606 		/*
607 		 * In the hashed case, if we haven't yet built the hash table then we
608 		 * can just return; nothing done yet, so nothing to undo. If subnode's
609 		 * chgParam is not NULL then it will be re-scanned by ExecProcNode,
610 		 * else no reason to re-scan it at all.
611 		 */
612 		if (!node->table_filled)
613 			return;
614 
615 		/*
616 		 * If we do have the hash table and the subplan does not have any
617 		 * parameter changes, then we can just rescan the existing hash table;
618 		 * no need to build it again.
619 		 */
620 		if (node->ps.lefttree->chgParam == NULL)
621 		{
622 			ResetTupleHashIterator(node->hashtable, &node->hashiter);
623 			return;
624 		}
625 	}
626 
627 	/* Release first tuple of group, if we have made a copy */
628 	if (node->grp_firstTuple != NULL)
629 	{
630 		heap_freetuple(node->grp_firstTuple);
631 		node->grp_firstTuple = NULL;
632 	}
633 
634 	/* Release any hashtable storage */
635 	if (node->tableContext)
636 		MemoryContextResetAndDeleteChildren(node->tableContext);
637 
638 	/* And rebuild empty hashtable if needed */
639 	if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
640 	{
641 		ResetTupleHashTable(node->hashtable);
642 		node->table_filled = false;
643 	}
644 
645 	/*
646 	 * if chgParam of subnode is not null then plan will be re-scanned by
647 	 * first ExecProcNode.
648 	 */
649 	if (node->ps.lefttree->chgParam == NULL)
650 		ExecReScan(node->ps.lefttree);
651 }
652