1 /*-------------------------------------------------------------------------
2  *
3  * nodeSetOp.c
4  *	  Routines to handle INTERSECT and EXCEPT selection
5  *
6  * The input of a SetOp node consists of tuples from two relations,
7  * which have been combined into one dataset, with a junk attribute added
8  * that shows which relation each tuple came from.  In SETOP_SORTED mode,
9  * the input has furthermore been sorted according to all the grouping
10  * columns (ie, all the non-junk attributes).  The SetOp node scans each
11  * group of identical tuples to determine how many came from each input
12  * relation.  Then it is a simple matter to emit the output demanded by the
13  * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
14  *
15  * In SETOP_HASHED mode, the input is delivered in no particular order,
16  * except that we know all the tuples from one input relation will come before
17  * all the tuples of the other.  The planner guarantees that the first input
18  * relation is the left-hand one for EXCEPT, and tries to make the smaller
19  * input relation come first for INTERSECT.  We build a hash table in memory
20  * with one entry for each group of identical tuples, and count the number of
21  * tuples in the group from each relation.  After seeing all the input, we
22  * scan the hashtable and generate the correct output using those counts.
23  * We can avoid making hashtable entries for any tuples appearing only in the
24  * second input relation, since they cannot result in any output.
25  *
26  * This node type is not used for UNION or UNION ALL, since those can be
27  * implemented more cheaply (there's no need for the junk attribute to
28  * identify the source relation).
29  *
30  * Note that SetOp does no qual checking nor projection.  The delivered
31  * output tuples are just copies of the first-to-arrive tuple in each
32  * input group.
33  *
34  *
35  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
36  * Portions Copyright (c) 1994, Regents of the University of California
37  *
38  *
39  * IDENTIFICATION
40  *	  src/backend/executor/nodeSetOp.c
41  *
42  *-------------------------------------------------------------------------
43  */
44 
45 #include "postgres.h"
46 
47 #include "access/htup_details.h"
48 #include "executor/executor.h"
49 #include "executor/nodeSetOp.h"
50 #include "miscadmin.h"
51 #include "utils/memutils.h"
52 
53 
54 /*
55  * SetOpStatePerGroupData - per-group working state
56  *
57  * These values are working state that is initialized at the start of
58  * an input tuple group and updated for each input tuple.
59  *
60  * In SETOP_SORTED mode, we need only one of these structs, and it's kept in
61  * the plan state node.  In SETOP_HASHED mode, the hash table contains one
62  * of these for each tuple group.
63  */
64 typedef struct SetOpStatePerGroupData
65 {
66 	long		numLeft;		/* number of left-input dups in group */
67 	long		numRight;		/* number of right-input dups in group */
68 }			SetOpStatePerGroupData;
69 
70 
71 static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate);
72 static void setop_fill_hash_table(SetOpState *setopstate);
73 static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate);
74 
75 
76 /*
77  * Initialize state for a new group of input values.
78  */
79 static inline void
initialize_counts(SetOpStatePerGroup pergroup)80 initialize_counts(SetOpStatePerGroup pergroup)
81 {
82 	pergroup->numLeft = pergroup->numRight = 0;
83 }
84 
85 /*
86  * Advance the appropriate counter for one input tuple.
87  */
88 static inline void
advance_counts(SetOpStatePerGroup pergroup,int flag)89 advance_counts(SetOpStatePerGroup pergroup, int flag)
90 {
91 	if (flag)
92 		pergroup->numRight++;
93 	else
94 		pergroup->numLeft++;
95 }
96 
97 /*
98  * Fetch the "flag" column from an input tuple.
99  * This is an integer column with value 0 for left side, 1 for right side.
100  */
101 static int
fetch_tuple_flag(SetOpState * setopstate,TupleTableSlot * inputslot)102 fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot)
103 {
104 	SetOp	   *node = (SetOp *) setopstate->ps.plan;
105 	int			flag;
106 	bool		isNull;
107 
108 	flag = DatumGetInt32(slot_getattr(inputslot,
109 									  node->flagColIdx,
110 									  &isNull));
111 	Assert(!isNull);
112 	Assert(flag == 0 || flag == 1);
113 	return flag;
114 }
115 
116 /*
117  * Initialize the hash table to empty.
118  */
119 static void
build_hash_table(SetOpState * setopstate)120 build_hash_table(SetOpState *setopstate)
121 {
122 	SetOp	   *node = (SetOp *) setopstate->ps.plan;
123 
124 	Assert(node->strategy == SETOP_HASHED);
125 	Assert(node->numGroups > 0);
126 
127 	setopstate->hashtable = BuildTupleHashTable(node->numCols,
128 												node->dupColIdx,
129 												setopstate->eqfunctions,
130 												setopstate->hashfunctions,
131 												node->numGroups,
132 												0,
133 												setopstate->tableContext,
134 												setopstate->tempContext,
135 												false);
136 }
137 
138 /*
139  * We've completed processing a tuple group.  Decide how many copies (if any)
140  * of its representative row to emit, and store the count into numOutput.
141  * This logic is straight from the SQL92 specification.
142  */
143 static void
set_output_count(SetOpState * setopstate,SetOpStatePerGroup pergroup)144 set_output_count(SetOpState *setopstate, SetOpStatePerGroup pergroup)
145 {
146 	SetOp	   *plannode = (SetOp *) setopstate->ps.plan;
147 
148 	switch (plannode->cmd)
149 	{
150 		case SETOPCMD_INTERSECT:
151 			if (pergroup->numLeft > 0 && pergroup->numRight > 0)
152 				setopstate->numOutput = 1;
153 			else
154 				setopstate->numOutput = 0;
155 			break;
156 		case SETOPCMD_INTERSECT_ALL:
157 			setopstate->numOutput =
158 				(pergroup->numLeft < pergroup->numRight) ?
159 				pergroup->numLeft : pergroup->numRight;
160 			break;
161 		case SETOPCMD_EXCEPT:
162 			if (pergroup->numLeft > 0 && pergroup->numRight == 0)
163 				setopstate->numOutput = 1;
164 			else
165 				setopstate->numOutput = 0;
166 			break;
167 		case SETOPCMD_EXCEPT_ALL:
168 			setopstate->numOutput =
169 				(pergroup->numLeft < pergroup->numRight) ?
170 				0 : (pergroup->numLeft - pergroup->numRight);
171 			break;
172 		default:
173 			elog(ERROR, "unrecognized set op: %d", (int) plannode->cmd);
174 			break;
175 	}
176 }
177 
178 
179 /* ----------------------------------------------------------------
180  *		ExecSetOp
181  * ----------------------------------------------------------------
182  */
183 static TupleTableSlot *			/* return: a tuple or NULL */
ExecSetOp(PlanState * pstate)184 ExecSetOp(PlanState *pstate)
185 {
186 	SetOpState *node = castNode(SetOpState, pstate);
187 	SetOp	   *plannode = (SetOp *) node->ps.plan;
188 	TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot;
189 
190 	CHECK_FOR_INTERRUPTS();
191 
192 	/*
193 	 * If the previously-returned tuple needs to be returned more than once,
194 	 * keep returning it.
195 	 */
196 	if (node->numOutput > 0)
197 	{
198 		node->numOutput--;
199 		return resultTupleSlot;
200 	}
201 
202 	/* Otherwise, we're done if we are out of groups */
203 	if (node->setop_done)
204 		return NULL;
205 
206 	/* Fetch the next tuple group according to the correct strategy */
207 	if (plannode->strategy == SETOP_HASHED)
208 	{
209 		if (!node->table_filled)
210 			setop_fill_hash_table(node);
211 		return setop_retrieve_hash_table(node);
212 	}
213 	else
214 		return setop_retrieve_direct(node);
215 }
216 
217 /*
218  * ExecSetOp for non-hashed case
219  */
220 static TupleTableSlot *
setop_retrieve_direct(SetOpState * setopstate)221 setop_retrieve_direct(SetOpState *setopstate)
222 {
223 	SetOp	   *node = (SetOp *) setopstate->ps.plan;
224 	PlanState  *outerPlan;
225 	SetOpStatePerGroup pergroup;
226 	TupleTableSlot *outerslot;
227 	TupleTableSlot *resultTupleSlot;
228 
229 	/*
230 	 * get state info from node
231 	 */
232 	outerPlan = outerPlanState(setopstate);
233 	pergroup = (SetOpStatePerGroup) setopstate->pergroup;
234 	resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
235 
236 	/*
237 	 * We loop retrieving groups until we find one we should return
238 	 */
239 	while (!setopstate->setop_done)
240 	{
241 		/*
242 		 * If we don't already have the first tuple of the new group, fetch it
243 		 * from the outer plan.
244 		 */
245 		if (setopstate->grp_firstTuple == NULL)
246 		{
247 			outerslot = ExecProcNode(outerPlan);
248 			if (!TupIsNull(outerslot))
249 			{
250 				/* Make a copy of the first input tuple */
251 				setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
252 			}
253 			else
254 			{
255 				/* outer plan produced no tuples at all */
256 				setopstate->setop_done = true;
257 				return NULL;
258 			}
259 		}
260 
261 		/*
262 		 * Store the copied first input tuple in the tuple table slot reserved
263 		 * for it.  The tuple will be deleted when it is cleared from the
264 		 * slot.
265 		 */
266 		ExecStoreTuple(setopstate->grp_firstTuple,
267 					   resultTupleSlot,
268 					   InvalidBuffer,
269 					   true);
270 		setopstate->grp_firstTuple = NULL;	/* don't keep two pointers */
271 
272 		/* Initialize working state for a new input tuple group */
273 		initialize_counts(pergroup);
274 
275 		/* Count the first input tuple */
276 		advance_counts(pergroup,
277 					   fetch_tuple_flag(setopstate, resultTupleSlot));
278 
279 		/*
280 		 * Scan the outer plan until we exhaust it or cross a group boundary.
281 		 */
282 		for (;;)
283 		{
284 			outerslot = ExecProcNode(outerPlan);
285 			if (TupIsNull(outerslot))
286 			{
287 				/* no more outer-plan tuples available */
288 				setopstate->setop_done = true;
289 				break;
290 			}
291 
292 			/*
293 			 * Check whether we've crossed a group boundary.
294 			 */
295 			if (!execTuplesMatch(resultTupleSlot,
296 								 outerslot,
297 								 node->numCols, node->dupColIdx,
298 								 setopstate->eqfunctions,
299 								 setopstate->tempContext))
300 			{
301 				/*
302 				 * Save the first input tuple of the next group.
303 				 */
304 				setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
305 				break;
306 			}
307 
308 			/* Still in same group, so count this tuple */
309 			advance_counts(pergroup,
310 						   fetch_tuple_flag(setopstate, outerslot));
311 		}
312 
313 		/*
314 		 * Done scanning input tuple group.  See if we should emit any copies
315 		 * of result tuple, and if so return the first copy.
316 		 */
317 		set_output_count(setopstate, pergroup);
318 
319 		if (setopstate->numOutput > 0)
320 		{
321 			setopstate->numOutput--;
322 			return resultTupleSlot;
323 		}
324 	}
325 
326 	/* No more groups */
327 	ExecClearTuple(resultTupleSlot);
328 	return NULL;
329 }
330 
331 /*
332  * ExecSetOp for hashed case: phase 1, read input and build hash table
333  */
334 static void
setop_fill_hash_table(SetOpState * setopstate)335 setop_fill_hash_table(SetOpState *setopstate)
336 {
337 	SetOp	   *node = (SetOp *) setopstate->ps.plan;
338 	PlanState  *outerPlan;
339 	int			firstFlag;
340 	bool		in_first_rel PG_USED_FOR_ASSERTS_ONLY;
341 
342 	/*
343 	 * get state info from node
344 	 */
345 	outerPlan = outerPlanState(setopstate);
346 	firstFlag = node->firstFlag;
347 	/* verify planner didn't mess up */
348 	Assert(firstFlag == 0 ||
349 		   (firstFlag == 1 &&
350 			(node->cmd == SETOPCMD_INTERSECT ||
351 			 node->cmd == SETOPCMD_INTERSECT_ALL)));
352 
353 	/*
354 	 * Process each outer-plan tuple, and then fetch the next one, until we
355 	 * exhaust the outer plan.
356 	 */
357 	in_first_rel = true;
358 	for (;;)
359 	{
360 		TupleTableSlot *outerslot;
361 		int			flag;
362 		TupleHashEntryData *entry;
363 		bool		isnew;
364 
365 		outerslot = ExecProcNode(outerPlan);
366 		if (TupIsNull(outerslot))
367 			break;
368 
369 		/* Identify whether it's left or right input */
370 		flag = fetch_tuple_flag(setopstate, outerslot);
371 
372 		if (flag == firstFlag)
373 		{
374 			/* (still) in first input relation */
375 			Assert(in_first_rel);
376 
377 			/* Find or build hashtable entry for this tuple's group */
378 			entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
379 										 &isnew);
380 
381 			/* If new tuple group, initialize counts */
382 			if (isnew)
383 			{
384 				entry->additional = (SetOpStatePerGroup)
385 					MemoryContextAlloc(setopstate->hashtable->tablecxt,
386 									   sizeof(SetOpStatePerGroupData));
387 				initialize_counts((SetOpStatePerGroup) entry->additional);
388 			}
389 
390 			/* Advance the counts */
391 			advance_counts((SetOpStatePerGroup) entry->additional, flag);
392 		}
393 		else
394 		{
395 			/* reached second relation */
396 			in_first_rel = false;
397 
398 			/* For tuples not seen previously, do not make hashtable entry */
399 			entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
400 										 NULL);
401 
402 			/* Advance the counts if entry is already present */
403 			if (entry)
404 				advance_counts((SetOpStatePerGroup) entry->additional, flag);
405 		}
406 
407 		/* Must reset temp context after each hashtable lookup */
408 		MemoryContextReset(setopstate->tempContext);
409 	}
410 
411 	setopstate->table_filled = true;
412 	/* Initialize to walk the hash table */
413 	ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter);
414 }
415 
416 /*
417  * ExecSetOp for hashed case: phase 2, retrieving groups from hash table
418  */
419 static TupleTableSlot *
setop_retrieve_hash_table(SetOpState * setopstate)420 setop_retrieve_hash_table(SetOpState *setopstate)
421 {
422 	TupleHashEntryData *entry;
423 	TupleTableSlot *resultTupleSlot;
424 
425 	/*
426 	 * get state info from node
427 	 */
428 	resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
429 
430 	/*
431 	 * We loop retrieving groups until we find one we should return
432 	 */
433 	while (!setopstate->setop_done)
434 	{
435 		CHECK_FOR_INTERRUPTS();
436 
437 		/*
438 		 * Find the next entry in the hash table
439 		 */
440 		entry = ScanTupleHashTable(setopstate->hashtable, &setopstate->hashiter);
441 		if (entry == NULL)
442 		{
443 			/* No more entries in hashtable, so done */
444 			setopstate->setop_done = true;
445 			return NULL;
446 		}
447 
448 		/*
449 		 * See if we should emit any copies of this tuple, and if so return
450 		 * the first copy.
451 		 */
452 		set_output_count(setopstate, (SetOpStatePerGroup) entry->additional);
453 
454 		if (setopstate->numOutput > 0)
455 		{
456 			setopstate->numOutput--;
457 			return ExecStoreMinimalTuple(entry->firstTuple,
458 										 resultTupleSlot,
459 										 false);
460 		}
461 	}
462 
463 	/* No more groups */
464 	ExecClearTuple(resultTupleSlot);
465 	return NULL;
466 }
467 
468 /* ----------------------------------------------------------------
469  *		ExecInitSetOp
470  *
471  *		This initializes the setop node state structures and
472  *		the node's subplan.
473  * ----------------------------------------------------------------
474  */
475 SetOpState *
ExecInitSetOp(SetOp * node,EState * estate,int eflags)476 ExecInitSetOp(SetOp *node, EState *estate, int eflags)
477 {
478 	SetOpState *setopstate;
479 
480 	/* check for unsupported flags */
481 	Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
482 
483 	/*
484 	 * create state structure
485 	 */
486 	setopstate = makeNode(SetOpState);
487 	setopstate->ps.plan = (Plan *) node;
488 	setopstate->ps.state = estate;
489 	setopstate->ps.ExecProcNode = ExecSetOp;
490 
491 	setopstate->eqfunctions = NULL;
492 	setopstate->hashfunctions = NULL;
493 	setopstate->setop_done = false;
494 	setopstate->numOutput = 0;
495 	setopstate->pergroup = NULL;
496 	setopstate->grp_firstTuple = NULL;
497 	setopstate->hashtable = NULL;
498 	setopstate->tableContext = NULL;
499 
500 	/*
501 	 * Miscellaneous initialization
502 	 *
503 	 * SetOp nodes have no ExprContext initialization because they never call
504 	 * ExecQual or ExecProject.  But they do need a per-tuple memory context
505 	 * anyway for calling execTuplesMatch.
506 	 */
507 	setopstate->tempContext =
508 		AllocSetContextCreate(CurrentMemoryContext,
509 							  "SetOp",
510 							  ALLOCSET_DEFAULT_SIZES);
511 
512 	/*
513 	 * If hashing, we also need a longer-lived context to store the hash
514 	 * table.  The table can't just be kept in the per-query context because
515 	 * we want to be able to throw it away in ExecReScanSetOp.
516 	 */
517 	if (node->strategy == SETOP_HASHED)
518 		setopstate->tableContext =
519 			AllocSetContextCreate(CurrentMemoryContext,
520 								  "SetOp hash table",
521 								  ALLOCSET_DEFAULT_SIZES);
522 
523 	/*
524 	 * Tuple table initialization
525 	 */
526 	ExecInitResultTupleSlot(estate, &setopstate->ps);
527 
528 	/*
529 	 * initialize child nodes
530 	 *
531 	 * If we are hashing then the child plan does not need to handle REWIND
532 	 * efficiently; see ExecReScanSetOp.
533 	 */
534 	if (node->strategy == SETOP_HASHED)
535 		eflags &= ~EXEC_FLAG_REWIND;
536 	outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
537 
538 	/*
539 	 * setop nodes do no projections, so initialize projection info for this
540 	 * node appropriately
541 	 */
542 	ExecAssignResultTypeFromTL(&setopstate->ps);
543 	setopstate->ps.ps_ProjInfo = NULL;
544 
545 	/*
546 	 * Precompute fmgr lookup data for inner loop. We need both equality and
547 	 * hashing functions to do it by hashing, but only equality if not
548 	 * hashing.
549 	 */
550 	if (node->strategy == SETOP_HASHED)
551 		execTuplesHashPrepare(node->numCols,
552 							  node->dupOperators,
553 							  &setopstate->eqfunctions,
554 							  &setopstate->hashfunctions);
555 	else
556 		setopstate->eqfunctions =
557 			execTuplesMatchPrepare(node->numCols,
558 								   node->dupOperators);
559 
560 	if (node->strategy == SETOP_HASHED)
561 	{
562 		build_hash_table(setopstate);
563 		setopstate->table_filled = false;
564 	}
565 	else
566 	{
567 		setopstate->pergroup =
568 			(SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData));
569 	}
570 
571 	return setopstate;
572 }
573 
574 /* ----------------------------------------------------------------
575  *		ExecEndSetOp
576  *
577  *		This shuts down the subplan and frees resources allocated
578  *		to this node.
579  * ----------------------------------------------------------------
580  */
581 void
ExecEndSetOp(SetOpState * node)582 ExecEndSetOp(SetOpState *node)
583 {
584 	/* clean up tuple table */
585 	ExecClearTuple(node->ps.ps_ResultTupleSlot);
586 
587 	/* free subsidiary stuff including hashtable */
588 	MemoryContextDelete(node->tempContext);
589 	if (node->tableContext)
590 		MemoryContextDelete(node->tableContext);
591 
592 	ExecEndNode(outerPlanState(node));
593 }
594 
595 
596 void
ExecReScanSetOp(SetOpState * node)597 ExecReScanSetOp(SetOpState *node)
598 {
599 	ExecClearTuple(node->ps.ps_ResultTupleSlot);
600 	node->setop_done = false;
601 	node->numOutput = 0;
602 
603 	if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
604 	{
605 		/*
606 		 * In the hashed case, if we haven't yet built the hash table then we
607 		 * can just return; nothing done yet, so nothing to undo. If subnode's
608 		 * chgParam is not NULL then it will be re-scanned by ExecProcNode,
609 		 * else no reason to re-scan it at all.
610 		 */
611 		if (!node->table_filled)
612 			return;
613 
614 		/*
615 		 * If we do have the hash table and the subplan does not have any
616 		 * parameter changes, then we can just rescan the existing hash table;
617 		 * no need to build it again.
618 		 */
619 		if (node->ps.lefttree->chgParam == NULL)
620 		{
621 			ResetTupleHashIterator(node->hashtable, &node->hashiter);
622 			return;
623 		}
624 	}
625 
626 	/* Release first tuple of group, if we have made a copy */
627 	if (node->grp_firstTuple != NULL)
628 	{
629 		heap_freetuple(node->grp_firstTuple);
630 		node->grp_firstTuple = NULL;
631 	}
632 
633 	/* Release any hashtable storage */
634 	if (node->tableContext)
635 		MemoryContextResetAndDeleteChildren(node->tableContext);
636 
637 	/* And rebuild empty hashtable if needed */
638 	if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
639 	{
640 		build_hash_table(node);
641 		node->table_filled = false;
642 	}
643 
644 	/*
645 	 * if chgParam of subnode is not null then plan will be re-scanned by
646 	 * first ExecProcNode.
647 	 */
648 	if (node->ps.lefttree->chgParam == NULL)
649 		ExecReScan(node->ps.lefttree);
650 }
651