1 /*-------------------------------------------------------------------------
2 *
3 * nodeSetOp.c
4 * Routines to handle INTERSECT and EXCEPT selection
5 *
6 * The input of a SetOp node consists of tuples from two relations,
7 * which have been combined into one dataset, with a junk attribute added
8 * that shows which relation each tuple came from. In SETOP_SORTED mode,
9 * the input has furthermore been sorted according to all the grouping
10 * columns (ie, all the non-junk attributes). The SetOp node scans each
11 * group of identical tuples to determine how many came from each input
12 * relation. Then it is a simple matter to emit the output demanded by the
13 * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
14 *
15 * In SETOP_HASHED mode, the input is delivered in no particular order,
16 * except that we know all the tuples from one input relation will come before
17 * all the tuples of the other. The planner guarantees that the first input
18 * relation is the left-hand one for EXCEPT, and tries to make the smaller
19 * input relation come first for INTERSECT. We build a hash table in memory
20 * with one entry for each group of identical tuples, and count the number of
21 * tuples in the group from each relation. After seeing all the input, we
22 * scan the hashtable and generate the correct output using those counts.
23 * We can avoid making hashtable entries for any tuples appearing only in the
24 * second input relation, since they cannot result in any output.
25 *
26 * This node type is not used for UNION or UNION ALL, since those can be
27 * implemented more cheaply (there's no need for the junk attribute to
28 * identify the source relation).
29 *
30 * Note that SetOp does no qual checking nor projection. The delivered
31 * output tuples are just copies of the first-to-arrive tuple in each
32 * input group.
33 *
34 *
35 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
36 * Portions Copyright (c) 1994, Regents of the University of California
37 *
38 *
39 * IDENTIFICATION
40 * src/backend/executor/nodeSetOp.c
41 *
42 *-------------------------------------------------------------------------
43 */
44
45 #include "postgres.h"
46
47 #include "access/htup_details.h"
48 #include "executor/executor.h"
49 #include "executor/nodeSetOp.h"
50 #include "miscadmin.h"
51 #include "utils/memutils.h"
52
53
54 /*
55 * SetOpStatePerGroupData - per-group working state
56 *
57 * These values are working state that is initialized at the start of
58 * an input tuple group and updated for each input tuple.
59 *
60 * In SETOP_SORTED mode, we need only one of these structs, and it's kept in
61 * the plan state node. In SETOP_HASHED mode, the hash table contains one
62 * of these for each tuple group.
63 */
64 typedef struct SetOpStatePerGroupData
65 {
66 long numLeft; /* number of left-input dups in group */
67 long numRight; /* number of right-input dups in group */
68 } SetOpStatePerGroupData;
69
70
71 static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate);
72 static void setop_fill_hash_table(SetOpState *setopstate);
73 static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate);
74
75
76 /*
77 * Initialize state for a new group of input values.
78 */
79 static inline void
initialize_counts(SetOpStatePerGroup pergroup)80 initialize_counts(SetOpStatePerGroup pergroup)
81 {
82 pergroup->numLeft = pergroup->numRight = 0;
83 }
84
85 /*
86 * Advance the appropriate counter for one input tuple.
87 */
88 static inline void
advance_counts(SetOpStatePerGroup pergroup,int flag)89 advance_counts(SetOpStatePerGroup pergroup, int flag)
90 {
91 if (flag)
92 pergroup->numRight++;
93 else
94 pergroup->numLeft++;
95 }
96
97 /*
98 * Fetch the "flag" column from an input tuple.
99 * This is an integer column with value 0 for left side, 1 for right side.
100 */
101 static int
fetch_tuple_flag(SetOpState * setopstate,TupleTableSlot * inputslot)102 fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot)
103 {
104 SetOp *node = (SetOp *) setopstate->ps.plan;
105 int flag;
106 bool isNull;
107
108 flag = DatumGetInt32(slot_getattr(inputslot,
109 node->flagColIdx,
110 &isNull));
111 Assert(!isNull);
112 Assert(flag == 0 || flag == 1);
113 return flag;
114 }
115
116 /*
117 * Initialize the hash table to empty.
118 */
119 static void
build_hash_table(SetOpState * setopstate)120 build_hash_table(SetOpState *setopstate)
121 {
122 SetOp *node = (SetOp *) setopstate->ps.plan;
123 ExprContext *econtext = setopstate->ps.ps_ExprContext;
124 TupleDesc desc = ExecGetResultType(outerPlanState(setopstate));
125
126 Assert(node->strategy == SETOP_HASHED);
127 Assert(node->numGroups > 0);
128
129 setopstate->hashtable = BuildTupleHashTableExt(&setopstate->ps,
130 desc,
131 node->numCols,
132 node->dupColIdx,
133 setopstate->eqfuncoids,
134 setopstate->hashfunctions,
135 node->numGroups,
136 0,
137 setopstate->ps.state->es_query_cxt,
138 setopstate->tableContext,
139 econtext->ecxt_per_tuple_memory,
140 false);
141 }
142
143 /*
144 * We've completed processing a tuple group. Decide how many copies (if any)
145 * of its representative row to emit, and store the count into numOutput.
146 * This logic is straight from the SQL92 specification.
147 */
148 static void
set_output_count(SetOpState * setopstate,SetOpStatePerGroup pergroup)149 set_output_count(SetOpState *setopstate, SetOpStatePerGroup pergroup)
150 {
151 SetOp *plannode = (SetOp *) setopstate->ps.plan;
152
153 switch (plannode->cmd)
154 {
155 case SETOPCMD_INTERSECT:
156 if (pergroup->numLeft > 0 && pergroup->numRight > 0)
157 setopstate->numOutput = 1;
158 else
159 setopstate->numOutput = 0;
160 break;
161 case SETOPCMD_INTERSECT_ALL:
162 setopstate->numOutput =
163 (pergroup->numLeft < pergroup->numRight) ?
164 pergroup->numLeft : pergroup->numRight;
165 break;
166 case SETOPCMD_EXCEPT:
167 if (pergroup->numLeft > 0 && pergroup->numRight == 0)
168 setopstate->numOutput = 1;
169 else
170 setopstate->numOutput = 0;
171 break;
172 case SETOPCMD_EXCEPT_ALL:
173 setopstate->numOutput =
174 (pergroup->numLeft < pergroup->numRight) ?
175 0 : (pergroup->numLeft - pergroup->numRight);
176 break;
177 default:
178 elog(ERROR, "unrecognized set op: %d", (int) plannode->cmd);
179 break;
180 }
181 }
182
183
184 /* ----------------------------------------------------------------
185 * ExecSetOp
186 * ----------------------------------------------------------------
187 */
188 static TupleTableSlot * /* return: a tuple or NULL */
ExecSetOp(PlanState * pstate)189 ExecSetOp(PlanState *pstate)
190 {
191 SetOpState *node = castNode(SetOpState, pstate);
192 SetOp *plannode = (SetOp *) node->ps.plan;
193 TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot;
194
195 CHECK_FOR_INTERRUPTS();
196
197 /*
198 * If the previously-returned tuple needs to be returned more than once,
199 * keep returning it.
200 */
201 if (node->numOutput > 0)
202 {
203 node->numOutput--;
204 return resultTupleSlot;
205 }
206
207 /* Otherwise, we're done if we are out of groups */
208 if (node->setop_done)
209 return NULL;
210
211 /* Fetch the next tuple group according to the correct strategy */
212 if (plannode->strategy == SETOP_HASHED)
213 {
214 if (!node->table_filled)
215 setop_fill_hash_table(node);
216 return setop_retrieve_hash_table(node);
217 }
218 else
219 return setop_retrieve_direct(node);
220 }
221
222 /*
223 * ExecSetOp for non-hashed case
224 */
225 static TupleTableSlot *
setop_retrieve_direct(SetOpState * setopstate)226 setop_retrieve_direct(SetOpState *setopstate)
227 {
228 PlanState *outerPlan;
229 SetOpStatePerGroup pergroup;
230 TupleTableSlot *outerslot;
231 TupleTableSlot *resultTupleSlot;
232 ExprContext *econtext = setopstate->ps.ps_ExprContext;
233
234 /*
235 * get state info from node
236 */
237 outerPlan = outerPlanState(setopstate);
238 pergroup = (SetOpStatePerGroup) setopstate->pergroup;
239 resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
240
241 /*
242 * We loop retrieving groups until we find one we should return
243 */
244 while (!setopstate->setop_done)
245 {
246 /*
247 * If we don't already have the first tuple of the new group, fetch it
248 * from the outer plan.
249 */
250 if (setopstate->grp_firstTuple == NULL)
251 {
252 outerslot = ExecProcNode(outerPlan);
253 if (!TupIsNull(outerslot))
254 {
255 /* Make a copy of the first input tuple */
256 setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
257 }
258 else
259 {
260 /* outer plan produced no tuples at all */
261 setopstate->setop_done = true;
262 return NULL;
263 }
264 }
265
266 /*
267 * Store the copied first input tuple in the tuple table slot reserved
268 * for it. The tuple will be deleted when it is cleared from the
269 * slot.
270 */
271 ExecStoreTuple(setopstate->grp_firstTuple,
272 resultTupleSlot,
273 InvalidBuffer,
274 true);
275 setopstate->grp_firstTuple = NULL; /* don't keep two pointers */
276
277 /* Initialize working state for a new input tuple group */
278 initialize_counts(pergroup);
279
280 /* Count the first input tuple */
281 advance_counts(pergroup,
282 fetch_tuple_flag(setopstate, resultTupleSlot));
283
284 /*
285 * Scan the outer plan until we exhaust it or cross a group boundary.
286 */
287 for (;;)
288 {
289 outerslot = ExecProcNode(outerPlan);
290 if (TupIsNull(outerslot))
291 {
292 /* no more outer-plan tuples available */
293 setopstate->setop_done = true;
294 break;
295 }
296
297 /*
298 * Check whether we've crossed a group boundary.
299 */
300 econtext->ecxt_outertuple = resultTupleSlot;
301 econtext->ecxt_innertuple = outerslot;
302
303 if (!ExecQualAndReset(setopstate->eqfunction, econtext))
304 {
305 /*
306 * Save the first input tuple of the next group.
307 */
308 setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
309 break;
310 }
311
312 /* Still in same group, so count this tuple */
313 advance_counts(pergroup,
314 fetch_tuple_flag(setopstate, outerslot));
315 }
316
317 /*
318 * Done scanning input tuple group. See if we should emit any copies
319 * of result tuple, and if so return the first copy.
320 */
321 set_output_count(setopstate, pergroup);
322
323 if (setopstate->numOutput > 0)
324 {
325 setopstate->numOutput--;
326 return resultTupleSlot;
327 }
328 }
329
330 /* No more groups */
331 ExecClearTuple(resultTupleSlot);
332 return NULL;
333 }
334
335 /*
336 * ExecSetOp for hashed case: phase 1, read input and build hash table
337 */
338 static void
setop_fill_hash_table(SetOpState * setopstate)339 setop_fill_hash_table(SetOpState *setopstate)
340 {
341 SetOp *node = (SetOp *) setopstate->ps.plan;
342 PlanState *outerPlan;
343 int firstFlag;
344 bool in_first_rel PG_USED_FOR_ASSERTS_ONLY;
345 ExprContext *econtext = setopstate->ps.ps_ExprContext;
346
347 /*
348 * get state info from node
349 */
350 outerPlan = outerPlanState(setopstate);
351 firstFlag = node->firstFlag;
352 /* verify planner didn't mess up */
353 Assert(firstFlag == 0 ||
354 (firstFlag == 1 &&
355 (node->cmd == SETOPCMD_INTERSECT ||
356 node->cmd == SETOPCMD_INTERSECT_ALL)));
357
358 /*
359 * Process each outer-plan tuple, and then fetch the next one, until we
360 * exhaust the outer plan.
361 */
362 in_first_rel = true;
363 for (;;)
364 {
365 TupleTableSlot *outerslot;
366 int flag;
367 TupleHashEntryData *entry;
368 bool isnew;
369
370 outerslot = ExecProcNode(outerPlan);
371 if (TupIsNull(outerslot))
372 break;
373
374 /* Identify whether it's left or right input */
375 flag = fetch_tuple_flag(setopstate, outerslot);
376
377 if (flag == firstFlag)
378 {
379 /* (still) in first input relation */
380 Assert(in_first_rel);
381
382 /* Find or build hashtable entry for this tuple's group */
383 entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
384 &isnew);
385
386 /* If new tuple group, initialize counts */
387 if (isnew)
388 {
389 entry->additional = (SetOpStatePerGroup)
390 MemoryContextAlloc(setopstate->hashtable->tablecxt,
391 sizeof(SetOpStatePerGroupData));
392 initialize_counts((SetOpStatePerGroup) entry->additional);
393 }
394
395 /* Advance the counts */
396 advance_counts((SetOpStatePerGroup) entry->additional, flag);
397 }
398 else
399 {
400 /* reached second relation */
401 in_first_rel = false;
402
403 /* For tuples not seen previously, do not make hashtable entry */
404 entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
405 NULL);
406
407 /* Advance the counts if entry is already present */
408 if (entry)
409 advance_counts((SetOpStatePerGroup) entry->additional, flag);
410 }
411
412 /* Must reset expression context after each hashtable lookup */
413 ResetExprContext(econtext);
414 }
415
416 setopstate->table_filled = true;
417 /* Initialize to walk the hash table */
418 ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter);
419 }
420
421 /*
422 * ExecSetOp for hashed case: phase 2, retrieving groups from hash table
423 */
424 static TupleTableSlot *
setop_retrieve_hash_table(SetOpState * setopstate)425 setop_retrieve_hash_table(SetOpState *setopstate)
426 {
427 TupleHashEntryData *entry;
428 TupleTableSlot *resultTupleSlot;
429
430 /*
431 * get state info from node
432 */
433 resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
434
435 /*
436 * We loop retrieving groups until we find one we should return
437 */
438 while (!setopstate->setop_done)
439 {
440 CHECK_FOR_INTERRUPTS();
441
442 /*
443 * Find the next entry in the hash table
444 */
445 entry = ScanTupleHashTable(setopstate->hashtable, &setopstate->hashiter);
446 if (entry == NULL)
447 {
448 /* No more entries in hashtable, so done */
449 setopstate->setop_done = true;
450 return NULL;
451 }
452
453 /*
454 * See if we should emit any copies of this tuple, and if so return
455 * the first copy.
456 */
457 set_output_count(setopstate, (SetOpStatePerGroup) entry->additional);
458
459 if (setopstate->numOutput > 0)
460 {
461 setopstate->numOutput--;
462 return ExecStoreMinimalTuple(entry->firstTuple,
463 resultTupleSlot,
464 false);
465 }
466 }
467
468 /* No more groups */
469 ExecClearTuple(resultTupleSlot);
470 return NULL;
471 }
472
473 /* ----------------------------------------------------------------
474 * ExecInitSetOp
475 *
476 * This initializes the setop node state structures and
477 * the node's subplan.
478 * ----------------------------------------------------------------
479 */
480 SetOpState *
ExecInitSetOp(SetOp * node,EState * estate,int eflags)481 ExecInitSetOp(SetOp *node, EState *estate, int eflags)
482 {
483 SetOpState *setopstate;
484 TupleDesc outerDesc;
485
486 /* check for unsupported flags */
487 Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
488
489 /*
490 * create state structure
491 */
492 setopstate = makeNode(SetOpState);
493 setopstate->ps.plan = (Plan *) node;
494 setopstate->ps.state = estate;
495 setopstate->ps.ExecProcNode = ExecSetOp;
496
497 setopstate->eqfuncoids = NULL;
498 setopstate->hashfunctions = NULL;
499 setopstate->setop_done = false;
500 setopstate->numOutput = 0;
501 setopstate->pergroup = NULL;
502 setopstate->grp_firstTuple = NULL;
503 setopstate->hashtable = NULL;
504 setopstate->tableContext = NULL;
505
506 /*
507 * create expression context
508 */
509 ExecAssignExprContext(estate, &setopstate->ps);
510
511 /*
512 * If hashing, we also need a longer-lived context to store the hash
513 * table. The table can't just be kept in the per-query context because
514 * we want to be able to throw it away in ExecReScanSetOp.
515 */
516 if (node->strategy == SETOP_HASHED)
517 setopstate->tableContext =
518 AllocSetContextCreate(CurrentMemoryContext,
519 "SetOp hash table",
520 ALLOCSET_DEFAULT_SIZES);
521
522 /*
523 * initialize child nodes
524 *
525 * If we are hashing then the child plan does not need to handle REWIND
526 * efficiently; see ExecReScanSetOp.
527 */
528 if (node->strategy == SETOP_HASHED)
529 eflags &= ~EXEC_FLAG_REWIND;
530 outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
531 outerDesc = ExecGetResultType(outerPlanState(setopstate));
532
533 /*
534 * Initialize result slot and type. Setop nodes do no projections, so
535 * initialize projection info for this node appropriately.
536 */
537 ExecInitResultTupleSlotTL(estate, &setopstate->ps);
538 setopstate->ps.ps_ProjInfo = NULL;
539
540 /*
541 * Precompute fmgr lookup data for inner loop. We need both equality and
542 * hashing functions to do it by hashing, but only equality if not
543 * hashing.
544 */
545 if (node->strategy == SETOP_HASHED)
546 execTuplesHashPrepare(node->numCols,
547 node->dupOperators,
548 &setopstate->eqfuncoids,
549 &setopstate->hashfunctions);
550 else
551 setopstate->eqfunction =
552 execTuplesMatchPrepare(outerDesc,
553 node->numCols,
554 node->dupColIdx,
555 node->dupOperators,
556 &setopstate->ps);
557
558 if (node->strategy == SETOP_HASHED)
559 {
560 build_hash_table(setopstate);
561 setopstate->table_filled = false;
562 }
563 else
564 {
565 setopstate->pergroup =
566 (SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData));
567 }
568
569 return setopstate;
570 }
571
572 /* ----------------------------------------------------------------
573 * ExecEndSetOp
574 *
575 * This shuts down the subplan and frees resources allocated
576 * to this node.
577 * ----------------------------------------------------------------
578 */
579 void
ExecEndSetOp(SetOpState * node)580 ExecEndSetOp(SetOpState *node)
581 {
582 /* clean up tuple table */
583 ExecClearTuple(node->ps.ps_ResultTupleSlot);
584
585 /* free subsidiary stuff including hashtable */
586 if (node->tableContext)
587 MemoryContextDelete(node->tableContext);
588 ExecFreeExprContext(&node->ps);
589
590 ExecEndNode(outerPlanState(node));
591 }
592
593
594 void
ExecReScanSetOp(SetOpState * node)595 ExecReScanSetOp(SetOpState *node)
596 {
597 ExecClearTuple(node->ps.ps_ResultTupleSlot);
598 node->setop_done = false;
599 node->numOutput = 0;
600
601 if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
602 {
603 /*
604 * In the hashed case, if we haven't yet built the hash table then we
605 * can just return; nothing done yet, so nothing to undo. If subnode's
606 * chgParam is not NULL then it will be re-scanned by ExecProcNode,
607 * else no reason to re-scan it at all.
608 */
609 if (!node->table_filled)
610 return;
611
612 /*
613 * If we do have the hash table and the subplan does not have any
614 * parameter changes, then we can just rescan the existing hash table;
615 * no need to build it again.
616 */
617 if (node->ps.lefttree->chgParam == NULL)
618 {
619 ResetTupleHashIterator(node->hashtable, &node->hashiter);
620 return;
621 }
622 }
623
624 /* Release first tuple of group, if we have made a copy */
625 if (node->grp_firstTuple != NULL)
626 {
627 heap_freetuple(node->grp_firstTuple);
628 node->grp_firstTuple = NULL;
629 }
630
631 /* Release any hashtable storage */
632 if (node->tableContext)
633 MemoryContextResetAndDeleteChildren(node->tableContext);
634
635 /* And rebuild empty hashtable if needed */
636 if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
637 {
638 ResetTupleHashTable(node->hashtable);
639 node->table_filled = false;
640 }
641
642 /*
643 * if chgParam of subnode is not null then plan will be re-scanned by
644 * first ExecProcNode.
645 */
646 if (node->ps.lefttree->chgParam == NULL)
647 ExecReScan(node->ps.lefttree);
648 }
649