1 /*-------------------------------------------------------------------------
2 *
3 * nodeSetOp.c
4 * Routines to handle INTERSECT and EXCEPT selection
5 *
6 * The input of a SetOp node consists of tuples from two relations,
7 * which have been combined into one dataset, with a junk attribute added
8 * that shows which relation each tuple came from. In SETOP_SORTED mode,
9 * the input has furthermore been sorted according to all the grouping
10 * columns (ie, all the non-junk attributes). The SetOp node scans each
11 * group of identical tuples to determine how many came from each input
12 * relation. Then it is a simple matter to emit the output demanded by the
13 * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
14 *
15 * In SETOP_HASHED mode, the input is delivered in no particular order,
16 * except that we know all the tuples from one input relation will come before
17 * all the tuples of the other. The planner guarantees that the first input
18 * relation is the left-hand one for EXCEPT, and tries to make the smaller
19 * input relation come first for INTERSECT. We build a hash table in memory
20 * with one entry for each group of identical tuples, and count the number of
21 * tuples in the group from each relation. After seeing all the input, we
22 * scan the hashtable and generate the correct output using those counts.
23 * We can avoid making hashtable entries for any tuples appearing only in the
24 * second input relation, since they cannot result in any output.
25 *
26 * This node type is not used for UNION or UNION ALL, since those can be
27 * implemented more cheaply (there's no need for the junk attribute to
28 * identify the source relation).
29 *
30 * Note that SetOp does no qual checking nor projection. The delivered
31 * output tuples are just copies of the first-to-arrive tuple in each
32 * input group.
33 *
34 *
35 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
36 * Portions Copyright (c) 1994, Regents of the University of California
37 *
38 *
39 * IDENTIFICATION
40 * src/backend/executor/nodeSetOp.c
41 *
42 *-------------------------------------------------------------------------
43 */
44
45 #include "postgres.h"
46
47 #include "access/htup_details.h"
48 #include "executor/executor.h"
49 #include "executor/nodeSetOp.h"
50 #include "miscadmin.h"
51 #include "utils/memutils.h"
52
53
54 /*
55 * SetOpStatePerGroupData - per-group working state
56 *
57 * These values are working state that is initialized at the start of
58 * an input tuple group and updated for each input tuple.
59 *
60 * In SETOP_SORTED mode, we need only one of these structs, and it's kept in
61 * the plan state node. In SETOP_HASHED mode, the hash table contains one
62 * of these for each tuple group.
63 */
64 typedef struct SetOpStatePerGroupData
65 {
66 long numLeft; /* number of left-input dups in group */
67 long numRight; /* number of right-input dups in group */
68 } SetOpStatePerGroupData;
69
70
71 static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate);
72 static void setop_fill_hash_table(SetOpState *setopstate);
73 static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate);
74
75
76 /*
77 * Initialize state for a new group of input values.
78 */
79 static inline void
initialize_counts(SetOpStatePerGroup pergroup)80 initialize_counts(SetOpStatePerGroup pergroup)
81 {
82 pergroup->numLeft = pergroup->numRight = 0;
83 }
84
85 /*
86 * Advance the appropriate counter for one input tuple.
87 */
88 static inline void
advance_counts(SetOpStatePerGroup pergroup,int flag)89 advance_counts(SetOpStatePerGroup pergroup, int flag)
90 {
91 if (flag)
92 pergroup->numRight++;
93 else
94 pergroup->numLeft++;
95 }
96
97 /*
98 * Fetch the "flag" column from an input tuple.
99 * This is an integer column with value 0 for left side, 1 for right side.
100 */
101 static int
fetch_tuple_flag(SetOpState * setopstate,TupleTableSlot * inputslot)102 fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot)
103 {
104 SetOp *node = (SetOp *) setopstate->ps.plan;
105 int flag;
106 bool isNull;
107
108 flag = DatumGetInt32(slot_getattr(inputslot,
109 node->flagColIdx,
110 &isNull));
111 Assert(!isNull);
112 Assert(flag == 0 || flag == 1);
113 return flag;
114 }
115
116 /*
117 * Initialize the hash table to empty.
118 */
119 static void
build_hash_table(SetOpState * setopstate)120 build_hash_table(SetOpState *setopstate)
121 {
122 SetOp *node = (SetOp *) setopstate->ps.plan;
123
124 Assert(node->strategy == SETOP_HASHED);
125 Assert(node->numGroups > 0);
126
127 setopstate->hashtable = BuildTupleHashTable(node->numCols,
128 node->dupColIdx,
129 setopstate->eqfunctions,
130 setopstate->hashfunctions,
131 node->numGroups,
132 0,
133 setopstate->tableContext,
134 setopstate->tempContext,
135 false);
136 }
137
138 /*
139 * We've completed processing a tuple group. Decide how many copies (if any)
140 * of its representative row to emit, and store the count into numOutput.
141 * This logic is straight from the SQL92 specification.
142 */
143 static void
set_output_count(SetOpState * setopstate,SetOpStatePerGroup pergroup)144 set_output_count(SetOpState *setopstate, SetOpStatePerGroup pergroup)
145 {
146 SetOp *plannode = (SetOp *) setopstate->ps.plan;
147
148 switch (plannode->cmd)
149 {
150 case SETOPCMD_INTERSECT:
151 if (pergroup->numLeft > 0 && pergroup->numRight > 0)
152 setopstate->numOutput = 1;
153 else
154 setopstate->numOutput = 0;
155 break;
156 case SETOPCMD_INTERSECT_ALL:
157 setopstate->numOutput =
158 (pergroup->numLeft < pergroup->numRight) ?
159 pergroup->numLeft : pergroup->numRight;
160 break;
161 case SETOPCMD_EXCEPT:
162 if (pergroup->numLeft > 0 && pergroup->numRight == 0)
163 setopstate->numOutput = 1;
164 else
165 setopstate->numOutput = 0;
166 break;
167 case SETOPCMD_EXCEPT_ALL:
168 setopstate->numOutput =
169 (pergroup->numLeft < pergroup->numRight) ?
170 0 : (pergroup->numLeft - pergroup->numRight);
171 break;
172 default:
173 elog(ERROR, "unrecognized set op: %d", (int) plannode->cmd);
174 break;
175 }
176 }
177
178
179 /* ----------------------------------------------------------------
180 * ExecSetOp
181 * ----------------------------------------------------------------
182 */
183 static TupleTableSlot * /* return: a tuple or NULL */
ExecSetOp(PlanState * pstate)184 ExecSetOp(PlanState *pstate)
185 {
186 SetOpState *node = castNode(SetOpState, pstate);
187 SetOp *plannode = (SetOp *) node->ps.plan;
188 TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot;
189
190 CHECK_FOR_INTERRUPTS();
191
192 /*
193 * If the previously-returned tuple needs to be returned more than once,
194 * keep returning it.
195 */
196 if (node->numOutput > 0)
197 {
198 node->numOutput--;
199 return resultTupleSlot;
200 }
201
202 /* Otherwise, we're done if we are out of groups */
203 if (node->setop_done)
204 return NULL;
205
206 /* Fetch the next tuple group according to the correct strategy */
207 if (plannode->strategy == SETOP_HASHED)
208 {
209 if (!node->table_filled)
210 setop_fill_hash_table(node);
211 return setop_retrieve_hash_table(node);
212 }
213 else
214 return setop_retrieve_direct(node);
215 }
216
217 /*
218 * ExecSetOp for non-hashed case
219 */
220 static TupleTableSlot *
setop_retrieve_direct(SetOpState * setopstate)221 setop_retrieve_direct(SetOpState *setopstate)
222 {
223 SetOp *node = (SetOp *) setopstate->ps.plan;
224 PlanState *outerPlan;
225 SetOpStatePerGroup pergroup;
226 TupleTableSlot *outerslot;
227 TupleTableSlot *resultTupleSlot;
228
229 /*
230 * get state info from node
231 */
232 outerPlan = outerPlanState(setopstate);
233 pergroup = (SetOpStatePerGroup) setopstate->pergroup;
234 resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
235
236 /*
237 * We loop retrieving groups until we find one we should return
238 */
239 while (!setopstate->setop_done)
240 {
241 /*
242 * If we don't already have the first tuple of the new group, fetch it
243 * from the outer plan.
244 */
245 if (setopstate->grp_firstTuple == NULL)
246 {
247 outerslot = ExecProcNode(outerPlan);
248 if (!TupIsNull(outerslot))
249 {
250 /* Make a copy of the first input tuple */
251 setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
252 }
253 else
254 {
255 /* outer plan produced no tuples at all */
256 setopstate->setop_done = true;
257 return NULL;
258 }
259 }
260
261 /*
262 * Store the copied first input tuple in the tuple table slot reserved
263 * for it. The tuple will be deleted when it is cleared from the
264 * slot.
265 */
266 ExecStoreTuple(setopstate->grp_firstTuple,
267 resultTupleSlot,
268 InvalidBuffer,
269 true);
270 setopstate->grp_firstTuple = NULL; /* don't keep two pointers */
271
272 /* Initialize working state for a new input tuple group */
273 initialize_counts(pergroup);
274
275 /* Count the first input tuple */
276 advance_counts(pergroup,
277 fetch_tuple_flag(setopstate, resultTupleSlot));
278
279 /*
280 * Scan the outer plan until we exhaust it or cross a group boundary.
281 */
282 for (;;)
283 {
284 outerslot = ExecProcNode(outerPlan);
285 if (TupIsNull(outerslot))
286 {
287 /* no more outer-plan tuples available */
288 setopstate->setop_done = true;
289 break;
290 }
291
292 /*
293 * Check whether we've crossed a group boundary.
294 */
295 if (!execTuplesMatch(resultTupleSlot,
296 outerslot,
297 node->numCols, node->dupColIdx,
298 setopstate->eqfunctions,
299 setopstate->tempContext))
300 {
301 /*
302 * Save the first input tuple of the next group.
303 */
304 setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
305 break;
306 }
307
308 /* Still in same group, so count this tuple */
309 advance_counts(pergroup,
310 fetch_tuple_flag(setopstate, outerslot));
311 }
312
313 /*
314 * Done scanning input tuple group. See if we should emit any copies
315 * of result tuple, and if so return the first copy.
316 */
317 set_output_count(setopstate, pergroup);
318
319 if (setopstate->numOutput > 0)
320 {
321 setopstate->numOutput--;
322 return resultTupleSlot;
323 }
324 }
325
326 /* No more groups */
327 ExecClearTuple(resultTupleSlot);
328 return NULL;
329 }
330
331 /*
332 * ExecSetOp for hashed case: phase 1, read input and build hash table
333 */
334 static void
setop_fill_hash_table(SetOpState * setopstate)335 setop_fill_hash_table(SetOpState *setopstate)
336 {
337 SetOp *node = (SetOp *) setopstate->ps.plan;
338 PlanState *outerPlan;
339 int firstFlag;
340 bool in_first_rel PG_USED_FOR_ASSERTS_ONLY;
341
342 /*
343 * get state info from node
344 */
345 outerPlan = outerPlanState(setopstate);
346 firstFlag = node->firstFlag;
347 /* verify planner didn't mess up */
348 Assert(firstFlag == 0 ||
349 (firstFlag == 1 &&
350 (node->cmd == SETOPCMD_INTERSECT ||
351 node->cmd == SETOPCMD_INTERSECT_ALL)));
352
353 /*
354 * Process each outer-plan tuple, and then fetch the next one, until we
355 * exhaust the outer plan.
356 */
357 in_first_rel = true;
358 for (;;)
359 {
360 TupleTableSlot *outerslot;
361 int flag;
362 TupleHashEntryData *entry;
363 bool isnew;
364
365 outerslot = ExecProcNode(outerPlan);
366 if (TupIsNull(outerslot))
367 break;
368
369 /* Identify whether it's left or right input */
370 flag = fetch_tuple_flag(setopstate, outerslot);
371
372 if (flag == firstFlag)
373 {
374 /* (still) in first input relation */
375 Assert(in_first_rel);
376
377 /* Find or build hashtable entry for this tuple's group */
378 entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
379 &isnew);
380
381 /* If new tuple group, initialize counts */
382 if (isnew)
383 {
384 entry->additional = (SetOpStatePerGroup)
385 MemoryContextAlloc(setopstate->hashtable->tablecxt,
386 sizeof(SetOpStatePerGroupData));
387 initialize_counts((SetOpStatePerGroup) entry->additional);
388 }
389
390 /* Advance the counts */
391 advance_counts((SetOpStatePerGroup) entry->additional, flag);
392 }
393 else
394 {
395 /* reached second relation */
396 in_first_rel = false;
397
398 /* For tuples not seen previously, do not make hashtable entry */
399 entry = LookupTupleHashEntry(setopstate->hashtable, outerslot,
400 NULL);
401
402 /* Advance the counts if entry is already present */
403 if (entry)
404 advance_counts((SetOpStatePerGroup) entry->additional, flag);
405 }
406
407 /* Must reset temp context after each hashtable lookup */
408 MemoryContextReset(setopstate->tempContext);
409 }
410
411 setopstate->table_filled = true;
412 /* Initialize to walk the hash table */
413 ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter);
414 }
415
416 /*
417 * ExecSetOp for hashed case: phase 2, retrieving groups from hash table
418 */
419 static TupleTableSlot *
setop_retrieve_hash_table(SetOpState * setopstate)420 setop_retrieve_hash_table(SetOpState *setopstate)
421 {
422 TupleHashEntryData *entry;
423 TupleTableSlot *resultTupleSlot;
424
425 /*
426 * get state info from node
427 */
428 resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
429
430 /*
431 * We loop retrieving groups until we find one we should return
432 */
433 while (!setopstate->setop_done)
434 {
435 CHECK_FOR_INTERRUPTS();
436
437 /*
438 * Find the next entry in the hash table
439 */
440 entry = ScanTupleHashTable(setopstate->hashtable, &setopstate->hashiter);
441 if (entry == NULL)
442 {
443 /* No more entries in hashtable, so done */
444 setopstate->setop_done = true;
445 return NULL;
446 }
447
448 /*
449 * See if we should emit any copies of this tuple, and if so return
450 * the first copy.
451 */
452 set_output_count(setopstate, (SetOpStatePerGroup) entry->additional);
453
454 if (setopstate->numOutput > 0)
455 {
456 setopstate->numOutput--;
457 return ExecStoreMinimalTuple(entry->firstTuple,
458 resultTupleSlot,
459 false);
460 }
461 }
462
463 /* No more groups */
464 ExecClearTuple(resultTupleSlot);
465 return NULL;
466 }
467
468 /* ----------------------------------------------------------------
469 * ExecInitSetOp
470 *
471 * This initializes the setop node state structures and
472 * the node's subplan.
473 * ----------------------------------------------------------------
474 */
475 SetOpState *
ExecInitSetOp(SetOp * node,EState * estate,int eflags)476 ExecInitSetOp(SetOp *node, EState *estate, int eflags)
477 {
478 SetOpState *setopstate;
479
480 /* check for unsupported flags */
481 Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
482
483 /*
484 * create state structure
485 */
486 setopstate = makeNode(SetOpState);
487 setopstate->ps.plan = (Plan *) node;
488 setopstate->ps.state = estate;
489 setopstate->ps.ExecProcNode = ExecSetOp;
490
491 setopstate->eqfunctions = NULL;
492 setopstate->hashfunctions = NULL;
493 setopstate->setop_done = false;
494 setopstate->numOutput = 0;
495 setopstate->pergroup = NULL;
496 setopstate->grp_firstTuple = NULL;
497 setopstate->hashtable = NULL;
498 setopstate->tableContext = NULL;
499
500 /*
501 * Miscellaneous initialization
502 *
503 * SetOp nodes have no ExprContext initialization because they never call
504 * ExecQual or ExecProject. But they do need a per-tuple memory context
505 * anyway for calling execTuplesMatch.
506 */
507 setopstate->tempContext =
508 AllocSetContextCreate(CurrentMemoryContext,
509 "SetOp",
510 ALLOCSET_DEFAULT_SIZES);
511
512 /*
513 * If hashing, we also need a longer-lived context to store the hash
514 * table. The table can't just be kept in the per-query context because
515 * we want to be able to throw it away in ExecReScanSetOp.
516 */
517 if (node->strategy == SETOP_HASHED)
518 setopstate->tableContext =
519 AllocSetContextCreate(CurrentMemoryContext,
520 "SetOp hash table",
521 ALLOCSET_DEFAULT_SIZES);
522
523 /*
524 * Tuple table initialization
525 */
526 ExecInitResultTupleSlot(estate, &setopstate->ps);
527
528 /*
529 * initialize child nodes
530 *
531 * If we are hashing then the child plan does not need to handle REWIND
532 * efficiently; see ExecReScanSetOp.
533 */
534 if (node->strategy == SETOP_HASHED)
535 eflags &= ~EXEC_FLAG_REWIND;
536 outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
537
538 /*
539 * setop nodes do no projections, so initialize projection info for this
540 * node appropriately
541 */
542 ExecAssignResultTypeFromTL(&setopstate->ps);
543 setopstate->ps.ps_ProjInfo = NULL;
544
545 /*
546 * Precompute fmgr lookup data for inner loop. We need both equality and
547 * hashing functions to do it by hashing, but only equality if not
548 * hashing.
549 */
550 if (node->strategy == SETOP_HASHED)
551 execTuplesHashPrepare(node->numCols,
552 node->dupOperators,
553 &setopstate->eqfunctions,
554 &setopstate->hashfunctions);
555 else
556 setopstate->eqfunctions =
557 execTuplesMatchPrepare(node->numCols,
558 node->dupOperators);
559
560 if (node->strategy == SETOP_HASHED)
561 {
562 build_hash_table(setopstate);
563 setopstate->table_filled = false;
564 }
565 else
566 {
567 setopstate->pergroup =
568 (SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData));
569 }
570
571 return setopstate;
572 }
573
574 /* ----------------------------------------------------------------
575 * ExecEndSetOp
576 *
577 * This shuts down the subplan and frees resources allocated
578 * to this node.
579 * ----------------------------------------------------------------
580 */
581 void
ExecEndSetOp(SetOpState * node)582 ExecEndSetOp(SetOpState *node)
583 {
584 /* clean up tuple table */
585 ExecClearTuple(node->ps.ps_ResultTupleSlot);
586
587 /* free subsidiary stuff including hashtable */
588 MemoryContextDelete(node->tempContext);
589 if (node->tableContext)
590 MemoryContextDelete(node->tableContext);
591
592 ExecEndNode(outerPlanState(node));
593 }
594
595
596 void
ExecReScanSetOp(SetOpState * node)597 ExecReScanSetOp(SetOpState *node)
598 {
599 ExecClearTuple(node->ps.ps_ResultTupleSlot);
600 node->setop_done = false;
601 node->numOutput = 0;
602
603 if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
604 {
605 /*
606 * In the hashed case, if we haven't yet built the hash table then we
607 * can just return; nothing done yet, so nothing to undo. If subnode's
608 * chgParam is not NULL then it will be re-scanned by ExecProcNode,
609 * else no reason to re-scan it at all.
610 */
611 if (!node->table_filled)
612 return;
613
614 /*
615 * If we do have the hash table and the subplan does not have any
616 * parameter changes, then we can just rescan the existing hash table;
617 * no need to build it again.
618 */
619 if (node->ps.lefttree->chgParam == NULL)
620 {
621 ResetTupleHashIterator(node->hashtable, &node->hashiter);
622 return;
623 }
624 }
625
626 /* Release first tuple of group, if we have made a copy */
627 if (node->grp_firstTuple != NULL)
628 {
629 heap_freetuple(node->grp_firstTuple);
630 node->grp_firstTuple = NULL;
631 }
632
633 /* Release any hashtable storage */
634 if (node->tableContext)
635 MemoryContextResetAndDeleteChildren(node->tableContext);
636
637 /* And rebuild empty hashtable if needed */
638 if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
639 {
640 build_hash_table(node);
641 node->table_filled = false;
642 }
643
644 /*
645 * if chgParam of subnode is not null then plan will be re-scanned by
646 * first ExecProcNode.
647 */
648 if (node->ps.lefttree->chgParam == NULL)
649 ExecReScan(node->ps.lefttree);
650 }
651