1 /*-------------------------------------------------------------------------
2 *
3 * nodeSetOp.c
4 * Routines to handle INTERSECT and EXCEPT selection
5 *
6 * The input of a SetOp node consists of tuples from two relations,
7 * which have been combined into one dataset, with a junk attribute added
8 * that shows which relation each tuple came from. In SETOP_SORTED mode,
9 * the input has furthermore been sorted according to all the grouping
10 * columns (ie, all the non-junk attributes). The SetOp node scans each
11 * group of identical tuples to determine how many came from each input
12 * relation. Then it is a simple matter to emit the output demanded by the
13 * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL.
14 *
15 * In SETOP_HASHED mode, the input is delivered in no particular order,
16 * except that we know all the tuples from one input relation will come before
17 * all the tuples of the other. The planner guarantees that the first input
18 * relation is the left-hand one for EXCEPT, and tries to make the smaller
19 * input relation come first for INTERSECT. We build a hash table in memory
20 * with one entry for each group of identical tuples, and count the number of
21 * tuples in the group from each relation. After seeing all the input, we
22 * scan the hashtable and generate the correct output using those counts.
23 * We can avoid making hashtable entries for any tuples appearing only in the
24 * second input relation, since they cannot result in any output.
25 *
26 * This node type is not used for UNION or UNION ALL, since those can be
27 * implemented more cheaply (there's no need for the junk attribute to
28 * identify the source relation).
29 *
30 * Note that SetOp does no qual checking nor projection. The delivered
31 * output tuples are just copies of the first-to-arrive tuple in each
32 * input group.
33 *
34 *
35 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
36 * Portions Copyright (c) 1994, Regents of the University of California
37 *
38 *
39 * IDENTIFICATION
40 * src/backend/executor/nodeSetOp.c
41 *
42 *-------------------------------------------------------------------------
43 */
44
45 #include "postgres.h"
46
47 #include "access/htup_details.h"
48 #include "executor/executor.h"
49 #include "executor/nodeSetOp.h"
50 #include "utils/memutils.h"
51
52
53 /*
54 * SetOpStatePerGroupData - per-group working state
55 *
56 * These values are working state that is initialized at the start of
57 * an input tuple group and updated for each input tuple.
58 *
59 * In SETOP_SORTED mode, we need only one of these structs, and it's kept in
60 * the plan state node. In SETOP_HASHED mode, the hash table contains one
61 * of these for each tuple group.
62 */
63 typedef struct SetOpStatePerGroupData
64 {
65 long numLeft; /* number of left-input dups in group */
66 long numRight; /* number of right-input dups in group */
67 } SetOpStatePerGroupData;
68
69 /*
70 * To implement hashed mode, we need a hashtable that stores a
71 * representative tuple and the duplicate counts for each distinct set
72 * of grouping columns. We compute the hash key from the grouping columns.
73 */
74 typedef struct SetOpHashEntryData *SetOpHashEntry;
75
76 typedef struct SetOpHashEntryData
77 {
78 TupleHashEntryData shared; /* common header for hash table entries */
79 SetOpStatePerGroupData pergroup;
80 } SetOpHashEntryData;
81
82
83 static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate);
84 static void setop_fill_hash_table(SetOpState *setopstate);
85 static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate);
86
87
88 /*
89 * Initialize state for a new group of input values.
90 */
91 static inline void
initialize_counts(SetOpStatePerGroup pergroup)92 initialize_counts(SetOpStatePerGroup pergroup)
93 {
94 pergroup->numLeft = pergroup->numRight = 0;
95 }
96
97 /*
98 * Advance the appropriate counter for one input tuple.
99 */
100 static inline void
advance_counts(SetOpStatePerGroup pergroup,int flag)101 advance_counts(SetOpStatePerGroup pergroup, int flag)
102 {
103 if (flag)
104 pergroup->numRight++;
105 else
106 pergroup->numLeft++;
107 }
108
109 /*
110 * Fetch the "flag" column from an input tuple.
111 * This is an integer column with value 0 for left side, 1 for right side.
112 */
113 static int
fetch_tuple_flag(SetOpState * setopstate,TupleTableSlot * inputslot)114 fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot)
115 {
116 SetOp *node = (SetOp *) setopstate->ps.plan;
117 int flag;
118 bool isNull;
119
120 flag = DatumGetInt32(slot_getattr(inputslot,
121 node->flagColIdx,
122 &isNull));
123 Assert(!isNull);
124 Assert(flag == 0 || flag == 1);
125 return flag;
126 }
127
128 /*
129 * Initialize the hash table to empty.
130 */
131 static void
build_hash_table(SetOpState * setopstate)132 build_hash_table(SetOpState *setopstate)
133 {
134 SetOp *node = (SetOp *) setopstate->ps.plan;
135
136 Assert(node->strategy == SETOP_HASHED);
137 Assert(node->numGroups > 0);
138
139 setopstate->hashtable = BuildTupleHashTable(node->numCols,
140 node->dupColIdx,
141 setopstate->eqfunctions,
142 setopstate->hashfunctions,
143 node->numGroups,
144 sizeof(SetOpHashEntryData),
145 setopstate->tableContext,
146 setopstate->tempContext);
147 }
148
149 /*
150 * We've completed processing a tuple group. Decide how many copies (if any)
151 * of its representative row to emit, and store the count into numOutput.
152 * This logic is straight from the SQL92 specification.
153 */
154 static void
set_output_count(SetOpState * setopstate,SetOpStatePerGroup pergroup)155 set_output_count(SetOpState *setopstate, SetOpStatePerGroup pergroup)
156 {
157 SetOp *plannode = (SetOp *) setopstate->ps.plan;
158
159 switch (plannode->cmd)
160 {
161 case SETOPCMD_INTERSECT:
162 if (pergroup->numLeft > 0 && pergroup->numRight > 0)
163 setopstate->numOutput = 1;
164 else
165 setopstate->numOutput = 0;
166 break;
167 case SETOPCMD_INTERSECT_ALL:
168 setopstate->numOutput =
169 (pergroup->numLeft < pergroup->numRight) ?
170 pergroup->numLeft : pergroup->numRight;
171 break;
172 case SETOPCMD_EXCEPT:
173 if (pergroup->numLeft > 0 && pergroup->numRight == 0)
174 setopstate->numOutput = 1;
175 else
176 setopstate->numOutput = 0;
177 break;
178 case SETOPCMD_EXCEPT_ALL:
179 setopstate->numOutput =
180 (pergroup->numLeft < pergroup->numRight) ?
181 0 : (pergroup->numLeft - pergroup->numRight);
182 break;
183 default:
184 elog(ERROR, "unrecognized set op: %d", (int) plannode->cmd);
185 break;
186 }
187 }
188
189
190 /* ----------------------------------------------------------------
191 * ExecSetOp
192 * ----------------------------------------------------------------
193 */
194 TupleTableSlot * /* return: a tuple or NULL */
ExecSetOp(SetOpState * node)195 ExecSetOp(SetOpState *node)
196 {
197 SetOp *plannode = (SetOp *) node->ps.plan;
198 TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot;
199
200 /*
201 * If the previously-returned tuple needs to be returned more than once,
202 * keep returning it.
203 */
204 if (node->numOutput > 0)
205 {
206 node->numOutput--;
207 return resultTupleSlot;
208 }
209
210 /* Otherwise, we're done if we are out of groups */
211 if (node->setop_done)
212 return NULL;
213
214 /* Fetch the next tuple group according to the correct strategy */
215 if (plannode->strategy == SETOP_HASHED)
216 {
217 if (!node->table_filled)
218 setop_fill_hash_table(node);
219 return setop_retrieve_hash_table(node);
220 }
221 else
222 return setop_retrieve_direct(node);
223 }
224
225 /*
226 * ExecSetOp for non-hashed case
227 */
228 static TupleTableSlot *
setop_retrieve_direct(SetOpState * setopstate)229 setop_retrieve_direct(SetOpState *setopstate)
230 {
231 SetOp *node = (SetOp *) setopstate->ps.plan;
232 PlanState *outerPlan;
233 SetOpStatePerGroup pergroup;
234 TupleTableSlot *outerslot;
235 TupleTableSlot *resultTupleSlot;
236
237 /*
238 * get state info from node
239 */
240 outerPlan = outerPlanState(setopstate);
241 pergroup = setopstate->pergroup;
242 resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
243
244 /*
245 * We loop retrieving groups until we find one we should return
246 */
247 while (!setopstate->setop_done)
248 {
249 /*
250 * If we don't already have the first tuple of the new group, fetch it
251 * from the outer plan.
252 */
253 if (setopstate->grp_firstTuple == NULL)
254 {
255 outerslot = ExecProcNode(outerPlan);
256 if (!TupIsNull(outerslot))
257 {
258 /* Make a copy of the first input tuple */
259 setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
260 }
261 else
262 {
263 /* outer plan produced no tuples at all */
264 setopstate->setop_done = true;
265 return NULL;
266 }
267 }
268
269 /*
270 * Store the copied first input tuple in the tuple table slot reserved
271 * for it. The tuple will be deleted when it is cleared from the
272 * slot.
273 */
274 ExecStoreTuple(setopstate->grp_firstTuple,
275 resultTupleSlot,
276 InvalidBuffer,
277 true);
278 setopstate->grp_firstTuple = NULL; /* don't keep two pointers */
279
280 /* Initialize working state for a new input tuple group */
281 initialize_counts(pergroup);
282
283 /* Count the first input tuple */
284 advance_counts(pergroup,
285 fetch_tuple_flag(setopstate, resultTupleSlot));
286
287 /*
288 * Scan the outer plan until we exhaust it or cross a group boundary.
289 */
290 for (;;)
291 {
292 outerslot = ExecProcNode(outerPlan);
293 if (TupIsNull(outerslot))
294 {
295 /* no more outer-plan tuples available */
296 setopstate->setop_done = true;
297 break;
298 }
299
300 /*
301 * Check whether we've crossed a group boundary.
302 */
303 if (!execTuplesMatch(resultTupleSlot,
304 outerslot,
305 node->numCols, node->dupColIdx,
306 setopstate->eqfunctions,
307 setopstate->tempContext))
308 {
309 /*
310 * Save the first input tuple of the next group.
311 */
312 setopstate->grp_firstTuple = ExecCopySlotTuple(outerslot);
313 break;
314 }
315
316 /* Still in same group, so count this tuple */
317 advance_counts(pergroup,
318 fetch_tuple_flag(setopstate, outerslot));
319 }
320
321 /*
322 * Done scanning input tuple group. See if we should emit any copies
323 * of result tuple, and if so return the first copy.
324 */
325 set_output_count(setopstate, pergroup);
326
327 if (setopstate->numOutput > 0)
328 {
329 setopstate->numOutput--;
330 return resultTupleSlot;
331 }
332 }
333
334 /* No more groups */
335 ExecClearTuple(resultTupleSlot);
336 return NULL;
337 }
338
339 /*
340 * ExecSetOp for hashed case: phase 1, read input and build hash table
341 */
342 static void
setop_fill_hash_table(SetOpState * setopstate)343 setop_fill_hash_table(SetOpState *setopstate)
344 {
345 SetOp *node = (SetOp *) setopstate->ps.plan;
346 PlanState *outerPlan;
347 int firstFlag;
348 bool in_first_rel PG_USED_FOR_ASSERTS_ONLY;
349
350 /*
351 * get state info from node
352 */
353 outerPlan = outerPlanState(setopstate);
354 firstFlag = node->firstFlag;
355 /* verify planner didn't mess up */
356 Assert(firstFlag == 0 ||
357 (firstFlag == 1 &&
358 (node->cmd == SETOPCMD_INTERSECT ||
359 node->cmd == SETOPCMD_INTERSECT_ALL)));
360
361 /*
362 * Process each outer-plan tuple, and then fetch the next one, until we
363 * exhaust the outer plan.
364 */
365 in_first_rel = true;
366 for (;;)
367 {
368 TupleTableSlot *outerslot;
369 int flag;
370 SetOpHashEntry entry;
371 bool isnew;
372
373 outerslot = ExecProcNode(outerPlan);
374 if (TupIsNull(outerslot))
375 break;
376
377 /* Identify whether it's left or right input */
378 flag = fetch_tuple_flag(setopstate, outerslot);
379
380 if (flag == firstFlag)
381 {
382 /* (still) in first input relation */
383 Assert(in_first_rel);
384
385 /* Find or build hashtable entry for this tuple's group */
386 entry = (SetOpHashEntry)
387 LookupTupleHashEntry(setopstate->hashtable, outerslot, &isnew);
388
389 /* If new tuple group, initialize counts */
390 if (isnew)
391 initialize_counts(&entry->pergroup);
392
393 /* Advance the counts */
394 advance_counts(&entry->pergroup, flag);
395 }
396 else
397 {
398 /* reached second relation */
399 in_first_rel = false;
400
401 /* For tuples not seen previously, do not make hashtable entry */
402 entry = (SetOpHashEntry)
403 LookupTupleHashEntry(setopstate->hashtable, outerslot, NULL);
404
405 /* Advance the counts if entry is already present */
406 if (entry)
407 advance_counts(&entry->pergroup, flag);
408 }
409
410 /* Must reset temp context after each hashtable lookup */
411 MemoryContextReset(setopstate->tempContext);
412 }
413
414 setopstate->table_filled = true;
415 /* Initialize to walk the hash table */
416 ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter);
417 }
418
419 /*
420 * ExecSetOp for hashed case: phase 2, retrieving groups from hash table
421 */
422 static TupleTableSlot *
setop_retrieve_hash_table(SetOpState * setopstate)423 setop_retrieve_hash_table(SetOpState *setopstate)
424 {
425 SetOpHashEntry entry;
426 TupleTableSlot *resultTupleSlot;
427
428 /*
429 * get state info from node
430 */
431 resultTupleSlot = setopstate->ps.ps_ResultTupleSlot;
432
433 /*
434 * We loop retrieving groups until we find one we should return
435 */
436 while (!setopstate->setop_done)
437 {
438 /*
439 * Find the next entry in the hash table
440 */
441 entry = (SetOpHashEntry) ScanTupleHashTable(&setopstate->hashiter);
442 if (entry == NULL)
443 {
444 /* No more entries in hashtable, so done */
445 setopstate->setop_done = true;
446 return NULL;
447 }
448
449 /*
450 * See if we should emit any copies of this tuple, and if so return
451 * the first copy.
452 */
453 set_output_count(setopstate, &entry->pergroup);
454
455 if (setopstate->numOutput > 0)
456 {
457 setopstate->numOutput--;
458 return ExecStoreMinimalTuple(entry->shared.firstTuple,
459 resultTupleSlot,
460 false);
461 }
462 }
463
464 /* No more groups */
465 ExecClearTuple(resultTupleSlot);
466 return NULL;
467 }
468
469 /* ----------------------------------------------------------------
470 * ExecInitSetOp
471 *
472 * This initializes the setop node state structures and
473 * the node's subplan.
474 * ----------------------------------------------------------------
475 */
476 SetOpState *
ExecInitSetOp(SetOp * node,EState * estate,int eflags)477 ExecInitSetOp(SetOp *node, EState *estate, int eflags)
478 {
479 SetOpState *setopstate;
480
481 /* check for unsupported flags */
482 Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
483
484 /*
485 * create state structure
486 */
487 setopstate = makeNode(SetOpState);
488 setopstate->ps.plan = (Plan *) node;
489 setopstate->ps.state = estate;
490
491 setopstate->eqfunctions = NULL;
492 setopstate->hashfunctions = NULL;
493 setopstate->setop_done = false;
494 setopstate->numOutput = 0;
495 setopstate->pergroup = NULL;
496 setopstate->grp_firstTuple = NULL;
497 setopstate->hashtable = NULL;
498 setopstate->tableContext = NULL;
499
500 /*
501 * Miscellaneous initialization
502 *
503 * SetOp nodes have no ExprContext initialization because they never call
504 * ExecQual or ExecProject. But they do need a per-tuple memory context
505 * anyway for calling execTuplesMatch.
506 */
507 setopstate->tempContext =
508 AllocSetContextCreate(CurrentMemoryContext,
509 "SetOp",
510 ALLOCSET_DEFAULT_SIZES);
511
512 /*
513 * If hashing, we also need a longer-lived context to store the hash
514 * table. The table can't just be kept in the per-query context because
515 * we want to be able to throw it away in ExecReScanSetOp.
516 */
517 if (node->strategy == SETOP_HASHED)
518 setopstate->tableContext =
519 AllocSetContextCreate(CurrentMemoryContext,
520 "SetOp hash table",
521 ALLOCSET_DEFAULT_SIZES);
522
523 /*
524 * Tuple table initialization
525 */
526 ExecInitResultTupleSlot(estate, &setopstate->ps);
527
528 /*
529 * initialize child nodes
530 *
531 * If we are hashing then the child plan does not need to handle REWIND
532 * efficiently; see ExecReScanSetOp.
533 */
534 if (node->strategy == SETOP_HASHED)
535 eflags &= ~EXEC_FLAG_REWIND;
536 outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags);
537
538 /*
539 * setop nodes do no projections, so initialize projection info for this
540 * node appropriately
541 */
542 ExecAssignResultTypeFromTL(&setopstate->ps);
543 setopstate->ps.ps_ProjInfo = NULL;
544
545 /*
546 * Precompute fmgr lookup data for inner loop. We need both equality and
547 * hashing functions to do it by hashing, but only equality if not
548 * hashing.
549 */
550 if (node->strategy == SETOP_HASHED)
551 execTuplesHashPrepare(node->numCols,
552 node->dupOperators,
553 &setopstate->eqfunctions,
554 &setopstate->hashfunctions);
555 else
556 setopstate->eqfunctions =
557 execTuplesMatchPrepare(node->numCols,
558 node->dupOperators);
559
560 if (node->strategy == SETOP_HASHED)
561 {
562 build_hash_table(setopstate);
563 setopstate->table_filled = false;
564 }
565 else
566 {
567 setopstate->pergroup =
568 (SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData));
569 }
570
571 return setopstate;
572 }
573
574 /* ----------------------------------------------------------------
575 * ExecEndSetOp
576 *
577 * This shuts down the subplan and frees resources allocated
578 * to this node.
579 * ----------------------------------------------------------------
580 */
581 void
ExecEndSetOp(SetOpState * node)582 ExecEndSetOp(SetOpState *node)
583 {
584 /* clean up tuple table */
585 ExecClearTuple(node->ps.ps_ResultTupleSlot);
586
587 /* free subsidiary stuff including hashtable */
588 MemoryContextDelete(node->tempContext);
589 if (node->tableContext)
590 MemoryContextDelete(node->tableContext);
591
592 ExecEndNode(outerPlanState(node));
593 }
594
595
596 void
ExecReScanSetOp(SetOpState * node)597 ExecReScanSetOp(SetOpState *node)
598 {
599 ExecClearTuple(node->ps.ps_ResultTupleSlot);
600 node->setop_done = false;
601 node->numOutput = 0;
602
603 if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
604 {
605 /*
606 * In the hashed case, if we haven't yet built the hash table then we
607 * can just return; nothing done yet, so nothing to undo. If subnode's
608 * chgParam is not NULL then it will be re-scanned by ExecProcNode,
609 * else no reason to re-scan it at all.
610 */
611 if (!node->table_filled)
612 return;
613
614 /*
615 * If we do have the hash table and the subplan does not have any
616 * parameter changes, then we can just rescan the existing hash table;
617 * no need to build it again.
618 */
619 if (node->ps.lefttree->chgParam == NULL)
620 {
621 ResetTupleHashIterator(node->hashtable, &node->hashiter);
622 return;
623 }
624 }
625
626 /* Release first tuple of group, if we have made a copy */
627 if (node->grp_firstTuple != NULL)
628 {
629 heap_freetuple(node->grp_firstTuple);
630 node->grp_firstTuple = NULL;
631 }
632
633 /* Release any hashtable storage */
634 if (node->tableContext)
635 MemoryContextResetAndDeleteChildren(node->tableContext);
636
637 /* And rebuild empty hashtable if needed */
638 if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED)
639 {
640 build_hash_table(node);
641 node->table_filled = false;
642 }
643
644 /*
645 * if chgParam of subnode is not null then plan will be re-scanned by
646 * first ExecProcNode.
647 */
648 if (node->ps.lefttree->chgParam == NULL)
649 ExecReScan(node->ps.lefttree);
650 }
651