1 /*------------------------------------------------------------------------- 2 * 3 * nodeSetOp.c 4 * Routines to handle INTERSECT and EXCEPT selection 5 * 6 * The input of a SetOp node consists of tuples from two relations, 7 * which have been combined into one dataset, with a junk attribute added 8 * that shows which relation each tuple came from. In SETOP_SORTED mode, 9 * the input has furthermore been sorted according to all the grouping 10 * columns (ie, all the non-junk attributes). The SetOp node scans each 11 * group of identical tuples to determine how many came from each input 12 * relation. Then it is a simple matter to emit the output demanded by the 13 * SQL spec for INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL. 14 * 15 * In SETOP_HASHED mode, the input is delivered in no particular order, 16 * except that we know all the tuples from one input relation will come before 17 * all the tuples of the other. The planner guarantees that the first input 18 * relation is the left-hand one for EXCEPT, and tries to make the smaller 19 * input relation come first for INTERSECT. We build a hash table in memory 20 * with one entry for each group of identical tuples, and count the number of 21 * tuples in the group from each relation. After seeing all the input, we 22 * scan the hashtable and generate the correct output using those counts. 23 * We can avoid making hashtable entries for any tuples appearing only in the 24 * second input relation, since they cannot result in any output. 25 * 26 * This node type is not used for UNION or UNION ALL, since those can be 27 * implemented more cheaply (there's no need for the junk attribute to 28 * identify the source relation). 29 * 30 * Note that SetOp does no qual checking nor projection. The delivered 31 * output tuples are just copies of the first-to-arrive tuple in each 32 * input group. 33 * 34 * 35 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group 36 * Portions Copyright (c) 1994, Regents of the University of California 37 * 38 * 39 * IDENTIFICATION 40 * src/backend/executor/nodeSetOp.c 41 * 42 *------------------------------------------------------------------------- 43 */ 44 45 #include "postgres.h" 46 47 #include "access/htup_details.h" 48 #include "executor/executor.h" 49 #include "executor/nodeSetOp.h" 50 #include "miscadmin.h" 51 #include "utils/memutils.h" 52 53 54 /* 55 * SetOpStatePerGroupData - per-group working state 56 * 57 * These values are working state that is initialized at the start of 58 * an input tuple group and updated for each input tuple. 59 * 60 * In SETOP_SORTED mode, we need only one of these structs, and it's kept in 61 * the plan state node. In SETOP_HASHED mode, the hash table contains one 62 * of these for each tuple group. 63 */ 64 typedef struct SetOpStatePerGroupData 65 { 66 long numLeft; /* number of left-input dups in group */ 67 long numRight; /* number of right-input dups in group */ 68 } SetOpStatePerGroupData; 69 70 71 static TupleTableSlot *setop_retrieve_direct(SetOpState *setopstate); 72 static void setop_fill_hash_table(SetOpState *setopstate); 73 static TupleTableSlot *setop_retrieve_hash_table(SetOpState *setopstate); 74 75 76 /* 77 * Initialize state for a new group of input values. 78 */ 79 static inline void 80 initialize_counts(SetOpStatePerGroup pergroup) 81 { 82 pergroup->numLeft = pergroup->numRight = 0; 83 } 84 85 /* 86 * Advance the appropriate counter for one input tuple. 87 */ 88 static inline void 89 advance_counts(SetOpStatePerGroup pergroup, int flag) 90 { 91 if (flag) 92 pergroup->numRight++; 93 else 94 pergroup->numLeft++; 95 } 96 97 /* 98 * Fetch the "flag" column from an input tuple. 99 * This is an integer column with value 0 for left side, 1 for right side. 100 */ 101 static int 102 fetch_tuple_flag(SetOpState *setopstate, TupleTableSlot *inputslot) 103 { 104 SetOp *node = (SetOp *) setopstate->ps.plan; 105 int flag; 106 bool isNull; 107 108 flag = DatumGetInt32(slot_getattr(inputslot, 109 node->flagColIdx, 110 &isNull)); 111 Assert(!isNull); 112 Assert(flag == 0 || flag == 1); 113 return flag; 114 } 115 116 /* 117 * Initialize the hash table to empty. 118 */ 119 static void 120 build_hash_table(SetOpState *setopstate) 121 { 122 SetOp *node = (SetOp *) setopstate->ps.plan; 123 ExprContext *econtext = setopstate->ps.ps_ExprContext; 124 TupleDesc desc = ExecGetResultType(outerPlanState(setopstate)); 125 126 Assert(node->strategy == SETOP_HASHED); 127 Assert(node->numGroups > 0); 128 129 setopstate->hashtable = BuildTupleHashTableExt(&setopstate->ps, 130 desc, 131 node->numCols, 132 node->dupColIdx, 133 setopstate->eqfuncoids, 134 setopstate->hashfunctions, 135 node->dupCollations, 136 node->numGroups, 137 0, 138 setopstate->ps.state->es_query_cxt, 139 setopstate->tableContext, 140 econtext->ecxt_per_tuple_memory, 141 false); 142 } 143 144 /* 145 * We've completed processing a tuple group. Decide how many copies (if any) 146 * of its representative row to emit, and store the count into numOutput. 147 * This logic is straight from the SQL92 specification. 148 */ 149 static void 150 set_output_count(SetOpState *setopstate, SetOpStatePerGroup pergroup) 151 { 152 SetOp *plannode = (SetOp *) setopstate->ps.plan; 153 154 switch (plannode->cmd) 155 { 156 case SETOPCMD_INTERSECT: 157 if (pergroup->numLeft > 0 && pergroup->numRight > 0) 158 setopstate->numOutput = 1; 159 else 160 setopstate->numOutput = 0; 161 break; 162 case SETOPCMD_INTERSECT_ALL: 163 setopstate->numOutput = 164 (pergroup->numLeft < pergroup->numRight) ? 165 pergroup->numLeft : pergroup->numRight; 166 break; 167 case SETOPCMD_EXCEPT: 168 if (pergroup->numLeft > 0 && pergroup->numRight == 0) 169 setopstate->numOutput = 1; 170 else 171 setopstate->numOutput = 0; 172 break; 173 case SETOPCMD_EXCEPT_ALL: 174 setopstate->numOutput = 175 (pergroup->numLeft < pergroup->numRight) ? 176 0 : (pergroup->numLeft - pergroup->numRight); 177 break; 178 default: 179 elog(ERROR, "unrecognized set op: %d", (int) plannode->cmd); 180 break; 181 } 182 } 183 184 185 /* ---------------------------------------------------------------- 186 * ExecSetOp 187 * ---------------------------------------------------------------- 188 */ 189 static TupleTableSlot * /* return: a tuple or NULL */ 190 ExecSetOp(PlanState *pstate) 191 { 192 SetOpState *node = castNode(SetOpState, pstate); 193 SetOp *plannode = (SetOp *) node->ps.plan; 194 TupleTableSlot *resultTupleSlot = node->ps.ps_ResultTupleSlot; 195 196 CHECK_FOR_INTERRUPTS(); 197 198 /* 199 * If the previously-returned tuple needs to be returned more than once, 200 * keep returning it. 201 */ 202 if (node->numOutput > 0) 203 { 204 node->numOutput--; 205 return resultTupleSlot; 206 } 207 208 /* Otherwise, we're done if we are out of groups */ 209 if (node->setop_done) 210 return NULL; 211 212 /* Fetch the next tuple group according to the correct strategy */ 213 if (plannode->strategy == SETOP_HASHED) 214 { 215 if (!node->table_filled) 216 setop_fill_hash_table(node); 217 return setop_retrieve_hash_table(node); 218 } 219 else 220 return setop_retrieve_direct(node); 221 } 222 223 /* 224 * ExecSetOp for non-hashed case 225 */ 226 static TupleTableSlot * 227 setop_retrieve_direct(SetOpState *setopstate) 228 { 229 PlanState *outerPlan; 230 SetOpStatePerGroup pergroup; 231 TupleTableSlot *outerslot; 232 TupleTableSlot *resultTupleSlot; 233 ExprContext *econtext = setopstate->ps.ps_ExprContext; 234 235 /* 236 * get state info from node 237 */ 238 outerPlan = outerPlanState(setopstate); 239 pergroup = (SetOpStatePerGroup) setopstate->pergroup; 240 resultTupleSlot = setopstate->ps.ps_ResultTupleSlot; 241 242 /* 243 * We loop retrieving groups until we find one we should return 244 */ 245 while (!setopstate->setop_done) 246 { 247 /* 248 * If we don't already have the first tuple of the new group, fetch it 249 * from the outer plan. 250 */ 251 if (setopstate->grp_firstTuple == NULL) 252 { 253 outerslot = ExecProcNode(outerPlan); 254 if (!TupIsNull(outerslot)) 255 { 256 /* Make a copy of the first input tuple */ 257 setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot); 258 } 259 else 260 { 261 /* outer plan produced no tuples at all */ 262 setopstate->setop_done = true; 263 return NULL; 264 } 265 } 266 267 /* 268 * Store the copied first input tuple in the tuple table slot reserved 269 * for it. The tuple will be deleted when it is cleared from the 270 * slot. 271 */ 272 ExecStoreHeapTuple(setopstate->grp_firstTuple, 273 resultTupleSlot, 274 true); 275 setopstate->grp_firstTuple = NULL; /* don't keep two pointers */ 276 277 /* Initialize working state for a new input tuple group */ 278 initialize_counts(pergroup); 279 280 /* Count the first input tuple */ 281 advance_counts(pergroup, 282 fetch_tuple_flag(setopstate, resultTupleSlot)); 283 284 /* 285 * Scan the outer plan until we exhaust it or cross a group boundary. 286 */ 287 for (;;) 288 { 289 outerslot = ExecProcNode(outerPlan); 290 if (TupIsNull(outerslot)) 291 { 292 /* no more outer-plan tuples available */ 293 setopstate->setop_done = true; 294 break; 295 } 296 297 /* 298 * Check whether we've crossed a group boundary. 299 */ 300 econtext->ecxt_outertuple = resultTupleSlot; 301 econtext->ecxt_innertuple = outerslot; 302 303 if (!ExecQualAndReset(setopstate->eqfunction, econtext)) 304 { 305 /* 306 * Save the first input tuple of the next group. 307 */ 308 setopstate->grp_firstTuple = ExecCopySlotHeapTuple(outerslot); 309 break; 310 } 311 312 /* Still in same group, so count this tuple */ 313 advance_counts(pergroup, 314 fetch_tuple_flag(setopstate, outerslot)); 315 } 316 317 /* 318 * Done scanning input tuple group. See if we should emit any copies 319 * of result tuple, and if so return the first copy. 320 */ 321 set_output_count(setopstate, pergroup); 322 323 if (setopstate->numOutput > 0) 324 { 325 setopstate->numOutput--; 326 return resultTupleSlot; 327 } 328 } 329 330 /* No more groups */ 331 ExecClearTuple(resultTupleSlot); 332 return NULL; 333 } 334 335 /* 336 * ExecSetOp for hashed case: phase 1, read input and build hash table 337 */ 338 static void 339 setop_fill_hash_table(SetOpState *setopstate) 340 { 341 SetOp *node = (SetOp *) setopstate->ps.plan; 342 PlanState *outerPlan; 343 int firstFlag; 344 bool in_first_rel PG_USED_FOR_ASSERTS_ONLY; 345 ExprContext *econtext = setopstate->ps.ps_ExprContext; 346 347 /* 348 * get state info from node 349 */ 350 outerPlan = outerPlanState(setopstate); 351 firstFlag = node->firstFlag; 352 /* verify planner didn't mess up */ 353 Assert(firstFlag == 0 || 354 (firstFlag == 1 && 355 (node->cmd == SETOPCMD_INTERSECT || 356 node->cmd == SETOPCMD_INTERSECT_ALL))); 357 358 /* 359 * Process each outer-plan tuple, and then fetch the next one, until we 360 * exhaust the outer plan. 361 */ 362 in_first_rel = true; 363 for (;;) 364 { 365 TupleTableSlot *outerslot; 366 int flag; 367 TupleHashEntryData *entry; 368 bool isnew; 369 370 outerslot = ExecProcNode(outerPlan); 371 if (TupIsNull(outerslot)) 372 break; 373 374 /* Identify whether it's left or right input */ 375 flag = fetch_tuple_flag(setopstate, outerslot); 376 377 if (flag == firstFlag) 378 { 379 /* (still) in first input relation */ 380 Assert(in_first_rel); 381 382 /* Find or build hashtable entry for this tuple's group */ 383 entry = LookupTupleHashEntry(setopstate->hashtable, outerslot, 384 &isnew, NULL); 385 386 /* If new tuple group, initialize counts */ 387 if (isnew) 388 { 389 entry->additional = (SetOpStatePerGroup) 390 MemoryContextAlloc(setopstate->hashtable->tablecxt, 391 sizeof(SetOpStatePerGroupData)); 392 initialize_counts((SetOpStatePerGroup) entry->additional); 393 } 394 395 /* Advance the counts */ 396 advance_counts((SetOpStatePerGroup) entry->additional, flag); 397 } 398 else 399 { 400 /* reached second relation */ 401 in_first_rel = false; 402 403 /* For tuples not seen previously, do not make hashtable entry */ 404 entry = LookupTupleHashEntry(setopstate->hashtable, outerslot, 405 NULL, NULL); 406 407 /* Advance the counts if entry is already present */ 408 if (entry) 409 advance_counts((SetOpStatePerGroup) entry->additional, flag); 410 } 411 412 /* Must reset expression context after each hashtable lookup */ 413 ResetExprContext(econtext); 414 } 415 416 setopstate->table_filled = true; 417 /* Initialize to walk the hash table */ 418 ResetTupleHashIterator(setopstate->hashtable, &setopstate->hashiter); 419 } 420 421 /* 422 * ExecSetOp for hashed case: phase 2, retrieving groups from hash table 423 */ 424 static TupleTableSlot * 425 setop_retrieve_hash_table(SetOpState *setopstate) 426 { 427 TupleHashEntryData *entry; 428 TupleTableSlot *resultTupleSlot; 429 430 /* 431 * get state info from node 432 */ 433 resultTupleSlot = setopstate->ps.ps_ResultTupleSlot; 434 435 /* 436 * We loop retrieving groups until we find one we should return 437 */ 438 while (!setopstate->setop_done) 439 { 440 CHECK_FOR_INTERRUPTS(); 441 442 /* 443 * Find the next entry in the hash table 444 */ 445 entry = ScanTupleHashTable(setopstate->hashtable, &setopstate->hashiter); 446 if (entry == NULL) 447 { 448 /* No more entries in hashtable, so done */ 449 setopstate->setop_done = true; 450 return NULL; 451 } 452 453 /* 454 * See if we should emit any copies of this tuple, and if so return 455 * the first copy. 456 */ 457 set_output_count(setopstate, (SetOpStatePerGroup) entry->additional); 458 459 if (setopstate->numOutput > 0) 460 { 461 setopstate->numOutput--; 462 return ExecStoreMinimalTuple(entry->firstTuple, 463 resultTupleSlot, 464 false); 465 } 466 } 467 468 /* No more groups */ 469 ExecClearTuple(resultTupleSlot); 470 return NULL; 471 } 472 473 /* ---------------------------------------------------------------- 474 * ExecInitSetOp 475 * 476 * This initializes the setop node state structures and 477 * the node's subplan. 478 * ---------------------------------------------------------------- 479 */ 480 SetOpState * 481 ExecInitSetOp(SetOp *node, EState *estate, int eflags) 482 { 483 SetOpState *setopstate; 484 TupleDesc outerDesc; 485 486 /* check for unsupported flags */ 487 Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); 488 489 /* 490 * create state structure 491 */ 492 setopstate = makeNode(SetOpState); 493 setopstate->ps.plan = (Plan *) node; 494 setopstate->ps.state = estate; 495 setopstate->ps.ExecProcNode = ExecSetOp; 496 497 setopstate->eqfuncoids = NULL; 498 setopstate->hashfunctions = NULL; 499 setopstate->setop_done = false; 500 setopstate->numOutput = 0; 501 setopstate->pergroup = NULL; 502 setopstate->grp_firstTuple = NULL; 503 setopstate->hashtable = NULL; 504 setopstate->tableContext = NULL; 505 506 /* 507 * create expression context 508 */ 509 ExecAssignExprContext(estate, &setopstate->ps); 510 511 /* 512 * If hashing, we also need a longer-lived context to store the hash 513 * table. The table can't just be kept in the per-query context because 514 * we want to be able to throw it away in ExecReScanSetOp. 515 */ 516 if (node->strategy == SETOP_HASHED) 517 setopstate->tableContext = 518 AllocSetContextCreate(CurrentMemoryContext, 519 "SetOp hash table", 520 ALLOCSET_DEFAULT_SIZES); 521 522 /* 523 * initialize child nodes 524 * 525 * If we are hashing then the child plan does not need to handle REWIND 526 * efficiently; see ExecReScanSetOp. 527 */ 528 if (node->strategy == SETOP_HASHED) 529 eflags &= ~EXEC_FLAG_REWIND; 530 outerPlanState(setopstate) = ExecInitNode(outerPlan(node), estate, eflags); 531 outerDesc = ExecGetResultType(outerPlanState(setopstate)); 532 533 /* 534 * Initialize result slot and type. Setop nodes do no projections, so 535 * initialize projection info for this node appropriately. 536 */ 537 ExecInitResultTupleSlotTL(&setopstate->ps, 538 node->strategy == SETOP_HASHED ? 539 &TTSOpsMinimalTuple : &TTSOpsHeapTuple); 540 setopstate->ps.ps_ProjInfo = NULL; 541 542 /* 543 * Precompute fmgr lookup data for inner loop. We need both equality and 544 * hashing functions to do it by hashing, but only equality if not 545 * hashing. 546 */ 547 if (node->strategy == SETOP_HASHED) 548 execTuplesHashPrepare(node->numCols, 549 node->dupOperators, 550 &setopstate->eqfuncoids, 551 &setopstate->hashfunctions); 552 else 553 setopstate->eqfunction = 554 execTuplesMatchPrepare(outerDesc, 555 node->numCols, 556 node->dupColIdx, 557 node->dupOperators, 558 node->dupCollations, 559 &setopstate->ps); 560 561 if (node->strategy == SETOP_HASHED) 562 { 563 build_hash_table(setopstate); 564 setopstate->table_filled = false; 565 } 566 else 567 { 568 setopstate->pergroup = 569 (SetOpStatePerGroup) palloc0(sizeof(SetOpStatePerGroupData)); 570 } 571 572 return setopstate; 573 } 574 575 /* ---------------------------------------------------------------- 576 * ExecEndSetOp 577 * 578 * This shuts down the subplan and frees resources allocated 579 * to this node. 580 * ---------------------------------------------------------------- 581 */ 582 void 583 ExecEndSetOp(SetOpState *node) 584 { 585 /* clean up tuple table */ 586 ExecClearTuple(node->ps.ps_ResultTupleSlot); 587 588 /* free subsidiary stuff including hashtable */ 589 if (node->tableContext) 590 MemoryContextDelete(node->tableContext); 591 ExecFreeExprContext(&node->ps); 592 593 ExecEndNode(outerPlanState(node)); 594 } 595 596 597 void 598 ExecReScanSetOp(SetOpState *node) 599 { 600 ExecClearTuple(node->ps.ps_ResultTupleSlot); 601 node->setop_done = false; 602 node->numOutput = 0; 603 604 if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED) 605 { 606 /* 607 * In the hashed case, if we haven't yet built the hash table then we 608 * can just return; nothing done yet, so nothing to undo. If subnode's 609 * chgParam is not NULL then it will be re-scanned by ExecProcNode, 610 * else no reason to re-scan it at all. 611 */ 612 if (!node->table_filled) 613 return; 614 615 /* 616 * If we do have the hash table and the subplan does not have any 617 * parameter changes, then we can just rescan the existing hash table; 618 * no need to build it again. 619 */ 620 if (node->ps.lefttree->chgParam == NULL) 621 { 622 ResetTupleHashIterator(node->hashtable, &node->hashiter); 623 return; 624 } 625 } 626 627 /* Release first tuple of group, if we have made a copy */ 628 if (node->grp_firstTuple != NULL) 629 { 630 heap_freetuple(node->grp_firstTuple); 631 node->grp_firstTuple = NULL; 632 } 633 634 /* Release any hashtable storage */ 635 if (node->tableContext) 636 MemoryContextResetAndDeleteChildren(node->tableContext); 637 638 /* And rebuild empty hashtable if needed */ 639 if (((SetOp *) node->ps.plan)->strategy == SETOP_HASHED) 640 { 641 ResetTupleHashTable(node->hashtable); 642 node->table_filled = false; 643 } 644 645 /* 646 * if chgParam of subnode is not null then plan will be re-scanned by 647 * first ExecProcNode. 648 */ 649 if (node->ps.lefttree->chgParam == NULL) 650 ExecReScan(node->ps.lefttree); 651 } 652