1 /*------------------------------------------------------------------------- 2 * 3 * nodeBitmapHeapscan.c 4 * Routines to support bitmapped scans of relations 5 * 6 * NOTE: it is critical that this plan type only be used with MVCC-compliant 7 * snapshots (ie, regular snapshots, not SnapshotAny or one of the other 8 * special snapshots). The reason is that since index and heap scans are 9 * decoupled, there can be no assurance that the index tuple prompting a 10 * visit to a particular heap TID still exists when the visit is made. 11 * Therefore the tuple might not exist anymore either (which is OK because 12 * heap_fetch will cope) --- but worse, the tuple slot could have been 13 * re-used for a newer tuple. With an MVCC snapshot the newer tuple is 14 * certain to fail the time qual and so it will not be mistakenly returned, 15 * but with anything else we might return a tuple that doesn't meet the 16 * required index qual conditions. 17 * 18 * 19 * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group 20 * Portions Copyright (c) 1994, Regents of the University of California 21 * 22 * 23 * IDENTIFICATION 24 * src/backend/executor/nodeBitmapHeapscan.c 25 * 26 *------------------------------------------------------------------------- 27 */ 28 /* 29 * INTERFACE ROUTINES 30 * ExecBitmapHeapScan scans a relation using bitmap info 31 * ExecBitmapHeapNext workhorse for above 32 * ExecInitBitmapHeapScan creates and initializes state info. 33 * ExecReScanBitmapHeapScan prepares to rescan the plan. 34 * ExecEndBitmapHeapScan releases all storage. 35 */ 36 #include "postgres.h" 37 38 #include <math.h> 39 40 #include "access/relscan.h" 41 #include "access/tableam.h" 42 #include "access/transam.h" 43 #include "access/visibilitymap.h" 44 #include "executor/execdebug.h" 45 #include "executor/nodeBitmapHeapscan.h" 46 #include "miscadmin.h" 47 #include "pgstat.h" 48 #include "storage/bufmgr.h" 49 #include "storage/predicate.h" 50 #include "utils/memutils.h" 51 #include "utils/rel.h" 52 #include "utils/snapmgr.h" 53 #include "utils/spccache.h" 54 55 static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node); 56 static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate); 57 static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, 58 TBMIterateResult *tbmres); 59 static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node); 60 static inline void BitmapPrefetch(BitmapHeapScanState *node, 61 TableScanDesc scan); 62 static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate); 63 64 65 /* ---------------------------------------------------------------- 66 * BitmapHeapNext 67 * 68 * Retrieve next tuple from the BitmapHeapScan node's currentRelation 69 * ---------------------------------------------------------------- 70 */ 71 static TupleTableSlot * 72 BitmapHeapNext(BitmapHeapScanState *node) 73 { 74 ExprContext *econtext; 75 TableScanDesc scan; 76 TIDBitmap *tbm; 77 TBMIterator *tbmiterator = NULL; 78 TBMSharedIterator *shared_tbmiterator = NULL; 79 TBMIterateResult *tbmres; 80 TupleTableSlot *slot; 81 ParallelBitmapHeapState *pstate = node->pstate; 82 dsa_area *dsa = node->ss.ps.state->es_query_dsa; 83 84 /* 85 * extract necessary information from index scan node 86 */ 87 econtext = node->ss.ps.ps_ExprContext; 88 slot = node->ss.ss_ScanTupleSlot; 89 scan = node->ss.ss_currentScanDesc; 90 tbm = node->tbm; 91 if (pstate == NULL) 92 tbmiterator = node->tbmiterator; 93 else 94 shared_tbmiterator = node->shared_tbmiterator; 95 tbmres = node->tbmres; 96 97 /* 98 * If we haven't yet performed the underlying index scan, do it, and begin 99 * the iteration over the bitmap. 100 * 101 * For prefetching, we use *two* iterators, one for the pages we are 102 * actually scanning and another that runs ahead of the first for 103 * prefetching. node->prefetch_pages tracks exactly how many pages ahead 104 * the prefetch iterator is. Also, node->prefetch_target tracks the 105 * desired prefetch distance, which starts small and increases up to the 106 * node->prefetch_maximum. This is to avoid doing a lot of prefetching in 107 * a scan that stops after a few tuples because of a LIMIT. 108 */ 109 if (!node->initialized) 110 { 111 if (!pstate) 112 { 113 tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node)); 114 115 if (!tbm || !IsA(tbm, TIDBitmap)) 116 elog(ERROR, "unrecognized result from subplan"); 117 118 node->tbm = tbm; 119 node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm); 120 node->tbmres = tbmres = NULL; 121 122 #ifdef USE_PREFETCH 123 if (node->prefetch_maximum > 0) 124 { 125 node->prefetch_iterator = tbm_begin_iterate(tbm); 126 node->prefetch_pages = 0; 127 node->prefetch_target = -1; 128 } 129 #endif /* USE_PREFETCH */ 130 } 131 else 132 { 133 /* 134 * The leader will immediately come out of the function, but 135 * others will be blocked until leader populates the TBM and wakes 136 * them up. 137 */ 138 if (BitmapShouldInitializeSharedState(pstate)) 139 { 140 tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node)); 141 if (!tbm || !IsA(tbm, TIDBitmap)) 142 elog(ERROR, "unrecognized result from subplan"); 143 144 node->tbm = tbm; 145 146 /* 147 * Prepare to iterate over the TBM. This will return the 148 * dsa_pointer of the iterator state which will be used by 149 * multiple processes to iterate jointly. 150 */ 151 pstate->tbmiterator = tbm_prepare_shared_iterate(tbm); 152 #ifdef USE_PREFETCH 153 if (node->prefetch_maximum > 0) 154 { 155 pstate->prefetch_iterator = 156 tbm_prepare_shared_iterate(tbm); 157 158 /* 159 * We don't need the mutex here as we haven't yet woke up 160 * others. 161 */ 162 pstate->prefetch_pages = 0; 163 pstate->prefetch_target = -1; 164 } 165 #endif 166 167 /* We have initialized the shared state so wake up others. */ 168 BitmapDoneInitializingSharedState(pstate); 169 } 170 171 /* Allocate a private iterator and attach the shared state to it */ 172 node->shared_tbmiterator = shared_tbmiterator = 173 tbm_attach_shared_iterate(dsa, pstate->tbmiterator); 174 node->tbmres = tbmres = NULL; 175 176 #ifdef USE_PREFETCH 177 if (node->prefetch_maximum > 0) 178 { 179 node->shared_prefetch_iterator = 180 tbm_attach_shared_iterate(dsa, pstate->prefetch_iterator); 181 } 182 #endif /* USE_PREFETCH */ 183 } 184 node->initialized = true; 185 } 186 187 for (;;) 188 { 189 bool skip_fetch; 190 191 CHECK_FOR_INTERRUPTS(); 192 193 /* 194 * Get next page of results if needed 195 */ 196 if (tbmres == NULL) 197 { 198 if (!pstate) 199 node->tbmres = tbmres = tbm_iterate(tbmiterator); 200 else 201 node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator); 202 if (tbmres == NULL) 203 { 204 /* no more entries in the bitmap */ 205 break; 206 } 207 208 BitmapAdjustPrefetchIterator(node, tbmres); 209 210 /* 211 * We can skip fetching the heap page if we don't need any fields 212 * from the heap, and the bitmap entries don't need rechecking, 213 * and all tuples on the page are visible to our transaction. 214 * 215 * XXX: It's a layering violation that we do these checks above 216 * tableam, they should probably moved below it at some point. 217 */ 218 skip_fetch = (node->can_skip_fetch && 219 !tbmres->recheck && 220 VM_ALL_VISIBLE(node->ss.ss_currentRelation, 221 tbmres->blockno, 222 &node->vmbuffer)); 223 224 if (skip_fetch) 225 { 226 /* can't be lossy in the skip_fetch case */ 227 Assert(tbmres->ntuples >= 0); 228 229 /* 230 * The number of tuples on this page is put into 231 * node->return_empty_tuples. 232 */ 233 node->return_empty_tuples = tbmres->ntuples; 234 } 235 else if (!table_scan_bitmap_next_block(scan, tbmres)) 236 { 237 /* AM doesn't think this block is valid, skip */ 238 continue; 239 } 240 241 if (tbmres->ntuples >= 0) 242 node->exact_pages++; 243 else 244 node->lossy_pages++; 245 246 /* Adjust the prefetch target */ 247 BitmapAdjustPrefetchTarget(node); 248 } 249 else 250 { 251 /* 252 * Continuing in previously obtained page. 253 */ 254 255 #ifdef USE_PREFETCH 256 257 /* 258 * Try to prefetch at least a few pages even before we get to the 259 * second page if we don't stop reading after the first tuple. 260 */ 261 if (!pstate) 262 { 263 if (node->prefetch_target < node->prefetch_maximum) 264 node->prefetch_target++; 265 } 266 else if (pstate->prefetch_target < node->prefetch_maximum) 267 { 268 /* take spinlock while updating shared state */ 269 SpinLockAcquire(&pstate->mutex); 270 if (pstate->prefetch_target < node->prefetch_maximum) 271 pstate->prefetch_target++; 272 SpinLockRelease(&pstate->mutex); 273 } 274 #endif /* USE_PREFETCH */ 275 } 276 277 /* 278 * We issue prefetch requests *after* fetching the current page to try 279 * to avoid having prefetching interfere with the main I/O. Also, this 280 * should happen only when we have determined there is still something 281 * to do on the current page, else we may uselessly prefetch the same 282 * page we are just about to request for real. 283 * 284 * XXX: It's a layering violation that we do these checks above 285 * tableam, they should probably moved below it at some point. 286 */ 287 BitmapPrefetch(node, scan); 288 289 if (node->return_empty_tuples > 0) 290 { 291 /* 292 * If we don't have to fetch the tuple, just return nulls. 293 */ 294 ExecStoreAllNullTuple(slot); 295 296 if (--node->return_empty_tuples == 0) 297 { 298 /* no more tuples to return in the next round */ 299 node->tbmres = tbmres = NULL; 300 } 301 } 302 else 303 { 304 /* 305 * Attempt to fetch tuple from AM. 306 */ 307 if (!table_scan_bitmap_next_tuple(scan, tbmres, slot)) 308 { 309 /* nothing more to look at on this page */ 310 node->tbmres = tbmres = NULL; 311 continue; 312 } 313 314 /* 315 * If we are using lossy info, we have to recheck the qual 316 * conditions at every tuple. 317 */ 318 if (tbmres->recheck) 319 { 320 econtext->ecxt_scantuple = slot; 321 if (!ExecQualAndReset(node->bitmapqualorig, econtext)) 322 { 323 /* Fails recheck, so drop it and loop back for another */ 324 InstrCountFiltered2(node, 1); 325 ExecClearTuple(slot); 326 continue; 327 } 328 } 329 } 330 331 /* OK to return this tuple */ 332 return slot; 333 } 334 335 /* 336 * if we get here it means we are at the end of the scan.. 337 */ 338 return ExecClearTuple(slot); 339 } 340 341 /* 342 * BitmapDoneInitializingSharedState - Shared state is initialized 343 * 344 * By this time the leader has already populated the TBM and initialized the 345 * shared state so wake up other processes. 346 */ 347 static inline void 348 BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate) 349 { 350 SpinLockAcquire(&pstate->mutex); 351 pstate->state = BM_FINISHED; 352 SpinLockRelease(&pstate->mutex); 353 ConditionVariableBroadcast(&pstate->cv); 354 } 355 356 /* 357 * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator 358 */ 359 static inline void 360 BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, 361 TBMIterateResult *tbmres) 362 { 363 #ifdef USE_PREFETCH 364 ParallelBitmapHeapState *pstate = node->pstate; 365 366 if (pstate == NULL) 367 { 368 TBMIterator *prefetch_iterator = node->prefetch_iterator; 369 370 if (node->prefetch_pages > 0) 371 { 372 /* The main iterator has closed the distance by one page */ 373 node->prefetch_pages--; 374 } 375 else if (prefetch_iterator) 376 { 377 /* Do not let the prefetch iterator get behind the main one */ 378 TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator); 379 380 if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno) 381 elog(ERROR, "prefetch and main iterators are out of sync"); 382 } 383 return; 384 } 385 386 if (node->prefetch_maximum > 0) 387 { 388 TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator; 389 390 SpinLockAcquire(&pstate->mutex); 391 if (pstate->prefetch_pages > 0) 392 { 393 pstate->prefetch_pages--; 394 SpinLockRelease(&pstate->mutex); 395 } 396 else 397 { 398 /* Release the mutex before iterating */ 399 SpinLockRelease(&pstate->mutex); 400 401 /* 402 * In case of shared mode, we can not ensure that the current 403 * blockno of the main iterator and that of the prefetch iterator 404 * are same. It's possible that whatever blockno we are 405 * prefetching will be processed by another process. Therefore, 406 * we don't validate the blockno here as we do in non-parallel 407 * case. 408 */ 409 if (prefetch_iterator) 410 tbm_shared_iterate(prefetch_iterator); 411 } 412 } 413 #endif /* USE_PREFETCH */ 414 } 415 416 /* 417 * BitmapAdjustPrefetchTarget - Adjust the prefetch target 418 * 419 * Increase prefetch target if it's not yet at the max. Note that 420 * we will increase it to zero after fetching the very first 421 * page/tuple, then to one after the second tuple is fetched, then 422 * it doubles as later pages are fetched. 423 */ 424 static inline void 425 BitmapAdjustPrefetchTarget(BitmapHeapScanState *node) 426 { 427 #ifdef USE_PREFETCH 428 ParallelBitmapHeapState *pstate = node->pstate; 429 430 if (pstate == NULL) 431 { 432 if (node->prefetch_target >= node->prefetch_maximum) 433 /* don't increase any further */ ; 434 else if (node->prefetch_target >= node->prefetch_maximum / 2) 435 node->prefetch_target = node->prefetch_maximum; 436 else if (node->prefetch_target > 0) 437 node->prefetch_target *= 2; 438 else 439 node->prefetch_target++; 440 return; 441 } 442 443 /* Do an unlocked check first to save spinlock acquisitions. */ 444 if (pstate->prefetch_target < node->prefetch_maximum) 445 { 446 SpinLockAcquire(&pstate->mutex); 447 if (pstate->prefetch_target >= node->prefetch_maximum) 448 /* don't increase any further */ ; 449 else if (pstate->prefetch_target >= node->prefetch_maximum / 2) 450 pstate->prefetch_target = node->prefetch_maximum; 451 else if (pstate->prefetch_target > 0) 452 pstate->prefetch_target *= 2; 453 else 454 pstate->prefetch_target++; 455 SpinLockRelease(&pstate->mutex); 456 } 457 #endif /* USE_PREFETCH */ 458 } 459 460 /* 461 * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target 462 */ 463 static inline void 464 BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan) 465 { 466 #ifdef USE_PREFETCH 467 ParallelBitmapHeapState *pstate = node->pstate; 468 469 if (pstate == NULL) 470 { 471 TBMIterator *prefetch_iterator = node->prefetch_iterator; 472 473 if (prefetch_iterator) 474 { 475 while (node->prefetch_pages < node->prefetch_target) 476 { 477 TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator); 478 bool skip_fetch; 479 480 if (tbmpre == NULL) 481 { 482 /* No more pages to prefetch */ 483 tbm_end_iterate(prefetch_iterator); 484 node->prefetch_iterator = NULL; 485 break; 486 } 487 node->prefetch_pages++; 488 489 /* 490 * If we expect not to have to actually read this heap page, 491 * skip this prefetch call, but continue to run the prefetch 492 * logic normally. (Would it be better not to increment 493 * prefetch_pages?) 494 * 495 * This depends on the assumption that the index AM will 496 * report the same recheck flag for this future heap page as 497 * it did for the current heap page; which is not a certainty 498 * but is true in many cases. 499 */ 500 skip_fetch = (node->can_skip_fetch && 501 (node->tbmres ? !node->tbmres->recheck : false) && 502 VM_ALL_VISIBLE(node->ss.ss_currentRelation, 503 tbmpre->blockno, 504 &node->pvmbuffer)); 505 506 if (!skip_fetch) 507 PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno); 508 } 509 } 510 511 return; 512 } 513 514 if (pstate->prefetch_pages < pstate->prefetch_target) 515 { 516 TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator; 517 518 if (prefetch_iterator) 519 { 520 while (1) 521 { 522 TBMIterateResult *tbmpre; 523 bool do_prefetch = false; 524 bool skip_fetch; 525 526 /* 527 * Recheck under the mutex. If some other process has already 528 * done enough prefetching then we need not to do anything. 529 */ 530 SpinLockAcquire(&pstate->mutex); 531 if (pstate->prefetch_pages < pstate->prefetch_target) 532 { 533 pstate->prefetch_pages++; 534 do_prefetch = true; 535 } 536 SpinLockRelease(&pstate->mutex); 537 538 if (!do_prefetch) 539 return; 540 541 tbmpre = tbm_shared_iterate(prefetch_iterator); 542 if (tbmpre == NULL) 543 { 544 /* No more pages to prefetch */ 545 tbm_end_shared_iterate(prefetch_iterator); 546 node->shared_prefetch_iterator = NULL; 547 break; 548 } 549 550 /* As above, skip prefetch if we expect not to need page */ 551 skip_fetch = (node->can_skip_fetch && 552 (node->tbmres ? !node->tbmres->recheck : false) && 553 VM_ALL_VISIBLE(node->ss.ss_currentRelation, 554 tbmpre->blockno, 555 &node->pvmbuffer)); 556 557 if (!skip_fetch) 558 PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno); 559 } 560 } 561 } 562 #endif /* USE_PREFETCH */ 563 } 564 565 /* 566 * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual 567 */ 568 static bool 569 BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot) 570 { 571 ExprContext *econtext; 572 573 /* 574 * extract necessary information from index scan node 575 */ 576 econtext = node->ss.ps.ps_ExprContext; 577 578 /* Does the tuple meet the original qual conditions? */ 579 econtext->ecxt_scantuple = slot; 580 return ExecQualAndReset(node->bitmapqualorig, econtext); 581 } 582 583 /* ---------------------------------------------------------------- 584 * ExecBitmapHeapScan(node) 585 * ---------------------------------------------------------------- 586 */ 587 static TupleTableSlot * 588 ExecBitmapHeapScan(PlanState *pstate) 589 { 590 BitmapHeapScanState *node = castNode(BitmapHeapScanState, pstate); 591 592 return ExecScan(&node->ss, 593 (ExecScanAccessMtd) BitmapHeapNext, 594 (ExecScanRecheckMtd) BitmapHeapRecheck); 595 } 596 597 /* ---------------------------------------------------------------- 598 * ExecReScanBitmapHeapScan(node) 599 * ---------------------------------------------------------------- 600 */ 601 void 602 ExecReScanBitmapHeapScan(BitmapHeapScanState *node) 603 { 604 PlanState *outerPlan = outerPlanState(node); 605 606 /* rescan to release any page pin */ 607 table_rescan(node->ss.ss_currentScanDesc, NULL); 608 609 /* release bitmaps and buffers if any */ 610 if (node->tbmiterator) 611 tbm_end_iterate(node->tbmiterator); 612 if (node->prefetch_iterator) 613 tbm_end_iterate(node->prefetch_iterator); 614 if (node->shared_tbmiterator) 615 tbm_end_shared_iterate(node->shared_tbmiterator); 616 if (node->shared_prefetch_iterator) 617 tbm_end_shared_iterate(node->shared_prefetch_iterator); 618 if (node->tbm) 619 tbm_free(node->tbm); 620 if (node->vmbuffer != InvalidBuffer) 621 ReleaseBuffer(node->vmbuffer); 622 if (node->pvmbuffer != InvalidBuffer) 623 ReleaseBuffer(node->pvmbuffer); 624 node->tbm = NULL; 625 node->tbmiterator = NULL; 626 node->tbmres = NULL; 627 node->prefetch_iterator = NULL; 628 node->initialized = false; 629 node->shared_tbmiterator = NULL; 630 node->shared_prefetch_iterator = NULL; 631 node->vmbuffer = InvalidBuffer; 632 node->pvmbuffer = InvalidBuffer; 633 634 ExecScanReScan(&node->ss); 635 636 /* 637 * if chgParam of subnode is not null then plan will be re-scanned by 638 * first ExecProcNode. 639 */ 640 if (outerPlan->chgParam == NULL) 641 ExecReScan(outerPlan); 642 } 643 644 /* ---------------------------------------------------------------- 645 * ExecEndBitmapHeapScan 646 * ---------------------------------------------------------------- 647 */ 648 void 649 ExecEndBitmapHeapScan(BitmapHeapScanState *node) 650 { 651 TableScanDesc scanDesc; 652 653 /* 654 * extract information from the node 655 */ 656 scanDesc = node->ss.ss_currentScanDesc; 657 658 /* 659 * Free the exprcontext 660 */ 661 ExecFreeExprContext(&node->ss.ps); 662 663 /* 664 * clear out tuple table slots 665 */ 666 if (node->ss.ps.ps_ResultTupleSlot) 667 ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); 668 ExecClearTuple(node->ss.ss_ScanTupleSlot); 669 670 /* 671 * close down subplans 672 */ 673 ExecEndNode(outerPlanState(node)); 674 675 /* 676 * release bitmaps and buffers if any 677 */ 678 if (node->tbmiterator) 679 tbm_end_iterate(node->tbmiterator); 680 if (node->prefetch_iterator) 681 tbm_end_iterate(node->prefetch_iterator); 682 if (node->tbm) 683 tbm_free(node->tbm); 684 if (node->shared_tbmiterator) 685 tbm_end_shared_iterate(node->shared_tbmiterator); 686 if (node->shared_prefetch_iterator) 687 tbm_end_shared_iterate(node->shared_prefetch_iterator); 688 if (node->vmbuffer != InvalidBuffer) 689 ReleaseBuffer(node->vmbuffer); 690 if (node->pvmbuffer != InvalidBuffer) 691 ReleaseBuffer(node->pvmbuffer); 692 693 /* 694 * close heap scan 695 */ 696 table_endscan(scanDesc); 697 } 698 699 /* ---------------------------------------------------------------- 700 * ExecInitBitmapHeapScan 701 * 702 * Initializes the scan's state information. 703 * ---------------------------------------------------------------- 704 */ 705 BitmapHeapScanState * 706 ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) 707 { 708 BitmapHeapScanState *scanstate; 709 Relation currentRelation; 710 711 /* check for unsupported flags */ 712 Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); 713 714 /* 715 * Assert caller didn't ask for an unsafe snapshot --- see comments at 716 * head of file. 717 */ 718 Assert(IsMVCCSnapshot(estate->es_snapshot)); 719 720 /* 721 * create state structure 722 */ 723 scanstate = makeNode(BitmapHeapScanState); 724 scanstate->ss.ps.plan = (Plan *) node; 725 scanstate->ss.ps.state = estate; 726 scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan; 727 728 scanstate->tbm = NULL; 729 scanstate->tbmiterator = NULL; 730 scanstate->tbmres = NULL; 731 scanstate->return_empty_tuples = 0; 732 scanstate->vmbuffer = InvalidBuffer; 733 scanstate->pvmbuffer = InvalidBuffer; 734 scanstate->exact_pages = 0; 735 scanstate->lossy_pages = 0; 736 scanstate->prefetch_iterator = NULL; 737 scanstate->prefetch_pages = 0; 738 scanstate->prefetch_target = 0; 739 scanstate->pscan_len = 0; 740 scanstate->initialized = false; 741 scanstate->shared_tbmiterator = NULL; 742 scanstate->shared_prefetch_iterator = NULL; 743 scanstate->pstate = NULL; 744 745 /* 746 * We can potentially skip fetching heap pages if we do not need any 747 * columns of the table, either for checking non-indexable quals or for 748 * returning data. This test is a bit simplistic, as it checks the 749 * stronger condition that there's no qual or return tlist at all. But in 750 * most cases it's probably not worth working harder than that. 751 */ 752 scanstate->can_skip_fetch = (node->scan.plan.qual == NIL && 753 node->scan.plan.targetlist == NIL); 754 755 /* 756 * Miscellaneous initialization 757 * 758 * create expression context for node 759 */ 760 ExecAssignExprContext(estate, &scanstate->ss.ps); 761 762 /* 763 * open the scan relation 764 */ 765 currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags); 766 767 /* 768 * initialize child nodes 769 */ 770 outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags); 771 772 /* 773 * get the scan type from the relation descriptor. 774 */ 775 ExecInitScanTupleSlot(estate, &scanstate->ss, 776 RelationGetDescr(currentRelation), 777 table_slot_callbacks(currentRelation)); 778 779 /* 780 * Initialize result type and projection. 781 */ 782 ExecInitResultTypeTL(&scanstate->ss.ps); 783 ExecAssignScanProjectionInfo(&scanstate->ss); 784 785 /* 786 * initialize child expressions 787 */ 788 scanstate->ss.ps.qual = 789 ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate); 790 scanstate->bitmapqualorig = 791 ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate); 792 793 /* 794 * Maximum number of prefetches for the tablespace if configured, 795 * otherwise the current value of the effective_io_concurrency GUC. 796 */ 797 scanstate->prefetch_maximum = 798 get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace); 799 800 scanstate->ss.ss_currentRelation = currentRelation; 801 802 scanstate->ss.ss_currentScanDesc = table_beginscan_bm(currentRelation, 803 estate->es_snapshot, 804 0, 805 NULL); 806 807 /* 808 * all done. 809 */ 810 return scanstate; 811 } 812 813 /*---------------- 814 * BitmapShouldInitializeSharedState 815 * 816 * The first process to come here and see the state to the BM_INITIAL 817 * will become the leader for the parallel bitmap scan and will be 818 * responsible for populating the TIDBitmap. The other processes will 819 * be blocked by the condition variable until the leader wakes them up. 820 * --------------- 821 */ 822 static bool 823 BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate) 824 { 825 SharedBitmapState state; 826 827 while (1) 828 { 829 SpinLockAcquire(&pstate->mutex); 830 state = pstate->state; 831 if (pstate->state == BM_INITIAL) 832 pstate->state = BM_INPROGRESS; 833 SpinLockRelease(&pstate->mutex); 834 835 /* Exit if bitmap is done, or if we're the leader. */ 836 if (state != BM_INPROGRESS) 837 break; 838 839 /* Wait for the leader to wake us up. */ 840 ConditionVariableSleep(&pstate->cv, WAIT_EVENT_PARALLEL_BITMAP_SCAN); 841 } 842 843 ConditionVariableCancelSleep(); 844 845 return (state == BM_INITIAL); 846 } 847 848 /* ---------------------------------------------------------------- 849 * ExecBitmapHeapEstimate 850 * 851 * Compute the amount of space we'll need in the parallel 852 * query DSM, and inform pcxt->estimator about our needs. 853 * ---------------------------------------------------------------- 854 */ 855 void 856 ExecBitmapHeapEstimate(BitmapHeapScanState *node, 857 ParallelContext *pcxt) 858 { 859 EState *estate = node->ss.ps.state; 860 861 node->pscan_len = add_size(offsetof(ParallelBitmapHeapState, 862 phs_snapshot_data), 863 EstimateSnapshotSpace(estate->es_snapshot)); 864 865 shm_toc_estimate_chunk(&pcxt->estimator, node->pscan_len); 866 shm_toc_estimate_keys(&pcxt->estimator, 1); 867 } 868 869 /* ---------------------------------------------------------------- 870 * ExecBitmapHeapInitializeDSM 871 * 872 * Set up a parallel bitmap heap scan descriptor. 873 * ---------------------------------------------------------------- 874 */ 875 void 876 ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node, 877 ParallelContext *pcxt) 878 { 879 ParallelBitmapHeapState *pstate; 880 EState *estate = node->ss.ps.state; 881 dsa_area *dsa = node->ss.ps.state->es_query_dsa; 882 883 /* If there's no DSA, there are no workers; initialize nothing. */ 884 if (dsa == NULL) 885 return; 886 887 pstate = shm_toc_allocate(pcxt->toc, node->pscan_len); 888 889 pstate->tbmiterator = 0; 890 pstate->prefetch_iterator = 0; 891 892 /* Initialize the mutex */ 893 SpinLockInit(&pstate->mutex); 894 pstate->prefetch_pages = 0; 895 pstate->prefetch_target = 0; 896 pstate->state = BM_INITIAL; 897 898 ConditionVariableInit(&pstate->cv); 899 SerializeSnapshot(estate->es_snapshot, pstate->phs_snapshot_data); 900 901 shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate); 902 node->pstate = pstate; 903 } 904 905 /* ---------------------------------------------------------------- 906 * ExecBitmapHeapReInitializeDSM 907 * 908 * Reset shared state before beginning a fresh scan. 909 * ---------------------------------------------------------------- 910 */ 911 void 912 ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node, 913 ParallelContext *pcxt) 914 { 915 ParallelBitmapHeapState *pstate = node->pstate; 916 dsa_area *dsa = node->ss.ps.state->es_query_dsa; 917 918 /* If there's no DSA, there are no workers; do nothing. */ 919 if (dsa == NULL) 920 return; 921 922 pstate->state = BM_INITIAL; 923 924 if (DsaPointerIsValid(pstate->tbmiterator)) 925 tbm_free_shared_area(dsa, pstate->tbmiterator); 926 927 if (DsaPointerIsValid(pstate->prefetch_iterator)) 928 tbm_free_shared_area(dsa, pstate->prefetch_iterator); 929 930 pstate->tbmiterator = InvalidDsaPointer; 931 pstate->prefetch_iterator = InvalidDsaPointer; 932 } 933 934 /* ---------------------------------------------------------------- 935 * ExecBitmapHeapInitializeWorker 936 * 937 * Copy relevant information from TOC into planstate. 938 * ---------------------------------------------------------------- 939 */ 940 void 941 ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, 942 ParallelWorkerContext *pwcxt) 943 { 944 ParallelBitmapHeapState *pstate; 945 Snapshot snapshot; 946 947 Assert(node->ss.ps.state->es_query_dsa != NULL); 948 949 pstate = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false); 950 node->pstate = pstate; 951 952 snapshot = RestoreSnapshot(pstate->phs_snapshot_data); 953 table_scan_update_snapshot(node->ss.ss_currentScanDesc, snapshot); 954 } 955