1 /*------------------------------------------------------------------------- 2 * 3 * nodeBitmapHeapscan.c 4 * Routines to support bitmapped scans of relations 5 * 6 * NOTE: it is critical that this plan type only be used with MVCC-compliant 7 * snapshots (ie, regular snapshots, not SnapshotAny or one of the other 8 * special snapshots). The reason is that since index and heap scans are 9 * decoupled, there can be no assurance that the index tuple prompting a 10 * visit to a particular heap TID still exists when the visit is made. 11 * Therefore the tuple might not exist anymore either (which is OK because 12 * heap_fetch will cope) --- but worse, the tuple slot could have been 13 * re-used for a newer tuple. With an MVCC snapshot the newer tuple is 14 * certain to fail the time qual and so it will not be mistakenly returned, 15 * but with anything else we might return a tuple that doesn't meet the 16 * required index qual conditions. new() -> Self17 * 18 * 19 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group 20 * Portions Copyright (c) 1994, Regents of the University of California 21 * 22 * 23 * IDENTIFICATION 24 * src/backend/executor/nodeBitmapHeapscan.c 25 * 26 *------------------------------------------------------------------------- 27 */ 28 /* 29 * INTERFACE ROUTINES 30 * ExecBitmapHeapScan scans a relation using bitmap info 31 * ExecBitmapHeapNext workhorse for above 32 * ExecInitBitmapHeapScan creates and initializes state info. 33 * ExecReScanBitmapHeapScan prepares to rescan the plan. 34 * ExecEndBitmapHeapScan releases all storage. 35 */ 36 #include "postgres.h" 37 38 #include <math.h> 39 40 #include "access/relscan.h" 41 #include "access/tableam.h" 42 #include "access/transam.h" 43 #include "access/visibilitymap.h" 44 #include "executor/execdebug.h" 45 #include "executor/nodeBitmapHeapscan.h" 46 #include "miscadmin.h" 47 #include "pgstat.h" 48 #include "storage/bufmgr.h" 49 #include "storage/predicate.h" 50 #include "utils/memutils.h" 51 #include "utils/rel.h" 52 #include "utils/spccache.h" 53 #include "utils/snapmgr.h" 54 55 56 static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node); 57 static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate); 58 static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, 59 TBMIterateResult *tbmres); 60 static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node); 61 static inline void BitmapPrefetch(BitmapHeapScanState *node, 62 TableScanDesc scan); 63 static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate); 64 65 66 /* ---------------------------------------------------------------- 67 * BitmapHeapNext 68 * 69 * Retrieve next tuple from the BitmapHeapScan node's currentRelation 70 * ---------------------------------------------------------------- 71 */ 72 static TupleTableSlot * 73 BitmapHeapNext(BitmapHeapScanState *node) 74 { 75 ExprContext *econtext; 76 TableScanDesc scan; 77 TIDBitmap *tbm; 78 TBMIterator *tbmiterator = NULL; 79 TBMSharedIterator *shared_tbmiterator = NULL; 80 TBMIterateResult *tbmres; 81 TupleTableSlot *slot; 82 ParallelBitmapHeapState *pstate = node->pstate; 83 dsa_area *dsa = node->ss.ps.state->es_query_dsa; 84 85 /* 86 * extract necessary information from index scan node 87 */ 88 econtext = node->ss.ps.ps_ExprContext; 89 slot = node->ss.ss_ScanTupleSlot; 90 scan = node->ss.ss_currentScanDesc; 91 tbm = node->tbm; 92 if (pstate == NULL) 93 tbmiterator = node->tbmiterator; 94 else 95 shared_tbmiterator = node->shared_tbmiterator; 96 tbmres = node->tbmres; 97 98 /* 99 * If we haven't yet performed the underlying index scan, do it, and begin 100 * the iteration over the bitmap. 101 * 102 * For prefetching, we use *two* iterators, one for the pages we are 103 * actually scanning and another that runs ahead of the first for 104 * prefetching. node->prefetch_pages tracks exactly how many pages ahead 105 * the prefetch iterator is. Also, node->prefetch_target tracks the 106 * desired prefetch distance, which starts small and increases up to the 107 * node->prefetch_maximum. This is to avoid doing a lot of prefetching in 108 * a scan that stops after a few tuples because of a LIMIT. 109 */ 110 if (!node->initialized) 111 { 112 if (!pstate) 113 { 114 tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node)); 115 116 if (!tbm || !IsA(tbm, TIDBitmap)) 117 elog(ERROR, "unrecognized result from subplan"); 118 119 node->tbm = tbm; 120 node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm); 121 node->tbmres = tbmres = NULL; 122 123 #ifdef USE_PREFETCH 124 if (node->prefetch_maximum > 0) 125 { 126 node->prefetch_iterator = tbm_begin_iterate(tbm); 127 node->prefetch_pages = 0; 128 node->prefetch_target = -1; 129 } 130 #endif /* USE_PREFETCH */ 131 } 132 else 133 { 134 /* 135 * The leader will immediately come out of the function, but 136 * others will be blocked until leader populates the TBM and wakes 137 * them up. 138 */ 139 if (BitmapShouldInitializeSharedState(pstate)) 140 { 141 tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node)); 142 if (!tbm || !IsA(tbm, TIDBitmap)) 143 elog(ERROR, "unrecognized result from subplan"); 144 145 node->tbm = tbm; 146 147 /* 148 * Prepare to iterate over the TBM. This will return the 149 * dsa_pointer of the iterator state which will be used by 150 * multiple processes to iterate jointly. 151 */ 152 pstate->tbmiterator = tbm_prepare_shared_iterate(tbm); 153 #ifdef USE_PREFETCH 154 if (node->prefetch_maximum > 0) 155 { 156 pstate->prefetch_iterator = 157 tbm_prepare_shared_iterate(tbm); 158 159 /* 160 * We don't need the mutex here as we haven't yet woke up 161 * others. 162 */ 163 pstate->prefetch_pages = 0; 164 pstate->prefetch_target = -1; 165 } 166 #endif 167 168 /* We have initialized the shared state so wake up others. */ 169 BitmapDoneInitializingSharedState(pstate); 170 } 171 172 /* Allocate a private iterator and attach the shared state to it */ 173 node->shared_tbmiterator = shared_tbmiterator = 174 tbm_attach_shared_iterate(dsa, pstate->tbmiterator); 175 node->tbmres = tbmres = NULL; 176 177 #ifdef USE_PREFETCH 178 if (node->prefetch_maximum > 0) 179 { 180 node->shared_prefetch_iterator = 181 tbm_attach_shared_iterate(dsa, pstate->prefetch_iterator); 182 } 183 #endif /* USE_PREFETCH */ 184 } 185 node->initialized = true; 186 } 187 188 for (;;) 189 { 190 bool skip_fetch; 191 192 CHECK_FOR_INTERRUPTS(); 193 194 /* 195 * Get next page of results if needed 196 */ 197 if (tbmres == NULL) 198 { 199 if (!pstate) 200 node->tbmres = tbmres = tbm_iterate(tbmiterator); 201 else 202 node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator); 203 if (tbmres == NULL) 204 { 205 /* no more entries in the bitmap */ 206 break; 207 } 208 209 BitmapAdjustPrefetchIterator(node, tbmres); 210 211 /* 212 * We can skip fetching the heap page if we don't need any fields 213 * from the heap, and the bitmap entries don't need rechecking, 214 * and all tuples on the page are visible to our transaction. 215 * 216 * XXX: It's a layering violation that we do these checks above 217 * tableam, they should probably moved below it at some point. 218 */ 219 skip_fetch = (node->can_skip_fetch && 220 !tbmres->recheck && 221 VM_ALL_VISIBLE(node->ss.ss_currentRelation, 222 tbmres->blockno, 223 &node->vmbuffer)); 224 225 if (skip_fetch) 226 { 227 /* can't be lossy in the skip_fetch case */ 228 Assert(tbmres->ntuples >= 0); 229 230 /* 231 * The number of tuples on this page is put into 232 * node->return_empty_tuples. 233 */ 234 node->return_empty_tuples = tbmres->ntuples; 235 } 236 else if (!table_scan_bitmap_next_block(scan, tbmres)) 237 { 238 /* AM doesn't think this block is valid, skip */ 239 continue; 240 } 241 242 if (tbmres->ntuples >= 0) 243 node->exact_pages++; 244 else 245 node->lossy_pages++; 246 247 /* Adjust the prefetch target */ 248 BitmapAdjustPrefetchTarget(node); 249 } 250 else 251 { 252 /* 253 * Continuing in previously obtained page. 254 */ 255 256 #ifdef USE_PREFETCH 257 258 /* 259 * Try to prefetch at least a few pages even before we get to the 260 * second page if we don't stop reading after the first tuple. 261 */ 262 if (!pstate) 263 { 264 if (node->prefetch_target < node->prefetch_maximum) 265 node->prefetch_target++; 266 } 267 else if (pstate->prefetch_target < node->prefetch_maximum) 268 { 269 /* take spinlock while updating shared state */ 270 SpinLockAcquire(&pstate->mutex); 271 if (pstate->prefetch_target < node->prefetch_maximum) 272 pstate->prefetch_target++; 273 SpinLockRelease(&pstate->mutex); 274 } 275 #endif /* USE_PREFETCH */ 276 } 277 278 /* 279 * We issue prefetch requests *after* fetching the current page to try 280 * to avoid having prefetching interfere with the main I/O. Also, this 281 * should happen only when we have determined there is still something 282 * to do on the current page, else we may uselessly prefetch the same 283 * page we are just about to request for real. 284 * 285 * XXX: It's a layering violation that we do these checks above 286 * tableam, they should probably moved below it at some point. 287 */ 288 BitmapPrefetch(node, scan); 289 290 if (node->return_empty_tuples > 0) 291 { 292 /* 293 * If we don't have to fetch the tuple, just return nulls. 294 */ 295 ExecStoreAllNullTuple(slot); 296 297 if (--node->return_empty_tuples == 0) 298 { 299 /* no more tuples to return in the next round */ 300 node->tbmres = tbmres = NULL; 301 } 302 } 303 else 304 { 305 /* 306 * Attempt to fetch tuple from AM. 307 */ 308 if (!table_scan_bitmap_next_tuple(scan, tbmres, slot)) 309 { 310 /* nothing more to look at on this page */ 311 node->tbmres = tbmres = NULL; 312 continue; 313 } 314 315 /* 316 * If we are using lossy info, we have to recheck the qual 317 * conditions at every tuple. 318 */ 319 if (tbmres->recheck) 320 { 321 econtext->ecxt_scantuple = slot; 322 if (!ExecQualAndReset(node->bitmapqualorig, econtext)) 323 { 324 /* Fails recheck, so drop it and loop back for another */ 325 InstrCountFiltered2(node, 1); 326 ExecClearTuple(slot); 327 continue; 328 } 329 } 330 } 331 332 /* OK to return this tuple */ 333 return slot; 334 } 335 336 /* 337 * if we get here it means we are at the end of the scan.. 338 */ 339 return ExecClearTuple(slot); 340 } 341 342 /* 343 * BitmapDoneInitializingSharedState - Shared state is initialized 344 * 345 * By this time the leader has already populated the TBM and initialized the 346 * shared state so wake up other processes. 347 */ 348 static inline void 349 BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate) 350 { 351 SpinLockAcquire(&pstate->mutex); 352 pstate->state = BM_FINISHED; 353 SpinLockRelease(&pstate->mutex); 354 ConditionVariableBroadcast(&pstate->cv); 355 } 356 357 /* 358 * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator 359 */ 360 static inline void 361 BitmapAdjustPrefetchIterator(BitmapHeapScanState *node, 362 TBMIterateResult *tbmres) 363 { 364 #ifdef USE_PREFETCH 365 ParallelBitmapHeapState *pstate = node->pstate; 366 367 if (pstate == NULL) 368 { 369 TBMIterator *prefetch_iterator = node->prefetch_iterator; 370 371 if (node->prefetch_pages > 0) 372 { 373 /* The main iterator has closed the distance by one page */ 374 node->prefetch_pages--; 375 } 376 else if (prefetch_iterator) 377 { 378 /* Do not let the prefetch iterator get behind the main one */ 379 TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator); 380 381 if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno) 382 elog(ERROR, "prefetch and main iterators are out of sync"); 383 } 384 return; 385 } 386 387 if (node->prefetch_maximum > 0) 388 { 389 TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator; 390 391 SpinLockAcquire(&pstate->mutex); 392 if (pstate->prefetch_pages > 0) 393 { 394 pstate->prefetch_pages--; 395 SpinLockRelease(&pstate->mutex); 396 } 397 else 398 { 399 /* Release the mutex before iterating */ 400 SpinLockRelease(&pstate->mutex); 401 402 /* 403 * In case of shared mode, we can not ensure that the current 404 * blockno of the main iterator and that of the prefetch iterator 405 * are same. It's possible that whatever blockno we are 406 * prefetching will be processed by another process. Therefore, 407 * we don't validate the blockno here as we do in non-parallel 408 * case. 409 */ 410 if (prefetch_iterator) 411 tbm_shared_iterate(prefetch_iterator); 412 } 413 } 414 #endif /* USE_PREFETCH */ 415 } 416 417 /* 418 * BitmapAdjustPrefetchTarget - Adjust the prefetch target 419 * 420 * Increase prefetch target if it's not yet at the max. Note that 421 * we will increase it to zero after fetching the very first 422 * page/tuple, then to one after the second tuple is fetched, then 423 * it doubles as later pages are fetched. 424 */ 425 static inline void 426 BitmapAdjustPrefetchTarget(BitmapHeapScanState *node) 427 { 428 #ifdef USE_PREFETCH 429 ParallelBitmapHeapState *pstate = node->pstate; 430 431 if (pstate == NULL) 432 { 433 if (node->prefetch_target >= node->prefetch_maximum) 434 /* don't increase any further */ ; 435 else if (node->prefetch_target >= node->prefetch_maximum / 2) 436 node->prefetch_target = node->prefetch_maximum; 437 else if (node->prefetch_target > 0) 438 node->prefetch_target *= 2; 439 else 440 node->prefetch_target++; 441 return; 442 } 443 444 /* Do an unlocked check first to save spinlock acquisitions. */ 445 if (pstate->prefetch_target < node->prefetch_maximum) 446 { 447 SpinLockAcquire(&pstate->mutex); 448 if (pstate->prefetch_target >= node->prefetch_maximum) 449 /* don't increase any further */ ; 450 else if (pstate->prefetch_target >= node->prefetch_maximum / 2) 451 pstate->prefetch_target = node->prefetch_maximum; 452 else if (pstate->prefetch_target > 0) 453 pstate->prefetch_target *= 2; 454 else 455 pstate->prefetch_target++; 456 SpinLockRelease(&pstate->mutex); 457 } 458 #endif /* USE_PREFETCH */ 459 } 460 461 /* 462 * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target 463 */ 464 static inline void 465 BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan) 466 { 467 #ifdef USE_PREFETCH 468 ParallelBitmapHeapState *pstate = node->pstate; 469 470 if (pstate == NULL) 471 { 472 TBMIterator *prefetch_iterator = node->prefetch_iterator; 473 474 if (prefetch_iterator) 475 { 476 while (node->prefetch_pages < node->prefetch_target) 477 { 478 TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator); 479 bool skip_fetch; 480 481 if (tbmpre == NULL) 482 { 483 /* No more pages to prefetch */ 484 tbm_end_iterate(prefetch_iterator); 485 node->prefetch_iterator = NULL; 486 break; 487 } 488 node->prefetch_pages++; 489 490 /* 491 * If we expect not to have to actually read this heap page, 492 * skip this prefetch call, but continue to run the prefetch 493 * logic normally. (Would it be better not to increment 494 * prefetch_pages?) 495 * 496 * This depends on the assumption that the index AM will 497 * report the same recheck flag for this future heap page as 498 * it did for the current heap page; which is not a certainty 499 * but is true in many cases. 500 */ 501 skip_fetch = (node->can_skip_fetch && 502 (node->tbmres ? !node->tbmres->recheck : false) && 503 VM_ALL_VISIBLE(node->ss.ss_currentRelation, 504 tbmpre->blockno, 505 &node->pvmbuffer)); 506 507 if (!skip_fetch) 508 PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno); 509 } 510 } 511 512 return; 513 } 514 515 if (pstate->prefetch_pages < pstate->prefetch_target) 516 { 517 TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator; 518 519 if (prefetch_iterator) 520 { 521 while (1) 522 { 523 TBMIterateResult *tbmpre; 524 bool do_prefetch = false; 525 bool skip_fetch; 526 527 /* 528 * Recheck under the mutex. If some other process has already 529 * done enough prefetching then we need not to do anything. 530 */ 531 SpinLockAcquire(&pstate->mutex); 532 if (pstate->prefetch_pages < pstate->prefetch_target) 533 { 534 pstate->prefetch_pages++; 535 do_prefetch = true; 536 } 537 SpinLockRelease(&pstate->mutex); 538 539 if (!do_prefetch) 540 return; 541 542 tbmpre = tbm_shared_iterate(prefetch_iterator); 543 if (tbmpre == NULL) 544 { 545 /* No more pages to prefetch */ 546 tbm_end_shared_iterate(prefetch_iterator); 547 node->shared_prefetch_iterator = NULL; 548 break; 549 } 550 551 /* As above, skip prefetch if we expect not to need page */ 552 skip_fetch = (node->can_skip_fetch && 553 (node->tbmres ? !node->tbmres->recheck : false) && 554 VM_ALL_VISIBLE(node->ss.ss_currentRelation, 555 tbmpre->blockno, 556 &node->pvmbuffer)); 557 558 if (!skip_fetch) 559 PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno); 560 } 561 } 562 } 563 #endif /* USE_PREFETCH */ 564 } 565 566 /* 567 * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual 568 */ 569 static bool 570 BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot) 571 { 572 ExprContext *econtext; 573 574 /* 575 * extract necessary information from index scan node 576 */ 577 econtext = node->ss.ps.ps_ExprContext; 578 579 /* Does the tuple meet the original qual conditions? */ 580 econtext->ecxt_scantuple = slot; 581 return ExecQualAndReset(node->bitmapqualorig, econtext); 582 } 583 584 /* ---------------------------------------------------------------- 585 * ExecBitmapHeapScan(node) 586 * ---------------------------------------------------------------- 587 */ 588 static TupleTableSlot * 589 ExecBitmapHeapScan(PlanState *pstate) 590 { 591 BitmapHeapScanState *node = castNode(BitmapHeapScanState, pstate); 592 593 return ExecScan(&node->ss, 594 (ExecScanAccessMtd) BitmapHeapNext, 595 (ExecScanRecheckMtd) BitmapHeapRecheck); 596 } 597 598 /* ---------------------------------------------------------------- 599 * ExecReScanBitmapHeapScan(node) 600 * ---------------------------------------------------------------- 601 */ 602 void 603 ExecReScanBitmapHeapScan(BitmapHeapScanState *node) 604 { 605 PlanState *outerPlan = outerPlanState(node); 606 607 /* rescan to release any page pin */ 608 table_rescan(node->ss.ss_currentScanDesc, NULL); 609 610 /* release bitmaps and buffers if any */ 611 if (node->tbmiterator) 612 tbm_end_iterate(node->tbmiterator); 613 if (node->prefetch_iterator) 614 tbm_end_iterate(node->prefetch_iterator); 615 if (node->shared_tbmiterator) 616 tbm_end_shared_iterate(node->shared_tbmiterator); 617 if (node->shared_prefetch_iterator) 618 tbm_end_shared_iterate(node->shared_prefetch_iterator); 619 if (node->tbm) 620 tbm_free(node->tbm); 621 if (node->vmbuffer != InvalidBuffer) 622 ReleaseBuffer(node->vmbuffer); 623 if (node->pvmbuffer != InvalidBuffer) 624 ReleaseBuffer(node->pvmbuffer); 625 node->tbm = NULL; 626 node->tbmiterator = NULL; 627 node->tbmres = NULL; 628 node->prefetch_iterator = NULL; 629 node->initialized = false; 630 node->shared_tbmiterator = NULL; 631 node->shared_prefetch_iterator = NULL; 632 node->vmbuffer = InvalidBuffer; 633 node->pvmbuffer = InvalidBuffer; 634 635 ExecScanReScan(&node->ss); 636 637 /* 638 * if chgParam of subnode is not null then plan will be re-scanned by 639 * first ExecProcNode. 640 */ 641 if (outerPlan->chgParam == NULL) 642 ExecReScan(outerPlan); 643 } 644 645 /* ---------------------------------------------------------------- 646 * ExecEndBitmapHeapScan 647 * ---------------------------------------------------------------- 648 */ 649 void 650 ExecEndBitmapHeapScan(BitmapHeapScanState *node) 651 { 652 TableScanDesc scanDesc; 653 654 /* 655 * extract information from the node 656 */ 657 scanDesc = node->ss.ss_currentScanDesc; 658 659 /* 660 * Free the exprcontext 661 */ 662 ExecFreeExprContext(&node->ss.ps); 663 664 /* 665 * clear out tuple table slots 666 */ 667 if (node->ss.ps.ps_ResultTupleSlot) 668 ExecClearTuple(node->ss.ps.ps_ResultTupleSlot); 669 ExecClearTuple(node->ss.ss_ScanTupleSlot); 670 671 /* 672 * close down subplans 673 */ 674 ExecEndNode(outerPlanState(node)); 675 676 /* 677 * release bitmaps and buffers if any 678 */ 679 if (node->tbmiterator) 680 tbm_end_iterate(node->tbmiterator); 681 if (node->prefetch_iterator) 682 tbm_end_iterate(node->prefetch_iterator); 683 if (node->tbm) 684 tbm_free(node->tbm); 685 if (node->shared_tbmiterator) 686 tbm_end_shared_iterate(node->shared_tbmiterator); 687 if (node->shared_prefetch_iterator) 688 tbm_end_shared_iterate(node->shared_prefetch_iterator); 689 if (node->vmbuffer != InvalidBuffer) 690 ReleaseBuffer(node->vmbuffer); 691 if (node->pvmbuffer != InvalidBuffer) 692 ReleaseBuffer(node->pvmbuffer); 693 694 /* 695 * close heap scan 696 */ 697 table_endscan(scanDesc); 698 } 699 700 /* ---------------------------------------------------------------- 701 * ExecInitBitmapHeapScan 702 * 703 * Initializes the scan's state information. 704 * ---------------------------------------------------------------- 705 */ 706 BitmapHeapScanState * 707 ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags) 708 { 709 BitmapHeapScanState *scanstate; 710 Relation currentRelation; 711 int io_concurrency; 712 713 /* check for unsupported flags */ 714 Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK))); 715 716 /* 717 * Assert caller didn't ask for an unsafe snapshot --- see comments at 718 * head of file. 719 */ 720 Assert(IsMVCCSnapshot(estate->es_snapshot)); 721 722 /* 723 * create state structure 724 */ 725 scanstate = makeNode(BitmapHeapScanState); 726 scanstate->ss.ps.plan = (Plan *) node; 727 scanstate->ss.ps.state = estate; 728 scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan; 729 730 scanstate->tbm = NULL; 731 scanstate->tbmiterator = NULL; 732 scanstate->tbmres = NULL; 733 scanstate->return_empty_tuples = 0; 734 scanstate->vmbuffer = InvalidBuffer; 735 scanstate->pvmbuffer = InvalidBuffer; 736 scanstate->exact_pages = 0; 737 scanstate->lossy_pages = 0; 738 scanstate->prefetch_iterator = NULL; 739 scanstate->prefetch_pages = 0; 740 scanstate->prefetch_target = 0; 741 /* may be updated below */ 742 scanstate->prefetch_maximum = target_prefetch_pages; 743 scanstate->pscan_len = 0; 744 scanstate->initialized = false; 745 scanstate->shared_tbmiterator = NULL; 746 scanstate->shared_prefetch_iterator = NULL; 747 scanstate->pstate = NULL; 748 749 /* 750 * We can potentially skip fetching heap pages if we do not need any 751 * columns of the table, either for checking non-indexable quals or for 752 * returning data. This test is a bit simplistic, as it checks the 753 * stronger condition that there's no qual or return tlist at all. But in 754 * most cases it's probably not worth working harder than that. 755 */ 756 scanstate->can_skip_fetch = (node->scan.plan.qual == NIL && 757 node->scan.plan.targetlist == NIL); 758 759 /* 760 * Miscellaneous initialization 761 * 762 * create expression context for node 763 */ 764 ExecAssignExprContext(estate, &scanstate->ss.ps); 765 766 /* 767 * open the scan relation 768 */ 769 currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags); 770 771 /* 772 * initialize child nodes 773 */ 774 outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags); 775 776 /* 777 * get the scan type from the relation descriptor. 778 */ 779 ExecInitScanTupleSlot(estate, &scanstate->ss, 780 RelationGetDescr(currentRelation), 781 table_slot_callbacks(currentRelation)); 782 783 /* 784 * Initialize result type and projection. 785 */ 786 ExecInitResultTypeTL(&scanstate->ss.ps); 787 ExecAssignScanProjectionInfo(&scanstate->ss); 788 789 /* 790 * initialize child expressions 791 */ 792 scanstate->ss.ps.qual = 793 ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate); 794 scanstate->bitmapqualorig = 795 ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate); 796 797 /* 798 * Determine the maximum for prefetch_target. If the tablespace has a 799 * specific IO concurrency set, use that to compute the corresponding 800 * maximum value; otherwise, we already initialized to the value computed 801 * by the GUC machinery. 802 */ 803 io_concurrency = 804 get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace); 805 if (io_concurrency != effective_io_concurrency) 806 { 807 double maximum; 808 809 if (ComputeIoConcurrency(io_concurrency, &maximum)) 810 scanstate->prefetch_maximum = rint(maximum); 811 } 812 813 scanstate->ss.ss_currentRelation = currentRelation; 814 815 scanstate->ss.ss_currentScanDesc = table_beginscan_bm(currentRelation, 816 estate->es_snapshot, 817 0, 818 NULL); 819 820 /* 821 * all done. 822 */ 823 return scanstate; 824 } 825 826 /*---------------- 827 * BitmapShouldInitializeSharedState 828 * 829 * The first process to come here and see the state to the BM_INITIAL 830 * will become the leader for the parallel bitmap scan and will be 831 * responsible for populating the TIDBitmap. The other processes will 832 * be blocked by the condition variable until the leader wakes them up. 833 * --------------- 834 */ 835 static bool 836 BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate) 837 { 838 SharedBitmapState state; 839 840 while (1) 841 { 842 SpinLockAcquire(&pstate->mutex); 843 state = pstate->state; 844 if (pstate->state == BM_INITIAL) 845 pstate->state = BM_INPROGRESS; 846 SpinLockRelease(&pstate->mutex); 847 848 /* Exit if bitmap is done, or if we're the leader. */ 849 if (state != BM_INPROGRESS) 850 break; 851 852 /* Wait for the leader to wake us up. */ 853 ConditionVariableSleep(&pstate->cv, WAIT_EVENT_PARALLEL_BITMAP_SCAN); 854 } 855 856 ConditionVariableCancelSleep(); 857 858 return (state == BM_INITIAL); 859 } 860 861 /* ---------------------------------------------------------------- 862 * ExecBitmapHeapEstimate 863 * 864 * Compute the amount of space we'll need in the parallel 865 * query DSM, and inform pcxt->estimator about our needs. 866 * ---------------------------------------------------------------- 867 */ 868 void 869 ExecBitmapHeapEstimate(BitmapHeapScanState *node, 870 ParallelContext *pcxt) 871 { 872 EState *estate = node->ss.ps.state; 873 874 node->pscan_len = add_size(offsetof(ParallelBitmapHeapState, 875 phs_snapshot_data), 876 EstimateSnapshotSpace(estate->es_snapshot)); 877 878 shm_toc_estimate_chunk(&pcxt->estimator, node->pscan_len); 879 shm_toc_estimate_keys(&pcxt->estimator, 1); 880 } 881 882 /* ---------------------------------------------------------------- 883 * ExecBitmapHeapInitializeDSM 884 * 885 * Set up a parallel bitmap heap scan descriptor. 886 * ---------------------------------------------------------------- 887 */ 888 void 889 ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node, 890 ParallelContext *pcxt) 891 { 892 ParallelBitmapHeapState *pstate; 893 EState *estate = node->ss.ps.state; 894 dsa_area *dsa = node->ss.ps.state->es_query_dsa; 895 896 /* If there's no DSA, there are no workers; initialize nothing. */ 897 if (dsa == NULL) 898 return; 899 900 pstate = shm_toc_allocate(pcxt->toc, node->pscan_len); 901 902 pstate->tbmiterator = 0; 903 pstate->prefetch_iterator = 0; 904 905 /* Initialize the mutex */ 906 SpinLockInit(&pstate->mutex); 907 pstate->prefetch_pages = 0; 908 pstate->prefetch_target = 0; 909 pstate->state = BM_INITIAL; 910 911 ConditionVariableInit(&pstate->cv); 912 SerializeSnapshot(estate->es_snapshot, pstate->phs_snapshot_data); 913 914 shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate); 915 node->pstate = pstate; 916 } 917 918 /* ---------------------------------------------------------------- 919 * ExecBitmapHeapReInitializeDSM 920 * 921 * Reset shared state before beginning a fresh scan. 922 * ---------------------------------------------------------------- 923 */ 924 void 925 ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node, 926 ParallelContext *pcxt) 927 { 928 ParallelBitmapHeapState *pstate = node->pstate; 929 dsa_area *dsa = node->ss.ps.state->es_query_dsa; 930 931 /* If there's no DSA, there are no workers; do nothing. */ 932 if (dsa == NULL) 933 return; 934 935 pstate->state = BM_INITIAL; 936 937 if (DsaPointerIsValid(pstate->tbmiterator)) 938 tbm_free_shared_area(dsa, pstate->tbmiterator); 939 940 if (DsaPointerIsValid(pstate->prefetch_iterator)) 941 tbm_free_shared_area(dsa, pstate->prefetch_iterator); 942 943 pstate->tbmiterator = InvalidDsaPointer; 944 pstate->prefetch_iterator = InvalidDsaPointer; 945 } 946 947 /* ---------------------------------------------------------------- 948 * ExecBitmapHeapInitializeWorker 949 * 950 * Copy relevant information from TOC into planstate. 951 * ---------------------------------------------------------------- 952 */ 953 void 954 ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node, 955 ParallelWorkerContext *pwcxt) 956 { 957 ParallelBitmapHeapState *pstate; 958 Snapshot snapshot; 959 960 Assert(node->ss.ps.state->es_query_dsa != NULL); 961 962 pstate = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false); 963 node->pstate = pstate; 964 965 snapshot = RestoreSnapshot(pstate->phs_snapshot_data); 966 table_scan_update_snapshot(node->ss.ss_currentScanDesc, snapshot); 967 } 968