1 /*-------------------------------------------------------------------------
2 *
3 * nodeBitmapHeapscan.c
4 * Routines to support bitmapped scans of relations
5 *
6 * NOTE: it is critical that this plan type only be used with MVCC-compliant
7 * snapshots (ie, regular snapshots, not SnapshotAny or one of the other
8 * special snapshots). The reason is that since index and heap scans are
9 * decoupled, there can be no assurance that the index tuple prompting a
10 * visit to a particular heap TID still exists when the visit is made.
11 * Therefore the tuple might not exist anymore either (which is OK because
12 * heap_fetch will cope) --- but worse, the tuple slot could have been
13 * re-used for a newer tuple. With an MVCC snapshot the newer tuple is
14 * certain to fail the time qual and so it will not be mistakenly returned,
15 * but with anything else we might return a tuple that doesn't meet the
16 * required index qual conditions.
17 *
18 *
19 * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
20 * Portions Copyright (c) 1994, Regents of the University of California
21 *
22 *
23 * IDENTIFICATION
24 * src/backend/executor/nodeBitmapHeapscan.c
25 *
26 *-------------------------------------------------------------------------
27 */
28 /*
29 * INTERFACE ROUTINES
30 * ExecBitmapHeapScan scans a relation using bitmap info
31 * ExecBitmapHeapNext workhorse for above
32 * ExecInitBitmapHeapScan creates and initializes state info.
33 * ExecReScanBitmapHeapScan prepares to rescan the plan.
34 * ExecEndBitmapHeapScan releases all storage.
35 */
36 #include "postgres.h"
37
38 #include <math.h>
39
40 #include "access/relscan.h"
41 #include "access/tableam.h"
42 #include "access/transam.h"
43 #include "access/visibilitymap.h"
44 #include "executor/execdebug.h"
45 #include "executor/nodeBitmapHeapscan.h"
46 #include "miscadmin.h"
47 #include "pgstat.h"
48 #include "storage/bufmgr.h"
49 #include "storage/predicate.h"
50 #include "utils/memutils.h"
51 #include "utils/rel.h"
52 #include "utils/snapmgr.h"
53 #include "utils/spccache.h"
54
55 static TupleTableSlot *BitmapHeapNext(BitmapHeapScanState *node);
56 static inline void BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate);
57 static inline void BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
58 TBMIterateResult *tbmres);
59 static inline void BitmapAdjustPrefetchTarget(BitmapHeapScanState *node);
60 static inline void BitmapPrefetch(BitmapHeapScanState *node,
61 TableScanDesc scan);
62 static bool BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate);
63
64
65 /* ----------------------------------------------------------------
66 * BitmapHeapNext
67 *
68 * Retrieve next tuple from the BitmapHeapScan node's currentRelation
69 * ----------------------------------------------------------------
70 */
71 static TupleTableSlot *
BitmapHeapNext(BitmapHeapScanState * node)72 BitmapHeapNext(BitmapHeapScanState *node)
73 {
74 ExprContext *econtext;
75 TableScanDesc scan;
76 TIDBitmap *tbm;
77 TBMIterator *tbmiterator = NULL;
78 TBMSharedIterator *shared_tbmiterator = NULL;
79 TBMIterateResult *tbmres;
80 TupleTableSlot *slot;
81 ParallelBitmapHeapState *pstate = node->pstate;
82 dsa_area *dsa = node->ss.ps.state->es_query_dsa;
83
84 /*
85 * extract necessary information from index scan node
86 */
87 econtext = node->ss.ps.ps_ExprContext;
88 slot = node->ss.ss_ScanTupleSlot;
89 scan = node->ss.ss_currentScanDesc;
90 tbm = node->tbm;
91 if (pstate == NULL)
92 tbmiterator = node->tbmiterator;
93 else
94 shared_tbmiterator = node->shared_tbmiterator;
95 tbmres = node->tbmres;
96
97 /*
98 * If we haven't yet performed the underlying index scan, do it, and begin
99 * the iteration over the bitmap.
100 *
101 * For prefetching, we use *two* iterators, one for the pages we are
102 * actually scanning and another that runs ahead of the first for
103 * prefetching. node->prefetch_pages tracks exactly how many pages ahead
104 * the prefetch iterator is. Also, node->prefetch_target tracks the
105 * desired prefetch distance, which starts small and increases up to the
106 * node->prefetch_maximum. This is to avoid doing a lot of prefetching in
107 * a scan that stops after a few tuples because of a LIMIT.
108 */
109 if (!node->initialized)
110 {
111 if (!pstate)
112 {
113 tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
114
115 if (!tbm || !IsA(tbm, TIDBitmap))
116 elog(ERROR, "unrecognized result from subplan");
117
118 node->tbm = tbm;
119 node->tbmiterator = tbmiterator = tbm_begin_iterate(tbm);
120 node->tbmres = tbmres = NULL;
121
122 #ifdef USE_PREFETCH
123 if (node->prefetch_maximum > 0)
124 {
125 node->prefetch_iterator = tbm_begin_iterate(tbm);
126 node->prefetch_pages = 0;
127 node->prefetch_target = -1;
128 }
129 #endif /* USE_PREFETCH */
130 }
131 else
132 {
133 /*
134 * The leader will immediately come out of the function, but
135 * others will be blocked until leader populates the TBM and wakes
136 * them up.
137 */
138 if (BitmapShouldInitializeSharedState(pstate))
139 {
140 tbm = (TIDBitmap *) MultiExecProcNode(outerPlanState(node));
141 if (!tbm || !IsA(tbm, TIDBitmap))
142 elog(ERROR, "unrecognized result from subplan");
143
144 node->tbm = tbm;
145
146 /*
147 * Prepare to iterate over the TBM. This will return the
148 * dsa_pointer of the iterator state which will be used by
149 * multiple processes to iterate jointly.
150 */
151 pstate->tbmiterator = tbm_prepare_shared_iterate(tbm);
152 #ifdef USE_PREFETCH
153 if (node->prefetch_maximum > 0)
154 {
155 pstate->prefetch_iterator =
156 tbm_prepare_shared_iterate(tbm);
157
158 /*
159 * We don't need the mutex here as we haven't yet woke up
160 * others.
161 */
162 pstate->prefetch_pages = 0;
163 pstate->prefetch_target = -1;
164 }
165 #endif
166
167 /* We have initialized the shared state so wake up others. */
168 BitmapDoneInitializingSharedState(pstate);
169 }
170
171 /* Allocate a private iterator and attach the shared state to it */
172 node->shared_tbmiterator = shared_tbmiterator =
173 tbm_attach_shared_iterate(dsa, pstate->tbmiterator);
174 node->tbmres = tbmres = NULL;
175
176 #ifdef USE_PREFETCH
177 if (node->prefetch_maximum > 0)
178 {
179 node->shared_prefetch_iterator =
180 tbm_attach_shared_iterate(dsa, pstate->prefetch_iterator);
181 }
182 #endif /* USE_PREFETCH */
183 }
184 node->initialized = true;
185 }
186
187 for (;;)
188 {
189 bool skip_fetch;
190
191 CHECK_FOR_INTERRUPTS();
192
193 /*
194 * Get next page of results if needed
195 */
196 if (tbmres == NULL)
197 {
198 if (!pstate)
199 node->tbmres = tbmres = tbm_iterate(tbmiterator);
200 else
201 node->tbmres = tbmres = tbm_shared_iterate(shared_tbmiterator);
202 if (tbmres == NULL)
203 {
204 /* no more entries in the bitmap */
205 break;
206 }
207
208 BitmapAdjustPrefetchIterator(node, tbmres);
209
210 /*
211 * We can skip fetching the heap page if we don't need any fields
212 * from the heap, and the bitmap entries don't need rechecking,
213 * and all tuples on the page are visible to our transaction.
214 *
215 * XXX: It's a layering violation that we do these checks above
216 * tableam, they should probably moved below it at some point.
217 */
218 skip_fetch = (node->can_skip_fetch &&
219 !tbmres->recheck &&
220 VM_ALL_VISIBLE(node->ss.ss_currentRelation,
221 tbmres->blockno,
222 &node->vmbuffer));
223
224 if (skip_fetch)
225 {
226 /* can't be lossy in the skip_fetch case */
227 Assert(tbmres->ntuples >= 0);
228
229 /*
230 * The number of tuples on this page is put into
231 * node->return_empty_tuples.
232 */
233 node->return_empty_tuples = tbmres->ntuples;
234 }
235 else if (!table_scan_bitmap_next_block(scan, tbmres))
236 {
237 /* AM doesn't think this block is valid, skip */
238 continue;
239 }
240
241 if (tbmres->ntuples >= 0)
242 node->exact_pages++;
243 else
244 node->lossy_pages++;
245
246 /* Adjust the prefetch target */
247 BitmapAdjustPrefetchTarget(node);
248 }
249 else
250 {
251 /*
252 * Continuing in previously obtained page.
253 */
254
255 #ifdef USE_PREFETCH
256
257 /*
258 * Try to prefetch at least a few pages even before we get to the
259 * second page if we don't stop reading after the first tuple.
260 */
261 if (!pstate)
262 {
263 if (node->prefetch_target < node->prefetch_maximum)
264 node->prefetch_target++;
265 }
266 else if (pstate->prefetch_target < node->prefetch_maximum)
267 {
268 /* take spinlock while updating shared state */
269 SpinLockAcquire(&pstate->mutex);
270 if (pstate->prefetch_target < node->prefetch_maximum)
271 pstate->prefetch_target++;
272 SpinLockRelease(&pstate->mutex);
273 }
274 #endif /* USE_PREFETCH */
275 }
276
277 /*
278 * We issue prefetch requests *after* fetching the current page to try
279 * to avoid having prefetching interfere with the main I/O. Also, this
280 * should happen only when we have determined there is still something
281 * to do on the current page, else we may uselessly prefetch the same
282 * page we are just about to request for real.
283 *
284 * XXX: It's a layering violation that we do these checks above
285 * tableam, they should probably moved below it at some point.
286 */
287 BitmapPrefetch(node, scan);
288
289 if (node->return_empty_tuples > 0)
290 {
291 /*
292 * If we don't have to fetch the tuple, just return nulls.
293 */
294 ExecStoreAllNullTuple(slot);
295
296 if (--node->return_empty_tuples == 0)
297 {
298 /* no more tuples to return in the next round */
299 node->tbmres = tbmres = NULL;
300 }
301 }
302 else
303 {
304 /*
305 * Attempt to fetch tuple from AM.
306 */
307 if (!table_scan_bitmap_next_tuple(scan, tbmres, slot))
308 {
309 /* nothing more to look at on this page */
310 node->tbmres = tbmres = NULL;
311 continue;
312 }
313
314 /*
315 * If we are using lossy info, we have to recheck the qual
316 * conditions at every tuple.
317 */
318 if (tbmres->recheck)
319 {
320 econtext->ecxt_scantuple = slot;
321 if (!ExecQualAndReset(node->bitmapqualorig, econtext))
322 {
323 /* Fails recheck, so drop it and loop back for another */
324 InstrCountFiltered2(node, 1);
325 ExecClearTuple(slot);
326 continue;
327 }
328 }
329 }
330
331 /* OK to return this tuple */
332 return slot;
333 }
334
335 /*
336 * if we get here it means we are at the end of the scan..
337 */
338 return ExecClearTuple(slot);
339 }
340
341 /*
342 * BitmapDoneInitializingSharedState - Shared state is initialized
343 *
344 * By this time the leader has already populated the TBM and initialized the
345 * shared state so wake up other processes.
346 */
347 static inline void
BitmapDoneInitializingSharedState(ParallelBitmapHeapState * pstate)348 BitmapDoneInitializingSharedState(ParallelBitmapHeapState *pstate)
349 {
350 SpinLockAcquire(&pstate->mutex);
351 pstate->state = BM_FINISHED;
352 SpinLockRelease(&pstate->mutex);
353 ConditionVariableBroadcast(&pstate->cv);
354 }
355
356 /*
357 * BitmapAdjustPrefetchIterator - Adjust the prefetch iterator
358 */
359 static inline void
BitmapAdjustPrefetchIterator(BitmapHeapScanState * node,TBMIterateResult * tbmres)360 BitmapAdjustPrefetchIterator(BitmapHeapScanState *node,
361 TBMIterateResult *tbmres)
362 {
363 #ifdef USE_PREFETCH
364 ParallelBitmapHeapState *pstate = node->pstate;
365
366 if (pstate == NULL)
367 {
368 TBMIterator *prefetch_iterator = node->prefetch_iterator;
369
370 if (node->prefetch_pages > 0)
371 {
372 /* The main iterator has closed the distance by one page */
373 node->prefetch_pages--;
374 }
375 else if (prefetch_iterator)
376 {
377 /* Do not let the prefetch iterator get behind the main one */
378 TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
379
380 if (tbmpre == NULL || tbmpre->blockno != tbmres->blockno)
381 elog(ERROR, "prefetch and main iterators are out of sync");
382 }
383 return;
384 }
385
386 if (node->prefetch_maximum > 0)
387 {
388 TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
389
390 SpinLockAcquire(&pstate->mutex);
391 if (pstate->prefetch_pages > 0)
392 {
393 pstate->prefetch_pages--;
394 SpinLockRelease(&pstate->mutex);
395 }
396 else
397 {
398 /* Release the mutex before iterating */
399 SpinLockRelease(&pstate->mutex);
400
401 /*
402 * In case of shared mode, we can not ensure that the current
403 * blockno of the main iterator and that of the prefetch iterator
404 * are same. It's possible that whatever blockno we are
405 * prefetching will be processed by another process. Therefore,
406 * we don't validate the blockno here as we do in non-parallel
407 * case.
408 */
409 if (prefetch_iterator)
410 tbm_shared_iterate(prefetch_iterator);
411 }
412 }
413 #endif /* USE_PREFETCH */
414 }
415
416 /*
417 * BitmapAdjustPrefetchTarget - Adjust the prefetch target
418 *
419 * Increase prefetch target if it's not yet at the max. Note that
420 * we will increase it to zero after fetching the very first
421 * page/tuple, then to one after the second tuple is fetched, then
422 * it doubles as later pages are fetched.
423 */
424 static inline void
BitmapAdjustPrefetchTarget(BitmapHeapScanState * node)425 BitmapAdjustPrefetchTarget(BitmapHeapScanState *node)
426 {
427 #ifdef USE_PREFETCH
428 ParallelBitmapHeapState *pstate = node->pstate;
429
430 if (pstate == NULL)
431 {
432 if (node->prefetch_target >= node->prefetch_maximum)
433 /* don't increase any further */ ;
434 else if (node->prefetch_target >= node->prefetch_maximum / 2)
435 node->prefetch_target = node->prefetch_maximum;
436 else if (node->prefetch_target > 0)
437 node->prefetch_target *= 2;
438 else
439 node->prefetch_target++;
440 return;
441 }
442
443 /* Do an unlocked check first to save spinlock acquisitions. */
444 if (pstate->prefetch_target < node->prefetch_maximum)
445 {
446 SpinLockAcquire(&pstate->mutex);
447 if (pstate->prefetch_target >= node->prefetch_maximum)
448 /* don't increase any further */ ;
449 else if (pstate->prefetch_target >= node->prefetch_maximum / 2)
450 pstate->prefetch_target = node->prefetch_maximum;
451 else if (pstate->prefetch_target > 0)
452 pstate->prefetch_target *= 2;
453 else
454 pstate->prefetch_target++;
455 SpinLockRelease(&pstate->mutex);
456 }
457 #endif /* USE_PREFETCH */
458 }
459
460 /*
461 * BitmapPrefetch - Prefetch, if prefetch_pages are behind prefetch_target
462 */
463 static inline void
BitmapPrefetch(BitmapHeapScanState * node,TableScanDesc scan)464 BitmapPrefetch(BitmapHeapScanState *node, TableScanDesc scan)
465 {
466 #ifdef USE_PREFETCH
467 ParallelBitmapHeapState *pstate = node->pstate;
468
469 if (pstate == NULL)
470 {
471 TBMIterator *prefetch_iterator = node->prefetch_iterator;
472
473 if (prefetch_iterator)
474 {
475 while (node->prefetch_pages < node->prefetch_target)
476 {
477 TBMIterateResult *tbmpre = tbm_iterate(prefetch_iterator);
478 bool skip_fetch;
479
480 if (tbmpre == NULL)
481 {
482 /* No more pages to prefetch */
483 tbm_end_iterate(prefetch_iterator);
484 node->prefetch_iterator = NULL;
485 break;
486 }
487 node->prefetch_pages++;
488
489 /*
490 * If we expect not to have to actually read this heap page,
491 * skip this prefetch call, but continue to run the prefetch
492 * logic normally. (Would it be better not to increment
493 * prefetch_pages?)
494 *
495 * This depends on the assumption that the index AM will
496 * report the same recheck flag for this future heap page as
497 * it did for the current heap page; which is not a certainty
498 * but is true in many cases.
499 */
500 skip_fetch = (node->can_skip_fetch &&
501 (node->tbmres ? !node->tbmres->recheck : false) &&
502 VM_ALL_VISIBLE(node->ss.ss_currentRelation,
503 tbmpre->blockno,
504 &node->pvmbuffer));
505
506 if (!skip_fetch)
507 PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
508 }
509 }
510
511 return;
512 }
513
514 if (pstate->prefetch_pages < pstate->prefetch_target)
515 {
516 TBMSharedIterator *prefetch_iterator = node->shared_prefetch_iterator;
517
518 if (prefetch_iterator)
519 {
520 while (1)
521 {
522 TBMIterateResult *tbmpre;
523 bool do_prefetch = false;
524 bool skip_fetch;
525
526 /*
527 * Recheck under the mutex. If some other process has already
528 * done enough prefetching then we need not to do anything.
529 */
530 SpinLockAcquire(&pstate->mutex);
531 if (pstate->prefetch_pages < pstate->prefetch_target)
532 {
533 pstate->prefetch_pages++;
534 do_prefetch = true;
535 }
536 SpinLockRelease(&pstate->mutex);
537
538 if (!do_prefetch)
539 return;
540
541 tbmpre = tbm_shared_iterate(prefetch_iterator);
542 if (tbmpre == NULL)
543 {
544 /* No more pages to prefetch */
545 tbm_end_shared_iterate(prefetch_iterator);
546 node->shared_prefetch_iterator = NULL;
547 break;
548 }
549
550 /* As above, skip prefetch if we expect not to need page */
551 skip_fetch = (node->can_skip_fetch &&
552 (node->tbmres ? !node->tbmres->recheck : false) &&
553 VM_ALL_VISIBLE(node->ss.ss_currentRelation,
554 tbmpre->blockno,
555 &node->pvmbuffer));
556
557 if (!skip_fetch)
558 PrefetchBuffer(scan->rs_rd, MAIN_FORKNUM, tbmpre->blockno);
559 }
560 }
561 }
562 #endif /* USE_PREFETCH */
563 }
564
565 /*
566 * BitmapHeapRecheck -- access method routine to recheck a tuple in EvalPlanQual
567 */
568 static bool
BitmapHeapRecheck(BitmapHeapScanState * node,TupleTableSlot * slot)569 BitmapHeapRecheck(BitmapHeapScanState *node, TupleTableSlot *slot)
570 {
571 ExprContext *econtext;
572
573 /*
574 * extract necessary information from index scan node
575 */
576 econtext = node->ss.ps.ps_ExprContext;
577
578 /* Does the tuple meet the original qual conditions? */
579 econtext->ecxt_scantuple = slot;
580 return ExecQualAndReset(node->bitmapqualorig, econtext);
581 }
582
583 /* ----------------------------------------------------------------
584 * ExecBitmapHeapScan(node)
585 * ----------------------------------------------------------------
586 */
587 static TupleTableSlot *
ExecBitmapHeapScan(PlanState * pstate)588 ExecBitmapHeapScan(PlanState *pstate)
589 {
590 BitmapHeapScanState *node = castNode(BitmapHeapScanState, pstate);
591
592 return ExecScan(&node->ss,
593 (ExecScanAccessMtd) BitmapHeapNext,
594 (ExecScanRecheckMtd) BitmapHeapRecheck);
595 }
596
597 /* ----------------------------------------------------------------
598 * ExecReScanBitmapHeapScan(node)
599 * ----------------------------------------------------------------
600 */
601 void
ExecReScanBitmapHeapScan(BitmapHeapScanState * node)602 ExecReScanBitmapHeapScan(BitmapHeapScanState *node)
603 {
604 PlanState *outerPlan = outerPlanState(node);
605
606 /* rescan to release any page pin */
607 table_rescan(node->ss.ss_currentScanDesc, NULL);
608
609 /* release bitmaps and buffers if any */
610 if (node->tbmiterator)
611 tbm_end_iterate(node->tbmiterator);
612 if (node->prefetch_iterator)
613 tbm_end_iterate(node->prefetch_iterator);
614 if (node->shared_tbmiterator)
615 tbm_end_shared_iterate(node->shared_tbmiterator);
616 if (node->shared_prefetch_iterator)
617 tbm_end_shared_iterate(node->shared_prefetch_iterator);
618 if (node->tbm)
619 tbm_free(node->tbm);
620 if (node->vmbuffer != InvalidBuffer)
621 ReleaseBuffer(node->vmbuffer);
622 if (node->pvmbuffer != InvalidBuffer)
623 ReleaseBuffer(node->pvmbuffer);
624 node->tbm = NULL;
625 node->tbmiterator = NULL;
626 node->tbmres = NULL;
627 node->prefetch_iterator = NULL;
628 node->initialized = false;
629 node->shared_tbmiterator = NULL;
630 node->shared_prefetch_iterator = NULL;
631 node->vmbuffer = InvalidBuffer;
632 node->pvmbuffer = InvalidBuffer;
633
634 ExecScanReScan(&node->ss);
635
636 /*
637 * if chgParam of subnode is not null then plan will be re-scanned by
638 * first ExecProcNode.
639 */
640 if (outerPlan->chgParam == NULL)
641 ExecReScan(outerPlan);
642 }
643
644 /* ----------------------------------------------------------------
645 * ExecEndBitmapHeapScan
646 * ----------------------------------------------------------------
647 */
648 void
ExecEndBitmapHeapScan(BitmapHeapScanState * node)649 ExecEndBitmapHeapScan(BitmapHeapScanState *node)
650 {
651 TableScanDesc scanDesc;
652
653 /*
654 * extract information from the node
655 */
656 scanDesc = node->ss.ss_currentScanDesc;
657
658 /*
659 * Free the exprcontext
660 */
661 ExecFreeExprContext(&node->ss.ps);
662
663 /*
664 * clear out tuple table slots
665 */
666 if (node->ss.ps.ps_ResultTupleSlot)
667 ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
668 ExecClearTuple(node->ss.ss_ScanTupleSlot);
669
670 /*
671 * close down subplans
672 */
673 ExecEndNode(outerPlanState(node));
674
675 /*
676 * release bitmaps and buffers if any
677 */
678 if (node->tbmiterator)
679 tbm_end_iterate(node->tbmiterator);
680 if (node->prefetch_iterator)
681 tbm_end_iterate(node->prefetch_iterator);
682 if (node->tbm)
683 tbm_free(node->tbm);
684 if (node->shared_tbmiterator)
685 tbm_end_shared_iterate(node->shared_tbmiterator);
686 if (node->shared_prefetch_iterator)
687 tbm_end_shared_iterate(node->shared_prefetch_iterator);
688 if (node->vmbuffer != InvalidBuffer)
689 ReleaseBuffer(node->vmbuffer);
690 if (node->pvmbuffer != InvalidBuffer)
691 ReleaseBuffer(node->pvmbuffer);
692
693 /*
694 * close heap scan
695 */
696 table_endscan(scanDesc);
697 }
698
699 /* ----------------------------------------------------------------
700 * ExecInitBitmapHeapScan
701 *
702 * Initializes the scan's state information.
703 * ----------------------------------------------------------------
704 */
705 BitmapHeapScanState *
ExecInitBitmapHeapScan(BitmapHeapScan * node,EState * estate,int eflags)706 ExecInitBitmapHeapScan(BitmapHeapScan *node, EState *estate, int eflags)
707 {
708 BitmapHeapScanState *scanstate;
709 Relation currentRelation;
710
711 /* check for unsupported flags */
712 Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
713
714 /*
715 * Assert caller didn't ask for an unsafe snapshot --- see comments at
716 * head of file.
717 */
718 Assert(IsMVCCSnapshot(estate->es_snapshot));
719
720 /*
721 * create state structure
722 */
723 scanstate = makeNode(BitmapHeapScanState);
724 scanstate->ss.ps.plan = (Plan *) node;
725 scanstate->ss.ps.state = estate;
726 scanstate->ss.ps.ExecProcNode = ExecBitmapHeapScan;
727
728 scanstate->tbm = NULL;
729 scanstate->tbmiterator = NULL;
730 scanstate->tbmres = NULL;
731 scanstate->return_empty_tuples = 0;
732 scanstate->vmbuffer = InvalidBuffer;
733 scanstate->pvmbuffer = InvalidBuffer;
734 scanstate->exact_pages = 0;
735 scanstate->lossy_pages = 0;
736 scanstate->prefetch_iterator = NULL;
737 scanstate->prefetch_pages = 0;
738 scanstate->prefetch_target = 0;
739 scanstate->pscan_len = 0;
740 scanstate->initialized = false;
741 scanstate->shared_tbmiterator = NULL;
742 scanstate->shared_prefetch_iterator = NULL;
743 scanstate->pstate = NULL;
744
745 /*
746 * We can potentially skip fetching heap pages if we do not need any
747 * columns of the table, either for checking non-indexable quals or for
748 * returning data. This test is a bit simplistic, as it checks the
749 * stronger condition that there's no qual or return tlist at all. But in
750 * most cases it's probably not worth working harder than that.
751 */
752 scanstate->can_skip_fetch = (node->scan.plan.qual == NIL &&
753 node->scan.plan.targetlist == NIL);
754
755 /*
756 * Miscellaneous initialization
757 *
758 * create expression context for node
759 */
760 ExecAssignExprContext(estate, &scanstate->ss.ps);
761
762 /*
763 * open the scan relation
764 */
765 currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
766
767 /*
768 * initialize child nodes
769 */
770 outerPlanState(scanstate) = ExecInitNode(outerPlan(node), estate, eflags);
771
772 /*
773 * get the scan type from the relation descriptor.
774 */
775 ExecInitScanTupleSlot(estate, &scanstate->ss,
776 RelationGetDescr(currentRelation),
777 table_slot_callbacks(currentRelation));
778
779 /*
780 * Initialize result type and projection.
781 */
782 ExecInitResultTypeTL(&scanstate->ss.ps);
783 ExecAssignScanProjectionInfo(&scanstate->ss);
784
785 /*
786 * initialize child expressions
787 */
788 scanstate->ss.ps.qual =
789 ExecInitQual(node->scan.plan.qual, (PlanState *) scanstate);
790 scanstate->bitmapqualorig =
791 ExecInitQual(node->bitmapqualorig, (PlanState *) scanstate);
792
793 /*
794 * Maximum number of prefetches for the tablespace if configured,
795 * otherwise the current value of the effective_io_concurrency GUC.
796 */
797 scanstate->prefetch_maximum =
798 get_tablespace_io_concurrency(currentRelation->rd_rel->reltablespace);
799
800 scanstate->ss.ss_currentRelation = currentRelation;
801
802 scanstate->ss.ss_currentScanDesc = table_beginscan_bm(currentRelation,
803 estate->es_snapshot,
804 0,
805 NULL);
806
807 /*
808 * all done.
809 */
810 return scanstate;
811 }
812
813 /*----------------
814 * BitmapShouldInitializeSharedState
815 *
816 * The first process to come here and see the state to the BM_INITIAL
817 * will become the leader for the parallel bitmap scan and will be
818 * responsible for populating the TIDBitmap. The other processes will
819 * be blocked by the condition variable until the leader wakes them up.
820 * ---------------
821 */
822 static bool
BitmapShouldInitializeSharedState(ParallelBitmapHeapState * pstate)823 BitmapShouldInitializeSharedState(ParallelBitmapHeapState *pstate)
824 {
825 SharedBitmapState state;
826
827 while (1)
828 {
829 SpinLockAcquire(&pstate->mutex);
830 state = pstate->state;
831 if (pstate->state == BM_INITIAL)
832 pstate->state = BM_INPROGRESS;
833 SpinLockRelease(&pstate->mutex);
834
835 /* Exit if bitmap is done, or if we're the leader. */
836 if (state != BM_INPROGRESS)
837 break;
838
839 /* Wait for the leader to wake us up. */
840 ConditionVariableSleep(&pstate->cv, WAIT_EVENT_PARALLEL_BITMAP_SCAN);
841 }
842
843 ConditionVariableCancelSleep();
844
845 return (state == BM_INITIAL);
846 }
847
848 /* ----------------------------------------------------------------
849 * ExecBitmapHeapEstimate
850 *
851 * Compute the amount of space we'll need in the parallel
852 * query DSM, and inform pcxt->estimator about our needs.
853 * ----------------------------------------------------------------
854 */
855 void
ExecBitmapHeapEstimate(BitmapHeapScanState * node,ParallelContext * pcxt)856 ExecBitmapHeapEstimate(BitmapHeapScanState *node,
857 ParallelContext *pcxt)
858 {
859 EState *estate = node->ss.ps.state;
860
861 node->pscan_len = add_size(offsetof(ParallelBitmapHeapState,
862 phs_snapshot_data),
863 EstimateSnapshotSpace(estate->es_snapshot));
864
865 shm_toc_estimate_chunk(&pcxt->estimator, node->pscan_len);
866 shm_toc_estimate_keys(&pcxt->estimator, 1);
867 }
868
869 /* ----------------------------------------------------------------
870 * ExecBitmapHeapInitializeDSM
871 *
872 * Set up a parallel bitmap heap scan descriptor.
873 * ----------------------------------------------------------------
874 */
875 void
ExecBitmapHeapInitializeDSM(BitmapHeapScanState * node,ParallelContext * pcxt)876 ExecBitmapHeapInitializeDSM(BitmapHeapScanState *node,
877 ParallelContext *pcxt)
878 {
879 ParallelBitmapHeapState *pstate;
880 EState *estate = node->ss.ps.state;
881 dsa_area *dsa = node->ss.ps.state->es_query_dsa;
882
883 /* If there's no DSA, there are no workers; initialize nothing. */
884 if (dsa == NULL)
885 return;
886
887 pstate = shm_toc_allocate(pcxt->toc, node->pscan_len);
888
889 pstate->tbmiterator = 0;
890 pstate->prefetch_iterator = 0;
891
892 /* Initialize the mutex */
893 SpinLockInit(&pstate->mutex);
894 pstate->prefetch_pages = 0;
895 pstate->prefetch_target = 0;
896 pstate->state = BM_INITIAL;
897
898 ConditionVariableInit(&pstate->cv);
899 SerializeSnapshot(estate->es_snapshot, pstate->phs_snapshot_data);
900
901 shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, pstate);
902 node->pstate = pstate;
903 }
904
905 /* ----------------------------------------------------------------
906 * ExecBitmapHeapReInitializeDSM
907 *
908 * Reset shared state before beginning a fresh scan.
909 * ----------------------------------------------------------------
910 */
911 void
ExecBitmapHeapReInitializeDSM(BitmapHeapScanState * node,ParallelContext * pcxt)912 ExecBitmapHeapReInitializeDSM(BitmapHeapScanState *node,
913 ParallelContext *pcxt)
914 {
915 ParallelBitmapHeapState *pstate = node->pstate;
916 dsa_area *dsa = node->ss.ps.state->es_query_dsa;
917
918 /* If there's no DSA, there are no workers; do nothing. */
919 if (dsa == NULL)
920 return;
921
922 pstate->state = BM_INITIAL;
923
924 if (DsaPointerIsValid(pstate->tbmiterator))
925 tbm_free_shared_area(dsa, pstate->tbmiterator);
926
927 if (DsaPointerIsValid(pstate->prefetch_iterator))
928 tbm_free_shared_area(dsa, pstate->prefetch_iterator);
929
930 pstate->tbmiterator = InvalidDsaPointer;
931 pstate->prefetch_iterator = InvalidDsaPointer;
932 }
933
934 /* ----------------------------------------------------------------
935 * ExecBitmapHeapInitializeWorker
936 *
937 * Copy relevant information from TOC into planstate.
938 * ----------------------------------------------------------------
939 */
940 void
ExecBitmapHeapInitializeWorker(BitmapHeapScanState * node,ParallelWorkerContext * pwcxt)941 ExecBitmapHeapInitializeWorker(BitmapHeapScanState *node,
942 ParallelWorkerContext *pwcxt)
943 {
944 ParallelBitmapHeapState *pstate;
945 Snapshot snapshot;
946
947 Assert(node->ss.ps.state->es_query_dsa != NULL);
948
949 pstate = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
950 node->pstate = pstate;
951
952 snapshot = RestoreSnapshot(pstate->phs_snapshot_data);
953 table_scan_update_snapshot(node->ss.ss_currentScanDesc, snapshot);
954 }
955