1 /*-------------------------------------------------------------------------
2  *
3  * nodeIndexonlyscan.c
4  *	  Routines to support index-only scans
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/executor/nodeIndexonlyscan.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /*
16  * INTERFACE ROUTINES
17  *		ExecIndexOnlyScan			scans an index
18  *		IndexOnlyNext				retrieve next tuple
19  *		ExecInitIndexOnlyScan		creates and initializes state info.
20  *		ExecReScanIndexOnlyScan		rescans the indexed relation.
21  *		ExecEndIndexOnlyScan		releases all storage.
22  *		ExecIndexOnlyMarkPos		marks scan position.
23  *		ExecIndexOnlyRestrPos		restores scan position.
24  *		ExecIndexOnlyScanEstimate	estimates DSM space needed for
25  *						parallel index-only scan
26  *		ExecIndexOnlyScanInitializeDSM	initialize DSM for parallel
27  *						index-only scan
28  *		ExecIndexOnlyScanReInitializeDSM	reinitialize DSM for fresh scan
29  *		ExecIndexOnlyScanInitializeWorker attach to DSM info in parallel worker
30  */
31 #include "postgres.h"
32 
33 #include "access/relscan.h"
34 #include "access/visibilitymap.h"
35 #include "executor/execdebug.h"
36 #include "executor/nodeIndexonlyscan.h"
37 #include "executor/nodeIndexscan.h"
38 #include "miscadmin.h"
39 #include "storage/bufmgr.h"
40 #include "storage/predicate.h"
41 #include "utils/memutils.h"
42 #include "utils/rel.h"
43 
44 
45 static TupleTableSlot *IndexOnlyNext(IndexOnlyScanState *node);
46 static void StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup,
47 				TupleDesc itupdesc);
48 
49 
50 /* ----------------------------------------------------------------
51  *		IndexOnlyNext
52  *
53  *		Retrieve a tuple from the IndexOnlyScan node's index.
54  * ----------------------------------------------------------------
55  */
56 static TupleTableSlot *
IndexOnlyNext(IndexOnlyScanState * node)57 IndexOnlyNext(IndexOnlyScanState *node)
58 {
59 	EState	   *estate;
60 	ExprContext *econtext;
61 	ScanDirection direction;
62 	IndexScanDesc scandesc;
63 	TupleTableSlot *slot;
64 	ItemPointer tid;
65 
66 	/*
67 	 * extract necessary information from index scan node
68 	 */
69 	estate = node->ss.ps.state;
70 	direction = estate->es_direction;
71 	/* flip direction if this is an overall backward scan */
72 	if (ScanDirectionIsBackward(((IndexOnlyScan *) node->ss.ps.plan)->indexorderdir))
73 	{
74 		if (ScanDirectionIsForward(direction))
75 			direction = BackwardScanDirection;
76 		else if (ScanDirectionIsBackward(direction))
77 			direction = ForwardScanDirection;
78 	}
79 	scandesc = node->ioss_ScanDesc;
80 	econtext = node->ss.ps.ps_ExprContext;
81 	slot = node->ss.ss_ScanTupleSlot;
82 
83 	if (scandesc == NULL)
84 	{
85 		/*
86 		 * We reach here if the index only scan is not parallel, or if we're
87 		 * serially executing an index only scan that was planned to be
88 		 * parallel.
89 		 */
90 		scandesc = index_beginscan(node->ss.ss_currentRelation,
91 								   node->ioss_RelationDesc,
92 								   estate->es_snapshot,
93 								   node->ioss_NumScanKeys,
94 								   node->ioss_NumOrderByKeys);
95 
96 		node->ioss_ScanDesc = scandesc;
97 
98 
99 		/* Set it up for index-only scan */
100 		node->ioss_ScanDesc->xs_want_itup = true;
101 		node->ioss_VMBuffer = InvalidBuffer;
102 
103 		/*
104 		 * If no run-time keys to calculate or they are ready, go ahead and
105 		 * pass the scankeys to the index AM.
106 		 */
107 		if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
108 			index_rescan(scandesc,
109 						 node->ioss_ScanKeys,
110 						 node->ioss_NumScanKeys,
111 						 node->ioss_OrderByKeys,
112 						 node->ioss_NumOrderByKeys);
113 	}
114 
115 	/*
116 	 * OK, now that we have what we need, fetch the next tuple.
117 	 */
118 	while ((tid = index_getnext_tid(scandesc, direction)) != NULL)
119 	{
120 		HeapTuple	tuple = NULL;
121 
122 		CHECK_FOR_INTERRUPTS();
123 
124 		/*
125 		 * We can skip the heap fetch if the TID references a heap page on
126 		 * which all tuples are known visible to everybody.  In any case,
127 		 * we'll use the index tuple not the heap tuple as the data source.
128 		 *
129 		 * Note on Memory Ordering Effects: visibilitymap_get_status does not
130 		 * lock the visibility map buffer, and therefore the result we read
131 		 * here could be slightly stale.  However, it can't be stale enough to
132 		 * matter.
133 		 *
134 		 * We need to detect clearing a VM bit due to an insert right away,
135 		 * because the tuple is present in the index page but not visible. The
136 		 * reading of the TID by this scan (using a shared lock on the index
137 		 * buffer) is serialized with the insert of the TID into the index
138 		 * (using an exclusive lock on the index buffer). Because the VM bit
139 		 * is cleared before updating the index, and locking/unlocking of the
140 		 * index page acts as a full memory barrier, we are sure to see the
141 		 * cleared bit if we see a recently-inserted TID.
142 		 *
143 		 * Deletes do not update the index page (only VACUUM will clear out
144 		 * the TID), so the clearing of the VM bit by a delete is not
145 		 * serialized with this test below, and we may see a value that is
146 		 * significantly stale. However, we don't care about the delete right
147 		 * away, because the tuple is still visible until the deleting
148 		 * transaction commits or the statement ends (if it's our
149 		 * transaction). In either case, the lock on the VM buffer will have
150 		 * been released (acting as a write barrier) after clearing the bit.
151 		 * And for us to have a snapshot that includes the deleting
152 		 * transaction (making the tuple invisible), we must have acquired
153 		 * ProcArrayLock after that time, acting as a read barrier.
154 		 *
155 		 * It's worth going through this complexity to avoid needing to lock
156 		 * the VM buffer, which could cause significant contention.
157 		 */
158 		if (!VM_ALL_VISIBLE(scandesc->heapRelation,
159 							ItemPointerGetBlockNumber(tid),
160 							&node->ioss_VMBuffer))
161 		{
162 			/*
163 			 * Rats, we have to visit the heap to check visibility.
164 			 */
165 			node->ioss_HeapFetches++;
166 			tuple = index_fetch_heap(scandesc);
167 			if (tuple == NULL)
168 				continue;		/* no visible tuple, try next index entry */
169 
170 			/*
171 			 * Only MVCC snapshots are supported here, so there should be no
172 			 * need to keep following the HOT chain once a visible entry has
173 			 * been found.  If we did want to allow that, we'd need to keep
174 			 * more state to remember not to call index_getnext_tid next time.
175 			 */
176 			if (scandesc->xs_continue_hot)
177 				elog(ERROR, "non-MVCC snapshots are not supported in index-only scans");
178 
179 			/*
180 			 * Note: at this point we are holding a pin on the heap page, as
181 			 * recorded in scandesc->xs_cbuf.  We could release that pin now,
182 			 * but it's not clear whether it's a win to do so.  The next index
183 			 * entry might require a visit to the same heap page.
184 			 */
185 		}
186 
187 		/*
188 		 * Fill the scan tuple slot with data from the index.  This might be
189 		 * provided in either HeapTuple or IndexTuple format.  Conceivably an
190 		 * index AM might fill both fields, in which case we prefer the heap
191 		 * format, since it's probably a bit cheaper to fill a slot from.
192 		 */
193 		if (scandesc->xs_hitup)
194 		{
195 			/*
196 			 * We don't take the trouble to verify that the provided tuple has
197 			 * exactly the slot's format, but it seems worth doing a quick
198 			 * check on the number of fields.
199 			 */
200 			Assert(slot->tts_tupleDescriptor->natts ==
201 				   scandesc->xs_hitupdesc->natts);
202 			ExecStoreTuple(scandesc->xs_hitup, slot, InvalidBuffer, false);
203 		}
204 		else if (scandesc->xs_itup)
205 			StoreIndexTuple(slot, scandesc->xs_itup, scandesc->xs_itupdesc);
206 		else
207 			elog(ERROR, "no data returned for index-only scan");
208 
209 		/*
210 		 * If the index was lossy, we have to recheck the index quals.
211 		 * (Currently, this can never happen, but we should support the case
212 		 * for possible future use, eg with GiST indexes.)
213 		 */
214 		if (scandesc->xs_recheck)
215 		{
216 			econtext->ecxt_scantuple = slot;
217 			ResetExprContext(econtext);
218 			if (!ExecQual(node->indexqual, econtext))
219 			{
220 				/* Fails recheck, so drop it and loop back for another */
221 				InstrCountFiltered2(node, 1);
222 				continue;
223 			}
224 		}
225 
226 		/*
227 		 * We don't currently support rechecking ORDER BY distances.  (In
228 		 * principle, if the index can support retrieval of the originally
229 		 * indexed value, it should be able to produce an exact distance
230 		 * calculation too.  So it's not clear that adding code here for
231 		 * recheck/re-sort would be worth the trouble.  But we should at least
232 		 * throw an error if someone tries it.)
233 		 */
234 		if (scandesc->numberOfOrderBys > 0 && scandesc->xs_recheckorderby)
235 			ereport(ERROR,
236 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
237 					 errmsg("lossy distance functions are not supported in index-only scans")));
238 
239 		/*
240 		 * If we didn't access the heap, then we'll need to take a predicate
241 		 * lock explicitly, as if we had.  For now we do that at page level.
242 		 */
243 		if (tuple == NULL)
244 			PredicateLockPage(scandesc->heapRelation,
245 							  ItemPointerGetBlockNumber(tid),
246 							  estate->es_snapshot);
247 
248 		return slot;
249 	}
250 
251 	/*
252 	 * if we get here it means the index scan failed so we are at the end of
253 	 * the scan..
254 	 */
255 	return ExecClearTuple(slot);
256 }
257 
258 /*
259  * StoreIndexTuple
260  *		Fill the slot with data from the index tuple.
261  *
262  * At some point this might be generally-useful functionality, but
263  * right now we don't need it elsewhere.
264  */
265 static void
StoreIndexTuple(TupleTableSlot * slot,IndexTuple itup,TupleDesc itupdesc)266 StoreIndexTuple(TupleTableSlot *slot, IndexTuple itup, TupleDesc itupdesc)
267 {
268 	int			nindexatts = itupdesc->natts;
269 	Datum	   *values = slot->tts_values;
270 	bool	   *isnull = slot->tts_isnull;
271 	int			i;
272 
273 	/*
274 	 * Note: we must use the tupdesc supplied by the AM in index_getattr, not
275 	 * the slot's tupdesc, in case the latter has different datatypes (this
276 	 * happens for btree name_ops in particular).  They'd better have the same
277 	 * number of columns though, as well as being datatype-compatible which is
278 	 * something we can't so easily check.
279 	 */
280 	Assert(slot->tts_tupleDescriptor->natts == nindexatts);
281 
282 	ExecClearTuple(slot);
283 	for (i = 0; i < nindexatts; i++)
284 		values[i] = index_getattr(itup, i + 1, itupdesc, &isnull[i]);
285 	ExecStoreVirtualTuple(slot);
286 }
287 
288 /*
289  * IndexOnlyRecheck -- access method routine to recheck a tuple in EvalPlanQual
290  *
291  * This can't really happen, since an index can't supply CTID which would
292  * be necessary data for any potential EvalPlanQual target relation.  If it
293  * did happen, the EPQ code would pass us the wrong data, namely a heap
294  * tuple not an index tuple.  So throw an error.
295  */
296 static bool
IndexOnlyRecheck(IndexOnlyScanState * node,TupleTableSlot * slot)297 IndexOnlyRecheck(IndexOnlyScanState *node, TupleTableSlot *slot)
298 {
299 	elog(ERROR, "EvalPlanQual recheck is not supported in index-only scans");
300 	return false;				/* keep compiler quiet */
301 }
302 
303 /* ----------------------------------------------------------------
304  *		ExecIndexOnlyScan(node)
305  * ----------------------------------------------------------------
306  */
307 static TupleTableSlot *
ExecIndexOnlyScan(PlanState * pstate)308 ExecIndexOnlyScan(PlanState *pstate)
309 {
310 	IndexOnlyScanState *node = castNode(IndexOnlyScanState, pstate);
311 
312 	/*
313 	 * If we have runtime keys and they've not already been set up, do it now.
314 	 */
315 	if (node->ioss_NumRuntimeKeys != 0 && !node->ioss_RuntimeKeysReady)
316 		ExecReScan((PlanState *) node);
317 
318 	return ExecScan(&node->ss,
319 					(ExecScanAccessMtd) IndexOnlyNext,
320 					(ExecScanRecheckMtd) IndexOnlyRecheck);
321 }
322 
323 /* ----------------------------------------------------------------
324  *		ExecReScanIndexOnlyScan(node)
325  *
326  *		Recalculates the values of any scan keys whose value depends on
327  *		information known at runtime, then rescans the indexed relation.
328  *
329  *		Updating the scan key was formerly done separately in
330  *		ExecUpdateIndexScanKeys. Integrating it into ReScan makes
331  *		rescans of indices and relations/general streams more uniform.
332  * ----------------------------------------------------------------
333  */
334 void
ExecReScanIndexOnlyScan(IndexOnlyScanState * node)335 ExecReScanIndexOnlyScan(IndexOnlyScanState *node)
336 {
337 	/*
338 	 * If we are doing runtime key calculations (ie, any of the index key
339 	 * values weren't simple Consts), compute the new key values.  But first,
340 	 * reset the context so we don't leak memory as each outer tuple is
341 	 * scanned.  Note this assumes that we will recalculate *all* runtime keys
342 	 * on each call.
343 	 */
344 	if (node->ioss_NumRuntimeKeys != 0)
345 	{
346 		ExprContext *econtext = node->ioss_RuntimeContext;
347 
348 		ResetExprContext(econtext);
349 		ExecIndexEvalRuntimeKeys(econtext,
350 								 node->ioss_RuntimeKeys,
351 								 node->ioss_NumRuntimeKeys);
352 	}
353 	node->ioss_RuntimeKeysReady = true;
354 
355 	/* reset index scan */
356 	if (node->ioss_ScanDesc)
357 		index_rescan(node->ioss_ScanDesc,
358 					 node->ioss_ScanKeys, node->ioss_NumScanKeys,
359 					 node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
360 
361 	ExecScanReScan(&node->ss);
362 }
363 
364 
365 /* ----------------------------------------------------------------
366  *		ExecEndIndexOnlyScan
367  * ----------------------------------------------------------------
368  */
369 void
ExecEndIndexOnlyScan(IndexOnlyScanState * node)370 ExecEndIndexOnlyScan(IndexOnlyScanState *node)
371 {
372 	Relation	indexRelationDesc;
373 	IndexScanDesc indexScanDesc;
374 	Relation	relation;
375 
376 	/*
377 	 * extract information from the node
378 	 */
379 	indexRelationDesc = node->ioss_RelationDesc;
380 	indexScanDesc = node->ioss_ScanDesc;
381 	relation = node->ss.ss_currentRelation;
382 
383 	/* Release VM buffer pin, if any. */
384 	if (node->ioss_VMBuffer != InvalidBuffer)
385 	{
386 		ReleaseBuffer(node->ioss_VMBuffer);
387 		node->ioss_VMBuffer = InvalidBuffer;
388 	}
389 
390 	/*
391 	 * Free the exprcontext(s) ... now dead code, see ExecFreeExprContext
392 	 */
393 #ifdef NOT_USED
394 	ExecFreeExprContext(&node->ss.ps);
395 	if (node->ioss_RuntimeContext)
396 		FreeExprContext(node->ioss_RuntimeContext, true);
397 #endif
398 
399 	/*
400 	 * clear out tuple table slots
401 	 */
402 	ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
403 	ExecClearTuple(node->ss.ss_ScanTupleSlot);
404 
405 	/*
406 	 * close the index relation (no-op if we didn't open it)
407 	 */
408 	if (indexScanDesc)
409 		index_endscan(indexScanDesc);
410 	if (indexRelationDesc)
411 		index_close(indexRelationDesc, NoLock);
412 
413 	/*
414 	 * close the heap relation.
415 	 */
416 	ExecCloseScanRelation(relation);
417 }
418 
419 /* ----------------------------------------------------------------
420  *		ExecIndexOnlyMarkPos
421  *
422  * Note: we assume that no caller attempts to set a mark before having read
423  * at least one tuple.  Otherwise, ioss_ScanDesc might still be NULL.
424  * ----------------------------------------------------------------
425  */
426 void
ExecIndexOnlyMarkPos(IndexOnlyScanState * node)427 ExecIndexOnlyMarkPos(IndexOnlyScanState *node)
428 {
429 	EState	   *estate = node->ss.ps.state;
430 
431 	if (estate->es_epqTuple != NULL)
432 	{
433 		/*
434 		 * We are inside an EvalPlanQual recheck.  If a test tuple exists for
435 		 * this relation, then we shouldn't access the index at all.  We would
436 		 * instead need to save, and later restore, the state of the
437 		 * es_epqScanDone flag, so that re-fetching the test tuple is
438 		 * possible.  However, given the assumption that no caller sets a mark
439 		 * at the start of the scan, we can only get here with es_epqScanDone
440 		 * already set, and so no state need be saved.
441 		 */
442 		Index		scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
443 
444 		Assert(scanrelid > 0);
445 		if (estate->es_epqTupleSet[scanrelid - 1])
446 		{
447 			/* Verify the claim above */
448 			if (!estate->es_epqScanDone[scanrelid - 1])
449 				elog(ERROR, "unexpected ExecIndexOnlyMarkPos call in EPQ recheck");
450 			return;
451 		}
452 	}
453 
454 	index_markpos(node->ioss_ScanDesc);
455 }
456 
457 /* ----------------------------------------------------------------
458  *		ExecIndexOnlyRestrPos
459  * ----------------------------------------------------------------
460  */
461 void
ExecIndexOnlyRestrPos(IndexOnlyScanState * node)462 ExecIndexOnlyRestrPos(IndexOnlyScanState *node)
463 {
464 	EState	   *estate = node->ss.ps.state;
465 
466 	if (estate->es_epqTuple != NULL)
467 	{
468 		/* See comments in ExecIndexOnlyMarkPos */
469 		Index		scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
470 
471 		Assert(scanrelid > 0);
472 		if (estate->es_epqTupleSet[scanrelid - 1])
473 		{
474 			/* Verify the claim above */
475 			if (!estate->es_epqScanDone[scanrelid - 1])
476 				elog(ERROR, "unexpected ExecIndexOnlyRestrPos call in EPQ recheck");
477 			return;
478 		}
479 	}
480 
481 	index_restrpos(node->ioss_ScanDesc);
482 }
483 
484 /* ----------------------------------------------------------------
485  *		ExecInitIndexOnlyScan
486  *
487  *		Initializes the index scan's state information, creates
488  *		scan keys, and opens the base and index relations.
489  *
490  *		Note: index scans have 2 sets of state information because
491  *			  we have to keep track of the base relation and the
492  *			  index relation.
493  * ----------------------------------------------------------------
494  */
495 IndexOnlyScanState *
ExecInitIndexOnlyScan(IndexOnlyScan * node,EState * estate,int eflags)496 ExecInitIndexOnlyScan(IndexOnlyScan *node, EState *estate, int eflags)
497 {
498 	IndexOnlyScanState *indexstate;
499 	Relation	currentRelation;
500 	bool		relistarget;
501 	TupleDesc	tupDesc;
502 
503 	/*
504 	 * create state structure
505 	 */
506 	indexstate = makeNode(IndexOnlyScanState);
507 	indexstate->ss.ps.plan = (Plan *) node;
508 	indexstate->ss.ps.state = estate;
509 	indexstate->ss.ps.ExecProcNode = ExecIndexOnlyScan;
510 	indexstate->ioss_HeapFetches = 0;
511 
512 	/*
513 	 * Miscellaneous initialization
514 	 *
515 	 * create expression context for node
516 	 */
517 	ExecAssignExprContext(estate, &indexstate->ss.ps);
518 
519 	/*
520 	 * initialize child expressions
521 	 *
522 	 * Note: we don't initialize all of the indexorderby expression, only the
523 	 * sub-parts corresponding to runtime keys (see below).
524 	 */
525 	indexstate->ss.ps.qual =
526 		ExecInitQual(node->scan.plan.qual, (PlanState *) indexstate);
527 	indexstate->indexqual =
528 		ExecInitQual(node->indexqual, (PlanState *) indexstate);
529 
530 	/*
531 	 * tuple table initialization
532 	 */
533 	ExecInitResultTupleSlot(estate, &indexstate->ss.ps);
534 	ExecInitScanTupleSlot(estate, &indexstate->ss);
535 
536 	/*
537 	 * open the base relation and acquire appropriate lock on it.
538 	 */
539 	currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
540 
541 	indexstate->ss.ss_currentRelation = currentRelation;
542 	indexstate->ss.ss_currentScanDesc = NULL;	/* no heap scan here */
543 
544 	/*
545 	 * Build the scan tuple type using the indextlist generated by the
546 	 * planner.  We use this, rather than the index's physical tuple
547 	 * descriptor, because the latter contains storage column types not the
548 	 * types of the original datums.  (It's the AM's responsibility to return
549 	 * suitable data anyway.)
550 	 */
551 	tupDesc = ExecTypeFromTL(node->indextlist, false);
552 	ExecAssignScanType(&indexstate->ss, tupDesc);
553 
554 	/*
555 	 * Initialize result tuple type and projection info.  The node's
556 	 * targetlist will contain Vars with varno = INDEX_VAR, referencing the
557 	 * scan tuple.
558 	 */
559 	ExecAssignResultTypeFromTL(&indexstate->ss.ps);
560 	ExecAssignScanProjectionInfoWithVarno(&indexstate->ss, INDEX_VAR);
561 
562 	/*
563 	 * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop
564 	 * here.  This allows an index-advisor plugin to EXPLAIN a plan containing
565 	 * references to nonexistent indexes.
566 	 */
567 	if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
568 		return indexstate;
569 
570 	/*
571 	 * Open the index relation.
572 	 *
573 	 * If the parent table is one of the target relations of the query, then
574 	 * InitPlan already opened and write-locked the index, so we can avoid
575 	 * taking another lock here.  Otherwise we need a normal reader's lock.
576 	 */
577 	relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid);
578 	indexstate->ioss_RelationDesc = index_open(node->indexid,
579 											   relistarget ? NoLock : AccessShareLock);
580 
581 	/*
582 	 * Initialize index-specific scan state
583 	 */
584 	indexstate->ioss_RuntimeKeysReady = false;
585 	indexstate->ioss_RuntimeKeys = NULL;
586 	indexstate->ioss_NumRuntimeKeys = 0;
587 
588 	/*
589 	 * build the index scan keys from the index qualification
590 	 */
591 	ExecIndexBuildScanKeys((PlanState *) indexstate,
592 						   indexstate->ioss_RelationDesc,
593 						   node->indexqual,
594 						   false,
595 						   &indexstate->ioss_ScanKeys,
596 						   &indexstate->ioss_NumScanKeys,
597 						   &indexstate->ioss_RuntimeKeys,
598 						   &indexstate->ioss_NumRuntimeKeys,
599 						   NULL,	/* no ArrayKeys */
600 						   NULL);
601 
602 	/*
603 	 * any ORDER BY exprs have to be turned into scankeys in the same way
604 	 */
605 	ExecIndexBuildScanKeys((PlanState *) indexstate,
606 						   indexstate->ioss_RelationDesc,
607 						   node->indexorderby,
608 						   true,
609 						   &indexstate->ioss_OrderByKeys,
610 						   &indexstate->ioss_NumOrderByKeys,
611 						   &indexstate->ioss_RuntimeKeys,
612 						   &indexstate->ioss_NumRuntimeKeys,
613 						   NULL,	/* no ArrayKeys */
614 						   NULL);
615 
616 	/*
617 	 * If we have runtime keys, we need an ExprContext to evaluate them. The
618 	 * node's standard context won't do because we want to reset that context
619 	 * for every tuple.  So, build another context just like the other one...
620 	 * -tgl 7/11/00
621 	 */
622 	if (indexstate->ioss_NumRuntimeKeys != 0)
623 	{
624 		ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;
625 
626 		ExecAssignExprContext(estate, &indexstate->ss.ps);
627 		indexstate->ioss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
628 		indexstate->ss.ps.ps_ExprContext = stdecontext;
629 	}
630 	else
631 	{
632 		indexstate->ioss_RuntimeContext = NULL;
633 	}
634 
635 	/*
636 	 * all done.
637 	 */
638 	return indexstate;
639 }
640 
641 /* ----------------------------------------------------------------
642  *		Parallel Index-only Scan Support
643  * ----------------------------------------------------------------
644  */
645 
646 /* ----------------------------------------------------------------
647  *		ExecIndexOnlyScanEstimate
648  *
649  *	estimates the space required to serialize index-only scan node.
650  * ----------------------------------------------------------------
651  */
652 void
ExecIndexOnlyScanEstimate(IndexOnlyScanState * node,ParallelContext * pcxt)653 ExecIndexOnlyScanEstimate(IndexOnlyScanState *node,
654 						  ParallelContext *pcxt)
655 {
656 	EState	   *estate = node->ss.ps.state;
657 
658 	node->ioss_PscanLen = index_parallelscan_estimate(node->ioss_RelationDesc,
659 													  estate->es_snapshot);
660 	shm_toc_estimate_chunk(&pcxt->estimator, node->ioss_PscanLen);
661 	shm_toc_estimate_keys(&pcxt->estimator, 1);
662 }
663 
664 /* ----------------------------------------------------------------
665  *		ExecIndexOnlyScanInitializeDSM
666  *
667  *		Set up a parallel index-only scan descriptor.
668  * ----------------------------------------------------------------
669  */
670 void
ExecIndexOnlyScanInitializeDSM(IndexOnlyScanState * node,ParallelContext * pcxt)671 ExecIndexOnlyScanInitializeDSM(IndexOnlyScanState *node,
672 							   ParallelContext *pcxt)
673 {
674 	EState	   *estate = node->ss.ps.state;
675 	ParallelIndexScanDesc piscan;
676 
677 	piscan = shm_toc_allocate(pcxt->toc, node->ioss_PscanLen);
678 	index_parallelscan_initialize(node->ss.ss_currentRelation,
679 								  node->ioss_RelationDesc,
680 								  estate->es_snapshot,
681 								  piscan);
682 	shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, piscan);
683 	node->ioss_ScanDesc =
684 		index_beginscan_parallel(node->ss.ss_currentRelation,
685 								 node->ioss_RelationDesc,
686 								 node->ioss_NumScanKeys,
687 								 node->ioss_NumOrderByKeys,
688 								 piscan);
689 	node->ioss_ScanDesc->xs_want_itup = true;
690 	node->ioss_VMBuffer = InvalidBuffer;
691 
692 	/*
693 	 * If no run-time keys to calculate or they are ready, go ahead and pass
694 	 * the scankeys to the index AM.
695 	 */
696 	if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
697 		index_rescan(node->ioss_ScanDesc,
698 					 node->ioss_ScanKeys, node->ioss_NumScanKeys,
699 					 node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
700 }
701 
702 /* ----------------------------------------------------------------
703  *		ExecIndexOnlyScanReInitializeDSM
704  *
705  *		Reset shared state before beginning a fresh scan.
706  * ----------------------------------------------------------------
707  */
708 void
ExecIndexOnlyScanReInitializeDSM(IndexOnlyScanState * node,ParallelContext * pcxt)709 ExecIndexOnlyScanReInitializeDSM(IndexOnlyScanState *node,
710 								 ParallelContext *pcxt)
711 {
712 	index_parallelrescan(node->ioss_ScanDesc);
713 }
714 
715 /* ----------------------------------------------------------------
716  *		ExecIndexOnlyScanInitializeWorker
717  *
718  *		Copy relevant information from TOC into planstate.
719  * ----------------------------------------------------------------
720  */
721 void
ExecIndexOnlyScanInitializeWorker(IndexOnlyScanState * node,shm_toc * toc)722 ExecIndexOnlyScanInitializeWorker(IndexOnlyScanState *node, shm_toc *toc)
723 {
724 	ParallelIndexScanDesc piscan;
725 
726 	piscan = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id, false);
727 	node->ioss_ScanDesc =
728 		index_beginscan_parallel(node->ss.ss_currentRelation,
729 								 node->ioss_RelationDesc,
730 								 node->ioss_NumScanKeys,
731 								 node->ioss_NumOrderByKeys,
732 								 piscan);
733 	node->ioss_ScanDesc->xs_want_itup = true;
734 
735 	/*
736 	 * If no run-time keys to calculate or they are ready, go ahead and pass
737 	 * the scankeys to the index AM.
738 	 */
739 	if (node->ioss_NumRuntimeKeys == 0 || node->ioss_RuntimeKeysReady)
740 		index_rescan(node->ioss_ScanDesc,
741 					 node->ioss_ScanKeys, node->ioss_NumScanKeys,
742 					 node->ioss_OrderByKeys, node->ioss_NumOrderByKeys);
743 }
744