1 /*-------------------------------------------------------------------------
2  *
3  * nodeIndexscan.c
4  *	  Routines to support indexed scans of relations
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/executor/nodeIndexscan.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /*
16  * INTERFACE ROUTINES
17  *		ExecIndexScan			scans a relation using an index
18  *		IndexNext				retrieve next tuple using index
19  *		IndexNextWithReorder	same, but recheck ORDER BY expressions
20  *		ExecInitIndexScan		creates and initializes state info.
21  *		ExecReScanIndexScan		rescans the indexed relation.
22  *		ExecEndIndexScan		releases all storage.
23  *		ExecIndexMarkPos		marks scan position.
24  *		ExecIndexRestrPos		restores scan position.
25  *		ExecIndexScanEstimate	estimates DSM space needed for parallel index scan
26  *		ExecIndexScanInitializeDSM initialize DSM for parallel indexscan
27  *		ExecIndexScanReInitializeDSM reinitialize DSM for fresh scan
28  *		ExecIndexScanInitializeWorker attach to DSM info in parallel worker
29  */
30 #include "postgres.h"
31 
32 #include "access/nbtree.h"
33 #include "access/relscan.h"
34 #include "access/tableam.h"
35 #include "catalog/pg_am.h"
36 #include "executor/execdebug.h"
37 #include "executor/nodeIndexscan.h"
38 #include "lib/pairingheap.h"
39 #include "miscadmin.h"
40 #include "nodes/nodeFuncs.h"
41 #include "utils/array.h"
42 #include "utils/datum.h"
43 #include "utils/lsyscache.h"
44 #include "utils/memutils.h"
45 #include "utils/rel.h"
46 
47 /*
48  * When an ordering operator is used, tuples fetched from the index that
49  * need to be reordered are queued in a pairing heap, as ReorderTuples.
50  */
51 typedef struct
52 {
53 	pairingheap_node ph_node;
54 	HeapTuple	htup;
55 	Datum	   *orderbyvals;
56 	bool	   *orderbynulls;
57 } ReorderTuple;
58 
59 static TupleTableSlot *IndexNext(IndexScanState *node);
60 static TupleTableSlot *IndexNextWithReorder(IndexScanState *node);
61 static void EvalOrderByExpressions(IndexScanState *node, ExprContext *econtext);
62 static bool IndexRecheck(IndexScanState *node, TupleTableSlot *slot);
63 static int	cmp_orderbyvals(const Datum *adist, const bool *anulls,
64 							const Datum *bdist, const bool *bnulls,
65 							IndexScanState *node);
66 static int	reorderqueue_cmp(const pairingheap_node *a,
67 							 const pairingheap_node *b, void *arg);
68 static void reorderqueue_push(IndexScanState *node, TupleTableSlot *slot,
69 							  Datum *orderbyvals, bool *orderbynulls);
70 static HeapTuple reorderqueue_pop(IndexScanState *node);
71 
72 
73 /* ----------------------------------------------------------------
74  *		IndexNext
75  *
76  *		Retrieve a tuple from the IndexScan node's currentRelation
77  *		using the index specified in the IndexScanState information.
78  * ----------------------------------------------------------------
79  */
80 static TupleTableSlot *
IndexNext(IndexScanState * node)81 IndexNext(IndexScanState *node)
82 {
83 	EState	   *estate;
84 	ExprContext *econtext;
85 	ScanDirection direction;
86 	IndexScanDesc scandesc;
87 	TupleTableSlot *slot;
88 
89 	/*
90 	 * extract necessary information from index scan node
91 	 */
92 	estate = node->ss.ps.state;
93 	direction = estate->es_direction;
94 	/* flip direction if this is an overall backward scan */
95 	if (ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indexorderdir))
96 	{
97 		if (ScanDirectionIsForward(direction))
98 			direction = BackwardScanDirection;
99 		else if (ScanDirectionIsBackward(direction))
100 			direction = ForwardScanDirection;
101 	}
102 	scandesc = node->iss_ScanDesc;
103 	econtext = node->ss.ps.ps_ExprContext;
104 	slot = node->ss.ss_ScanTupleSlot;
105 
106 	if (scandesc == NULL)
107 	{
108 		/*
109 		 * We reach here if the index scan is not parallel, or if we're
110 		 * serially executing an index scan that was planned to be parallel.
111 		 */
112 		scandesc = index_beginscan(node->ss.ss_currentRelation,
113 								   node->iss_RelationDesc,
114 								   estate->es_snapshot,
115 								   node->iss_NumScanKeys,
116 								   node->iss_NumOrderByKeys);
117 
118 		node->iss_ScanDesc = scandesc;
119 
120 		/*
121 		 * If no run-time keys to calculate or they are ready, go ahead and
122 		 * pass the scankeys to the index AM.
123 		 */
124 		if (node->iss_NumRuntimeKeys == 0 || node->iss_RuntimeKeysReady)
125 			index_rescan(scandesc,
126 						 node->iss_ScanKeys, node->iss_NumScanKeys,
127 						 node->iss_OrderByKeys, node->iss_NumOrderByKeys);
128 	}
129 
130 	/*
131 	 * ok, now that we have what we need, fetch the next tuple.
132 	 */
133 	while (index_getnext_slot(scandesc, direction, slot))
134 	{
135 		CHECK_FOR_INTERRUPTS();
136 
137 		/*
138 		 * If the index was lossy, we have to recheck the index quals using
139 		 * the fetched tuple.
140 		 */
141 		if (scandesc->xs_recheck)
142 		{
143 			econtext->ecxt_scantuple = slot;
144 			if (!ExecQualAndReset(node->indexqualorig, econtext))
145 			{
146 				/* Fails recheck, so drop it and loop back for another */
147 				InstrCountFiltered2(node, 1);
148 				continue;
149 			}
150 		}
151 
152 		return slot;
153 	}
154 
155 	/*
156 	 * if we get here it means the index scan failed so we are at the end of
157 	 * the scan..
158 	 */
159 	node->iss_ReachedEnd = true;
160 	return ExecClearTuple(slot);
161 }
162 
163 /* ----------------------------------------------------------------
164  *		IndexNextWithReorder
165  *
166  *		Like IndexNext, but this version can also re-check ORDER BY
167  *		expressions, and reorder the tuples as necessary.
168  * ----------------------------------------------------------------
169  */
170 static TupleTableSlot *
IndexNextWithReorder(IndexScanState * node)171 IndexNextWithReorder(IndexScanState *node)
172 {
173 	EState	   *estate;
174 	ExprContext *econtext;
175 	IndexScanDesc scandesc;
176 	TupleTableSlot *slot;
177 	ReorderTuple *topmost = NULL;
178 	bool		was_exact;
179 	Datum	   *lastfetched_vals;
180 	bool	   *lastfetched_nulls;
181 	int			cmp;
182 
183 	estate = node->ss.ps.state;
184 
185 	/*
186 	 * Only forward scan is supported with reordering.  Note: we can get away
187 	 * with just Asserting here because the system will not try to run the
188 	 * plan backwards if ExecSupportsBackwardScan() says it won't work.
189 	 * Currently, that is guaranteed because no index AMs support both
190 	 * amcanorderbyop and amcanbackward; if any ever do,
191 	 * ExecSupportsBackwardScan() will need to consider indexorderbys
192 	 * explicitly.
193 	 */
194 	Assert(!ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indexorderdir));
195 	Assert(ScanDirectionIsForward(estate->es_direction));
196 
197 	scandesc = node->iss_ScanDesc;
198 	econtext = node->ss.ps.ps_ExprContext;
199 	slot = node->ss.ss_ScanTupleSlot;
200 
201 	if (scandesc == NULL)
202 	{
203 		/*
204 		 * We reach here if the index scan is not parallel, or if we're
205 		 * serially executing an index scan that was planned to be parallel.
206 		 */
207 		scandesc = index_beginscan(node->ss.ss_currentRelation,
208 								   node->iss_RelationDesc,
209 								   estate->es_snapshot,
210 								   node->iss_NumScanKeys,
211 								   node->iss_NumOrderByKeys);
212 
213 		node->iss_ScanDesc = scandesc;
214 
215 		/*
216 		 * If no run-time keys to calculate or they are ready, go ahead and
217 		 * pass the scankeys to the index AM.
218 		 */
219 		if (node->iss_NumRuntimeKeys == 0 || node->iss_RuntimeKeysReady)
220 			index_rescan(scandesc,
221 						 node->iss_ScanKeys, node->iss_NumScanKeys,
222 						 node->iss_OrderByKeys, node->iss_NumOrderByKeys);
223 	}
224 
225 	for (;;)
226 	{
227 		CHECK_FOR_INTERRUPTS();
228 
229 		/*
230 		 * Check the reorder queue first.  If the topmost tuple in the queue
231 		 * has an ORDER BY value smaller than (or equal to) the value last
232 		 * returned by the index, we can return it now.
233 		 */
234 		if (!pairingheap_is_empty(node->iss_ReorderQueue))
235 		{
236 			topmost = (ReorderTuple *) pairingheap_first(node->iss_ReorderQueue);
237 
238 			if (node->iss_ReachedEnd ||
239 				cmp_orderbyvals(topmost->orderbyvals,
240 								topmost->orderbynulls,
241 								scandesc->xs_orderbyvals,
242 								scandesc->xs_orderbynulls,
243 								node) <= 0)
244 			{
245 				HeapTuple	tuple;
246 
247 				tuple = reorderqueue_pop(node);
248 
249 				/* Pass 'true', as the tuple in the queue is a palloc'd copy */
250 				ExecForceStoreHeapTuple(tuple, slot, true);
251 				return slot;
252 			}
253 		}
254 		else if (node->iss_ReachedEnd)
255 		{
256 			/* Queue is empty, and no more tuples from index.  We're done. */
257 			return ExecClearTuple(slot);
258 		}
259 
260 		/*
261 		 * Fetch next tuple from the index.
262 		 */
263 next_indextuple:
264 		if (!index_getnext_slot(scandesc, ForwardScanDirection, slot))
265 		{
266 			/*
267 			 * No more tuples from the index.  But we still need to drain any
268 			 * remaining tuples from the queue before we're done.
269 			 */
270 			node->iss_ReachedEnd = true;
271 			continue;
272 		}
273 
274 		/*
275 		 * If the index was lossy, we have to recheck the index quals and
276 		 * ORDER BY expressions using the fetched tuple.
277 		 */
278 		if (scandesc->xs_recheck)
279 		{
280 			econtext->ecxt_scantuple = slot;
281 			if (!ExecQualAndReset(node->indexqualorig, econtext))
282 			{
283 				/* Fails recheck, so drop it and loop back for another */
284 				InstrCountFiltered2(node, 1);
285 				/* allow this loop to be cancellable */
286 				CHECK_FOR_INTERRUPTS();
287 				goto next_indextuple;
288 			}
289 		}
290 
291 		if (scandesc->xs_recheckorderby)
292 		{
293 			econtext->ecxt_scantuple = slot;
294 			ResetExprContext(econtext);
295 			EvalOrderByExpressions(node, econtext);
296 
297 			/*
298 			 * Was the ORDER BY value returned by the index accurate?  The
299 			 * recheck flag means that the index can return inaccurate values,
300 			 * but then again, the value returned for any particular tuple
301 			 * could also be exactly correct.  Compare the value returned by
302 			 * the index with the recalculated value.  (If the value returned
303 			 * by the index happened to be exact right, we can often avoid
304 			 * pushing the tuple to the queue, just to pop it back out again.)
305 			 */
306 			cmp = cmp_orderbyvals(node->iss_OrderByValues,
307 								  node->iss_OrderByNulls,
308 								  scandesc->xs_orderbyvals,
309 								  scandesc->xs_orderbynulls,
310 								  node);
311 			if (cmp < 0)
312 				elog(ERROR, "index returned tuples in wrong order");
313 			else if (cmp == 0)
314 				was_exact = true;
315 			else
316 				was_exact = false;
317 			lastfetched_vals = node->iss_OrderByValues;
318 			lastfetched_nulls = node->iss_OrderByNulls;
319 		}
320 		else
321 		{
322 			was_exact = true;
323 			lastfetched_vals = scandesc->xs_orderbyvals;
324 			lastfetched_nulls = scandesc->xs_orderbynulls;
325 		}
326 
327 		/*
328 		 * Can we return this tuple immediately, or does it need to be pushed
329 		 * to the reorder queue?  If the ORDER BY expression values returned
330 		 * by the index were inaccurate, we can't return it yet, because the
331 		 * next tuple from the index might need to come before this one. Also,
332 		 * we can't return it yet if there are any smaller tuples in the queue
333 		 * already.
334 		 */
335 		if (!was_exact || (topmost && cmp_orderbyvals(lastfetched_vals,
336 													  lastfetched_nulls,
337 													  topmost->orderbyvals,
338 													  topmost->orderbynulls,
339 													  node) > 0))
340 		{
341 			/* Put this tuple to the queue */
342 			reorderqueue_push(node, slot, lastfetched_vals, lastfetched_nulls);
343 			continue;
344 		}
345 		else
346 		{
347 			/* Can return this tuple immediately. */
348 			return slot;
349 		}
350 	}
351 
352 	/*
353 	 * if we get here it means the index scan failed so we are at the end of
354 	 * the scan..
355 	 */
356 	return ExecClearTuple(slot);
357 }
358 
359 /*
360  * Calculate the expressions in the ORDER BY clause, based on the heap tuple.
361  */
362 static void
EvalOrderByExpressions(IndexScanState * node,ExprContext * econtext)363 EvalOrderByExpressions(IndexScanState *node, ExprContext *econtext)
364 {
365 	int			i;
366 	ListCell   *l;
367 	MemoryContext oldContext;
368 
369 	oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
370 
371 	i = 0;
372 	foreach(l, node->indexorderbyorig)
373 	{
374 		ExprState  *orderby = (ExprState *) lfirst(l);
375 
376 		node->iss_OrderByValues[i] = ExecEvalExpr(orderby,
377 												  econtext,
378 												  &node->iss_OrderByNulls[i]);
379 		i++;
380 	}
381 
382 	MemoryContextSwitchTo(oldContext);
383 }
384 
385 /*
386  * IndexRecheck -- access method routine to recheck a tuple in EvalPlanQual
387  */
388 static bool
IndexRecheck(IndexScanState * node,TupleTableSlot * slot)389 IndexRecheck(IndexScanState *node, TupleTableSlot *slot)
390 {
391 	ExprContext *econtext;
392 
393 	/*
394 	 * extract necessary information from index scan node
395 	 */
396 	econtext = node->ss.ps.ps_ExprContext;
397 
398 	/* Does the tuple meet the indexqual condition? */
399 	econtext->ecxt_scantuple = slot;
400 	return ExecQualAndReset(node->indexqualorig, econtext);
401 }
402 
403 
404 /*
405  * Compare ORDER BY expression values.
406  */
407 static int
cmp_orderbyvals(const Datum * adist,const bool * anulls,const Datum * bdist,const bool * bnulls,IndexScanState * node)408 cmp_orderbyvals(const Datum *adist, const bool *anulls,
409 				const Datum *bdist, const bool *bnulls,
410 				IndexScanState *node)
411 {
412 	int			i;
413 	int			result;
414 
415 	for (i = 0; i < node->iss_NumOrderByKeys; i++)
416 	{
417 		SortSupport ssup = &node->iss_SortSupport[i];
418 
419 		/*
420 		 * Handle nulls.  We only need to support NULLS LAST ordering, because
421 		 * match_pathkeys_to_index() doesn't consider indexorderby
422 		 * implementation otherwise.
423 		 */
424 		if (anulls[i] && !bnulls[i])
425 			return 1;
426 		else if (!anulls[i] && bnulls[i])
427 			return -1;
428 		else if (anulls[i] && bnulls[i])
429 			return 0;
430 
431 		result = ssup->comparator(adist[i], bdist[i], ssup);
432 		if (result != 0)
433 			return result;
434 	}
435 
436 	return 0;
437 }
438 
439 /*
440  * Pairing heap provides getting topmost (greatest) element while KNN provides
441  * ascending sort.  That's why we invert the sort order.
442  */
443 static int
reorderqueue_cmp(const pairingheap_node * a,const pairingheap_node * b,void * arg)444 reorderqueue_cmp(const pairingheap_node *a, const pairingheap_node *b,
445 				 void *arg)
446 {
447 	ReorderTuple *rta = (ReorderTuple *) a;
448 	ReorderTuple *rtb = (ReorderTuple *) b;
449 	IndexScanState *node = (IndexScanState *) arg;
450 
451 	/* exchange argument order to invert the sort order */
452 	return cmp_orderbyvals(rtb->orderbyvals, rtb->orderbynulls,
453 						   rta->orderbyvals, rta->orderbynulls,
454 						   node);
455 }
456 
457 /*
458  * Helper function to push a tuple to the reorder queue.
459  */
460 static void
reorderqueue_push(IndexScanState * node,TupleTableSlot * slot,Datum * orderbyvals,bool * orderbynulls)461 reorderqueue_push(IndexScanState *node, TupleTableSlot *slot,
462 				  Datum *orderbyvals, bool *orderbynulls)
463 {
464 	IndexScanDesc scandesc = node->iss_ScanDesc;
465 	EState	   *estate = node->ss.ps.state;
466 	MemoryContext oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
467 	ReorderTuple *rt;
468 	int			i;
469 
470 	rt = (ReorderTuple *) palloc(sizeof(ReorderTuple));
471 	rt->htup = ExecCopySlotHeapTuple(slot);
472 	rt->orderbyvals =
473 		(Datum *) palloc(sizeof(Datum) * scandesc->numberOfOrderBys);
474 	rt->orderbynulls =
475 		(bool *) palloc(sizeof(bool) * scandesc->numberOfOrderBys);
476 	for (i = 0; i < node->iss_NumOrderByKeys; i++)
477 	{
478 		if (!orderbynulls[i])
479 			rt->orderbyvals[i] = datumCopy(orderbyvals[i],
480 										   node->iss_OrderByTypByVals[i],
481 										   node->iss_OrderByTypLens[i]);
482 		else
483 			rt->orderbyvals[i] = (Datum) 0;
484 		rt->orderbynulls[i] = orderbynulls[i];
485 	}
486 	pairingheap_add(node->iss_ReorderQueue, &rt->ph_node);
487 
488 	MemoryContextSwitchTo(oldContext);
489 }
490 
491 /*
492  * Helper function to pop the next tuple from the reorder queue.
493  */
494 static HeapTuple
reorderqueue_pop(IndexScanState * node)495 reorderqueue_pop(IndexScanState *node)
496 {
497 	HeapTuple	result;
498 	ReorderTuple *topmost;
499 	int			i;
500 
501 	topmost = (ReorderTuple *) pairingheap_remove_first(node->iss_ReorderQueue);
502 
503 	result = topmost->htup;
504 	for (i = 0; i < node->iss_NumOrderByKeys; i++)
505 	{
506 		if (!node->iss_OrderByTypByVals[i] && !topmost->orderbynulls[i])
507 			pfree(DatumGetPointer(topmost->orderbyvals[i]));
508 	}
509 	pfree(topmost->orderbyvals);
510 	pfree(topmost->orderbynulls);
511 	pfree(topmost);
512 
513 	return result;
514 }
515 
516 
517 /* ----------------------------------------------------------------
518  *		ExecIndexScan(node)
519  * ----------------------------------------------------------------
520  */
521 static TupleTableSlot *
ExecIndexScan(PlanState * pstate)522 ExecIndexScan(PlanState *pstate)
523 {
524 	IndexScanState *node = castNode(IndexScanState, pstate);
525 
526 	/*
527 	 * If we have runtime keys and they've not already been set up, do it now.
528 	 */
529 	if (node->iss_NumRuntimeKeys != 0 && !node->iss_RuntimeKeysReady)
530 		ExecReScan((PlanState *) node);
531 
532 	if (node->iss_NumOrderByKeys > 0)
533 		return ExecScan(&node->ss,
534 						(ExecScanAccessMtd) IndexNextWithReorder,
535 						(ExecScanRecheckMtd) IndexRecheck);
536 	else
537 		return ExecScan(&node->ss,
538 						(ExecScanAccessMtd) IndexNext,
539 						(ExecScanRecheckMtd) IndexRecheck);
540 }
541 
542 /* ----------------------------------------------------------------
543  *		ExecReScanIndexScan(node)
544  *
545  *		Recalculates the values of any scan keys whose value depends on
546  *		information known at runtime, then rescans the indexed relation.
547  *
548  *		Updating the scan key was formerly done separately in
549  *		ExecUpdateIndexScanKeys. Integrating it into ReScan makes
550  *		rescans of indices and relations/general streams more uniform.
551  * ----------------------------------------------------------------
552  */
553 void
ExecReScanIndexScan(IndexScanState * node)554 ExecReScanIndexScan(IndexScanState *node)
555 {
556 	/*
557 	 * If we are doing runtime key calculations (ie, any of the index key
558 	 * values weren't simple Consts), compute the new key values.  But first,
559 	 * reset the context so we don't leak memory as each outer tuple is
560 	 * scanned.  Note this assumes that we will recalculate *all* runtime keys
561 	 * on each call.
562 	 */
563 	if (node->iss_NumRuntimeKeys != 0)
564 	{
565 		ExprContext *econtext = node->iss_RuntimeContext;
566 
567 		ResetExprContext(econtext);
568 		ExecIndexEvalRuntimeKeys(econtext,
569 								 node->iss_RuntimeKeys,
570 								 node->iss_NumRuntimeKeys);
571 	}
572 	node->iss_RuntimeKeysReady = true;
573 
574 	/* flush the reorder queue */
575 	if (node->iss_ReorderQueue)
576 	{
577 		while (!pairingheap_is_empty(node->iss_ReorderQueue))
578 			reorderqueue_pop(node);
579 	}
580 
581 	/* reset index scan */
582 	if (node->iss_ScanDesc)
583 		index_rescan(node->iss_ScanDesc,
584 					 node->iss_ScanKeys, node->iss_NumScanKeys,
585 					 node->iss_OrderByKeys, node->iss_NumOrderByKeys);
586 	node->iss_ReachedEnd = false;
587 
588 	ExecScanReScan(&node->ss);
589 }
590 
591 
592 /*
593  * ExecIndexEvalRuntimeKeys
594  *		Evaluate any runtime key values, and update the scankeys.
595  */
596 void
ExecIndexEvalRuntimeKeys(ExprContext * econtext,IndexRuntimeKeyInfo * runtimeKeys,int numRuntimeKeys)597 ExecIndexEvalRuntimeKeys(ExprContext *econtext,
598 						 IndexRuntimeKeyInfo *runtimeKeys, int numRuntimeKeys)
599 {
600 	int			j;
601 	MemoryContext oldContext;
602 
603 	/* We want to keep the key values in per-tuple memory */
604 	oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
605 
606 	for (j = 0; j < numRuntimeKeys; j++)
607 	{
608 		ScanKey		scan_key = runtimeKeys[j].scan_key;
609 		ExprState  *key_expr = runtimeKeys[j].key_expr;
610 		Datum		scanvalue;
611 		bool		isNull;
612 
613 		/*
614 		 * For each run-time key, extract the run-time expression and evaluate
615 		 * it with respect to the current context.  We then stick the result
616 		 * into the proper scan key.
617 		 *
618 		 * Note: the result of the eval could be a pass-by-ref value that's
619 		 * stored in some outer scan's tuple, not in
620 		 * econtext->ecxt_per_tuple_memory.  We assume that the outer tuple
621 		 * will stay put throughout our scan.  If this is wrong, we could copy
622 		 * the result into our context explicitly, but I think that's not
623 		 * necessary.
624 		 *
625 		 * It's also entirely possible that the result of the eval is a
626 		 * toasted value.  In this case we should forcibly detoast it, to
627 		 * avoid repeat detoastings each time the value is examined by an
628 		 * index support function.
629 		 */
630 		scanvalue = ExecEvalExpr(key_expr,
631 								 econtext,
632 								 &isNull);
633 		if (isNull)
634 		{
635 			scan_key->sk_argument = scanvalue;
636 			scan_key->sk_flags |= SK_ISNULL;
637 		}
638 		else
639 		{
640 			if (runtimeKeys[j].key_toastable)
641 				scanvalue = PointerGetDatum(PG_DETOAST_DATUM(scanvalue));
642 			scan_key->sk_argument = scanvalue;
643 			scan_key->sk_flags &= ~SK_ISNULL;
644 		}
645 	}
646 
647 	MemoryContextSwitchTo(oldContext);
648 }
649 
650 /*
651  * ExecIndexEvalArrayKeys
652  *		Evaluate any array key values, and set up to iterate through arrays.
653  *
654  * Returns true if there are array elements to consider; false means there
655  * is at least one null or empty array, so no match is possible.  On true
656  * result, the scankeys are initialized with the first elements of the arrays.
657  */
658 bool
ExecIndexEvalArrayKeys(ExprContext * econtext,IndexArrayKeyInfo * arrayKeys,int numArrayKeys)659 ExecIndexEvalArrayKeys(ExprContext *econtext,
660 					   IndexArrayKeyInfo *arrayKeys, int numArrayKeys)
661 {
662 	bool		result = true;
663 	int			j;
664 	MemoryContext oldContext;
665 
666 	/* We want to keep the arrays in per-tuple memory */
667 	oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
668 
669 	for (j = 0; j < numArrayKeys; j++)
670 	{
671 		ScanKey		scan_key = arrayKeys[j].scan_key;
672 		ExprState  *array_expr = arrayKeys[j].array_expr;
673 		Datum		arraydatum;
674 		bool		isNull;
675 		ArrayType  *arrayval;
676 		int16		elmlen;
677 		bool		elmbyval;
678 		char		elmalign;
679 		int			num_elems;
680 		Datum	   *elem_values;
681 		bool	   *elem_nulls;
682 
683 		/*
684 		 * Compute and deconstruct the array expression. (Notes in
685 		 * ExecIndexEvalRuntimeKeys() apply here too.)
686 		 */
687 		arraydatum = ExecEvalExpr(array_expr,
688 								  econtext,
689 								  &isNull);
690 		if (isNull)
691 		{
692 			result = false;
693 			break;				/* no point in evaluating more */
694 		}
695 		arrayval = DatumGetArrayTypeP(arraydatum);
696 		/* We could cache this data, but not clear it's worth it */
697 		get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
698 							 &elmlen, &elmbyval, &elmalign);
699 		deconstruct_array(arrayval,
700 						  ARR_ELEMTYPE(arrayval),
701 						  elmlen, elmbyval, elmalign,
702 						  &elem_values, &elem_nulls, &num_elems);
703 		if (num_elems <= 0)
704 		{
705 			result = false;
706 			break;				/* no point in evaluating more */
707 		}
708 
709 		/*
710 		 * Note: we expect the previous array data, if any, to be
711 		 * automatically freed by resetting the per-tuple context; hence no
712 		 * pfree's here.
713 		 */
714 		arrayKeys[j].elem_values = elem_values;
715 		arrayKeys[j].elem_nulls = elem_nulls;
716 		arrayKeys[j].num_elems = num_elems;
717 		scan_key->sk_argument = elem_values[0];
718 		if (elem_nulls[0])
719 			scan_key->sk_flags |= SK_ISNULL;
720 		else
721 			scan_key->sk_flags &= ~SK_ISNULL;
722 		arrayKeys[j].next_elem = 1;
723 	}
724 
725 	MemoryContextSwitchTo(oldContext);
726 
727 	return result;
728 }
729 
730 /*
731  * ExecIndexAdvanceArrayKeys
732  *		Advance to the next set of array key values, if any.
733  *
734  * Returns true if there is another set of values to consider, false if not.
735  * On true result, the scankeys are initialized with the next set of values.
736  */
737 bool
ExecIndexAdvanceArrayKeys(IndexArrayKeyInfo * arrayKeys,int numArrayKeys)738 ExecIndexAdvanceArrayKeys(IndexArrayKeyInfo *arrayKeys, int numArrayKeys)
739 {
740 	bool		found = false;
741 	int			j;
742 
743 	/*
744 	 * Note we advance the rightmost array key most quickly, since it will
745 	 * correspond to the lowest-order index column among the available
746 	 * qualifications.  This is hypothesized to result in better locality of
747 	 * access in the index.
748 	 */
749 	for (j = numArrayKeys - 1; j >= 0; j--)
750 	{
751 		ScanKey		scan_key = arrayKeys[j].scan_key;
752 		int			next_elem = arrayKeys[j].next_elem;
753 		int			num_elems = arrayKeys[j].num_elems;
754 		Datum	   *elem_values = arrayKeys[j].elem_values;
755 		bool	   *elem_nulls = arrayKeys[j].elem_nulls;
756 
757 		if (next_elem >= num_elems)
758 		{
759 			next_elem = 0;
760 			found = false;		/* need to advance next array key */
761 		}
762 		else
763 			found = true;
764 		scan_key->sk_argument = elem_values[next_elem];
765 		if (elem_nulls[next_elem])
766 			scan_key->sk_flags |= SK_ISNULL;
767 		else
768 			scan_key->sk_flags &= ~SK_ISNULL;
769 		arrayKeys[j].next_elem = next_elem + 1;
770 		if (found)
771 			break;
772 	}
773 
774 	return found;
775 }
776 
777 
778 /* ----------------------------------------------------------------
779  *		ExecEndIndexScan
780  * ----------------------------------------------------------------
781  */
782 void
ExecEndIndexScan(IndexScanState * node)783 ExecEndIndexScan(IndexScanState *node)
784 {
785 	Relation	indexRelationDesc;
786 	IndexScanDesc indexScanDesc;
787 
788 	/*
789 	 * extract information from the node
790 	 */
791 	indexRelationDesc = node->iss_RelationDesc;
792 	indexScanDesc = node->iss_ScanDesc;
793 
794 	/*
795 	 * Free the exprcontext(s) ... now dead code, see ExecFreeExprContext
796 	 */
797 #ifdef NOT_USED
798 	ExecFreeExprContext(&node->ss.ps);
799 	if (node->iss_RuntimeContext)
800 		FreeExprContext(node->iss_RuntimeContext, true);
801 #endif
802 
803 	/*
804 	 * clear out tuple table slots
805 	 */
806 	if (node->ss.ps.ps_ResultTupleSlot)
807 		ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
808 	ExecClearTuple(node->ss.ss_ScanTupleSlot);
809 
810 	/*
811 	 * close the index relation (no-op if we didn't open it)
812 	 */
813 	if (indexScanDesc)
814 		index_endscan(indexScanDesc);
815 	if (indexRelationDesc)
816 		index_close(indexRelationDesc, NoLock);
817 }
818 
819 /* ----------------------------------------------------------------
820  *		ExecIndexMarkPos
821  *
822  * Note: we assume that no caller attempts to set a mark before having read
823  * at least one tuple.  Otherwise, iss_ScanDesc might still be NULL.
824  * ----------------------------------------------------------------
825  */
826 void
ExecIndexMarkPos(IndexScanState * node)827 ExecIndexMarkPos(IndexScanState *node)
828 {
829 	EState	   *estate = node->ss.ps.state;
830 	EPQState   *epqstate = estate->es_epq_active;
831 
832 	if (epqstate != NULL)
833 	{
834 		/*
835 		 * We are inside an EvalPlanQual recheck.  If a test tuple exists for
836 		 * this relation, then we shouldn't access the index at all.  We would
837 		 * instead need to save, and later restore, the state of the
838 		 * relsubs_done flag, so that re-fetching the test tuple is possible.
839 		 * However, given the assumption that no caller sets a mark at the
840 		 * start of the scan, we can only get here with relsubs_done[i]
841 		 * already set, and so no state need be saved.
842 		 */
843 		Index		scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
844 
845 		Assert(scanrelid > 0);
846 		if (epqstate->relsubs_slot[scanrelid - 1] != NULL ||
847 			epqstate->relsubs_rowmark[scanrelid - 1] != NULL)
848 		{
849 			/* Verify the claim above */
850 			if (!epqstate->relsubs_done[scanrelid - 1])
851 				elog(ERROR, "unexpected ExecIndexMarkPos call in EPQ recheck");
852 			return;
853 		}
854 	}
855 
856 	index_markpos(node->iss_ScanDesc);
857 }
858 
859 /* ----------------------------------------------------------------
860  *		ExecIndexRestrPos
861  * ----------------------------------------------------------------
862  */
863 void
ExecIndexRestrPos(IndexScanState * node)864 ExecIndexRestrPos(IndexScanState *node)
865 {
866 	EState	   *estate = node->ss.ps.state;
867 	EPQState   *epqstate = estate->es_epq_active;
868 
869 	if (estate->es_epq_active != NULL)
870 	{
871 		/* See comments in ExecIndexMarkPos */
872 		Index		scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
873 
874 		Assert(scanrelid > 0);
875 		if (epqstate->relsubs_slot[scanrelid - 1] != NULL ||
876 			epqstate->relsubs_rowmark[scanrelid - 1] != NULL)
877 		{
878 			/* Verify the claim above */
879 			if (!epqstate->relsubs_done[scanrelid - 1])
880 				elog(ERROR, "unexpected ExecIndexRestrPos call in EPQ recheck");
881 			return;
882 		}
883 	}
884 
885 	index_restrpos(node->iss_ScanDesc);
886 }
887 
888 /* ----------------------------------------------------------------
889  *		ExecInitIndexScan
890  *
891  *		Initializes the index scan's state information, creates
892  *		scan keys, and opens the base and index relations.
893  *
894  *		Note: index scans have 2 sets of state information because
895  *			  we have to keep track of the base relation and the
896  *			  index relation.
897  * ----------------------------------------------------------------
898  */
899 IndexScanState *
ExecInitIndexScan(IndexScan * node,EState * estate,int eflags)900 ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
901 {
902 	IndexScanState *indexstate;
903 	Relation	currentRelation;
904 	LOCKMODE	lockmode;
905 
906 	/*
907 	 * create state structure
908 	 */
909 	indexstate = makeNode(IndexScanState);
910 	indexstate->ss.ps.plan = (Plan *) node;
911 	indexstate->ss.ps.state = estate;
912 	indexstate->ss.ps.ExecProcNode = ExecIndexScan;
913 
914 	/*
915 	 * Miscellaneous initialization
916 	 *
917 	 * create expression context for node
918 	 */
919 	ExecAssignExprContext(estate, &indexstate->ss.ps);
920 
921 	/*
922 	 * open the scan relation
923 	 */
924 	currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
925 
926 	indexstate->ss.ss_currentRelation = currentRelation;
927 	indexstate->ss.ss_currentScanDesc = NULL;	/* no heap scan here */
928 
929 	/*
930 	 * get the scan type from the relation descriptor.
931 	 */
932 	ExecInitScanTupleSlot(estate, &indexstate->ss,
933 						  RelationGetDescr(currentRelation),
934 						  table_slot_callbacks(currentRelation));
935 
936 	/*
937 	 * Initialize result type and projection.
938 	 */
939 	ExecInitResultTypeTL(&indexstate->ss.ps);
940 	ExecAssignScanProjectionInfo(&indexstate->ss);
941 
942 	/*
943 	 * initialize child expressions
944 	 *
945 	 * Note: we don't initialize all of the indexqual expression, only the
946 	 * sub-parts corresponding to runtime keys (see below).  Likewise for
947 	 * indexorderby, if any.  But the indexqualorig expression is always
948 	 * initialized even though it will only be used in some uncommon cases ---
949 	 * would be nice to improve that.  (Problem is that any SubPlans present
950 	 * in the expression must be found now...)
951 	 */
952 	indexstate->ss.ps.qual =
953 		ExecInitQual(node->scan.plan.qual, (PlanState *) indexstate);
954 	indexstate->indexqualorig =
955 		ExecInitQual(node->indexqualorig, (PlanState *) indexstate);
956 	indexstate->indexorderbyorig =
957 		ExecInitExprList(node->indexorderbyorig, (PlanState *) indexstate);
958 
959 	/*
960 	 * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop
961 	 * here.  This allows an index-advisor plugin to EXPLAIN a plan containing
962 	 * references to nonexistent indexes.
963 	 */
964 	if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
965 		return indexstate;
966 
967 	/* Open the index relation. */
968 	lockmode = exec_rt_fetch(node->scan.scanrelid, estate)->rellockmode;
969 	indexstate->iss_RelationDesc = index_open(node->indexid, lockmode);
970 
971 	/*
972 	 * Initialize index-specific scan state
973 	 */
974 	indexstate->iss_RuntimeKeysReady = false;
975 	indexstate->iss_RuntimeKeys = NULL;
976 	indexstate->iss_NumRuntimeKeys = 0;
977 
978 	/*
979 	 * build the index scan keys from the index qualification
980 	 */
981 	ExecIndexBuildScanKeys((PlanState *) indexstate,
982 						   indexstate->iss_RelationDesc,
983 						   node->indexqual,
984 						   false,
985 						   &indexstate->iss_ScanKeys,
986 						   &indexstate->iss_NumScanKeys,
987 						   &indexstate->iss_RuntimeKeys,
988 						   &indexstate->iss_NumRuntimeKeys,
989 						   NULL,	/* no ArrayKeys */
990 						   NULL);
991 
992 	/*
993 	 * any ORDER BY exprs have to be turned into scankeys in the same way
994 	 */
995 	ExecIndexBuildScanKeys((PlanState *) indexstate,
996 						   indexstate->iss_RelationDesc,
997 						   node->indexorderby,
998 						   true,
999 						   &indexstate->iss_OrderByKeys,
1000 						   &indexstate->iss_NumOrderByKeys,
1001 						   &indexstate->iss_RuntimeKeys,
1002 						   &indexstate->iss_NumRuntimeKeys,
1003 						   NULL,	/* no ArrayKeys */
1004 						   NULL);
1005 
1006 	/* Initialize sort support, if we need to re-check ORDER BY exprs */
1007 	if (indexstate->iss_NumOrderByKeys > 0)
1008 	{
1009 		int			numOrderByKeys = indexstate->iss_NumOrderByKeys;
1010 		int			i;
1011 		ListCell   *lco;
1012 		ListCell   *lcx;
1013 
1014 		/*
1015 		 * Prepare sort support, and look up the data type for each ORDER BY
1016 		 * expression.
1017 		 */
1018 		Assert(numOrderByKeys == list_length(node->indexorderbyops));
1019 		Assert(numOrderByKeys == list_length(node->indexorderbyorig));
1020 		indexstate->iss_SortSupport = (SortSupportData *)
1021 			palloc0(numOrderByKeys * sizeof(SortSupportData));
1022 		indexstate->iss_OrderByTypByVals = (bool *)
1023 			palloc(numOrderByKeys * sizeof(bool));
1024 		indexstate->iss_OrderByTypLens = (int16 *)
1025 			palloc(numOrderByKeys * sizeof(int16));
1026 		i = 0;
1027 		forboth(lco, node->indexorderbyops, lcx, node->indexorderbyorig)
1028 		{
1029 			Oid			orderbyop = lfirst_oid(lco);
1030 			Node	   *orderbyexpr = (Node *) lfirst(lcx);
1031 			Oid			orderbyType = exprType(orderbyexpr);
1032 			Oid			orderbyColl = exprCollation(orderbyexpr);
1033 			SortSupport orderbysort = &indexstate->iss_SortSupport[i];
1034 
1035 			/* Initialize sort support */
1036 			orderbysort->ssup_cxt = CurrentMemoryContext;
1037 			orderbysort->ssup_collation = orderbyColl;
1038 			/* See cmp_orderbyvals() comments on NULLS LAST */
1039 			orderbysort->ssup_nulls_first = false;
1040 			/* ssup_attno is unused here and elsewhere */
1041 			orderbysort->ssup_attno = 0;
1042 			/* No abbreviation */
1043 			orderbysort->abbreviate = false;
1044 			PrepareSortSupportFromOrderingOp(orderbyop, orderbysort);
1045 
1046 			get_typlenbyval(orderbyType,
1047 							&indexstate->iss_OrderByTypLens[i],
1048 							&indexstate->iss_OrderByTypByVals[i]);
1049 			i++;
1050 		}
1051 
1052 		/* allocate arrays to hold the re-calculated distances */
1053 		indexstate->iss_OrderByValues = (Datum *)
1054 			palloc(numOrderByKeys * sizeof(Datum));
1055 		indexstate->iss_OrderByNulls = (bool *)
1056 			palloc(numOrderByKeys * sizeof(bool));
1057 
1058 		/* and initialize the reorder queue */
1059 		indexstate->iss_ReorderQueue = pairingheap_allocate(reorderqueue_cmp,
1060 															indexstate);
1061 	}
1062 
1063 	/*
1064 	 * If we have runtime keys, we need an ExprContext to evaluate them. The
1065 	 * node's standard context won't do because we want to reset that context
1066 	 * for every tuple.  So, build another context just like the other one...
1067 	 * -tgl 7/11/00
1068 	 */
1069 	if (indexstate->iss_NumRuntimeKeys != 0)
1070 	{
1071 		ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;
1072 
1073 		ExecAssignExprContext(estate, &indexstate->ss.ps);
1074 		indexstate->iss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
1075 		indexstate->ss.ps.ps_ExprContext = stdecontext;
1076 	}
1077 	else
1078 	{
1079 		indexstate->iss_RuntimeContext = NULL;
1080 	}
1081 
1082 	/*
1083 	 * all done.
1084 	 */
1085 	return indexstate;
1086 }
1087 
1088 
1089 /*
1090  * ExecIndexBuildScanKeys
1091  *		Build the index scan keys from the index qualification expressions
1092  *
1093  * The index quals are passed to the index AM in the form of a ScanKey array.
1094  * This routine sets up the ScanKeys, fills in all constant fields of the
1095  * ScanKeys, and prepares information about the keys that have non-constant
1096  * comparison values.  We divide index qual expressions into five types:
1097  *
1098  * 1. Simple operator with constant comparison value ("indexkey op constant").
1099  * For these, we just fill in a ScanKey containing the constant value.
1100  *
1101  * 2. Simple operator with non-constant value ("indexkey op expression").
1102  * For these, we create a ScanKey with everything filled in except the
1103  * expression value, and set up an IndexRuntimeKeyInfo struct to drive
1104  * evaluation of the expression at the right times.
1105  *
1106  * 3. RowCompareExpr ("(indexkey, indexkey, ...) op (expr, expr, ...)").
1107  * For these, we create a header ScanKey plus a subsidiary ScanKey array,
1108  * as specified in access/skey.h.  The elements of the row comparison
1109  * can have either constant or non-constant comparison values.
1110  *
1111  * 4. ScalarArrayOpExpr ("indexkey op ANY (array-expression)").  If the index
1112  * supports amsearcharray, we handle these the same as simple operators,
1113  * setting the SK_SEARCHARRAY flag to tell the AM to handle them.  Otherwise,
1114  * we create a ScanKey with everything filled in except the comparison value,
1115  * and set up an IndexArrayKeyInfo struct to drive processing of the qual.
1116  * (Note that if we use an IndexArrayKeyInfo struct, the array expression is
1117  * always treated as requiring runtime evaluation, even if it's a constant.)
1118  *
1119  * 5. NullTest ("indexkey IS NULL/IS NOT NULL").  We just fill in the
1120  * ScanKey properly.
1121  *
1122  * This code is also used to prepare ORDER BY expressions for amcanorderbyop
1123  * indexes.  The behavior is exactly the same, except that we have to look up
1124  * the operator differently.  Note that only cases 1 and 2 are currently
1125  * possible for ORDER BY.
1126  *
1127  * Input params are:
1128  *
1129  * planstate: executor state node we are working for
1130  * index: the index we are building scan keys for
1131  * quals: indexquals (or indexorderbys) expressions
1132  * isorderby: true if processing ORDER BY exprs, false if processing quals
1133  * *runtimeKeys: ptr to pre-existing IndexRuntimeKeyInfos, or NULL if none
1134  * *numRuntimeKeys: number of pre-existing runtime keys
1135  *
1136  * Output params are:
1137  *
1138  * *scanKeys: receives ptr to array of ScanKeys
1139  * *numScanKeys: receives number of scankeys
1140  * *runtimeKeys: receives ptr to array of IndexRuntimeKeyInfos, or NULL if none
1141  * *numRuntimeKeys: receives number of runtime keys
1142  * *arrayKeys: receives ptr to array of IndexArrayKeyInfos, or NULL if none
1143  * *numArrayKeys: receives number of array keys
1144  *
1145  * Caller may pass NULL for arrayKeys and numArrayKeys to indicate that
1146  * IndexArrayKeyInfos are not supported.
1147  */
1148 void
ExecIndexBuildScanKeys(PlanState * planstate,Relation index,List * quals,bool isorderby,ScanKey * scanKeys,int * numScanKeys,IndexRuntimeKeyInfo ** runtimeKeys,int * numRuntimeKeys,IndexArrayKeyInfo ** arrayKeys,int * numArrayKeys)1149 ExecIndexBuildScanKeys(PlanState *planstate, Relation index,
1150 					   List *quals, bool isorderby,
1151 					   ScanKey *scanKeys, int *numScanKeys,
1152 					   IndexRuntimeKeyInfo **runtimeKeys, int *numRuntimeKeys,
1153 					   IndexArrayKeyInfo **arrayKeys, int *numArrayKeys)
1154 {
1155 	ListCell   *qual_cell;
1156 	ScanKey		scan_keys;
1157 	IndexRuntimeKeyInfo *runtime_keys;
1158 	IndexArrayKeyInfo *array_keys;
1159 	int			n_scan_keys;
1160 	int			n_runtime_keys;
1161 	int			max_runtime_keys;
1162 	int			n_array_keys;
1163 	int			j;
1164 
1165 	/* Allocate array for ScanKey structs: one per qual */
1166 	n_scan_keys = list_length(quals);
1167 	scan_keys = (ScanKey) palloc(n_scan_keys * sizeof(ScanKeyData));
1168 
1169 	/*
1170 	 * runtime_keys array is dynamically resized as needed.  We handle it this
1171 	 * way so that the same runtime keys array can be shared between
1172 	 * indexquals and indexorderbys, which will be processed in separate calls
1173 	 * of this function.  Caller must be sure to pass in NULL/0 for first
1174 	 * call.
1175 	 */
1176 	runtime_keys = *runtimeKeys;
1177 	n_runtime_keys = max_runtime_keys = *numRuntimeKeys;
1178 
1179 	/* Allocate array_keys as large as it could possibly need to be */
1180 	array_keys = (IndexArrayKeyInfo *)
1181 		palloc0(n_scan_keys * sizeof(IndexArrayKeyInfo));
1182 	n_array_keys = 0;
1183 
1184 	/*
1185 	 * for each opclause in the given qual, convert the opclause into a single
1186 	 * scan key
1187 	 */
1188 	j = 0;
1189 	foreach(qual_cell, quals)
1190 	{
1191 		Expr	   *clause = (Expr *) lfirst(qual_cell);
1192 		ScanKey		this_scan_key = &scan_keys[j++];
1193 		Oid			opno;		/* operator's OID */
1194 		RegProcedure opfuncid;	/* operator proc id used in scan */
1195 		Oid			opfamily;	/* opfamily of index column */
1196 		int			op_strategy;	/* operator's strategy number */
1197 		Oid			op_lefttype;	/* operator's declared input types */
1198 		Oid			op_righttype;
1199 		Expr	   *leftop;		/* expr on lhs of operator */
1200 		Expr	   *rightop;	/* expr on rhs ... */
1201 		AttrNumber	varattno;	/* att number used in scan */
1202 		int			indnkeyatts;
1203 
1204 		indnkeyatts = IndexRelationGetNumberOfKeyAttributes(index);
1205 		if (IsA(clause, OpExpr))
1206 		{
1207 			/* indexkey op const or indexkey op expression */
1208 			int			flags = 0;
1209 			Datum		scanvalue;
1210 
1211 			opno = ((OpExpr *) clause)->opno;
1212 			opfuncid = ((OpExpr *) clause)->opfuncid;
1213 
1214 			/*
1215 			 * leftop should be the index key Var, possibly relabeled
1216 			 */
1217 			leftop = (Expr *) get_leftop(clause);
1218 
1219 			if (leftop && IsA(leftop, RelabelType))
1220 				leftop = ((RelabelType *) leftop)->arg;
1221 
1222 			Assert(leftop != NULL);
1223 
1224 			if (!(IsA(leftop, Var) &&
1225 				  ((Var *) leftop)->varno == INDEX_VAR))
1226 				elog(ERROR, "indexqual doesn't have key on left side");
1227 
1228 			varattno = ((Var *) leftop)->varattno;
1229 			if (varattno < 1 || varattno > indnkeyatts)
1230 				elog(ERROR, "bogus index qualification");
1231 
1232 			/*
1233 			 * We have to look up the operator's strategy number.  This
1234 			 * provides a cross-check that the operator does match the index.
1235 			 */
1236 			opfamily = index->rd_opfamily[varattno - 1];
1237 
1238 			get_op_opfamily_properties(opno, opfamily, isorderby,
1239 									   &op_strategy,
1240 									   &op_lefttype,
1241 									   &op_righttype);
1242 
1243 			if (isorderby)
1244 				flags |= SK_ORDER_BY;
1245 
1246 			/*
1247 			 * rightop is the constant or variable comparison value
1248 			 */
1249 			rightop = (Expr *) get_rightop(clause);
1250 
1251 			if (rightop && IsA(rightop, RelabelType))
1252 				rightop = ((RelabelType *) rightop)->arg;
1253 
1254 			Assert(rightop != NULL);
1255 
1256 			if (IsA(rightop, Const))
1257 			{
1258 				/* OK, simple constant comparison value */
1259 				scanvalue = ((Const *) rightop)->constvalue;
1260 				if (((Const *) rightop)->constisnull)
1261 					flags |= SK_ISNULL;
1262 			}
1263 			else
1264 			{
1265 				/* Need to treat this one as a runtime key */
1266 				if (n_runtime_keys >= max_runtime_keys)
1267 				{
1268 					if (max_runtime_keys == 0)
1269 					{
1270 						max_runtime_keys = 8;
1271 						runtime_keys = (IndexRuntimeKeyInfo *)
1272 							palloc(max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
1273 					}
1274 					else
1275 					{
1276 						max_runtime_keys *= 2;
1277 						runtime_keys = (IndexRuntimeKeyInfo *)
1278 							repalloc(runtime_keys, max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
1279 					}
1280 				}
1281 				runtime_keys[n_runtime_keys].scan_key = this_scan_key;
1282 				runtime_keys[n_runtime_keys].key_expr =
1283 					ExecInitExpr(rightop, planstate);
1284 				runtime_keys[n_runtime_keys].key_toastable =
1285 					TypeIsToastable(op_righttype);
1286 				n_runtime_keys++;
1287 				scanvalue = (Datum) 0;
1288 			}
1289 
1290 			/*
1291 			 * initialize the scan key's fields appropriately
1292 			 */
1293 			ScanKeyEntryInitialize(this_scan_key,
1294 								   flags,
1295 								   varattno,	/* attribute number to scan */
1296 								   op_strategy, /* op's strategy */
1297 								   op_righttype,	/* strategy subtype */
1298 								   ((OpExpr *) clause)->inputcollid,	/* collation */
1299 								   opfuncid,	/* reg proc to use */
1300 								   scanvalue);	/* constant */
1301 		}
1302 		else if (IsA(clause, RowCompareExpr))
1303 		{
1304 			/* (indexkey, indexkey, ...) op (expression, expression, ...) */
1305 			RowCompareExpr *rc = (RowCompareExpr *) clause;
1306 			ScanKey		first_sub_key;
1307 			int			n_sub_key;
1308 			ListCell   *largs_cell;
1309 			ListCell   *rargs_cell;
1310 			ListCell   *opnos_cell;
1311 			ListCell   *collids_cell;
1312 
1313 			Assert(!isorderby);
1314 
1315 			first_sub_key = (ScanKey)
1316 				palloc(list_length(rc->opnos) * sizeof(ScanKeyData));
1317 			n_sub_key = 0;
1318 
1319 			/* Scan RowCompare columns and generate subsidiary ScanKey items */
1320 			forfour(largs_cell, rc->largs, rargs_cell, rc->rargs,
1321 					opnos_cell, rc->opnos, collids_cell, rc->inputcollids)
1322 			{
1323 				ScanKey		this_sub_key = &first_sub_key[n_sub_key];
1324 				int			flags = SK_ROW_MEMBER;
1325 				Datum		scanvalue;
1326 				Oid			inputcollation;
1327 
1328 				leftop = (Expr *) lfirst(largs_cell);
1329 				rightop = (Expr *) lfirst(rargs_cell);
1330 				opno = lfirst_oid(opnos_cell);
1331 				inputcollation = lfirst_oid(collids_cell);
1332 
1333 				/*
1334 				 * leftop should be the index key Var, possibly relabeled
1335 				 */
1336 				if (leftop && IsA(leftop, RelabelType))
1337 					leftop = ((RelabelType *) leftop)->arg;
1338 
1339 				Assert(leftop != NULL);
1340 
1341 				if (!(IsA(leftop, Var) &&
1342 					  ((Var *) leftop)->varno == INDEX_VAR))
1343 					elog(ERROR, "indexqual doesn't have key on left side");
1344 
1345 				varattno = ((Var *) leftop)->varattno;
1346 
1347 				/*
1348 				 * We have to look up the operator's associated btree support
1349 				 * function
1350 				 */
1351 				if (index->rd_rel->relam != BTREE_AM_OID ||
1352 					varattno < 1 || varattno > indnkeyatts)
1353 					elog(ERROR, "bogus RowCompare index qualification");
1354 				opfamily = index->rd_opfamily[varattno - 1];
1355 
1356 				get_op_opfamily_properties(opno, opfamily, isorderby,
1357 										   &op_strategy,
1358 										   &op_lefttype,
1359 										   &op_righttype);
1360 
1361 				if (op_strategy != rc->rctype)
1362 					elog(ERROR, "RowCompare index qualification contains wrong operator");
1363 
1364 				opfuncid = get_opfamily_proc(opfamily,
1365 											 op_lefttype,
1366 											 op_righttype,
1367 											 BTORDER_PROC);
1368 				if (!RegProcedureIsValid(opfuncid))
1369 					elog(ERROR, "missing support function %d(%u,%u) in opfamily %u",
1370 						 BTORDER_PROC, op_lefttype, op_righttype, opfamily);
1371 
1372 				/*
1373 				 * rightop is the constant or variable comparison value
1374 				 */
1375 				if (rightop && IsA(rightop, RelabelType))
1376 					rightop = ((RelabelType *) rightop)->arg;
1377 
1378 				Assert(rightop != NULL);
1379 
1380 				if (IsA(rightop, Const))
1381 				{
1382 					/* OK, simple constant comparison value */
1383 					scanvalue = ((Const *) rightop)->constvalue;
1384 					if (((Const *) rightop)->constisnull)
1385 						flags |= SK_ISNULL;
1386 				}
1387 				else
1388 				{
1389 					/* Need to treat this one as a runtime key */
1390 					if (n_runtime_keys >= max_runtime_keys)
1391 					{
1392 						if (max_runtime_keys == 0)
1393 						{
1394 							max_runtime_keys = 8;
1395 							runtime_keys = (IndexRuntimeKeyInfo *)
1396 								palloc(max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
1397 						}
1398 						else
1399 						{
1400 							max_runtime_keys *= 2;
1401 							runtime_keys = (IndexRuntimeKeyInfo *)
1402 								repalloc(runtime_keys, max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
1403 						}
1404 					}
1405 					runtime_keys[n_runtime_keys].scan_key = this_sub_key;
1406 					runtime_keys[n_runtime_keys].key_expr =
1407 						ExecInitExpr(rightop, planstate);
1408 					runtime_keys[n_runtime_keys].key_toastable =
1409 						TypeIsToastable(op_righttype);
1410 					n_runtime_keys++;
1411 					scanvalue = (Datum) 0;
1412 				}
1413 
1414 				/*
1415 				 * initialize the subsidiary scan key's fields appropriately
1416 				 */
1417 				ScanKeyEntryInitialize(this_sub_key,
1418 									   flags,
1419 									   varattno,	/* attribute number */
1420 									   op_strategy, /* op's strategy */
1421 									   op_righttype,	/* strategy subtype */
1422 									   inputcollation,	/* collation */
1423 									   opfuncid,	/* reg proc to use */
1424 									   scanvalue);	/* constant */
1425 				n_sub_key++;
1426 			}
1427 
1428 			/* Mark the last subsidiary scankey correctly */
1429 			first_sub_key[n_sub_key - 1].sk_flags |= SK_ROW_END;
1430 
1431 			/*
1432 			 * We don't use ScanKeyEntryInitialize for the header because it
1433 			 * isn't going to contain a valid sk_func pointer.
1434 			 */
1435 			MemSet(this_scan_key, 0, sizeof(ScanKeyData));
1436 			this_scan_key->sk_flags = SK_ROW_HEADER;
1437 			this_scan_key->sk_attno = first_sub_key->sk_attno;
1438 			this_scan_key->sk_strategy = rc->rctype;
1439 			/* sk_subtype, sk_collation, sk_func not used in a header */
1440 			this_scan_key->sk_argument = PointerGetDatum(first_sub_key);
1441 		}
1442 		else if (IsA(clause, ScalarArrayOpExpr))
1443 		{
1444 			/* indexkey op ANY (array-expression) */
1445 			ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
1446 			int			flags = 0;
1447 			Datum		scanvalue;
1448 
1449 			Assert(!isorderby);
1450 
1451 			Assert(saop->useOr);
1452 			opno = saop->opno;
1453 			opfuncid = saop->opfuncid;
1454 
1455 			/*
1456 			 * leftop should be the index key Var, possibly relabeled
1457 			 */
1458 			leftop = (Expr *) linitial(saop->args);
1459 
1460 			if (leftop && IsA(leftop, RelabelType))
1461 				leftop = ((RelabelType *) leftop)->arg;
1462 
1463 			Assert(leftop != NULL);
1464 
1465 			if (!(IsA(leftop, Var) &&
1466 				  ((Var *) leftop)->varno == INDEX_VAR))
1467 				elog(ERROR, "indexqual doesn't have key on left side");
1468 
1469 			varattno = ((Var *) leftop)->varattno;
1470 			if (varattno < 1 || varattno > indnkeyatts)
1471 				elog(ERROR, "bogus index qualification");
1472 
1473 			/*
1474 			 * We have to look up the operator's strategy number.  This
1475 			 * provides a cross-check that the operator does match the index.
1476 			 */
1477 			opfamily = index->rd_opfamily[varattno - 1];
1478 
1479 			get_op_opfamily_properties(opno, opfamily, isorderby,
1480 									   &op_strategy,
1481 									   &op_lefttype,
1482 									   &op_righttype);
1483 
1484 			/*
1485 			 * rightop is the constant or variable array value
1486 			 */
1487 			rightop = (Expr *) lsecond(saop->args);
1488 
1489 			if (rightop && IsA(rightop, RelabelType))
1490 				rightop = ((RelabelType *) rightop)->arg;
1491 
1492 			Assert(rightop != NULL);
1493 
1494 			if (index->rd_indam->amsearcharray)
1495 			{
1496 				/* Index AM will handle this like a simple operator */
1497 				flags |= SK_SEARCHARRAY;
1498 				if (IsA(rightop, Const))
1499 				{
1500 					/* OK, simple constant comparison value */
1501 					scanvalue = ((Const *) rightop)->constvalue;
1502 					if (((Const *) rightop)->constisnull)
1503 						flags |= SK_ISNULL;
1504 				}
1505 				else
1506 				{
1507 					/* Need to treat this one as a runtime key */
1508 					if (n_runtime_keys >= max_runtime_keys)
1509 					{
1510 						if (max_runtime_keys == 0)
1511 						{
1512 							max_runtime_keys = 8;
1513 							runtime_keys = (IndexRuntimeKeyInfo *)
1514 								palloc(max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
1515 						}
1516 						else
1517 						{
1518 							max_runtime_keys *= 2;
1519 							runtime_keys = (IndexRuntimeKeyInfo *)
1520 								repalloc(runtime_keys, max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
1521 						}
1522 					}
1523 					runtime_keys[n_runtime_keys].scan_key = this_scan_key;
1524 					runtime_keys[n_runtime_keys].key_expr =
1525 						ExecInitExpr(rightop, planstate);
1526 
1527 					/*
1528 					 * Careful here: the runtime expression is not of
1529 					 * op_righttype, but rather is an array of same; so
1530 					 * TypeIsToastable() isn't helpful.  However, we can
1531 					 * assume that all array types are toastable.
1532 					 */
1533 					runtime_keys[n_runtime_keys].key_toastable = true;
1534 					n_runtime_keys++;
1535 					scanvalue = (Datum) 0;
1536 				}
1537 			}
1538 			else
1539 			{
1540 				/* Executor has to expand the array value */
1541 				array_keys[n_array_keys].scan_key = this_scan_key;
1542 				array_keys[n_array_keys].array_expr =
1543 					ExecInitExpr(rightop, planstate);
1544 				/* the remaining fields were zeroed by palloc0 */
1545 				n_array_keys++;
1546 				scanvalue = (Datum) 0;
1547 			}
1548 
1549 			/*
1550 			 * initialize the scan key's fields appropriately
1551 			 */
1552 			ScanKeyEntryInitialize(this_scan_key,
1553 								   flags,
1554 								   varattno,	/* attribute number to scan */
1555 								   op_strategy, /* op's strategy */
1556 								   op_righttype,	/* strategy subtype */
1557 								   saop->inputcollid,	/* collation */
1558 								   opfuncid,	/* reg proc to use */
1559 								   scanvalue);	/* constant */
1560 		}
1561 		else if (IsA(clause, NullTest))
1562 		{
1563 			/* indexkey IS NULL or indexkey IS NOT NULL */
1564 			NullTest   *ntest = (NullTest *) clause;
1565 			int			flags;
1566 
1567 			Assert(!isorderby);
1568 
1569 			/*
1570 			 * argument should be the index key Var, possibly relabeled
1571 			 */
1572 			leftop = ntest->arg;
1573 
1574 			if (leftop && IsA(leftop, RelabelType))
1575 				leftop = ((RelabelType *) leftop)->arg;
1576 
1577 			Assert(leftop != NULL);
1578 
1579 			if (!(IsA(leftop, Var) &&
1580 				  ((Var *) leftop)->varno == INDEX_VAR))
1581 				elog(ERROR, "NullTest indexqual has wrong key");
1582 
1583 			varattno = ((Var *) leftop)->varattno;
1584 
1585 			/*
1586 			 * initialize the scan key's fields appropriately
1587 			 */
1588 			switch (ntest->nulltesttype)
1589 			{
1590 				case IS_NULL:
1591 					flags = SK_ISNULL | SK_SEARCHNULL;
1592 					break;
1593 				case IS_NOT_NULL:
1594 					flags = SK_ISNULL | SK_SEARCHNOTNULL;
1595 					break;
1596 				default:
1597 					elog(ERROR, "unrecognized nulltesttype: %d",
1598 						 (int) ntest->nulltesttype);
1599 					flags = 0;	/* keep compiler quiet */
1600 					break;
1601 			}
1602 
1603 			ScanKeyEntryInitialize(this_scan_key,
1604 								   flags,
1605 								   varattno,	/* attribute number to scan */
1606 								   InvalidStrategy, /* no strategy */
1607 								   InvalidOid,	/* no strategy subtype */
1608 								   InvalidOid,	/* no collation */
1609 								   InvalidOid,	/* no reg proc for this */
1610 								   (Datum) 0);	/* constant */
1611 		}
1612 		else
1613 			elog(ERROR, "unsupported indexqual type: %d",
1614 				 (int) nodeTag(clause));
1615 	}
1616 
1617 	Assert(n_runtime_keys <= max_runtime_keys);
1618 
1619 	/* Get rid of any unused arrays */
1620 	if (n_array_keys == 0)
1621 	{
1622 		pfree(array_keys);
1623 		array_keys = NULL;
1624 	}
1625 
1626 	/*
1627 	 * Return info to our caller.
1628 	 */
1629 	*scanKeys = scan_keys;
1630 	*numScanKeys = n_scan_keys;
1631 	*runtimeKeys = runtime_keys;
1632 	*numRuntimeKeys = n_runtime_keys;
1633 	if (arrayKeys)
1634 	{
1635 		*arrayKeys = array_keys;
1636 		*numArrayKeys = n_array_keys;
1637 	}
1638 	else if (n_array_keys != 0)
1639 		elog(ERROR, "ScalarArrayOpExpr index qual found where not allowed");
1640 }
1641 
1642 /* ----------------------------------------------------------------
1643  *						Parallel Scan Support
1644  * ----------------------------------------------------------------
1645  */
1646 
1647 /* ----------------------------------------------------------------
1648  *		ExecIndexScanEstimate
1649  *
1650  *		Compute the amount of space we'll need in the parallel
1651  *		query DSM, and inform pcxt->estimator about our needs.
1652  * ----------------------------------------------------------------
1653  */
1654 void
ExecIndexScanEstimate(IndexScanState * node,ParallelContext * pcxt)1655 ExecIndexScanEstimate(IndexScanState *node,
1656 					  ParallelContext *pcxt)
1657 {
1658 	EState	   *estate = node->ss.ps.state;
1659 
1660 	node->iss_PscanLen = index_parallelscan_estimate(node->iss_RelationDesc,
1661 													 estate->es_snapshot);
1662 	shm_toc_estimate_chunk(&pcxt->estimator, node->iss_PscanLen);
1663 	shm_toc_estimate_keys(&pcxt->estimator, 1);
1664 }
1665 
1666 /* ----------------------------------------------------------------
1667  *		ExecIndexScanInitializeDSM
1668  *
1669  *		Set up a parallel index scan descriptor.
1670  * ----------------------------------------------------------------
1671  */
1672 void
ExecIndexScanInitializeDSM(IndexScanState * node,ParallelContext * pcxt)1673 ExecIndexScanInitializeDSM(IndexScanState *node,
1674 						   ParallelContext *pcxt)
1675 {
1676 	EState	   *estate = node->ss.ps.state;
1677 	ParallelIndexScanDesc piscan;
1678 
1679 	piscan = shm_toc_allocate(pcxt->toc, node->iss_PscanLen);
1680 	index_parallelscan_initialize(node->ss.ss_currentRelation,
1681 								  node->iss_RelationDesc,
1682 								  estate->es_snapshot,
1683 								  piscan);
1684 	shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, piscan);
1685 	node->iss_ScanDesc =
1686 		index_beginscan_parallel(node->ss.ss_currentRelation,
1687 								 node->iss_RelationDesc,
1688 								 node->iss_NumScanKeys,
1689 								 node->iss_NumOrderByKeys,
1690 								 piscan);
1691 
1692 	/*
1693 	 * If no run-time keys to calculate or they are ready, go ahead and pass
1694 	 * the scankeys to the index AM.
1695 	 */
1696 	if (node->iss_NumRuntimeKeys == 0 || node->iss_RuntimeKeysReady)
1697 		index_rescan(node->iss_ScanDesc,
1698 					 node->iss_ScanKeys, node->iss_NumScanKeys,
1699 					 node->iss_OrderByKeys, node->iss_NumOrderByKeys);
1700 }
1701 
1702 /* ----------------------------------------------------------------
1703  *		ExecIndexScanReInitializeDSM
1704  *
1705  *		Reset shared state before beginning a fresh scan.
1706  * ----------------------------------------------------------------
1707  */
1708 void
ExecIndexScanReInitializeDSM(IndexScanState * node,ParallelContext * pcxt)1709 ExecIndexScanReInitializeDSM(IndexScanState *node,
1710 							 ParallelContext *pcxt)
1711 {
1712 	index_parallelrescan(node->iss_ScanDesc);
1713 }
1714 
1715 /* ----------------------------------------------------------------
1716  *		ExecIndexScanInitializeWorker
1717  *
1718  *		Copy relevant information from TOC into planstate.
1719  * ----------------------------------------------------------------
1720  */
1721 void
ExecIndexScanInitializeWorker(IndexScanState * node,ParallelWorkerContext * pwcxt)1722 ExecIndexScanInitializeWorker(IndexScanState *node,
1723 							  ParallelWorkerContext *pwcxt)
1724 {
1725 	ParallelIndexScanDesc piscan;
1726 
1727 	piscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
1728 	node->iss_ScanDesc =
1729 		index_beginscan_parallel(node->ss.ss_currentRelation,
1730 								 node->iss_RelationDesc,
1731 								 node->iss_NumScanKeys,
1732 								 node->iss_NumOrderByKeys,
1733 								 piscan);
1734 
1735 	/*
1736 	 * If no run-time keys to calculate or they are ready, go ahead and pass
1737 	 * the scankeys to the index AM.
1738 	 */
1739 	if (node->iss_NumRuntimeKeys == 0 || node->iss_RuntimeKeysReady)
1740 		index_rescan(node->iss_ScanDesc,
1741 					 node->iss_ScanKeys, node->iss_NumScanKeys,
1742 					 node->iss_OrderByKeys, node->iss_NumOrderByKeys);
1743 }
1744