1 /*-------------------------------------------------------------------------
2  *
3  * nodeIndexscan.c
4  *	  Routines to support indexed scans of relations
5  *
6  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/executor/nodeIndexscan.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /*
16  * INTERFACE ROUTINES
17  *		ExecIndexScan			scans a relation using an index
18  *		IndexNext				retrieve next tuple using index
19  *		IndexNextWithReorder	same, but recheck ORDER BY expressions
20  *		ExecInitIndexScan		creates and initializes state info.
21  *		ExecReScanIndexScan		rescans the indexed relation.
22  *		ExecEndIndexScan		releases all storage.
23  *		ExecIndexMarkPos		marks scan position.
24  *		ExecIndexRestrPos		restores scan position.
25  *		ExecIndexScanEstimate	estimates DSM space needed for parallel index scan
26  *		ExecIndexScanInitializeDSM initialize DSM for parallel indexscan
27  *		ExecIndexScanReInitializeDSM reinitialize DSM for fresh scan
28  *		ExecIndexScanInitializeWorker attach to DSM info in parallel worker
29  */
30 #include "postgres.h"
31 
32 #include "access/nbtree.h"
33 #include "access/relscan.h"
34 #include "catalog/pg_am.h"
35 #include "executor/execdebug.h"
36 #include "executor/nodeIndexscan.h"
37 #include "lib/pairingheap.h"
38 #include "miscadmin.h"
39 #include "nodes/nodeFuncs.h"
40 #include "optimizer/clauses.h"
41 #include "utils/array.h"
42 #include "utils/datum.h"
43 #include "utils/lsyscache.h"
44 #include "utils/memutils.h"
45 #include "utils/rel.h"
46 
47 /*
48  * When an ordering operator is used, tuples fetched from the index that
49  * need to be reordered are queued in a pairing heap, as ReorderTuples.
50  */
51 typedef struct
52 {
53 	pairingheap_node ph_node;
54 	HeapTuple	htup;
55 	Datum	   *orderbyvals;
56 	bool	   *orderbynulls;
57 } ReorderTuple;
58 
59 static TupleTableSlot *IndexNext(IndexScanState *node);
60 static TupleTableSlot *IndexNextWithReorder(IndexScanState *node);
61 static void EvalOrderByExpressions(IndexScanState *node, ExprContext *econtext);
62 static bool IndexRecheck(IndexScanState *node, TupleTableSlot *slot);
63 static int cmp_orderbyvals(const Datum *adist, const bool *anulls,
64 				const Datum *bdist, const bool *bnulls,
65 				IndexScanState *node);
66 static int reorderqueue_cmp(const pairingheap_node *a,
67 				 const pairingheap_node *b, void *arg);
68 static void reorderqueue_push(IndexScanState *node, HeapTuple tuple,
69 				  Datum *orderbyvals, bool *orderbynulls);
70 static HeapTuple reorderqueue_pop(IndexScanState *node);
71 
72 
73 /* ----------------------------------------------------------------
74  *		IndexNext
75  *
76  *		Retrieve a tuple from the IndexScan node's currentRelation
77  *		using the index specified in the IndexScanState information.
78  * ----------------------------------------------------------------
79  */
80 static TupleTableSlot *
IndexNext(IndexScanState * node)81 IndexNext(IndexScanState *node)
82 {
83 	EState	   *estate;
84 	ExprContext *econtext;
85 	ScanDirection direction;
86 	IndexScanDesc scandesc;
87 	HeapTuple	tuple;
88 	TupleTableSlot *slot;
89 
90 	/*
91 	 * extract necessary information from index scan node
92 	 */
93 	estate = node->ss.ps.state;
94 	direction = estate->es_direction;
95 	/* flip direction if this is an overall backward scan */
96 	if (ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indexorderdir))
97 	{
98 		if (ScanDirectionIsForward(direction))
99 			direction = BackwardScanDirection;
100 		else if (ScanDirectionIsBackward(direction))
101 			direction = ForwardScanDirection;
102 	}
103 	scandesc = node->iss_ScanDesc;
104 	econtext = node->ss.ps.ps_ExprContext;
105 	slot = node->ss.ss_ScanTupleSlot;
106 
107 	if (scandesc == NULL)
108 	{
109 		/*
110 		 * We reach here if the index scan is not parallel, or if we're
111 		 * serially executing an index scan that was planned to be parallel.
112 		 */
113 		scandesc = index_beginscan(node->ss.ss_currentRelation,
114 								   node->iss_RelationDesc,
115 								   estate->es_snapshot,
116 								   node->iss_NumScanKeys,
117 								   node->iss_NumOrderByKeys);
118 
119 		node->iss_ScanDesc = scandesc;
120 
121 		/*
122 		 * If no run-time keys to calculate or they are ready, go ahead and
123 		 * pass the scankeys to the index AM.
124 		 */
125 		if (node->iss_NumRuntimeKeys == 0 || node->iss_RuntimeKeysReady)
126 			index_rescan(scandesc,
127 						 node->iss_ScanKeys, node->iss_NumScanKeys,
128 						 node->iss_OrderByKeys, node->iss_NumOrderByKeys);
129 	}
130 
131 	/*
132 	 * ok, now that we have what we need, fetch the next tuple.
133 	 */
134 	while ((tuple = index_getnext(scandesc, direction)) != NULL)
135 	{
136 		CHECK_FOR_INTERRUPTS();
137 
138 		/*
139 		 * Store the scanned tuple in the scan tuple slot of the scan state.
140 		 * Note: we pass 'false' because tuples returned by amgetnext are
141 		 * pointers onto disk pages and must not be pfree()'d.
142 		 */
143 		ExecStoreTuple(tuple,	/* tuple to store */
144 					   slot,	/* slot to store in */
145 					   scandesc->xs_cbuf,	/* buffer containing tuple */
146 					   false);	/* don't pfree */
147 
148 		/*
149 		 * If the index was lossy, we have to recheck the index quals using
150 		 * the fetched tuple.
151 		 */
152 		if (scandesc->xs_recheck)
153 		{
154 			econtext->ecxt_scantuple = slot;
155 			if (!ExecQualAndReset(node->indexqualorig, econtext))
156 			{
157 				/* Fails recheck, so drop it and loop back for another */
158 				InstrCountFiltered2(node, 1);
159 				continue;
160 			}
161 		}
162 
163 		return slot;
164 	}
165 
166 	/*
167 	 * if we get here it means the index scan failed so we are at the end of
168 	 * the scan..
169 	 */
170 	node->iss_ReachedEnd = true;
171 	return ExecClearTuple(slot);
172 }
173 
174 /* ----------------------------------------------------------------
175  *		IndexNextWithReorder
176  *
177  *		Like IndexNext, but this version can also re-check ORDER BY
178  *		expressions, and reorder the tuples as necessary.
179  * ----------------------------------------------------------------
180  */
181 static TupleTableSlot *
IndexNextWithReorder(IndexScanState * node)182 IndexNextWithReorder(IndexScanState *node)
183 {
184 	EState	   *estate;
185 	ExprContext *econtext;
186 	IndexScanDesc scandesc;
187 	HeapTuple	tuple;
188 	TupleTableSlot *slot;
189 	ReorderTuple *topmost = NULL;
190 	bool		was_exact;
191 	Datum	   *lastfetched_vals;
192 	bool	   *lastfetched_nulls;
193 	int			cmp;
194 
195 	estate = node->ss.ps.state;
196 
197 	/*
198 	 * Only forward scan is supported with reordering.  Note: we can get away
199 	 * with just Asserting here because the system will not try to run the
200 	 * plan backwards if ExecSupportsBackwardScan() says it won't work.
201 	 * Currently, that is guaranteed because no index AMs support both
202 	 * amcanorderbyop and amcanbackward; if any ever do,
203 	 * ExecSupportsBackwardScan() will need to consider indexorderbys
204 	 * explicitly.
205 	 */
206 	Assert(!ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indexorderdir));
207 	Assert(ScanDirectionIsForward(estate->es_direction));
208 
209 	scandesc = node->iss_ScanDesc;
210 	econtext = node->ss.ps.ps_ExprContext;
211 	slot = node->ss.ss_ScanTupleSlot;
212 
213 	if (scandesc == NULL)
214 	{
215 		/*
216 		 * We reach here if the index scan is not parallel, or if we're
217 		 * serially executing an index scan that was planned to be parallel.
218 		 */
219 		scandesc = index_beginscan(node->ss.ss_currentRelation,
220 								   node->iss_RelationDesc,
221 								   estate->es_snapshot,
222 								   node->iss_NumScanKeys,
223 								   node->iss_NumOrderByKeys);
224 
225 		node->iss_ScanDesc = scandesc;
226 
227 		/*
228 		 * If no run-time keys to calculate or they are ready, go ahead and
229 		 * pass the scankeys to the index AM.
230 		 */
231 		if (node->iss_NumRuntimeKeys == 0 || node->iss_RuntimeKeysReady)
232 			index_rescan(scandesc,
233 						 node->iss_ScanKeys, node->iss_NumScanKeys,
234 						 node->iss_OrderByKeys, node->iss_NumOrderByKeys);
235 	}
236 
237 	for (;;)
238 	{
239 		CHECK_FOR_INTERRUPTS();
240 
241 		/*
242 		 * Check the reorder queue first.  If the topmost tuple in the queue
243 		 * has an ORDER BY value smaller than (or equal to) the value last
244 		 * returned by the index, we can return it now.
245 		 */
246 		if (!pairingheap_is_empty(node->iss_ReorderQueue))
247 		{
248 			topmost = (ReorderTuple *) pairingheap_first(node->iss_ReorderQueue);
249 
250 			if (node->iss_ReachedEnd ||
251 				cmp_orderbyvals(topmost->orderbyvals,
252 								topmost->orderbynulls,
253 								scandesc->xs_orderbyvals,
254 								scandesc->xs_orderbynulls,
255 								node) <= 0)
256 			{
257 				tuple = reorderqueue_pop(node);
258 
259 				/* Pass 'true', as the tuple in the queue is a palloc'd copy */
260 				ExecStoreTuple(tuple, slot, InvalidBuffer, true);
261 				return slot;
262 			}
263 		}
264 		else if (node->iss_ReachedEnd)
265 		{
266 			/* Queue is empty, and no more tuples from index.  We're done. */
267 			return ExecClearTuple(slot);
268 		}
269 
270 		/*
271 		 * Fetch next tuple from the index.
272 		 */
273 next_indextuple:
274 		tuple = index_getnext(scandesc, ForwardScanDirection);
275 		if (!tuple)
276 		{
277 			/*
278 			 * No more tuples from the index.  But we still need to drain any
279 			 * remaining tuples from the queue before we're done.
280 			 */
281 			node->iss_ReachedEnd = true;
282 			continue;
283 		}
284 
285 		/*
286 		 * Store the scanned tuple in the scan tuple slot of the scan state.
287 		 * Note: we pass 'false' because tuples returned by amgetnext are
288 		 * pointers onto disk pages and must not be pfree()'d.
289 		 */
290 		ExecStoreTuple(tuple,	/* tuple to store */
291 					   slot,	/* slot to store in */
292 					   scandesc->xs_cbuf,	/* buffer containing tuple */
293 					   false);	/* don't pfree */
294 
295 		/*
296 		 * If the index was lossy, we have to recheck the index quals and
297 		 * ORDER BY expressions using the fetched tuple.
298 		 */
299 		if (scandesc->xs_recheck)
300 		{
301 			econtext->ecxt_scantuple = slot;
302 			if (!ExecQualAndReset(node->indexqualorig, econtext))
303 			{
304 				/* Fails recheck, so drop it and loop back for another */
305 				InstrCountFiltered2(node, 1);
306 				/* allow this loop to be cancellable */
307 				CHECK_FOR_INTERRUPTS();
308 				goto next_indextuple;
309 			}
310 		}
311 
312 		if (scandesc->xs_recheckorderby)
313 		{
314 			econtext->ecxt_scantuple = slot;
315 			ResetExprContext(econtext);
316 			EvalOrderByExpressions(node, econtext);
317 
318 			/*
319 			 * Was the ORDER BY value returned by the index accurate?  The
320 			 * recheck flag means that the index can return inaccurate values,
321 			 * but then again, the value returned for any particular tuple
322 			 * could also be exactly correct.  Compare the value returned by
323 			 * the index with the recalculated value.  (If the value returned
324 			 * by the index happened to be exact right, we can often avoid
325 			 * pushing the tuple to the queue, just to pop it back out again.)
326 			 */
327 			cmp = cmp_orderbyvals(node->iss_OrderByValues,
328 								  node->iss_OrderByNulls,
329 								  scandesc->xs_orderbyvals,
330 								  scandesc->xs_orderbynulls,
331 								  node);
332 			if (cmp < 0)
333 				elog(ERROR, "index returned tuples in wrong order");
334 			else if (cmp == 0)
335 				was_exact = true;
336 			else
337 				was_exact = false;
338 			lastfetched_vals = node->iss_OrderByValues;
339 			lastfetched_nulls = node->iss_OrderByNulls;
340 		}
341 		else
342 		{
343 			was_exact = true;
344 			lastfetched_vals = scandesc->xs_orderbyvals;
345 			lastfetched_nulls = scandesc->xs_orderbynulls;
346 		}
347 
348 		/*
349 		 * Can we return this tuple immediately, or does it need to be pushed
350 		 * to the reorder queue?  If the ORDER BY expression values returned
351 		 * by the index were inaccurate, we can't return it yet, because the
352 		 * next tuple from the index might need to come before this one. Also,
353 		 * we can't return it yet if there are any smaller tuples in the queue
354 		 * already.
355 		 */
356 		if (!was_exact || (topmost && cmp_orderbyvals(lastfetched_vals,
357 													  lastfetched_nulls,
358 													  topmost->orderbyvals,
359 													  topmost->orderbynulls,
360 													  node) > 0))
361 		{
362 			/* Put this tuple to the queue */
363 			reorderqueue_push(node, tuple, lastfetched_vals, lastfetched_nulls);
364 			continue;
365 		}
366 		else
367 		{
368 			/* Can return this tuple immediately. */
369 			return slot;
370 		}
371 	}
372 
373 	/*
374 	 * if we get here it means the index scan failed so we are at the end of
375 	 * the scan..
376 	 */
377 	return ExecClearTuple(slot);
378 }
379 
380 /*
381  * Calculate the expressions in the ORDER BY clause, based on the heap tuple.
382  */
383 static void
EvalOrderByExpressions(IndexScanState * node,ExprContext * econtext)384 EvalOrderByExpressions(IndexScanState *node, ExprContext *econtext)
385 {
386 	int			i;
387 	ListCell   *l;
388 	MemoryContext oldContext;
389 
390 	oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
391 
392 	i = 0;
393 	foreach(l, node->indexorderbyorig)
394 	{
395 		ExprState  *orderby = (ExprState *) lfirst(l);
396 
397 		node->iss_OrderByValues[i] = ExecEvalExpr(orderby,
398 												  econtext,
399 												  &node->iss_OrderByNulls[i]);
400 		i++;
401 	}
402 
403 	MemoryContextSwitchTo(oldContext);
404 }
405 
406 /*
407  * IndexRecheck -- access method routine to recheck a tuple in EvalPlanQual
408  */
409 static bool
IndexRecheck(IndexScanState * node,TupleTableSlot * slot)410 IndexRecheck(IndexScanState *node, TupleTableSlot *slot)
411 {
412 	ExprContext *econtext;
413 
414 	/*
415 	 * extract necessary information from index scan node
416 	 */
417 	econtext = node->ss.ps.ps_ExprContext;
418 
419 	/* Does the tuple meet the indexqual condition? */
420 	econtext->ecxt_scantuple = slot;
421 	return ExecQualAndReset(node->indexqualorig, econtext);
422 }
423 
424 
425 /*
426  * Compare ORDER BY expression values.
427  */
428 static int
cmp_orderbyvals(const Datum * adist,const bool * anulls,const Datum * bdist,const bool * bnulls,IndexScanState * node)429 cmp_orderbyvals(const Datum *adist, const bool *anulls,
430 				const Datum *bdist, const bool *bnulls,
431 				IndexScanState *node)
432 {
433 	int			i;
434 	int			result;
435 
436 	for (i = 0; i < node->iss_NumOrderByKeys; i++)
437 	{
438 		SortSupport ssup = &node->iss_SortSupport[i];
439 
440 		/*
441 		 * Handle nulls.  We only need to support NULLS LAST ordering, because
442 		 * match_pathkeys_to_index() doesn't consider indexorderby
443 		 * implementation otherwise.
444 		 */
445 		if (anulls[i] && !bnulls[i])
446 			return 1;
447 		else if (!anulls[i] && bnulls[i])
448 			return -1;
449 		else if (anulls[i] && bnulls[i])
450 			return 0;
451 
452 		result = ssup->comparator(adist[i], bdist[i], ssup);
453 		if (result != 0)
454 			return result;
455 	}
456 
457 	return 0;
458 }
459 
460 /*
461  * Pairing heap provides getting topmost (greatest) element while KNN provides
462  * ascending sort.  That's why we invert the sort order.
463  */
464 static int
reorderqueue_cmp(const pairingheap_node * a,const pairingheap_node * b,void * arg)465 reorderqueue_cmp(const pairingheap_node *a, const pairingheap_node *b,
466 				 void *arg)
467 {
468 	ReorderTuple *rta = (ReorderTuple *) a;
469 	ReorderTuple *rtb = (ReorderTuple *) b;
470 	IndexScanState *node = (IndexScanState *) arg;
471 
472 	/* exchange argument order to invert the sort order */
473 	return cmp_orderbyvals(rtb->orderbyvals, rtb->orderbynulls,
474 						   rta->orderbyvals, rta->orderbynulls,
475 						   node);
476 }
477 
478 /*
479  * Helper function to push a tuple to the reorder queue.
480  */
481 static void
reorderqueue_push(IndexScanState * node,HeapTuple tuple,Datum * orderbyvals,bool * orderbynulls)482 reorderqueue_push(IndexScanState *node, HeapTuple tuple,
483 				  Datum *orderbyvals, bool *orderbynulls)
484 {
485 	IndexScanDesc scandesc = node->iss_ScanDesc;
486 	EState	   *estate = node->ss.ps.state;
487 	MemoryContext oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
488 	ReorderTuple *rt;
489 	int			i;
490 
491 	rt = (ReorderTuple *) palloc(sizeof(ReorderTuple));
492 	rt->htup = heap_copytuple(tuple);
493 	rt->orderbyvals =
494 		(Datum *) palloc(sizeof(Datum) * scandesc->numberOfOrderBys);
495 	rt->orderbynulls =
496 		(bool *) palloc(sizeof(bool) * scandesc->numberOfOrderBys);
497 	for (i = 0; i < node->iss_NumOrderByKeys; i++)
498 	{
499 		if (!orderbynulls[i])
500 			rt->orderbyvals[i] = datumCopy(orderbyvals[i],
501 										   node->iss_OrderByTypByVals[i],
502 										   node->iss_OrderByTypLens[i]);
503 		else
504 			rt->orderbyvals[i] = (Datum) 0;
505 		rt->orderbynulls[i] = orderbynulls[i];
506 	}
507 	pairingheap_add(node->iss_ReorderQueue, &rt->ph_node);
508 
509 	MemoryContextSwitchTo(oldContext);
510 }
511 
512 /*
513  * Helper function to pop the next tuple from the reorder queue.
514  */
515 static HeapTuple
reorderqueue_pop(IndexScanState * node)516 reorderqueue_pop(IndexScanState *node)
517 {
518 	HeapTuple	result;
519 	ReorderTuple *topmost;
520 	int			i;
521 
522 	topmost = (ReorderTuple *) pairingheap_remove_first(node->iss_ReorderQueue);
523 
524 	result = topmost->htup;
525 	for (i = 0; i < node->iss_NumOrderByKeys; i++)
526 	{
527 		if (!node->iss_OrderByTypByVals[i] && !topmost->orderbynulls[i])
528 			pfree(DatumGetPointer(topmost->orderbyvals[i]));
529 	}
530 	pfree(topmost->orderbyvals);
531 	pfree(topmost->orderbynulls);
532 	pfree(topmost);
533 
534 	return result;
535 }
536 
537 
538 /* ----------------------------------------------------------------
539  *		ExecIndexScan(node)
540  * ----------------------------------------------------------------
541  */
542 static TupleTableSlot *
ExecIndexScan(PlanState * pstate)543 ExecIndexScan(PlanState *pstate)
544 {
545 	IndexScanState *node = castNode(IndexScanState, pstate);
546 
547 	/*
548 	 * If we have runtime keys and they've not already been set up, do it now.
549 	 */
550 	if (node->iss_NumRuntimeKeys != 0 && !node->iss_RuntimeKeysReady)
551 		ExecReScan((PlanState *) node);
552 
553 	if (node->iss_NumOrderByKeys > 0)
554 		return ExecScan(&node->ss,
555 						(ExecScanAccessMtd) IndexNextWithReorder,
556 						(ExecScanRecheckMtd) IndexRecheck);
557 	else
558 		return ExecScan(&node->ss,
559 						(ExecScanAccessMtd) IndexNext,
560 						(ExecScanRecheckMtd) IndexRecheck);
561 }
562 
563 /* ----------------------------------------------------------------
564  *		ExecReScanIndexScan(node)
565  *
566  *		Recalculates the values of any scan keys whose value depends on
567  *		information known at runtime, then rescans the indexed relation.
568  *
569  *		Updating the scan key was formerly done separately in
570  *		ExecUpdateIndexScanKeys. Integrating it into ReScan makes
571  *		rescans of indices and relations/general streams more uniform.
572  * ----------------------------------------------------------------
573  */
574 void
ExecReScanIndexScan(IndexScanState * node)575 ExecReScanIndexScan(IndexScanState *node)
576 {
577 	/*
578 	 * If we are doing runtime key calculations (ie, any of the index key
579 	 * values weren't simple Consts), compute the new key values.  But first,
580 	 * reset the context so we don't leak memory as each outer tuple is
581 	 * scanned.  Note this assumes that we will recalculate *all* runtime keys
582 	 * on each call.
583 	 */
584 	if (node->iss_NumRuntimeKeys != 0)
585 	{
586 		ExprContext *econtext = node->iss_RuntimeContext;
587 
588 		ResetExprContext(econtext);
589 		ExecIndexEvalRuntimeKeys(econtext,
590 								 node->iss_RuntimeKeys,
591 								 node->iss_NumRuntimeKeys);
592 	}
593 	node->iss_RuntimeKeysReady = true;
594 
595 	/* flush the reorder queue */
596 	if (node->iss_ReorderQueue)
597 	{
598 		while (!pairingheap_is_empty(node->iss_ReorderQueue))
599 			reorderqueue_pop(node);
600 	}
601 
602 	/* reset index scan */
603 	if (node->iss_ScanDesc)
604 		index_rescan(node->iss_ScanDesc,
605 					 node->iss_ScanKeys, node->iss_NumScanKeys,
606 					 node->iss_OrderByKeys, node->iss_NumOrderByKeys);
607 	node->iss_ReachedEnd = false;
608 
609 	ExecScanReScan(&node->ss);
610 }
611 
612 
613 /*
614  * ExecIndexEvalRuntimeKeys
615  *		Evaluate any runtime key values, and update the scankeys.
616  */
617 void
ExecIndexEvalRuntimeKeys(ExprContext * econtext,IndexRuntimeKeyInfo * runtimeKeys,int numRuntimeKeys)618 ExecIndexEvalRuntimeKeys(ExprContext *econtext,
619 						 IndexRuntimeKeyInfo *runtimeKeys, int numRuntimeKeys)
620 {
621 	int			j;
622 	MemoryContext oldContext;
623 
624 	/* We want to keep the key values in per-tuple memory */
625 	oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
626 
627 	for (j = 0; j < numRuntimeKeys; j++)
628 	{
629 		ScanKey		scan_key = runtimeKeys[j].scan_key;
630 		ExprState  *key_expr = runtimeKeys[j].key_expr;
631 		Datum		scanvalue;
632 		bool		isNull;
633 
634 		/*
635 		 * For each run-time key, extract the run-time expression and evaluate
636 		 * it with respect to the current context.  We then stick the result
637 		 * into the proper scan key.
638 		 *
639 		 * Note: the result of the eval could be a pass-by-ref value that's
640 		 * stored in some outer scan's tuple, not in
641 		 * econtext->ecxt_per_tuple_memory.  We assume that the outer tuple
642 		 * will stay put throughout our scan.  If this is wrong, we could copy
643 		 * the result into our context explicitly, but I think that's not
644 		 * necessary.
645 		 *
646 		 * It's also entirely possible that the result of the eval is a
647 		 * toasted value.  In this case we should forcibly detoast it, to
648 		 * avoid repeat detoastings each time the value is examined by an
649 		 * index support function.
650 		 */
651 		scanvalue = ExecEvalExpr(key_expr,
652 								 econtext,
653 								 &isNull);
654 		if (isNull)
655 		{
656 			scan_key->sk_argument = scanvalue;
657 			scan_key->sk_flags |= SK_ISNULL;
658 		}
659 		else
660 		{
661 			if (runtimeKeys[j].key_toastable)
662 				scanvalue = PointerGetDatum(PG_DETOAST_DATUM(scanvalue));
663 			scan_key->sk_argument = scanvalue;
664 			scan_key->sk_flags &= ~SK_ISNULL;
665 		}
666 	}
667 
668 	MemoryContextSwitchTo(oldContext);
669 }
670 
671 /*
672  * ExecIndexEvalArrayKeys
673  *		Evaluate any array key values, and set up to iterate through arrays.
674  *
675  * Returns true if there are array elements to consider; false means there
676  * is at least one null or empty array, so no match is possible.  On true
677  * result, the scankeys are initialized with the first elements of the arrays.
678  */
679 bool
ExecIndexEvalArrayKeys(ExprContext * econtext,IndexArrayKeyInfo * arrayKeys,int numArrayKeys)680 ExecIndexEvalArrayKeys(ExprContext *econtext,
681 					   IndexArrayKeyInfo *arrayKeys, int numArrayKeys)
682 {
683 	bool		result = true;
684 	int			j;
685 	MemoryContext oldContext;
686 
687 	/* We want to keep the arrays in per-tuple memory */
688 	oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
689 
690 	for (j = 0; j < numArrayKeys; j++)
691 	{
692 		ScanKey		scan_key = arrayKeys[j].scan_key;
693 		ExprState  *array_expr = arrayKeys[j].array_expr;
694 		Datum		arraydatum;
695 		bool		isNull;
696 		ArrayType  *arrayval;
697 		int16		elmlen;
698 		bool		elmbyval;
699 		char		elmalign;
700 		int			num_elems;
701 		Datum	   *elem_values;
702 		bool	   *elem_nulls;
703 
704 		/*
705 		 * Compute and deconstruct the array expression. (Notes in
706 		 * ExecIndexEvalRuntimeKeys() apply here too.)
707 		 */
708 		arraydatum = ExecEvalExpr(array_expr,
709 								  econtext,
710 								  &isNull);
711 		if (isNull)
712 		{
713 			result = false;
714 			break;				/* no point in evaluating more */
715 		}
716 		arrayval = DatumGetArrayTypeP(arraydatum);
717 		/* We could cache this data, but not clear it's worth it */
718 		get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
719 							 &elmlen, &elmbyval, &elmalign);
720 		deconstruct_array(arrayval,
721 						  ARR_ELEMTYPE(arrayval),
722 						  elmlen, elmbyval, elmalign,
723 						  &elem_values, &elem_nulls, &num_elems);
724 		if (num_elems <= 0)
725 		{
726 			result = false;
727 			break;				/* no point in evaluating more */
728 		}
729 
730 		/*
731 		 * Note: we expect the previous array data, if any, to be
732 		 * automatically freed by resetting the per-tuple context; hence no
733 		 * pfree's here.
734 		 */
735 		arrayKeys[j].elem_values = elem_values;
736 		arrayKeys[j].elem_nulls = elem_nulls;
737 		arrayKeys[j].num_elems = num_elems;
738 		scan_key->sk_argument = elem_values[0];
739 		if (elem_nulls[0])
740 			scan_key->sk_flags |= SK_ISNULL;
741 		else
742 			scan_key->sk_flags &= ~SK_ISNULL;
743 		arrayKeys[j].next_elem = 1;
744 	}
745 
746 	MemoryContextSwitchTo(oldContext);
747 
748 	return result;
749 }
750 
751 /*
752  * ExecIndexAdvanceArrayKeys
753  *		Advance to the next set of array key values, if any.
754  *
755  * Returns true if there is another set of values to consider, false if not.
756  * On true result, the scankeys are initialized with the next set of values.
757  */
758 bool
ExecIndexAdvanceArrayKeys(IndexArrayKeyInfo * arrayKeys,int numArrayKeys)759 ExecIndexAdvanceArrayKeys(IndexArrayKeyInfo *arrayKeys, int numArrayKeys)
760 {
761 	bool		found = false;
762 	int			j;
763 
764 	/*
765 	 * Note we advance the rightmost array key most quickly, since it will
766 	 * correspond to the lowest-order index column among the available
767 	 * qualifications.  This is hypothesized to result in better locality of
768 	 * access in the index.
769 	 */
770 	for (j = numArrayKeys - 1; j >= 0; j--)
771 	{
772 		ScanKey		scan_key = arrayKeys[j].scan_key;
773 		int			next_elem = arrayKeys[j].next_elem;
774 		int			num_elems = arrayKeys[j].num_elems;
775 		Datum	   *elem_values = arrayKeys[j].elem_values;
776 		bool	   *elem_nulls = arrayKeys[j].elem_nulls;
777 
778 		if (next_elem >= num_elems)
779 		{
780 			next_elem = 0;
781 			found = false;		/* need to advance next array key */
782 		}
783 		else
784 			found = true;
785 		scan_key->sk_argument = elem_values[next_elem];
786 		if (elem_nulls[next_elem])
787 			scan_key->sk_flags |= SK_ISNULL;
788 		else
789 			scan_key->sk_flags &= ~SK_ISNULL;
790 		arrayKeys[j].next_elem = next_elem + 1;
791 		if (found)
792 			break;
793 	}
794 
795 	return found;
796 }
797 
798 
799 /* ----------------------------------------------------------------
800  *		ExecEndIndexScan
801  * ----------------------------------------------------------------
802  */
803 void
ExecEndIndexScan(IndexScanState * node)804 ExecEndIndexScan(IndexScanState *node)
805 {
806 	Relation	indexRelationDesc;
807 	IndexScanDesc indexScanDesc;
808 	Relation	relation;
809 
810 	/*
811 	 * extract information from the node
812 	 */
813 	indexRelationDesc = node->iss_RelationDesc;
814 	indexScanDesc = node->iss_ScanDesc;
815 	relation = node->ss.ss_currentRelation;
816 
817 	/*
818 	 * Free the exprcontext(s) ... now dead code, see ExecFreeExprContext
819 	 */
820 #ifdef NOT_USED
821 	ExecFreeExprContext(&node->ss.ps);
822 	if (node->iss_RuntimeContext)
823 		FreeExprContext(node->iss_RuntimeContext, true);
824 #endif
825 
826 	/*
827 	 * clear out tuple table slots
828 	 */
829 	ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
830 	ExecClearTuple(node->ss.ss_ScanTupleSlot);
831 
832 	/*
833 	 * close the index relation (no-op if we didn't open it)
834 	 */
835 	if (indexScanDesc)
836 		index_endscan(indexScanDesc);
837 	if (indexRelationDesc)
838 		index_close(indexRelationDesc, NoLock);
839 
840 	/*
841 	 * close the heap relation.
842 	 */
843 	ExecCloseScanRelation(relation);
844 }
845 
846 /* ----------------------------------------------------------------
847  *		ExecIndexMarkPos
848  *
849  * Note: we assume that no caller attempts to set a mark before having read
850  * at least one tuple.  Otherwise, iss_ScanDesc might still be NULL.
851  * ----------------------------------------------------------------
852  */
853 void
ExecIndexMarkPos(IndexScanState * node)854 ExecIndexMarkPos(IndexScanState *node)
855 {
856 	EState	   *estate = node->ss.ps.state;
857 
858 	if (estate->es_epqTuple != NULL)
859 	{
860 		/*
861 		 * We are inside an EvalPlanQual recheck.  If a test tuple exists for
862 		 * this relation, then we shouldn't access the index at all.  We would
863 		 * instead need to save, and later restore, the state of the
864 		 * es_epqScanDone flag, so that re-fetching the test tuple is
865 		 * possible.  However, given the assumption that no caller sets a mark
866 		 * at the start of the scan, we can only get here with es_epqScanDone
867 		 * already set, and so no state need be saved.
868 		 */
869 		Index		scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
870 
871 		Assert(scanrelid > 0);
872 		if (estate->es_epqTupleSet[scanrelid - 1])
873 		{
874 			/* Verify the claim above */
875 			if (!estate->es_epqScanDone[scanrelid - 1])
876 				elog(ERROR, "unexpected ExecIndexMarkPos call in EPQ recheck");
877 			return;
878 		}
879 	}
880 
881 	index_markpos(node->iss_ScanDesc);
882 }
883 
884 /* ----------------------------------------------------------------
885  *		ExecIndexRestrPos
886  * ----------------------------------------------------------------
887  */
888 void
ExecIndexRestrPos(IndexScanState * node)889 ExecIndexRestrPos(IndexScanState *node)
890 {
891 	EState	   *estate = node->ss.ps.state;
892 
893 	if (estate->es_epqTuple != NULL)
894 	{
895 		/* See comments in ExecIndexMarkPos */
896 		Index		scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
897 
898 		Assert(scanrelid > 0);
899 		if (estate->es_epqTupleSet[scanrelid - 1])
900 		{
901 			/* Verify the claim above */
902 			if (!estate->es_epqScanDone[scanrelid - 1])
903 				elog(ERROR, "unexpected ExecIndexRestrPos call in EPQ recheck");
904 			return;
905 		}
906 	}
907 
908 	index_restrpos(node->iss_ScanDesc);
909 }
910 
911 /* ----------------------------------------------------------------
912  *		ExecInitIndexScan
913  *
914  *		Initializes the index scan's state information, creates
915  *		scan keys, and opens the base and index relations.
916  *
917  *		Note: index scans have 2 sets of state information because
918  *			  we have to keep track of the base relation and the
919  *			  index relation.
920  * ----------------------------------------------------------------
921  */
922 IndexScanState *
ExecInitIndexScan(IndexScan * node,EState * estate,int eflags)923 ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
924 {
925 	IndexScanState *indexstate;
926 	Relation	currentRelation;
927 	bool		relistarget;
928 
929 	/*
930 	 * create state structure
931 	 */
932 	indexstate = makeNode(IndexScanState);
933 	indexstate->ss.ps.plan = (Plan *) node;
934 	indexstate->ss.ps.state = estate;
935 	indexstate->ss.ps.ExecProcNode = ExecIndexScan;
936 
937 	/*
938 	 * Miscellaneous initialization
939 	 *
940 	 * create expression context for node
941 	 */
942 	ExecAssignExprContext(estate, &indexstate->ss.ps);
943 
944 	/*
945 	 * open the base relation and acquire appropriate lock on it.
946 	 */
947 	currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
948 
949 	indexstate->ss.ss_currentRelation = currentRelation;
950 	indexstate->ss.ss_currentScanDesc = NULL;	/* no heap scan here */
951 
952 	/*
953 	 * get the scan type from the relation descriptor.
954 	 */
955 	ExecInitScanTupleSlot(estate, &indexstate->ss,
956 						  RelationGetDescr(currentRelation));
957 
958 	/*
959 	 * Initialize result slot, type and projection.
960 	 */
961 	ExecInitResultTupleSlotTL(estate, &indexstate->ss.ps);
962 	ExecAssignScanProjectionInfo(&indexstate->ss);
963 
964 	/*
965 	 * initialize child expressions
966 	 *
967 	 * Note: we don't initialize all of the indexqual expression, only the
968 	 * sub-parts corresponding to runtime keys (see below).  Likewise for
969 	 * indexorderby, if any.  But the indexqualorig expression is always
970 	 * initialized even though it will only be used in some uncommon cases ---
971 	 * would be nice to improve that.  (Problem is that any SubPlans present
972 	 * in the expression must be found now...)
973 	 */
974 	indexstate->ss.ps.qual =
975 		ExecInitQual(node->scan.plan.qual, (PlanState *) indexstate);
976 	indexstate->indexqualorig =
977 		ExecInitQual(node->indexqualorig, (PlanState *) indexstate);
978 	indexstate->indexorderbyorig =
979 		ExecInitExprList(node->indexorderbyorig, (PlanState *) indexstate);
980 
981 	/*
982 	 * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop
983 	 * here.  This allows an index-advisor plugin to EXPLAIN a plan containing
984 	 * references to nonexistent indexes.
985 	 */
986 	if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
987 		return indexstate;
988 
989 	/*
990 	 * Open the index relation.
991 	 *
992 	 * If the parent table is one of the target relations of the query, then
993 	 * InitPlan already opened and write-locked the index, so we can avoid
994 	 * taking another lock here.  Otherwise we need a normal reader's lock.
995 	 */
996 	relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid);
997 	indexstate->iss_RelationDesc = index_open(node->indexid,
998 											  relistarget ? NoLock : AccessShareLock);
999 
1000 	/*
1001 	 * Initialize index-specific scan state
1002 	 */
1003 	indexstate->iss_RuntimeKeysReady = false;
1004 	indexstate->iss_RuntimeKeys = NULL;
1005 	indexstate->iss_NumRuntimeKeys = 0;
1006 
1007 	/*
1008 	 * build the index scan keys from the index qualification
1009 	 */
1010 	ExecIndexBuildScanKeys((PlanState *) indexstate,
1011 						   indexstate->iss_RelationDesc,
1012 						   node->indexqual,
1013 						   false,
1014 						   &indexstate->iss_ScanKeys,
1015 						   &indexstate->iss_NumScanKeys,
1016 						   &indexstate->iss_RuntimeKeys,
1017 						   &indexstate->iss_NumRuntimeKeys,
1018 						   NULL,	/* no ArrayKeys */
1019 						   NULL);
1020 
1021 	/*
1022 	 * any ORDER BY exprs have to be turned into scankeys in the same way
1023 	 */
1024 	ExecIndexBuildScanKeys((PlanState *) indexstate,
1025 						   indexstate->iss_RelationDesc,
1026 						   node->indexorderby,
1027 						   true,
1028 						   &indexstate->iss_OrderByKeys,
1029 						   &indexstate->iss_NumOrderByKeys,
1030 						   &indexstate->iss_RuntimeKeys,
1031 						   &indexstate->iss_NumRuntimeKeys,
1032 						   NULL,	/* no ArrayKeys */
1033 						   NULL);
1034 
1035 	/* Initialize sort support, if we need to re-check ORDER BY exprs */
1036 	if (indexstate->iss_NumOrderByKeys > 0)
1037 	{
1038 		int			numOrderByKeys = indexstate->iss_NumOrderByKeys;
1039 		int			i;
1040 		ListCell   *lco;
1041 		ListCell   *lcx;
1042 
1043 		/*
1044 		 * Prepare sort support, and look up the data type for each ORDER BY
1045 		 * expression.
1046 		 */
1047 		Assert(numOrderByKeys == list_length(node->indexorderbyops));
1048 		Assert(numOrderByKeys == list_length(node->indexorderbyorig));
1049 		indexstate->iss_SortSupport = (SortSupportData *)
1050 			palloc0(numOrderByKeys * sizeof(SortSupportData));
1051 		indexstate->iss_OrderByTypByVals = (bool *)
1052 			palloc(numOrderByKeys * sizeof(bool));
1053 		indexstate->iss_OrderByTypLens = (int16 *)
1054 			palloc(numOrderByKeys * sizeof(int16));
1055 		i = 0;
1056 		forboth(lco, node->indexorderbyops, lcx, node->indexorderbyorig)
1057 		{
1058 			Oid			orderbyop = lfirst_oid(lco);
1059 			Node	   *orderbyexpr = (Node *) lfirst(lcx);
1060 			Oid			orderbyType = exprType(orderbyexpr);
1061 			Oid			orderbyColl = exprCollation(orderbyexpr);
1062 			SortSupport orderbysort = &indexstate->iss_SortSupport[i];
1063 
1064 			/* Initialize sort support */
1065 			orderbysort->ssup_cxt = CurrentMemoryContext;
1066 			orderbysort->ssup_collation = orderbyColl;
1067 			/* See cmp_orderbyvals() comments on NULLS LAST */
1068 			orderbysort->ssup_nulls_first = false;
1069 			/* ssup_attno is unused here and elsewhere */
1070 			orderbysort->ssup_attno = 0;
1071 			/* No abbreviation */
1072 			orderbysort->abbreviate = false;
1073 			PrepareSortSupportFromOrderingOp(orderbyop, orderbysort);
1074 
1075 			get_typlenbyval(orderbyType,
1076 							&indexstate->iss_OrderByTypLens[i],
1077 							&indexstate->iss_OrderByTypByVals[i]);
1078 			i++;
1079 		}
1080 
1081 		/* allocate arrays to hold the re-calculated distances */
1082 		indexstate->iss_OrderByValues = (Datum *)
1083 			palloc(numOrderByKeys * sizeof(Datum));
1084 		indexstate->iss_OrderByNulls = (bool *)
1085 			palloc(numOrderByKeys * sizeof(bool));
1086 
1087 		/* and initialize the reorder queue */
1088 		indexstate->iss_ReorderQueue = pairingheap_allocate(reorderqueue_cmp,
1089 															indexstate);
1090 	}
1091 
1092 	/*
1093 	 * If we have runtime keys, we need an ExprContext to evaluate them. The
1094 	 * node's standard context won't do because we want to reset that context
1095 	 * for every tuple.  So, build another context just like the other one...
1096 	 * -tgl 7/11/00
1097 	 */
1098 	if (indexstate->iss_NumRuntimeKeys != 0)
1099 	{
1100 		ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;
1101 
1102 		ExecAssignExprContext(estate, &indexstate->ss.ps);
1103 		indexstate->iss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
1104 		indexstate->ss.ps.ps_ExprContext = stdecontext;
1105 	}
1106 	else
1107 	{
1108 		indexstate->iss_RuntimeContext = NULL;
1109 	}
1110 
1111 	/*
1112 	 * all done.
1113 	 */
1114 	return indexstate;
1115 }
1116 
1117 
1118 /*
1119  * ExecIndexBuildScanKeys
1120  *		Build the index scan keys from the index qualification expressions
1121  *
1122  * The index quals are passed to the index AM in the form of a ScanKey array.
1123  * This routine sets up the ScanKeys, fills in all constant fields of the
1124  * ScanKeys, and prepares information about the keys that have non-constant
1125  * comparison values.  We divide index qual expressions into five types:
1126  *
1127  * 1. Simple operator with constant comparison value ("indexkey op constant").
1128  * For these, we just fill in a ScanKey containing the constant value.
1129  *
1130  * 2. Simple operator with non-constant value ("indexkey op expression").
1131  * For these, we create a ScanKey with everything filled in except the
1132  * expression value, and set up an IndexRuntimeKeyInfo struct to drive
1133  * evaluation of the expression at the right times.
1134  *
1135  * 3. RowCompareExpr ("(indexkey, indexkey, ...) op (expr, expr, ...)").
1136  * For these, we create a header ScanKey plus a subsidiary ScanKey array,
1137  * as specified in access/skey.h.  The elements of the row comparison
1138  * can have either constant or non-constant comparison values.
1139  *
1140  * 4. ScalarArrayOpExpr ("indexkey op ANY (array-expression)").  If the index
1141  * supports amsearcharray, we handle these the same as simple operators,
1142  * setting the SK_SEARCHARRAY flag to tell the AM to handle them.  Otherwise,
1143  * we create a ScanKey with everything filled in except the comparison value,
1144  * and set up an IndexArrayKeyInfo struct to drive processing of the qual.
1145  * (Note that if we use an IndexArrayKeyInfo struct, the array expression is
1146  * always treated as requiring runtime evaluation, even if it's a constant.)
1147  *
1148  * 5. NullTest ("indexkey IS NULL/IS NOT NULL").  We just fill in the
1149  * ScanKey properly.
1150  *
1151  * This code is also used to prepare ORDER BY expressions for amcanorderbyop
1152  * indexes.  The behavior is exactly the same, except that we have to look up
1153  * the operator differently.  Note that only cases 1 and 2 are currently
1154  * possible for ORDER BY.
1155  *
1156  * Input params are:
1157  *
1158  * planstate: executor state node we are working for
1159  * index: the index we are building scan keys for
1160  * quals: indexquals (or indexorderbys) expressions
1161  * isorderby: true if processing ORDER BY exprs, false if processing quals
1162  * *runtimeKeys: ptr to pre-existing IndexRuntimeKeyInfos, or NULL if none
1163  * *numRuntimeKeys: number of pre-existing runtime keys
1164  *
1165  * Output params are:
1166  *
1167  * *scanKeys: receives ptr to array of ScanKeys
1168  * *numScanKeys: receives number of scankeys
1169  * *runtimeKeys: receives ptr to array of IndexRuntimeKeyInfos, or NULL if none
1170  * *numRuntimeKeys: receives number of runtime keys
1171  * *arrayKeys: receives ptr to array of IndexArrayKeyInfos, or NULL if none
1172  * *numArrayKeys: receives number of array keys
1173  *
1174  * Caller may pass NULL for arrayKeys and numArrayKeys to indicate that
1175  * IndexArrayKeyInfos are not supported.
1176  */
1177 void
ExecIndexBuildScanKeys(PlanState * planstate,Relation index,List * quals,bool isorderby,ScanKey * scanKeys,int * numScanKeys,IndexRuntimeKeyInfo ** runtimeKeys,int * numRuntimeKeys,IndexArrayKeyInfo ** arrayKeys,int * numArrayKeys)1178 ExecIndexBuildScanKeys(PlanState *planstate, Relation index,
1179 					   List *quals, bool isorderby,
1180 					   ScanKey *scanKeys, int *numScanKeys,
1181 					   IndexRuntimeKeyInfo **runtimeKeys, int *numRuntimeKeys,
1182 					   IndexArrayKeyInfo **arrayKeys, int *numArrayKeys)
1183 {
1184 	ListCell   *qual_cell;
1185 	ScanKey		scan_keys;
1186 	IndexRuntimeKeyInfo *runtime_keys;
1187 	IndexArrayKeyInfo *array_keys;
1188 	int			n_scan_keys;
1189 	int			n_runtime_keys;
1190 	int			max_runtime_keys;
1191 	int			n_array_keys;
1192 	int			j;
1193 
1194 	/* Allocate array for ScanKey structs: one per qual */
1195 	n_scan_keys = list_length(quals);
1196 	scan_keys = (ScanKey) palloc(n_scan_keys * sizeof(ScanKeyData));
1197 
1198 	/*
1199 	 * runtime_keys array is dynamically resized as needed.  We handle it this
1200 	 * way so that the same runtime keys array can be shared between
1201 	 * indexquals and indexorderbys, which will be processed in separate calls
1202 	 * of this function.  Caller must be sure to pass in NULL/0 for first
1203 	 * call.
1204 	 */
1205 	runtime_keys = *runtimeKeys;
1206 	n_runtime_keys = max_runtime_keys = *numRuntimeKeys;
1207 
1208 	/* Allocate array_keys as large as it could possibly need to be */
1209 	array_keys = (IndexArrayKeyInfo *)
1210 		palloc0(n_scan_keys * sizeof(IndexArrayKeyInfo));
1211 	n_array_keys = 0;
1212 
1213 	/*
1214 	 * for each opclause in the given qual, convert the opclause into a single
1215 	 * scan key
1216 	 */
1217 	j = 0;
1218 	foreach(qual_cell, quals)
1219 	{
1220 		Expr	   *clause = (Expr *) lfirst(qual_cell);
1221 		ScanKey		this_scan_key = &scan_keys[j++];
1222 		Oid			opno;		/* operator's OID */
1223 		RegProcedure opfuncid;	/* operator proc id used in scan */
1224 		Oid			opfamily;	/* opfamily of index column */
1225 		int			op_strategy;	/* operator's strategy number */
1226 		Oid			op_lefttype;	/* operator's declared input types */
1227 		Oid			op_righttype;
1228 		Expr	   *leftop;		/* expr on lhs of operator */
1229 		Expr	   *rightop;	/* expr on rhs ... */
1230 		AttrNumber	varattno;	/* att number used in scan */
1231 		int			indnkeyatts;
1232 
1233 		indnkeyatts = IndexRelationGetNumberOfKeyAttributes(index);
1234 		if (IsA(clause, OpExpr))
1235 		{
1236 			/* indexkey op const or indexkey op expression */
1237 			int			flags = 0;
1238 			Datum		scanvalue;
1239 
1240 			opno = ((OpExpr *) clause)->opno;
1241 			opfuncid = ((OpExpr *) clause)->opfuncid;
1242 
1243 			/*
1244 			 * leftop should be the index key Var, possibly relabeled
1245 			 */
1246 			leftop = (Expr *) get_leftop(clause);
1247 
1248 			if (leftop && IsA(leftop, RelabelType))
1249 				leftop = ((RelabelType *) leftop)->arg;
1250 
1251 			Assert(leftop != NULL);
1252 
1253 			if (!(IsA(leftop, Var) &&
1254 				  ((Var *) leftop)->varno == INDEX_VAR))
1255 				elog(ERROR, "indexqual doesn't have key on left side");
1256 
1257 			varattno = ((Var *) leftop)->varattno;
1258 			if (varattno < 1 || varattno > indnkeyatts)
1259 				elog(ERROR, "bogus index qualification");
1260 
1261 			/*
1262 			 * We have to look up the operator's strategy number.  This
1263 			 * provides a cross-check that the operator does match the index.
1264 			 */
1265 			opfamily = index->rd_opfamily[varattno - 1];
1266 
1267 			get_op_opfamily_properties(opno, opfamily, isorderby,
1268 									   &op_strategy,
1269 									   &op_lefttype,
1270 									   &op_righttype);
1271 
1272 			if (isorderby)
1273 				flags |= SK_ORDER_BY;
1274 
1275 			/*
1276 			 * rightop is the constant or variable comparison value
1277 			 */
1278 			rightop = (Expr *) get_rightop(clause);
1279 
1280 			if (rightop && IsA(rightop, RelabelType))
1281 				rightop = ((RelabelType *) rightop)->arg;
1282 
1283 			Assert(rightop != NULL);
1284 
1285 			if (IsA(rightop, Const))
1286 			{
1287 				/* OK, simple constant comparison value */
1288 				scanvalue = ((Const *) rightop)->constvalue;
1289 				if (((Const *) rightop)->constisnull)
1290 					flags |= SK_ISNULL;
1291 			}
1292 			else
1293 			{
1294 				/* Need to treat this one as a runtime key */
1295 				if (n_runtime_keys >= max_runtime_keys)
1296 				{
1297 					if (max_runtime_keys == 0)
1298 					{
1299 						max_runtime_keys = 8;
1300 						runtime_keys = (IndexRuntimeKeyInfo *)
1301 							palloc(max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
1302 					}
1303 					else
1304 					{
1305 						max_runtime_keys *= 2;
1306 						runtime_keys = (IndexRuntimeKeyInfo *)
1307 							repalloc(runtime_keys, max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
1308 					}
1309 				}
1310 				runtime_keys[n_runtime_keys].scan_key = this_scan_key;
1311 				runtime_keys[n_runtime_keys].key_expr =
1312 					ExecInitExpr(rightop, planstate);
1313 				runtime_keys[n_runtime_keys].key_toastable =
1314 					TypeIsToastable(op_righttype);
1315 				n_runtime_keys++;
1316 				scanvalue = (Datum) 0;
1317 			}
1318 
1319 			/*
1320 			 * initialize the scan key's fields appropriately
1321 			 */
1322 			ScanKeyEntryInitialize(this_scan_key,
1323 								   flags,
1324 								   varattno,	/* attribute number to scan */
1325 								   op_strategy, /* op's strategy */
1326 								   op_righttype,	/* strategy subtype */
1327 								   ((OpExpr *) clause)->inputcollid,	/* collation */
1328 								   opfuncid,	/* reg proc to use */
1329 								   scanvalue);	/* constant */
1330 		}
1331 		else if (IsA(clause, RowCompareExpr))
1332 		{
1333 			/* (indexkey, indexkey, ...) op (expression, expression, ...) */
1334 			RowCompareExpr *rc = (RowCompareExpr *) clause;
1335 			ListCell   *largs_cell = list_head(rc->largs);
1336 			ListCell   *rargs_cell = list_head(rc->rargs);
1337 			ListCell   *opnos_cell = list_head(rc->opnos);
1338 			ListCell   *collids_cell = list_head(rc->inputcollids);
1339 			ScanKey		first_sub_key;
1340 			int			n_sub_key;
1341 
1342 			Assert(!isorderby);
1343 
1344 			first_sub_key = (ScanKey)
1345 				palloc(list_length(rc->opnos) * sizeof(ScanKeyData));
1346 			n_sub_key = 0;
1347 
1348 			/* Scan RowCompare columns and generate subsidiary ScanKey items */
1349 			while (opnos_cell != NULL)
1350 			{
1351 				ScanKey		this_sub_key = &first_sub_key[n_sub_key];
1352 				int			flags = SK_ROW_MEMBER;
1353 				Datum		scanvalue;
1354 				Oid			inputcollation;
1355 
1356 				/*
1357 				 * leftop should be the index key Var, possibly relabeled
1358 				 */
1359 				leftop = (Expr *) lfirst(largs_cell);
1360 				largs_cell = lnext(largs_cell);
1361 
1362 				if (leftop && IsA(leftop, RelabelType))
1363 					leftop = ((RelabelType *) leftop)->arg;
1364 
1365 				Assert(leftop != NULL);
1366 
1367 				if (!(IsA(leftop, Var) &&
1368 					  ((Var *) leftop)->varno == INDEX_VAR))
1369 					elog(ERROR, "indexqual doesn't have key on left side");
1370 
1371 				varattno = ((Var *) leftop)->varattno;
1372 
1373 				/*
1374 				 * We have to look up the operator's associated btree support
1375 				 * function
1376 				 */
1377 				opno = lfirst_oid(opnos_cell);
1378 				opnos_cell = lnext(opnos_cell);
1379 
1380 				if (index->rd_rel->relam != BTREE_AM_OID ||
1381 					varattno < 1 || varattno > indnkeyatts)
1382 					elog(ERROR, "bogus RowCompare index qualification");
1383 				opfamily = index->rd_opfamily[varattno - 1];
1384 
1385 				get_op_opfamily_properties(opno, opfamily, isorderby,
1386 										   &op_strategy,
1387 										   &op_lefttype,
1388 										   &op_righttype);
1389 
1390 				if (op_strategy != rc->rctype)
1391 					elog(ERROR, "RowCompare index qualification contains wrong operator");
1392 
1393 				opfuncid = get_opfamily_proc(opfamily,
1394 											 op_lefttype,
1395 											 op_righttype,
1396 											 BTORDER_PROC);
1397 				if (!RegProcedureIsValid(opfuncid))
1398 					elog(ERROR, "missing support function %d(%u,%u) in opfamily %u",
1399 						 BTORDER_PROC, op_lefttype, op_righttype, opfamily);
1400 
1401 				inputcollation = lfirst_oid(collids_cell);
1402 				collids_cell = lnext(collids_cell);
1403 
1404 				/*
1405 				 * rightop is the constant or variable comparison value
1406 				 */
1407 				rightop = (Expr *) lfirst(rargs_cell);
1408 				rargs_cell = lnext(rargs_cell);
1409 
1410 				if (rightop && IsA(rightop, RelabelType))
1411 					rightop = ((RelabelType *) rightop)->arg;
1412 
1413 				Assert(rightop != NULL);
1414 
1415 				if (IsA(rightop, Const))
1416 				{
1417 					/* OK, simple constant comparison value */
1418 					scanvalue = ((Const *) rightop)->constvalue;
1419 					if (((Const *) rightop)->constisnull)
1420 						flags |= SK_ISNULL;
1421 				}
1422 				else
1423 				{
1424 					/* Need to treat this one as a runtime key */
1425 					if (n_runtime_keys >= max_runtime_keys)
1426 					{
1427 						if (max_runtime_keys == 0)
1428 						{
1429 							max_runtime_keys = 8;
1430 							runtime_keys = (IndexRuntimeKeyInfo *)
1431 								palloc(max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
1432 						}
1433 						else
1434 						{
1435 							max_runtime_keys *= 2;
1436 							runtime_keys = (IndexRuntimeKeyInfo *)
1437 								repalloc(runtime_keys, max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
1438 						}
1439 					}
1440 					runtime_keys[n_runtime_keys].scan_key = this_sub_key;
1441 					runtime_keys[n_runtime_keys].key_expr =
1442 						ExecInitExpr(rightop, planstate);
1443 					runtime_keys[n_runtime_keys].key_toastable =
1444 						TypeIsToastable(op_righttype);
1445 					n_runtime_keys++;
1446 					scanvalue = (Datum) 0;
1447 				}
1448 
1449 				/*
1450 				 * initialize the subsidiary scan key's fields appropriately
1451 				 */
1452 				ScanKeyEntryInitialize(this_sub_key,
1453 									   flags,
1454 									   varattno,	/* attribute number */
1455 									   op_strategy, /* op's strategy */
1456 									   op_righttype,	/* strategy subtype */
1457 									   inputcollation,	/* collation */
1458 									   opfuncid,	/* reg proc to use */
1459 									   scanvalue);	/* constant */
1460 				n_sub_key++;
1461 			}
1462 
1463 			/* Mark the last subsidiary scankey correctly */
1464 			first_sub_key[n_sub_key - 1].sk_flags |= SK_ROW_END;
1465 
1466 			/*
1467 			 * We don't use ScanKeyEntryInitialize for the header because it
1468 			 * isn't going to contain a valid sk_func pointer.
1469 			 */
1470 			MemSet(this_scan_key, 0, sizeof(ScanKeyData));
1471 			this_scan_key->sk_flags = SK_ROW_HEADER;
1472 			this_scan_key->sk_attno = first_sub_key->sk_attno;
1473 			this_scan_key->sk_strategy = rc->rctype;
1474 			/* sk_subtype, sk_collation, sk_func not used in a header */
1475 			this_scan_key->sk_argument = PointerGetDatum(first_sub_key);
1476 		}
1477 		else if (IsA(clause, ScalarArrayOpExpr))
1478 		{
1479 			/* indexkey op ANY (array-expression) */
1480 			ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
1481 			int			flags = 0;
1482 			Datum		scanvalue;
1483 
1484 			Assert(!isorderby);
1485 
1486 			Assert(saop->useOr);
1487 			opno = saop->opno;
1488 			opfuncid = saop->opfuncid;
1489 
1490 			/*
1491 			 * leftop should be the index key Var, possibly relabeled
1492 			 */
1493 			leftop = (Expr *) linitial(saop->args);
1494 
1495 			if (leftop && IsA(leftop, RelabelType))
1496 				leftop = ((RelabelType *) leftop)->arg;
1497 
1498 			Assert(leftop != NULL);
1499 
1500 			if (!(IsA(leftop, Var) &&
1501 				  ((Var *) leftop)->varno == INDEX_VAR))
1502 				elog(ERROR, "indexqual doesn't have key on left side");
1503 
1504 			varattno = ((Var *) leftop)->varattno;
1505 			if (varattno < 1 || varattno > indnkeyatts)
1506 				elog(ERROR, "bogus index qualification");
1507 
1508 			/*
1509 			 * We have to look up the operator's strategy number.  This
1510 			 * provides a cross-check that the operator does match the index.
1511 			 */
1512 			opfamily = index->rd_opfamily[varattno - 1];
1513 
1514 			get_op_opfamily_properties(opno, opfamily, isorderby,
1515 									   &op_strategy,
1516 									   &op_lefttype,
1517 									   &op_righttype);
1518 
1519 			/*
1520 			 * rightop is the constant or variable array value
1521 			 */
1522 			rightop = (Expr *) lsecond(saop->args);
1523 
1524 			if (rightop && IsA(rightop, RelabelType))
1525 				rightop = ((RelabelType *) rightop)->arg;
1526 
1527 			Assert(rightop != NULL);
1528 
1529 			if (index->rd_amroutine->amsearcharray)
1530 			{
1531 				/* Index AM will handle this like a simple operator */
1532 				flags |= SK_SEARCHARRAY;
1533 				if (IsA(rightop, Const))
1534 				{
1535 					/* OK, simple constant comparison value */
1536 					scanvalue = ((Const *) rightop)->constvalue;
1537 					if (((Const *) rightop)->constisnull)
1538 						flags |= SK_ISNULL;
1539 				}
1540 				else
1541 				{
1542 					/* Need to treat this one as a runtime key */
1543 					if (n_runtime_keys >= max_runtime_keys)
1544 					{
1545 						if (max_runtime_keys == 0)
1546 						{
1547 							max_runtime_keys = 8;
1548 							runtime_keys = (IndexRuntimeKeyInfo *)
1549 								palloc(max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
1550 						}
1551 						else
1552 						{
1553 							max_runtime_keys *= 2;
1554 							runtime_keys = (IndexRuntimeKeyInfo *)
1555 								repalloc(runtime_keys, max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
1556 						}
1557 					}
1558 					runtime_keys[n_runtime_keys].scan_key = this_scan_key;
1559 					runtime_keys[n_runtime_keys].key_expr =
1560 						ExecInitExpr(rightop, planstate);
1561 
1562 					/*
1563 					 * Careful here: the runtime expression is not of
1564 					 * op_righttype, but rather is an array of same; so
1565 					 * TypeIsToastable() isn't helpful.  However, we can
1566 					 * assume that all array types are toastable.
1567 					 */
1568 					runtime_keys[n_runtime_keys].key_toastable = true;
1569 					n_runtime_keys++;
1570 					scanvalue = (Datum) 0;
1571 				}
1572 			}
1573 			else
1574 			{
1575 				/* Executor has to expand the array value */
1576 				array_keys[n_array_keys].scan_key = this_scan_key;
1577 				array_keys[n_array_keys].array_expr =
1578 					ExecInitExpr(rightop, planstate);
1579 				/* the remaining fields were zeroed by palloc0 */
1580 				n_array_keys++;
1581 				scanvalue = (Datum) 0;
1582 			}
1583 
1584 			/*
1585 			 * initialize the scan key's fields appropriately
1586 			 */
1587 			ScanKeyEntryInitialize(this_scan_key,
1588 								   flags,
1589 								   varattno,	/* attribute number to scan */
1590 								   op_strategy, /* op's strategy */
1591 								   op_righttype,	/* strategy subtype */
1592 								   saop->inputcollid,	/* collation */
1593 								   opfuncid,	/* reg proc to use */
1594 								   scanvalue);	/* constant */
1595 		}
1596 		else if (IsA(clause, NullTest))
1597 		{
1598 			/* indexkey IS NULL or indexkey IS NOT NULL */
1599 			NullTest   *ntest = (NullTest *) clause;
1600 			int			flags;
1601 
1602 			Assert(!isorderby);
1603 
1604 			/*
1605 			 * argument should be the index key Var, possibly relabeled
1606 			 */
1607 			leftop = ntest->arg;
1608 
1609 			if (leftop && IsA(leftop, RelabelType))
1610 				leftop = ((RelabelType *) leftop)->arg;
1611 
1612 			Assert(leftop != NULL);
1613 
1614 			if (!(IsA(leftop, Var) &&
1615 				  ((Var *) leftop)->varno == INDEX_VAR))
1616 				elog(ERROR, "NullTest indexqual has wrong key");
1617 
1618 			varattno = ((Var *) leftop)->varattno;
1619 
1620 			/*
1621 			 * initialize the scan key's fields appropriately
1622 			 */
1623 			switch (ntest->nulltesttype)
1624 			{
1625 				case IS_NULL:
1626 					flags = SK_ISNULL | SK_SEARCHNULL;
1627 					break;
1628 				case IS_NOT_NULL:
1629 					flags = SK_ISNULL | SK_SEARCHNOTNULL;
1630 					break;
1631 				default:
1632 					elog(ERROR, "unrecognized nulltesttype: %d",
1633 						 (int) ntest->nulltesttype);
1634 					flags = 0;	/* keep compiler quiet */
1635 					break;
1636 			}
1637 
1638 			ScanKeyEntryInitialize(this_scan_key,
1639 								   flags,
1640 								   varattno,	/* attribute number to scan */
1641 								   InvalidStrategy, /* no strategy */
1642 								   InvalidOid,	/* no strategy subtype */
1643 								   InvalidOid,	/* no collation */
1644 								   InvalidOid,	/* no reg proc for this */
1645 								   (Datum) 0);	/* constant */
1646 		}
1647 		else
1648 			elog(ERROR, "unsupported indexqual type: %d",
1649 				 (int) nodeTag(clause));
1650 	}
1651 
1652 	Assert(n_runtime_keys <= max_runtime_keys);
1653 
1654 	/* Get rid of any unused arrays */
1655 	if (n_array_keys == 0)
1656 	{
1657 		pfree(array_keys);
1658 		array_keys = NULL;
1659 	}
1660 
1661 	/*
1662 	 * Return info to our caller.
1663 	 */
1664 	*scanKeys = scan_keys;
1665 	*numScanKeys = n_scan_keys;
1666 	*runtimeKeys = runtime_keys;
1667 	*numRuntimeKeys = n_runtime_keys;
1668 	if (arrayKeys)
1669 	{
1670 		*arrayKeys = array_keys;
1671 		*numArrayKeys = n_array_keys;
1672 	}
1673 	else if (n_array_keys != 0)
1674 		elog(ERROR, "ScalarArrayOpExpr index qual found where not allowed");
1675 }
1676 
1677 /* ----------------------------------------------------------------
1678  *						Parallel Scan Support
1679  * ----------------------------------------------------------------
1680  */
1681 
1682 /* ----------------------------------------------------------------
1683  *		ExecIndexScanEstimate
1684  *
1685  *		Compute the amount of space we'll need in the parallel
1686  *		query DSM, and inform pcxt->estimator about our needs.
1687  * ----------------------------------------------------------------
1688  */
1689 void
ExecIndexScanEstimate(IndexScanState * node,ParallelContext * pcxt)1690 ExecIndexScanEstimate(IndexScanState *node,
1691 					  ParallelContext *pcxt)
1692 {
1693 	EState	   *estate = node->ss.ps.state;
1694 
1695 	node->iss_PscanLen = index_parallelscan_estimate(node->iss_RelationDesc,
1696 													 estate->es_snapshot);
1697 	shm_toc_estimate_chunk(&pcxt->estimator, node->iss_PscanLen);
1698 	shm_toc_estimate_keys(&pcxt->estimator, 1);
1699 }
1700 
1701 /* ----------------------------------------------------------------
1702  *		ExecIndexScanInitializeDSM
1703  *
1704  *		Set up a parallel index scan descriptor.
1705  * ----------------------------------------------------------------
1706  */
1707 void
ExecIndexScanInitializeDSM(IndexScanState * node,ParallelContext * pcxt)1708 ExecIndexScanInitializeDSM(IndexScanState *node,
1709 						   ParallelContext *pcxt)
1710 {
1711 	EState	   *estate = node->ss.ps.state;
1712 	ParallelIndexScanDesc piscan;
1713 
1714 	piscan = shm_toc_allocate(pcxt->toc, node->iss_PscanLen);
1715 	index_parallelscan_initialize(node->ss.ss_currentRelation,
1716 								  node->iss_RelationDesc,
1717 								  estate->es_snapshot,
1718 								  piscan);
1719 	shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, piscan);
1720 	node->iss_ScanDesc =
1721 		index_beginscan_parallel(node->ss.ss_currentRelation,
1722 								 node->iss_RelationDesc,
1723 								 node->iss_NumScanKeys,
1724 								 node->iss_NumOrderByKeys,
1725 								 piscan);
1726 
1727 	/*
1728 	 * If no run-time keys to calculate or they are ready, go ahead and pass
1729 	 * the scankeys to the index AM.
1730 	 */
1731 	if (node->iss_NumRuntimeKeys == 0 || node->iss_RuntimeKeysReady)
1732 		index_rescan(node->iss_ScanDesc,
1733 					 node->iss_ScanKeys, node->iss_NumScanKeys,
1734 					 node->iss_OrderByKeys, node->iss_NumOrderByKeys);
1735 }
1736 
1737 /* ----------------------------------------------------------------
1738  *		ExecIndexScanReInitializeDSM
1739  *
1740  *		Reset shared state before beginning a fresh scan.
1741  * ----------------------------------------------------------------
1742  */
1743 void
ExecIndexScanReInitializeDSM(IndexScanState * node,ParallelContext * pcxt)1744 ExecIndexScanReInitializeDSM(IndexScanState *node,
1745 							 ParallelContext *pcxt)
1746 {
1747 	index_parallelrescan(node->iss_ScanDesc);
1748 }
1749 
1750 /* ----------------------------------------------------------------
1751  *		ExecIndexScanInitializeWorker
1752  *
1753  *		Copy relevant information from TOC into planstate.
1754  * ----------------------------------------------------------------
1755  */
1756 void
ExecIndexScanInitializeWorker(IndexScanState * node,ParallelWorkerContext * pwcxt)1757 ExecIndexScanInitializeWorker(IndexScanState *node,
1758 							  ParallelWorkerContext *pwcxt)
1759 {
1760 	ParallelIndexScanDesc piscan;
1761 
1762 	piscan = shm_toc_lookup(pwcxt->toc, node->ss.ps.plan->plan_node_id, false);
1763 	node->iss_ScanDesc =
1764 		index_beginscan_parallel(node->ss.ss_currentRelation,
1765 								 node->iss_RelationDesc,
1766 								 node->iss_NumScanKeys,
1767 								 node->iss_NumOrderByKeys,
1768 								 piscan);
1769 
1770 	/*
1771 	 * If no run-time keys to calculate or they are ready, go ahead and pass
1772 	 * the scankeys to the index AM.
1773 	 */
1774 	if (node->iss_NumRuntimeKeys == 0 || node->iss_RuntimeKeysReady)
1775 		index_rescan(node->iss_ScanDesc,
1776 					 node->iss_ScanKeys, node->iss_NumScanKeys,
1777 					 node->iss_OrderByKeys, node->iss_NumOrderByKeys);
1778 }
1779