1 /*-------------------------------------------------------------------------
2  *
3  * nodeIndexscan.c
4  *	  Routines to support indexed scans of relations
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/executor/nodeIndexscan.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /*
16  * INTERFACE ROUTINES
17  *		ExecIndexScan			scans a relation using an index
18  *		IndexNext				retrieve next tuple using index
19  *		IndexNextWithReorder	same, but recheck ORDER BY expressions
20  *		ExecInitIndexScan		creates and initializes state info.
21  *		ExecReScanIndexScan		rescans the indexed relation.
22  *		ExecEndIndexScan		releases all storage.
23  *		ExecIndexMarkPos		marks scan position.
24  *		ExecIndexRestrPos		restores scan position.
25  *		ExecIndexScanEstimate	estimates DSM space needed for parallel index scan
26  *		ExecIndexScanInitializeDSM initialize DSM for parallel indexscan
27  *		ExecIndexScanReInitializeDSM reinitialize DSM for fresh scan
28  *		ExecIndexScanInitializeWorker attach to DSM info in parallel worker
29  */
30 #include "postgres.h"
31 
32 #include "access/nbtree.h"
33 #include "access/relscan.h"
34 #include "catalog/pg_am.h"
35 #include "executor/execdebug.h"
36 #include "executor/nodeIndexscan.h"
37 #include "lib/pairingheap.h"
38 #include "miscadmin.h"
39 #include "nodes/nodeFuncs.h"
40 #include "optimizer/clauses.h"
41 #include "utils/array.h"
42 #include "utils/datum.h"
43 #include "utils/lsyscache.h"
44 #include "utils/memutils.h"
45 #include "utils/rel.h"
46 
47 /*
48  * When an ordering operator is used, tuples fetched from the index that
49  * need to be reordered are queued in a pairing heap, as ReorderTuples.
50  */
51 typedef struct
52 {
53 	pairingheap_node ph_node;
54 	HeapTuple	htup;
55 	Datum	   *orderbyvals;
56 	bool	   *orderbynulls;
57 } ReorderTuple;
58 
59 static TupleTableSlot *IndexNext(IndexScanState *node);
60 static TupleTableSlot *IndexNextWithReorder(IndexScanState *node);
61 static void EvalOrderByExpressions(IndexScanState *node, ExprContext *econtext);
62 static bool IndexRecheck(IndexScanState *node, TupleTableSlot *slot);
63 static int cmp_orderbyvals(const Datum *adist, const bool *anulls,
64 				const Datum *bdist, const bool *bnulls,
65 				IndexScanState *node);
66 static int reorderqueue_cmp(const pairingheap_node *a,
67 				 const pairingheap_node *b, void *arg);
68 static void reorderqueue_push(IndexScanState *node, HeapTuple tuple,
69 				  Datum *orderbyvals, bool *orderbynulls);
70 static HeapTuple reorderqueue_pop(IndexScanState *node);
71 
72 
73 /* ----------------------------------------------------------------
74  *		IndexNext
75  *
76  *		Retrieve a tuple from the IndexScan node's currentRelation
77  *		using the index specified in the IndexScanState information.
78  * ----------------------------------------------------------------
79  */
80 static TupleTableSlot *
IndexNext(IndexScanState * node)81 IndexNext(IndexScanState *node)
82 {
83 	EState	   *estate;
84 	ExprContext *econtext;
85 	ScanDirection direction;
86 	IndexScanDesc scandesc;
87 	HeapTuple	tuple;
88 	TupleTableSlot *slot;
89 
90 	/*
91 	 * extract necessary information from index scan node
92 	 */
93 	estate = node->ss.ps.state;
94 	direction = estate->es_direction;
95 	/* flip direction if this is an overall backward scan */
96 	if (ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indexorderdir))
97 	{
98 		if (ScanDirectionIsForward(direction))
99 			direction = BackwardScanDirection;
100 		else if (ScanDirectionIsBackward(direction))
101 			direction = ForwardScanDirection;
102 	}
103 	scandesc = node->iss_ScanDesc;
104 	econtext = node->ss.ps.ps_ExprContext;
105 	slot = node->ss.ss_ScanTupleSlot;
106 
107 	if (scandesc == NULL)
108 	{
109 		/*
110 		 * We reach here if the index scan is not parallel, or if we're
111 		 * serially executing an index scan that was planned to be parallel.
112 		 */
113 		scandesc = index_beginscan(node->ss.ss_currentRelation,
114 								   node->iss_RelationDesc,
115 								   estate->es_snapshot,
116 								   node->iss_NumScanKeys,
117 								   node->iss_NumOrderByKeys);
118 
119 		node->iss_ScanDesc = scandesc;
120 
121 		/*
122 		 * If no run-time keys to calculate or they are ready, go ahead and
123 		 * pass the scankeys to the index AM.
124 		 */
125 		if (node->iss_NumRuntimeKeys == 0 || node->iss_RuntimeKeysReady)
126 			index_rescan(scandesc,
127 						 node->iss_ScanKeys, node->iss_NumScanKeys,
128 						 node->iss_OrderByKeys, node->iss_NumOrderByKeys);
129 	}
130 
131 	/*
132 	 * ok, now that we have what we need, fetch the next tuple.
133 	 */
134 	while ((tuple = index_getnext(scandesc, direction)) != NULL)
135 	{
136 		CHECK_FOR_INTERRUPTS();
137 
138 		/*
139 		 * Store the scanned tuple in the scan tuple slot of the scan state.
140 		 * Note: we pass 'false' because tuples returned by amgetnext are
141 		 * pointers onto disk pages and must not be pfree()'d.
142 		 */
143 		ExecStoreTuple(tuple,	/* tuple to store */
144 					   slot,	/* slot to store in */
145 					   scandesc->xs_cbuf,	/* buffer containing tuple */
146 					   false);	/* don't pfree */
147 
148 		/*
149 		 * If the index was lossy, we have to recheck the index quals using
150 		 * the fetched tuple.
151 		 */
152 		if (scandesc->xs_recheck)
153 		{
154 			econtext->ecxt_scantuple = slot;
155 			ResetExprContext(econtext);
156 			if (!ExecQual(node->indexqualorig, econtext))
157 			{
158 				/* Fails recheck, so drop it and loop back for another */
159 				InstrCountFiltered2(node, 1);
160 				continue;
161 			}
162 		}
163 
164 		return slot;
165 	}
166 
167 	/*
168 	 * if we get here it means the index scan failed so we are at the end of
169 	 * the scan..
170 	 */
171 	node->iss_ReachedEnd = true;
172 	return ExecClearTuple(slot);
173 }
174 
175 /* ----------------------------------------------------------------
176  *		IndexNextWithReorder
177  *
178  *		Like IndexNext, but this version can also re-check ORDER BY
179  *		expressions, and reorder the tuples as necessary.
180  * ----------------------------------------------------------------
181  */
182 static TupleTableSlot *
IndexNextWithReorder(IndexScanState * node)183 IndexNextWithReorder(IndexScanState *node)
184 {
185 	EState	   *estate;
186 	ExprContext *econtext;
187 	IndexScanDesc scandesc;
188 	HeapTuple	tuple;
189 	TupleTableSlot *slot;
190 	ReorderTuple *topmost = NULL;
191 	bool		was_exact;
192 	Datum	   *lastfetched_vals;
193 	bool	   *lastfetched_nulls;
194 	int			cmp;
195 
196 	estate = node->ss.ps.state;
197 
198 	/*
199 	 * Only forward scan is supported with reordering.  Note: we can get away
200 	 * with just Asserting here because the system will not try to run the
201 	 * plan backwards if ExecSupportsBackwardScan() says it won't work.
202 	 * Currently, that is guaranteed because no index AMs support both
203 	 * amcanorderbyop and amcanbackward; if any ever do,
204 	 * ExecSupportsBackwardScan() will need to consider indexorderbys
205 	 * explicitly.
206 	 */
207 	Assert(!ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indexorderdir));
208 	Assert(ScanDirectionIsForward(estate->es_direction));
209 
210 	scandesc = node->iss_ScanDesc;
211 	econtext = node->ss.ps.ps_ExprContext;
212 	slot = node->ss.ss_ScanTupleSlot;
213 
214 	if (scandesc == NULL)
215 	{
216 		/*
217 		 * We reach here if the index scan is not parallel, or if we're
218 		 * serially executing an index scan that was planned to be parallel.
219 		 */
220 		scandesc = index_beginscan(node->ss.ss_currentRelation,
221 								   node->iss_RelationDesc,
222 								   estate->es_snapshot,
223 								   node->iss_NumScanKeys,
224 								   node->iss_NumOrderByKeys);
225 
226 		node->iss_ScanDesc = scandesc;
227 
228 		/*
229 		 * If no run-time keys to calculate or they are ready, go ahead and
230 		 * pass the scankeys to the index AM.
231 		 */
232 		if (node->iss_NumRuntimeKeys == 0 || node->iss_RuntimeKeysReady)
233 			index_rescan(scandesc,
234 						 node->iss_ScanKeys, node->iss_NumScanKeys,
235 						 node->iss_OrderByKeys, node->iss_NumOrderByKeys);
236 	}
237 
238 	for (;;)
239 	{
240 		CHECK_FOR_INTERRUPTS();
241 
242 		/*
243 		 * Check the reorder queue first.  If the topmost tuple in the queue
244 		 * has an ORDER BY value smaller than (or equal to) the value last
245 		 * returned by the index, we can return it now.
246 		 */
247 		if (!pairingheap_is_empty(node->iss_ReorderQueue))
248 		{
249 			topmost = (ReorderTuple *) pairingheap_first(node->iss_ReorderQueue);
250 
251 			if (node->iss_ReachedEnd ||
252 				cmp_orderbyvals(topmost->orderbyvals,
253 								topmost->orderbynulls,
254 								scandesc->xs_orderbyvals,
255 								scandesc->xs_orderbynulls,
256 								node) <= 0)
257 			{
258 				tuple = reorderqueue_pop(node);
259 
260 				/* Pass 'true', as the tuple in the queue is a palloc'd copy */
261 				ExecStoreTuple(tuple, slot, InvalidBuffer, true);
262 				return slot;
263 			}
264 		}
265 		else if (node->iss_ReachedEnd)
266 		{
267 			/* Queue is empty, and no more tuples from index.  We're done. */
268 			return ExecClearTuple(slot);
269 		}
270 
271 		/*
272 		 * Fetch next tuple from the index.
273 		 */
274 next_indextuple:
275 		tuple = index_getnext(scandesc, ForwardScanDirection);
276 		if (!tuple)
277 		{
278 			/*
279 			 * No more tuples from the index.  But we still need to drain any
280 			 * remaining tuples from the queue before we're done.
281 			 */
282 			node->iss_ReachedEnd = true;
283 			continue;
284 		}
285 
286 		/*
287 		 * Store the scanned tuple in the scan tuple slot of the scan state.
288 		 * Note: we pass 'false' because tuples returned by amgetnext are
289 		 * pointers onto disk pages and must not be pfree()'d.
290 		 */
291 		ExecStoreTuple(tuple,	/* tuple to store */
292 					   slot,	/* slot to store in */
293 					   scandesc->xs_cbuf,	/* buffer containing tuple */
294 					   false);	/* don't pfree */
295 
296 		/*
297 		 * If the index was lossy, we have to recheck the index quals and
298 		 * ORDER BY expressions using the fetched tuple.
299 		 */
300 		if (scandesc->xs_recheck)
301 		{
302 			econtext->ecxt_scantuple = slot;
303 			ResetExprContext(econtext);
304 			if (!ExecQual(node->indexqualorig, econtext))
305 			{
306 				/* Fails recheck, so drop it and loop back for another */
307 				InstrCountFiltered2(node, 1);
308 				/* allow this loop to be cancellable */
309 				CHECK_FOR_INTERRUPTS();
310 				goto next_indextuple;
311 			}
312 		}
313 
314 		if (scandesc->xs_recheckorderby)
315 		{
316 			econtext->ecxt_scantuple = slot;
317 			ResetExprContext(econtext);
318 			EvalOrderByExpressions(node, econtext);
319 
320 			/*
321 			 * Was the ORDER BY value returned by the index accurate?  The
322 			 * recheck flag means that the index can return inaccurate values,
323 			 * but then again, the value returned for any particular tuple
324 			 * could also be exactly correct.  Compare the value returned by
325 			 * the index with the recalculated value.  (If the value returned
326 			 * by the index happened to be exact right, we can often avoid
327 			 * pushing the tuple to the queue, just to pop it back out again.)
328 			 */
329 			cmp = cmp_orderbyvals(node->iss_OrderByValues,
330 								  node->iss_OrderByNulls,
331 								  scandesc->xs_orderbyvals,
332 								  scandesc->xs_orderbynulls,
333 								  node);
334 			if (cmp < 0)
335 				elog(ERROR, "index returned tuples in wrong order");
336 			else if (cmp == 0)
337 				was_exact = true;
338 			else
339 				was_exact = false;
340 			lastfetched_vals = node->iss_OrderByValues;
341 			lastfetched_nulls = node->iss_OrderByNulls;
342 		}
343 		else
344 		{
345 			was_exact = true;
346 			lastfetched_vals = scandesc->xs_orderbyvals;
347 			lastfetched_nulls = scandesc->xs_orderbynulls;
348 		}
349 
350 		/*
351 		 * Can we return this tuple immediately, or does it need to be pushed
352 		 * to the reorder queue?  If the ORDER BY expression values returned
353 		 * by the index were inaccurate, we can't return it yet, because the
354 		 * next tuple from the index might need to come before this one. Also,
355 		 * we can't return it yet if there are any smaller tuples in the queue
356 		 * already.
357 		 */
358 		if (!was_exact || (topmost && cmp_orderbyvals(lastfetched_vals,
359 													  lastfetched_nulls,
360 													  topmost->orderbyvals,
361 													  topmost->orderbynulls,
362 													  node) > 0))
363 		{
364 			/* Put this tuple to the queue */
365 			reorderqueue_push(node, tuple, lastfetched_vals, lastfetched_nulls);
366 			continue;
367 		}
368 		else
369 		{
370 			/* Can return this tuple immediately. */
371 			return slot;
372 		}
373 	}
374 
375 	/*
376 	 * if we get here it means the index scan failed so we are at the end of
377 	 * the scan..
378 	 */
379 	return ExecClearTuple(slot);
380 }
381 
382 /*
383  * Calculate the expressions in the ORDER BY clause, based on the heap tuple.
384  */
385 static void
EvalOrderByExpressions(IndexScanState * node,ExprContext * econtext)386 EvalOrderByExpressions(IndexScanState *node, ExprContext *econtext)
387 {
388 	int			i;
389 	ListCell   *l;
390 	MemoryContext oldContext;
391 
392 	oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
393 
394 	i = 0;
395 	foreach(l, node->indexorderbyorig)
396 	{
397 		ExprState  *orderby = (ExprState *) lfirst(l);
398 
399 		node->iss_OrderByValues[i] = ExecEvalExpr(orderby,
400 												  econtext,
401 												  &node->iss_OrderByNulls[i]);
402 		i++;
403 	}
404 
405 	MemoryContextSwitchTo(oldContext);
406 }
407 
408 /*
409  * IndexRecheck -- access method routine to recheck a tuple in EvalPlanQual
410  */
411 static bool
IndexRecheck(IndexScanState * node,TupleTableSlot * slot)412 IndexRecheck(IndexScanState *node, TupleTableSlot *slot)
413 {
414 	ExprContext *econtext;
415 
416 	/*
417 	 * extract necessary information from index scan node
418 	 */
419 	econtext = node->ss.ps.ps_ExprContext;
420 
421 	/* Does the tuple meet the indexqual condition? */
422 	econtext->ecxt_scantuple = slot;
423 
424 	ResetExprContext(econtext);
425 
426 	return ExecQual(node->indexqualorig, econtext);
427 }
428 
429 
430 /*
431  * Compare ORDER BY expression values.
432  */
433 static int
cmp_orderbyvals(const Datum * adist,const bool * anulls,const Datum * bdist,const bool * bnulls,IndexScanState * node)434 cmp_orderbyvals(const Datum *adist, const bool *anulls,
435 				const Datum *bdist, const bool *bnulls,
436 				IndexScanState *node)
437 {
438 	int			i;
439 	int			result;
440 
441 	for (i = 0; i < node->iss_NumOrderByKeys; i++)
442 	{
443 		SortSupport ssup = &node->iss_SortSupport[i];
444 
445 		/*
446 		 * Handle nulls.  We only need to support NULLS LAST ordering, because
447 		 * match_pathkeys_to_index() doesn't consider indexorderby
448 		 * implementation otherwise.
449 		 */
450 		if (anulls[i] && !bnulls[i])
451 			return 1;
452 		else if (!anulls[i] && bnulls[i])
453 			return -1;
454 		else if (anulls[i] && bnulls[i])
455 			return 0;
456 
457 		result = ssup->comparator(adist[i], bdist[i], ssup);
458 		if (result != 0)
459 			return result;
460 	}
461 
462 	return 0;
463 }
464 
465 /*
466  * Pairing heap provides getting topmost (greatest) element while KNN provides
467  * ascending sort.  That's why we invert the sort order.
468  */
469 static int
reorderqueue_cmp(const pairingheap_node * a,const pairingheap_node * b,void * arg)470 reorderqueue_cmp(const pairingheap_node *a, const pairingheap_node *b,
471 				 void *arg)
472 {
473 	ReorderTuple *rta = (ReorderTuple *) a;
474 	ReorderTuple *rtb = (ReorderTuple *) b;
475 	IndexScanState *node = (IndexScanState *) arg;
476 
477 	/* exchange argument order to invert the sort order */
478 	return cmp_orderbyvals(rtb->orderbyvals, rtb->orderbynulls,
479 						   rta->orderbyvals, rta->orderbynulls,
480 						   node);
481 }
482 
483 /*
484  * Helper function to push a tuple to the reorder queue.
485  */
486 static void
reorderqueue_push(IndexScanState * node,HeapTuple tuple,Datum * orderbyvals,bool * orderbynulls)487 reorderqueue_push(IndexScanState *node, HeapTuple tuple,
488 				  Datum *orderbyvals, bool *orderbynulls)
489 {
490 	IndexScanDesc scandesc = node->iss_ScanDesc;
491 	EState	   *estate = node->ss.ps.state;
492 	MemoryContext oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
493 	ReorderTuple *rt;
494 	int			i;
495 
496 	rt = (ReorderTuple *) palloc(sizeof(ReorderTuple));
497 	rt->htup = heap_copytuple(tuple);
498 	rt->orderbyvals =
499 		(Datum *) palloc(sizeof(Datum) * scandesc->numberOfOrderBys);
500 	rt->orderbynulls =
501 		(bool *) palloc(sizeof(bool) * scandesc->numberOfOrderBys);
502 	for (i = 0; i < node->iss_NumOrderByKeys; i++)
503 	{
504 		if (!orderbynulls[i])
505 			rt->orderbyvals[i] = datumCopy(orderbyvals[i],
506 										   node->iss_OrderByTypByVals[i],
507 										   node->iss_OrderByTypLens[i]);
508 		else
509 			rt->orderbyvals[i] = (Datum) 0;
510 		rt->orderbynulls[i] = orderbynulls[i];
511 	}
512 	pairingheap_add(node->iss_ReorderQueue, &rt->ph_node);
513 
514 	MemoryContextSwitchTo(oldContext);
515 }
516 
517 /*
518  * Helper function to pop the next tuple from the reorder queue.
519  */
520 static HeapTuple
reorderqueue_pop(IndexScanState * node)521 reorderqueue_pop(IndexScanState *node)
522 {
523 	HeapTuple	result;
524 	ReorderTuple *topmost;
525 	int			i;
526 
527 	topmost = (ReorderTuple *) pairingheap_remove_first(node->iss_ReorderQueue);
528 
529 	result = topmost->htup;
530 	for (i = 0; i < node->iss_NumOrderByKeys; i++)
531 	{
532 		if (!node->iss_OrderByTypByVals[i] && !topmost->orderbynulls[i])
533 			pfree(DatumGetPointer(topmost->orderbyvals[i]));
534 	}
535 	pfree(topmost->orderbyvals);
536 	pfree(topmost->orderbynulls);
537 	pfree(topmost);
538 
539 	return result;
540 }
541 
542 
543 /* ----------------------------------------------------------------
544  *		ExecIndexScan(node)
545  * ----------------------------------------------------------------
546  */
547 static TupleTableSlot *
ExecIndexScan(PlanState * pstate)548 ExecIndexScan(PlanState *pstate)
549 {
550 	IndexScanState *node = castNode(IndexScanState, pstate);
551 
552 	/*
553 	 * If we have runtime keys and they've not already been set up, do it now.
554 	 */
555 	if (node->iss_NumRuntimeKeys != 0 && !node->iss_RuntimeKeysReady)
556 		ExecReScan((PlanState *) node);
557 
558 	if (node->iss_NumOrderByKeys > 0)
559 		return ExecScan(&node->ss,
560 						(ExecScanAccessMtd) IndexNextWithReorder,
561 						(ExecScanRecheckMtd) IndexRecheck);
562 	else
563 		return ExecScan(&node->ss,
564 						(ExecScanAccessMtd) IndexNext,
565 						(ExecScanRecheckMtd) IndexRecheck);
566 }
567 
568 /* ----------------------------------------------------------------
569  *		ExecReScanIndexScan(node)
570  *
571  *		Recalculates the values of any scan keys whose value depends on
572  *		information known at runtime, then rescans the indexed relation.
573  *
574  *		Updating the scan key was formerly done separately in
575  *		ExecUpdateIndexScanKeys. Integrating it into ReScan makes
576  *		rescans of indices and relations/general streams more uniform.
577  * ----------------------------------------------------------------
578  */
579 void
ExecReScanIndexScan(IndexScanState * node)580 ExecReScanIndexScan(IndexScanState *node)
581 {
582 	/*
583 	 * If we are doing runtime key calculations (ie, any of the index key
584 	 * values weren't simple Consts), compute the new key values.  But first,
585 	 * reset the context so we don't leak memory as each outer tuple is
586 	 * scanned.  Note this assumes that we will recalculate *all* runtime keys
587 	 * on each call.
588 	 */
589 	if (node->iss_NumRuntimeKeys != 0)
590 	{
591 		ExprContext *econtext = node->iss_RuntimeContext;
592 
593 		ResetExprContext(econtext);
594 		ExecIndexEvalRuntimeKeys(econtext,
595 								 node->iss_RuntimeKeys,
596 								 node->iss_NumRuntimeKeys);
597 	}
598 	node->iss_RuntimeKeysReady = true;
599 
600 	/* flush the reorder queue */
601 	if (node->iss_ReorderQueue)
602 	{
603 		while (!pairingheap_is_empty(node->iss_ReorderQueue))
604 			reorderqueue_pop(node);
605 	}
606 
607 	/* reset index scan */
608 	if (node->iss_ScanDesc)
609 		index_rescan(node->iss_ScanDesc,
610 					 node->iss_ScanKeys, node->iss_NumScanKeys,
611 					 node->iss_OrderByKeys, node->iss_NumOrderByKeys);
612 	node->iss_ReachedEnd = false;
613 
614 	ExecScanReScan(&node->ss);
615 }
616 
617 
618 /*
619  * ExecIndexEvalRuntimeKeys
620  *		Evaluate any runtime key values, and update the scankeys.
621  */
622 void
ExecIndexEvalRuntimeKeys(ExprContext * econtext,IndexRuntimeKeyInfo * runtimeKeys,int numRuntimeKeys)623 ExecIndexEvalRuntimeKeys(ExprContext *econtext,
624 						 IndexRuntimeKeyInfo *runtimeKeys, int numRuntimeKeys)
625 {
626 	int			j;
627 	MemoryContext oldContext;
628 
629 	/* We want to keep the key values in per-tuple memory */
630 	oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
631 
632 	for (j = 0; j < numRuntimeKeys; j++)
633 	{
634 		ScanKey		scan_key = runtimeKeys[j].scan_key;
635 		ExprState  *key_expr = runtimeKeys[j].key_expr;
636 		Datum		scanvalue;
637 		bool		isNull;
638 
639 		/*
640 		 * For each run-time key, extract the run-time expression and evaluate
641 		 * it with respect to the current context.  We then stick the result
642 		 * into the proper scan key.
643 		 *
644 		 * Note: the result of the eval could be a pass-by-ref value that's
645 		 * stored in some outer scan's tuple, not in
646 		 * econtext->ecxt_per_tuple_memory.  We assume that the outer tuple
647 		 * will stay put throughout our scan.  If this is wrong, we could copy
648 		 * the result into our context explicitly, but I think that's not
649 		 * necessary.
650 		 *
651 		 * It's also entirely possible that the result of the eval is a
652 		 * toasted value.  In this case we should forcibly detoast it, to
653 		 * avoid repeat detoastings each time the value is examined by an
654 		 * index support function.
655 		 */
656 		scanvalue = ExecEvalExpr(key_expr,
657 								 econtext,
658 								 &isNull);
659 		if (isNull)
660 		{
661 			scan_key->sk_argument = scanvalue;
662 			scan_key->sk_flags |= SK_ISNULL;
663 		}
664 		else
665 		{
666 			if (runtimeKeys[j].key_toastable)
667 				scanvalue = PointerGetDatum(PG_DETOAST_DATUM(scanvalue));
668 			scan_key->sk_argument = scanvalue;
669 			scan_key->sk_flags &= ~SK_ISNULL;
670 		}
671 	}
672 
673 	MemoryContextSwitchTo(oldContext);
674 }
675 
676 /*
677  * ExecIndexEvalArrayKeys
678  *		Evaluate any array key values, and set up to iterate through arrays.
679  *
680  * Returns TRUE if there are array elements to consider; FALSE means there
681  * is at least one null or empty array, so no match is possible.  On TRUE
682  * result, the scankeys are initialized with the first elements of the arrays.
683  */
684 bool
ExecIndexEvalArrayKeys(ExprContext * econtext,IndexArrayKeyInfo * arrayKeys,int numArrayKeys)685 ExecIndexEvalArrayKeys(ExprContext *econtext,
686 					   IndexArrayKeyInfo *arrayKeys, int numArrayKeys)
687 {
688 	bool		result = true;
689 	int			j;
690 	MemoryContext oldContext;
691 
692 	/* We want to keep the arrays in per-tuple memory */
693 	oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
694 
695 	for (j = 0; j < numArrayKeys; j++)
696 	{
697 		ScanKey		scan_key = arrayKeys[j].scan_key;
698 		ExprState  *array_expr = arrayKeys[j].array_expr;
699 		Datum		arraydatum;
700 		bool		isNull;
701 		ArrayType  *arrayval;
702 		int16		elmlen;
703 		bool		elmbyval;
704 		char		elmalign;
705 		int			num_elems;
706 		Datum	   *elem_values;
707 		bool	   *elem_nulls;
708 
709 		/*
710 		 * Compute and deconstruct the array expression. (Notes in
711 		 * ExecIndexEvalRuntimeKeys() apply here too.)
712 		 */
713 		arraydatum = ExecEvalExpr(array_expr,
714 								  econtext,
715 								  &isNull);
716 		if (isNull)
717 		{
718 			result = false;
719 			break;				/* no point in evaluating more */
720 		}
721 		arrayval = DatumGetArrayTypeP(arraydatum);
722 		/* We could cache this data, but not clear it's worth it */
723 		get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
724 							 &elmlen, &elmbyval, &elmalign);
725 		deconstruct_array(arrayval,
726 						  ARR_ELEMTYPE(arrayval),
727 						  elmlen, elmbyval, elmalign,
728 						  &elem_values, &elem_nulls, &num_elems);
729 		if (num_elems <= 0)
730 		{
731 			result = false;
732 			break;				/* no point in evaluating more */
733 		}
734 
735 		/*
736 		 * Note: we expect the previous array data, if any, to be
737 		 * automatically freed by resetting the per-tuple context; hence no
738 		 * pfree's here.
739 		 */
740 		arrayKeys[j].elem_values = elem_values;
741 		arrayKeys[j].elem_nulls = elem_nulls;
742 		arrayKeys[j].num_elems = num_elems;
743 		scan_key->sk_argument = elem_values[0];
744 		if (elem_nulls[0])
745 			scan_key->sk_flags |= SK_ISNULL;
746 		else
747 			scan_key->sk_flags &= ~SK_ISNULL;
748 		arrayKeys[j].next_elem = 1;
749 	}
750 
751 	MemoryContextSwitchTo(oldContext);
752 
753 	return result;
754 }
755 
756 /*
757  * ExecIndexAdvanceArrayKeys
758  *		Advance to the next set of array key values, if any.
759  *
760  * Returns TRUE if there is another set of values to consider, FALSE if not.
761  * On TRUE result, the scankeys are initialized with the next set of values.
762  */
763 bool
ExecIndexAdvanceArrayKeys(IndexArrayKeyInfo * arrayKeys,int numArrayKeys)764 ExecIndexAdvanceArrayKeys(IndexArrayKeyInfo *arrayKeys, int numArrayKeys)
765 {
766 	bool		found = false;
767 	int			j;
768 
769 	/*
770 	 * Note we advance the rightmost array key most quickly, since it will
771 	 * correspond to the lowest-order index column among the available
772 	 * qualifications.  This is hypothesized to result in better locality of
773 	 * access in the index.
774 	 */
775 	for (j = numArrayKeys - 1; j >= 0; j--)
776 	{
777 		ScanKey		scan_key = arrayKeys[j].scan_key;
778 		int			next_elem = arrayKeys[j].next_elem;
779 		int			num_elems = arrayKeys[j].num_elems;
780 		Datum	   *elem_values = arrayKeys[j].elem_values;
781 		bool	   *elem_nulls = arrayKeys[j].elem_nulls;
782 
783 		if (next_elem >= num_elems)
784 		{
785 			next_elem = 0;
786 			found = false;		/* need to advance next array key */
787 		}
788 		else
789 			found = true;
790 		scan_key->sk_argument = elem_values[next_elem];
791 		if (elem_nulls[next_elem])
792 			scan_key->sk_flags |= SK_ISNULL;
793 		else
794 			scan_key->sk_flags &= ~SK_ISNULL;
795 		arrayKeys[j].next_elem = next_elem + 1;
796 		if (found)
797 			break;
798 	}
799 
800 	return found;
801 }
802 
803 
804 /* ----------------------------------------------------------------
805  *		ExecEndIndexScan
806  * ----------------------------------------------------------------
807  */
808 void
ExecEndIndexScan(IndexScanState * node)809 ExecEndIndexScan(IndexScanState *node)
810 {
811 	Relation	indexRelationDesc;
812 	IndexScanDesc indexScanDesc;
813 	Relation	relation;
814 
815 	/*
816 	 * extract information from the node
817 	 */
818 	indexRelationDesc = node->iss_RelationDesc;
819 	indexScanDesc = node->iss_ScanDesc;
820 	relation = node->ss.ss_currentRelation;
821 
822 	/*
823 	 * Free the exprcontext(s) ... now dead code, see ExecFreeExprContext
824 	 */
825 #ifdef NOT_USED
826 	ExecFreeExprContext(&node->ss.ps);
827 	if (node->iss_RuntimeContext)
828 		FreeExprContext(node->iss_RuntimeContext, true);
829 #endif
830 
831 	/*
832 	 * clear out tuple table slots
833 	 */
834 	ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
835 	ExecClearTuple(node->ss.ss_ScanTupleSlot);
836 
837 	/*
838 	 * close the index relation (no-op if we didn't open it)
839 	 */
840 	if (indexScanDesc)
841 		index_endscan(indexScanDesc);
842 	if (indexRelationDesc)
843 		index_close(indexRelationDesc, NoLock);
844 
845 	/*
846 	 * close the heap relation.
847 	 */
848 	ExecCloseScanRelation(relation);
849 }
850 
851 /* ----------------------------------------------------------------
852  *		ExecIndexMarkPos
853  *
854  * Note: we assume that no caller attempts to set a mark before having read
855  * at least one tuple.  Otherwise, iss_ScanDesc might still be NULL.
856  * ----------------------------------------------------------------
857  */
858 void
ExecIndexMarkPos(IndexScanState * node)859 ExecIndexMarkPos(IndexScanState *node)
860 {
861 	EState	   *estate = node->ss.ps.state;
862 
863 	if (estate->es_epqTuple != NULL)
864 	{
865 		/*
866 		 * We are inside an EvalPlanQual recheck.  If a test tuple exists for
867 		 * this relation, then we shouldn't access the index at all.  We would
868 		 * instead need to save, and later restore, the state of the
869 		 * es_epqScanDone flag, so that re-fetching the test tuple is
870 		 * possible.  However, given the assumption that no caller sets a mark
871 		 * at the start of the scan, we can only get here with es_epqScanDone
872 		 * already set, and so no state need be saved.
873 		 */
874 		Index		scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
875 
876 		Assert(scanrelid > 0);
877 		if (estate->es_epqTupleSet[scanrelid - 1])
878 		{
879 			/* Verify the claim above */
880 			if (!estate->es_epqScanDone[scanrelid - 1])
881 				elog(ERROR, "unexpected ExecIndexMarkPos call in EPQ recheck");
882 			return;
883 		}
884 	}
885 
886 	index_markpos(node->iss_ScanDesc);
887 }
888 
889 /* ----------------------------------------------------------------
890  *		ExecIndexRestrPos
891  * ----------------------------------------------------------------
892  */
893 void
ExecIndexRestrPos(IndexScanState * node)894 ExecIndexRestrPos(IndexScanState *node)
895 {
896 	EState	   *estate = node->ss.ps.state;
897 
898 	if (estate->es_epqTuple != NULL)
899 	{
900 		/* See comments in ExecIndexMarkPos */
901 		Index		scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
902 
903 		Assert(scanrelid > 0);
904 		if (estate->es_epqTupleSet[scanrelid - 1])
905 		{
906 			/* Verify the claim above */
907 			if (!estate->es_epqScanDone[scanrelid - 1])
908 				elog(ERROR, "unexpected ExecIndexRestrPos call in EPQ recheck");
909 			return;
910 		}
911 	}
912 
913 	index_restrpos(node->iss_ScanDesc);
914 }
915 
916 /* ----------------------------------------------------------------
917  *		ExecInitIndexScan
918  *
919  *		Initializes the index scan's state information, creates
920  *		scan keys, and opens the base and index relations.
921  *
922  *		Note: index scans have 2 sets of state information because
923  *			  we have to keep track of the base relation and the
924  *			  index relation.
925  * ----------------------------------------------------------------
926  */
927 IndexScanState *
ExecInitIndexScan(IndexScan * node,EState * estate,int eflags)928 ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
929 {
930 	IndexScanState *indexstate;
931 	Relation	currentRelation;
932 	bool		relistarget;
933 
934 	/*
935 	 * create state structure
936 	 */
937 	indexstate = makeNode(IndexScanState);
938 	indexstate->ss.ps.plan = (Plan *) node;
939 	indexstate->ss.ps.state = estate;
940 	indexstate->ss.ps.ExecProcNode = ExecIndexScan;
941 
942 	/*
943 	 * Miscellaneous initialization
944 	 *
945 	 * create expression context for node
946 	 */
947 	ExecAssignExprContext(estate, &indexstate->ss.ps);
948 
949 	/*
950 	 * initialize child expressions
951 	 *
952 	 * Note: we don't initialize all of the indexqual expression, only the
953 	 * sub-parts corresponding to runtime keys (see below).  Likewise for
954 	 * indexorderby, if any.  But the indexqualorig expression is always
955 	 * initialized even though it will only be used in some uncommon cases ---
956 	 * would be nice to improve that.  (Problem is that any SubPlans present
957 	 * in the expression must be found now...)
958 	 */
959 	indexstate->ss.ps.qual =
960 		ExecInitQual(node->scan.plan.qual, (PlanState *) indexstate);
961 	indexstate->indexqualorig =
962 		ExecInitQual(node->indexqualorig, (PlanState *) indexstate);
963 	indexstate->indexorderbyorig =
964 		ExecInitExprList(node->indexorderbyorig, (PlanState *) indexstate);
965 
966 	/*
967 	 * tuple table initialization
968 	 */
969 	ExecInitResultTupleSlot(estate, &indexstate->ss.ps);
970 	ExecInitScanTupleSlot(estate, &indexstate->ss);
971 
972 	/*
973 	 * open the base relation and acquire appropriate lock on it.
974 	 */
975 	currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
976 
977 	indexstate->ss.ss_currentRelation = currentRelation;
978 	indexstate->ss.ss_currentScanDesc = NULL;	/* no heap scan here */
979 
980 	/*
981 	 * get the scan type from the relation descriptor.
982 	 */
983 	ExecAssignScanType(&indexstate->ss, RelationGetDescr(currentRelation));
984 
985 	/*
986 	 * Initialize result tuple type and projection info.
987 	 */
988 	ExecAssignResultTypeFromTL(&indexstate->ss.ps);
989 	ExecAssignScanProjectionInfo(&indexstate->ss);
990 
991 	/*
992 	 * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop
993 	 * here.  This allows an index-advisor plugin to EXPLAIN a plan containing
994 	 * references to nonexistent indexes.
995 	 */
996 	if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
997 		return indexstate;
998 
999 	/*
1000 	 * Open the index relation.
1001 	 *
1002 	 * If the parent table is one of the target relations of the query, then
1003 	 * InitPlan already opened and write-locked the index, so we can avoid
1004 	 * taking another lock here.  Otherwise we need a normal reader's lock.
1005 	 */
1006 	relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid);
1007 	indexstate->iss_RelationDesc = index_open(node->indexid,
1008 											  relistarget ? NoLock : AccessShareLock);
1009 
1010 	/*
1011 	 * Initialize index-specific scan state
1012 	 */
1013 	indexstate->iss_RuntimeKeysReady = false;
1014 	indexstate->iss_RuntimeKeys = NULL;
1015 	indexstate->iss_NumRuntimeKeys = 0;
1016 
1017 	/*
1018 	 * build the index scan keys from the index qualification
1019 	 */
1020 	ExecIndexBuildScanKeys((PlanState *) indexstate,
1021 						   indexstate->iss_RelationDesc,
1022 						   node->indexqual,
1023 						   false,
1024 						   &indexstate->iss_ScanKeys,
1025 						   &indexstate->iss_NumScanKeys,
1026 						   &indexstate->iss_RuntimeKeys,
1027 						   &indexstate->iss_NumRuntimeKeys,
1028 						   NULL,	/* no ArrayKeys */
1029 						   NULL);
1030 
1031 	/*
1032 	 * any ORDER BY exprs have to be turned into scankeys in the same way
1033 	 */
1034 	ExecIndexBuildScanKeys((PlanState *) indexstate,
1035 						   indexstate->iss_RelationDesc,
1036 						   node->indexorderby,
1037 						   true,
1038 						   &indexstate->iss_OrderByKeys,
1039 						   &indexstate->iss_NumOrderByKeys,
1040 						   &indexstate->iss_RuntimeKeys,
1041 						   &indexstate->iss_NumRuntimeKeys,
1042 						   NULL,	/* no ArrayKeys */
1043 						   NULL);
1044 
1045 	/* Initialize sort support, if we need to re-check ORDER BY exprs */
1046 	if (indexstate->iss_NumOrderByKeys > 0)
1047 	{
1048 		int			numOrderByKeys = indexstate->iss_NumOrderByKeys;
1049 		int			i;
1050 		ListCell   *lco;
1051 		ListCell   *lcx;
1052 
1053 		/*
1054 		 * Prepare sort support, and look up the data type for each ORDER BY
1055 		 * expression.
1056 		 */
1057 		Assert(numOrderByKeys == list_length(node->indexorderbyops));
1058 		Assert(numOrderByKeys == list_length(node->indexorderbyorig));
1059 		indexstate->iss_SortSupport = (SortSupportData *)
1060 			palloc0(numOrderByKeys * sizeof(SortSupportData));
1061 		indexstate->iss_OrderByTypByVals = (bool *)
1062 			palloc(numOrderByKeys * sizeof(bool));
1063 		indexstate->iss_OrderByTypLens = (int16 *)
1064 			palloc(numOrderByKeys * sizeof(int16));
1065 		i = 0;
1066 		forboth(lco, node->indexorderbyops, lcx, node->indexorderbyorig)
1067 		{
1068 			Oid			orderbyop = lfirst_oid(lco);
1069 			Node	   *orderbyexpr = (Node *) lfirst(lcx);
1070 			Oid			orderbyType = exprType(orderbyexpr);
1071 			Oid			orderbyColl = exprCollation(orderbyexpr);
1072 			SortSupport orderbysort = &indexstate->iss_SortSupport[i];
1073 
1074 			/* Initialize sort support */
1075 			orderbysort->ssup_cxt = CurrentMemoryContext;
1076 			orderbysort->ssup_collation = orderbyColl;
1077 			/* See cmp_orderbyvals() comments on NULLS LAST */
1078 			orderbysort->ssup_nulls_first = false;
1079 			/* ssup_attno is unused here and elsewhere */
1080 			orderbysort->ssup_attno = 0;
1081 			/* No abbreviation */
1082 			orderbysort->abbreviate = false;
1083 			PrepareSortSupportFromOrderingOp(orderbyop, orderbysort);
1084 
1085 			get_typlenbyval(orderbyType,
1086 							&indexstate->iss_OrderByTypLens[i],
1087 							&indexstate->iss_OrderByTypByVals[i]);
1088 			i++;
1089 		}
1090 
1091 		/* allocate arrays to hold the re-calculated distances */
1092 		indexstate->iss_OrderByValues = (Datum *)
1093 			palloc(numOrderByKeys * sizeof(Datum));
1094 		indexstate->iss_OrderByNulls = (bool *)
1095 			palloc(numOrderByKeys * sizeof(bool));
1096 
1097 		/* and initialize the reorder queue */
1098 		indexstate->iss_ReorderQueue = pairingheap_allocate(reorderqueue_cmp,
1099 															indexstate);
1100 	}
1101 
1102 	/*
1103 	 * If we have runtime keys, we need an ExprContext to evaluate them. The
1104 	 * node's standard context won't do because we want to reset that context
1105 	 * for every tuple.  So, build another context just like the other one...
1106 	 * -tgl 7/11/00
1107 	 */
1108 	if (indexstate->iss_NumRuntimeKeys != 0)
1109 	{
1110 		ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;
1111 
1112 		ExecAssignExprContext(estate, &indexstate->ss.ps);
1113 		indexstate->iss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
1114 		indexstate->ss.ps.ps_ExprContext = stdecontext;
1115 	}
1116 	else
1117 	{
1118 		indexstate->iss_RuntimeContext = NULL;
1119 	}
1120 
1121 	/*
1122 	 * all done.
1123 	 */
1124 	return indexstate;
1125 }
1126 
1127 
1128 /*
1129  * ExecIndexBuildScanKeys
1130  *		Build the index scan keys from the index qualification expressions
1131  *
1132  * The index quals are passed to the index AM in the form of a ScanKey array.
1133  * This routine sets up the ScanKeys, fills in all constant fields of the
1134  * ScanKeys, and prepares information about the keys that have non-constant
1135  * comparison values.  We divide index qual expressions into five types:
1136  *
1137  * 1. Simple operator with constant comparison value ("indexkey op constant").
1138  * For these, we just fill in a ScanKey containing the constant value.
1139  *
1140  * 2. Simple operator with non-constant value ("indexkey op expression").
1141  * For these, we create a ScanKey with everything filled in except the
1142  * expression value, and set up an IndexRuntimeKeyInfo struct to drive
1143  * evaluation of the expression at the right times.
1144  *
1145  * 3. RowCompareExpr ("(indexkey, indexkey, ...) op (expr, expr, ...)").
1146  * For these, we create a header ScanKey plus a subsidiary ScanKey array,
1147  * as specified in access/skey.h.  The elements of the row comparison
1148  * can have either constant or non-constant comparison values.
1149  *
1150  * 4. ScalarArrayOpExpr ("indexkey op ANY (array-expression)").  If the index
1151  * supports amsearcharray, we handle these the same as simple operators,
1152  * setting the SK_SEARCHARRAY flag to tell the AM to handle them.  Otherwise,
1153  * we create a ScanKey with everything filled in except the comparison value,
1154  * and set up an IndexArrayKeyInfo struct to drive processing of the qual.
1155  * (Note that if we use an IndexArrayKeyInfo struct, the array expression is
1156  * always treated as requiring runtime evaluation, even if it's a constant.)
1157  *
1158  * 5. NullTest ("indexkey IS NULL/IS NOT NULL").  We just fill in the
1159  * ScanKey properly.
1160  *
1161  * This code is also used to prepare ORDER BY expressions for amcanorderbyop
1162  * indexes.  The behavior is exactly the same, except that we have to look up
1163  * the operator differently.  Note that only cases 1 and 2 are currently
1164  * possible for ORDER BY.
1165  *
1166  * Input params are:
1167  *
1168  * planstate: executor state node we are working for
1169  * index: the index we are building scan keys for
1170  * quals: indexquals (or indexorderbys) expressions
1171  * isorderby: true if processing ORDER BY exprs, false if processing quals
1172  * *runtimeKeys: ptr to pre-existing IndexRuntimeKeyInfos, or NULL if none
1173  * *numRuntimeKeys: number of pre-existing runtime keys
1174  *
1175  * Output params are:
1176  *
1177  * *scanKeys: receives ptr to array of ScanKeys
1178  * *numScanKeys: receives number of scankeys
1179  * *runtimeKeys: receives ptr to array of IndexRuntimeKeyInfos, or NULL if none
1180  * *numRuntimeKeys: receives number of runtime keys
1181  * *arrayKeys: receives ptr to array of IndexArrayKeyInfos, or NULL if none
1182  * *numArrayKeys: receives number of array keys
1183  *
1184  * Caller may pass NULL for arrayKeys and numArrayKeys to indicate that
1185  * IndexArrayKeyInfos are not supported.
1186  */
1187 void
ExecIndexBuildScanKeys(PlanState * planstate,Relation index,List * quals,bool isorderby,ScanKey * scanKeys,int * numScanKeys,IndexRuntimeKeyInfo ** runtimeKeys,int * numRuntimeKeys,IndexArrayKeyInfo ** arrayKeys,int * numArrayKeys)1188 ExecIndexBuildScanKeys(PlanState *planstate, Relation index,
1189 					   List *quals, bool isorderby,
1190 					   ScanKey *scanKeys, int *numScanKeys,
1191 					   IndexRuntimeKeyInfo **runtimeKeys, int *numRuntimeKeys,
1192 					   IndexArrayKeyInfo **arrayKeys, int *numArrayKeys)
1193 {
1194 	ListCell   *qual_cell;
1195 	ScanKey		scan_keys;
1196 	IndexRuntimeKeyInfo *runtime_keys;
1197 	IndexArrayKeyInfo *array_keys;
1198 	int			n_scan_keys;
1199 	int			n_runtime_keys;
1200 	int			max_runtime_keys;
1201 	int			n_array_keys;
1202 	int			j;
1203 
1204 	/* Allocate array for ScanKey structs: one per qual */
1205 	n_scan_keys = list_length(quals);
1206 	scan_keys = (ScanKey) palloc(n_scan_keys * sizeof(ScanKeyData));
1207 
1208 	/*
1209 	 * runtime_keys array is dynamically resized as needed.  We handle it this
1210 	 * way so that the same runtime keys array can be shared between
1211 	 * indexquals and indexorderbys, which will be processed in separate calls
1212 	 * of this function.  Caller must be sure to pass in NULL/0 for first
1213 	 * call.
1214 	 */
1215 	runtime_keys = *runtimeKeys;
1216 	n_runtime_keys = max_runtime_keys = *numRuntimeKeys;
1217 
1218 	/* Allocate array_keys as large as it could possibly need to be */
1219 	array_keys = (IndexArrayKeyInfo *)
1220 		palloc0(n_scan_keys * sizeof(IndexArrayKeyInfo));
1221 	n_array_keys = 0;
1222 
1223 	/*
1224 	 * for each opclause in the given qual, convert the opclause into a single
1225 	 * scan key
1226 	 */
1227 	j = 0;
1228 	foreach(qual_cell, quals)
1229 	{
1230 		Expr	   *clause = (Expr *) lfirst(qual_cell);
1231 		ScanKey		this_scan_key = &scan_keys[j++];
1232 		Oid			opno;		/* operator's OID */
1233 		RegProcedure opfuncid;	/* operator proc id used in scan */
1234 		Oid			opfamily;	/* opfamily of index column */
1235 		int			op_strategy;	/* operator's strategy number */
1236 		Oid			op_lefttype;	/* operator's declared input types */
1237 		Oid			op_righttype;
1238 		Expr	   *leftop;		/* expr on lhs of operator */
1239 		Expr	   *rightop;	/* expr on rhs ... */
1240 		AttrNumber	varattno;	/* att number used in scan */
1241 
1242 		if (IsA(clause, OpExpr))
1243 		{
1244 			/* indexkey op const or indexkey op expression */
1245 			int			flags = 0;
1246 			Datum		scanvalue;
1247 
1248 			opno = ((OpExpr *) clause)->opno;
1249 			opfuncid = ((OpExpr *) clause)->opfuncid;
1250 
1251 			/*
1252 			 * leftop should be the index key Var, possibly relabeled
1253 			 */
1254 			leftop = (Expr *) get_leftop(clause);
1255 
1256 			if (leftop && IsA(leftop, RelabelType))
1257 				leftop = ((RelabelType *) leftop)->arg;
1258 
1259 			Assert(leftop != NULL);
1260 
1261 			if (!(IsA(leftop, Var) &&
1262 				  ((Var *) leftop)->varno == INDEX_VAR))
1263 				elog(ERROR, "indexqual doesn't have key on left side");
1264 
1265 			varattno = ((Var *) leftop)->varattno;
1266 			if (varattno < 1 || varattno > index->rd_index->indnatts)
1267 				elog(ERROR, "bogus index qualification");
1268 
1269 			/*
1270 			 * We have to look up the operator's strategy number.  This
1271 			 * provides a cross-check that the operator does match the index.
1272 			 */
1273 			opfamily = index->rd_opfamily[varattno - 1];
1274 
1275 			get_op_opfamily_properties(opno, opfamily, isorderby,
1276 									   &op_strategy,
1277 									   &op_lefttype,
1278 									   &op_righttype);
1279 
1280 			if (isorderby)
1281 				flags |= SK_ORDER_BY;
1282 
1283 			/*
1284 			 * rightop is the constant or variable comparison value
1285 			 */
1286 			rightop = (Expr *) get_rightop(clause);
1287 
1288 			if (rightop && IsA(rightop, RelabelType))
1289 				rightop = ((RelabelType *) rightop)->arg;
1290 
1291 			Assert(rightop != NULL);
1292 
1293 			if (IsA(rightop, Const))
1294 			{
1295 				/* OK, simple constant comparison value */
1296 				scanvalue = ((Const *) rightop)->constvalue;
1297 				if (((Const *) rightop)->constisnull)
1298 					flags |= SK_ISNULL;
1299 			}
1300 			else
1301 			{
1302 				/* Need to treat this one as a runtime key */
1303 				if (n_runtime_keys >= max_runtime_keys)
1304 				{
1305 					if (max_runtime_keys == 0)
1306 					{
1307 						max_runtime_keys = 8;
1308 						runtime_keys = (IndexRuntimeKeyInfo *)
1309 							palloc(max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
1310 					}
1311 					else
1312 					{
1313 						max_runtime_keys *= 2;
1314 						runtime_keys = (IndexRuntimeKeyInfo *)
1315 							repalloc(runtime_keys, max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
1316 					}
1317 				}
1318 				runtime_keys[n_runtime_keys].scan_key = this_scan_key;
1319 				runtime_keys[n_runtime_keys].key_expr =
1320 					ExecInitExpr(rightop, planstate);
1321 				runtime_keys[n_runtime_keys].key_toastable =
1322 					TypeIsToastable(op_righttype);
1323 				n_runtime_keys++;
1324 				scanvalue = (Datum) 0;
1325 			}
1326 
1327 			/*
1328 			 * initialize the scan key's fields appropriately
1329 			 */
1330 			ScanKeyEntryInitialize(this_scan_key,
1331 								   flags,
1332 								   varattno,	/* attribute number to scan */
1333 								   op_strategy, /* op's strategy */
1334 								   op_righttype,	/* strategy subtype */
1335 								   ((OpExpr *) clause)->inputcollid,	/* collation */
1336 								   opfuncid,	/* reg proc to use */
1337 								   scanvalue);	/* constant */
1338 		}
1339 		else if (IsA(clause, RowCompareExpr))
1340 		{
1341 			/* (indexkey, indexkey, ...) op (expression, expression, ...) */
1342 			RowCompareExpr *rc = (RowCompareExpr *) clause;
1343 			ListCell   *largs_cell = list_head(rc->largs);
1344 			ListCell   *rargs_cell = list_head(rc->rargs);
1345 			ListCell   *opnos_cell = list_head(rc->opnos);
1346 			ListCell   *collids_cell = list_head(rc->inputcollids);
1347 			ScanKey		first_sub_key;
1348 			int			n_sub_key;
1349 
1350 			Assert(!isorderby);
1351 
1352 			first_sub_key = (ScanKey)
1353 				palloc(list_length(rc->opnos) * sizeof(ScanKeyData));
1354 			n_sub_key = 0;
1355 
1356 			/* Scan RowCompare columns and generate subsidiary ScanKey items */
1357 			while (opnos_cell != NULL)
1358 			{
1359 				ScanKey		this_sub_key = &first_sub_key[n_sub_key];
1360 				int			flags = SK_ROW_MEMBER;
1361 				Datum		scanvalue;
1362 				Oid			inputcollation;
1363 
1364 				/*
1365 				 * leftop should be the index key Var, possibly relabeled
1366 				 */
1367 				leftop = (Expr *) lfirst(largs_cell);
1368 				largs_cell = lnext(largs_cell);
1369 
1370 				if (leftop && IsA(leftop, RelabelType))
1371 					leftop = ((RelabelType *) leftop)->arg;
1372 
1373 				Assert(leftop != NULL);
1374 
1375 				if (!(IsA(leftop, Var) &&
1376 					  ((Var *) leftop)->varno == INDEX_VAR))
1377 					elog(ERROR, "indexqual doesn't have key on left side");
1378 
1379 				varattno = ((Var *) leftop)->varattno;
1380 
1381 				/*
1382 				 * We have to look up the operator's associated btree support
1383 				 * function
1384 				 */
1385 				opno = lfirst_oid(opnos_cell);
1386 				opnos_cell = lnext(opnos_cell);
1387 
1388 				if (index->rd_rel->relam != BTREE_AM_OID ||
1389 					varattno < 1 || varattno > index->rd_index->indnatts)
1390 					elog(ERROR, "bogus RowCompare index qualification");
1391 				opfamily = index->rd_opfamily[varattno - 1];
1392 
1393 				get_op_opfamily_properties(opno, opfamily, isorderby,
1394 										   &op_strategy,
1395 										   &op_lefttype,
1396 										   &op_righttype);
1397 
1398 				if (op_strategy != rc->rctype)
1399 					elog(ERROR, "RowCompare index qualification contains wrong operator");
1400 
1401 				opfuncid = get_opfamily_proc(opfamily,
1402 											 op_lefttype,
1403 											 op_righttype,
1404 											 BTORDER_PROC);
1405 				if (!RegProcedureIsValid(opfuncid))
1406 					elog(ERROR, "missing support function %d(%u,%u) in opfamily %u",
1407 						 BTORDER_PROC, op_lefttype, op_righttype, opfamily);
1408 
1409 				inputcollation = lfirst_oid(collids_cell);
1410 				collids_cell = lnext(collids_cell);
1411 
1412 				/*
1413 				 * rightop is the constant or variable comparison value
1414 				 */
1415 				rightop = (Expr *) lfirst(rargs_cell);
1416 				rargs_cell = lnext(rargs_cell);
1417 
1418 				if (rightop && IsA(rightop, RelabelType))
1419 					rightop = ((RelabelType *) rightop)->arg;
1420 
1421 				Assert(rightop != NULL);
1422 
1423 				if (IsA(rightop, Const))
1424 				{
1425 					/* OK, simple constant comparison value */
1426 					scanvalue = ((Const *) rightop)->constvalue;
1427 					if (((Const *) rightop)->constisnull)
1428 						flags |= SK_ISNULL;
1429 				}
1430 				else
1431 				{
1432 					/* Need to treat this one as a runtime key */
1433 					if (n_runtime_keys >= max_runtime_keys)
1434 					{
1435 						if (max_runtime_keys == 0)
1436 						{
1437 							max_runtime_keys = 8;
1438 							runtime_keys = (IndexRuntimeKeyInfo *)
1439 								palloc(max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
1440 						}
1441 						else
1442 						{
1443 							max_runtime_keys *= 2;
1444 							runtime_keys = (IndexRuntimeKeyInfo *)
1445 								repalloc(runtime_keys, max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
1446 						}
1447 					}
1448 					runtime_keys[n_runtime_keys].scan_key = this_sub_key;
1449 					runtime_keys[n_runtime_keys].key_expr =
1450 						ExecInitExpr(rightop, planstate);
1451 					runtime_keys[n_runtime_keys].key_toastable =
1452 						TypeIsToastable(op_righttype);
1453 					n_runtime_keys++;
1454 					scanvalue = (Datum) 0;
1455 				}
1456 
1457 				/*
1458 				 * initialize the subsidiary scan key's fields appropriately
1459 				 */
1460 				ScanKeyEntryInitialize(this_sub_key,
1461 									   flags,
1462 									   varattno,	/* attribute number */
1463 									   op_strategy, /* op's strategy */
1464 									   op_righttype,	/* strategy subtype */
1465 									   inputcollation,	/* collation */
1466 									   opfuncid,	/* reg proc to use */
1467 									   scanvalue);	/* constant */
1468 				n_sub_key++;
1469 			}
1470 
1471 			/* Mark the last subsidiary scankey correctly */
1472 			first_sub_key[n_sub_key - 1].sk_flags |= SK_ROW_END;
1473 
1474 			/*
1475 			 * We don't use ScanKeyEntryInitialize for the header because it
1476 			 * isn't going to contain a valid sk_func pointer.
1477 			 */
1478 			MemSet(this_scan_key, 0, sizeof(ScanKeyData));
1479 			this_scan_key->sk_flags = SK_ROW_HEADER;
1480 			this_scan_key->sk_attno = first_sub_key->sk_attno;
1481 			this_scan_key->sk_strategy = rc->rctype;
1482 			/* sk_subtype, sk_collation, sk_func not used in a header */
1483 			this_scan_key->sk_argument = PointerGetDatum(first_sub_key);
1484 		}
1485 		else if (IsA(clause, ScalarArrayOpExpr))
1486 		{
1487 			/* indexkey op ANY (array-expression) */
1488 			ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
1489 			int			flags = 0;
1490 			Datum		scanvalue;
1491 
1492 			Assert(!isorderby);
1493 
1494 			Assert(saop->useOr);
1495 			opno = saop->opno;
1496 			opfuncid = saop->opfuncid;
1497 
1498 			/*
1499 			 * leftop should be the index key Var, possibly relabeled
1500 			 */
1501 			leftop = (Expr *) linitial(saop->args);
1502 
1503 			if (leftop && IsA(leftop, RelabelType))
1504 				leftop = ((RelabelType *) leftop)->arg;
1505 
1506 			Assert(leftop != NULL);
1507 
1508 			if (!(IsA(leftop, Var) &&
1509 				  ((Var *) leftop)->varno == INDEX_VAR))
1510 				elog(ERROR, "indexqual doesn't have key on left side");
1511 
1512 			varattno = ((Var *) leftop)->varattno;
1513 			if (varattno < 1 || varattno > index->rd_index->indnatts)
1514 				elog(ERROR, "bogus index qualification");
1515 
1516 			/*
1517 			 * We have to look up the operator's strategy number.  This
1518 			 * provides a cross-check that the operator does match the index.
1519 			 */
1520 			opfamily = index->rd_opfamily[varattno - 1];
1521 
1522 			get_op_opfamily_properties(opno, opfamily, isorderby,
1523 									   &op_strategy,
1524 									   &op_lefttype,
1525 									   &op_righttype);
1526 
1527 			/*
1528 			 * rightop is the constant or variable array value
1529 			 */
1530 			rightop = (Expr *) lsecond(saop->args);
1531 
1532 			if (rightop && IsA(rightop, RelabelType))
1533 				rightop = ((RelabelType *) rightop)->arg;
1534 
1535 			Assert(rightop != NULL);
1536 
1537 			if (index->rd_amroutine->amsearcharray)
1538 			{
1539 				/* Index AM will handle this like a simple operator */
1540 				flags |= SK_SEARCHARRAY;
1541 				if (IsA(rightop, Const))
1542 				{
1543 					/* OK, simple constant comparison value */
1544 					scanvalue = ((Const *) rightop)->constvalue;
1545 					if (((Const *) rightop)->constisnull)
1546 						flags |= SK_ISNULL;
1547 				}
1548 				else
1549 				{
1550 					/* Need to treat this one as a runtime key */
1551 					if (n_runtime_keys >= max_runtime_keys)
1552 					{
1553 						if (max_runtime_keys == 0)
1554 						{
1555 							max_runtime_keys = 8;
1556 							runtime_keys = (IndexRuntimeKeyInfo *)
1557 								palloc(max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
1558 						}
1559 						else
1560 						{
1561 							max_runtime_keys *= 2;
1562 							runtime_keys = (IndexRuntimeKeyInfo *)
1563 								repalloc(runtime_keys, max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
1564 						}
1565 					}
1566 					runtime_keys[n_runtime_keys].scan_key = this_scan_key;
1567 					runtime_keys[n_runtime_keys].key_expr =
1568 						ExecInitExpr(rightop, planstate);
1569 
1570 					/*
1571 					 * Careful here: the runtime expression is not of
1572 					 * op_righttype, but rather is an array of same; so
1573 					 * TypeIsToastable() isn't helpful.  However, we can
1574 					 * assume that all array types are toastable.
1575 					 */
1576 					runtime_keys[n_runtime_keys].key_toastable = true;
1577 					n_runtime_keys++;
1578 					scanvalue = (Datum) 0;
1579 				}
1580 			}
1581 			else
1582 			{
1583 				/* Executor has to expand the array value */
1584 				array_keys[n_array_keys].scan_key = this_scan_key;
1585 				array_keys[n_array_keys].array_expr =
1586 					ExecInitExpr(rightop, planstate);
1587 				/* the remaining fields were zeroed by palloc0 */
1588 				n_array_keys++;
1589 				scanvalue = (Datum) 0;
1590 			}
1591 
1592 			/*
1593 			 * initialize the scan key's fields appropriately
1594 			 */
1595 			ScanKeyEntryInitialize(this_scan_key,
1596 								   flags,
1597 								   varattno,	/* attribute number to scan */
1598 								   op_strategy, /* op's strategy */
1599 								   op_righttype,	/* strategy subtype */
1600 								   saop->inputcollid,	/* collation */
1601 								   opfuncid,	/* reg proc to use */
1602 								   scanvalue);	/* constant */
1603 		}
1604 		else if (IsA(clause, NullTest))
1605 		{
1606 			/* indexkey IS NULL or indexkey IS NOT NULL */
1607 			NullTest   *ntest = (NullTest *) clause;
1608 			int			flags;
1609 
1610 			Assert(!isorderby);
1611 
1612 			/*
1613 			 * argument should be the index key Var, possibly relabeled
1614 			 */
1615 			leftop = ntest->arg;
1616 
1617 			if (leftop && IsA(leftop, RelabelType))
1618 				leftop = ((RelabelType *) leftop)->arg;
1619 
1620 			Assert(leftop != NULL);
1621 
1622 			if (!(IsA(leftop, Var) &&
1623 				  ((Var *) leftop)->varno == INDEX_VAR))
1624 				elog(ERROR, "NullTest indexqual has wrong key");
1625 
1626 			varattno = ((Var *) leftop)->varattno;
1627 
1628 			/*
1629 			 * initialize the scan key's fields appropriately
1630 			 */
1631 			switch (ntest->nulltesttype)
1632 			{
1633 				case IS_NULL:
1634 					flags = SK_ISNULL | SK_SEARCHNULL;
1635 					break;
1636 				case IS_NOT_NULL:
1637 					flags = SK_ISNULL | SK_SEARCHNOTNULL;
1638 					break;
1639 				default:
1640 					elog(ERROR, "unrecognized nulltesttype: %d",
1641 						 (int) ntest->nulltesttype);
1642 					flags = 0;	/* keep compiler quiet */
1643 					break;
1644 			}
1645 
1646 			ScanKeyEntryInitialize(this_scan_key,
1647 								   flags,
1648 								   varattno,	/* attribute number to scan */
1649 								   InvalidStrategy, /* no strategy */
1650 								   InvalidOid,	/* no strategy subtype */
1651 								   InvalidOid,	/* no collation */
1652 								   InvalidOid,	/* no reg proc for this */
1653 								   (Datum) 0);	/* constant */
1654 		}
1655 		else
1656 			elog(ERROR, "unsupported indexqual type: %d",
1657 				 (int) nodeTag(clause));
1658 	}
1659 
1660 	Assert(n_runtime_keys <= max_runtime_keys);
1661 
1662 	/* Get rid of any unused arrays */
1663 	if (n_array_keys == 0)
1664 	{
1665 		pfree(array_keys);
1666 		array_keys = NULL;
1667 	}
1668 
1669 	/*
1670 	 * Return info to our caller.
1671 	 */
1672 	*scanKeys = scan_keys;
1673 	*numScanKeys = n_scan_keys;
1674 	*runtimeKeys = runtime_keys;
1675 	*numRuntimeKeys = n_runtime_keys;
1676 	if (arrayKeys)
1677 	{
1678 		*arrayKeys = array_keys;
1679 		*numArrayKeys = n_array_keys;
1680 	}
1681 	else if (n_array_keys != 0)
1682 		elog(ERROR, "ScalarArrayOpExpr index qual found where not allowed");
1683 }
1684 
1685 /* ----------------------------------------------------------------
1686  *						Parallel Scan Support
1687  * ----------------------------------------------------------------
1688  */
1689 
1690 /* ----------------------------------------------------------------
1691  *		ExecIndexScanEstimate
1692  *
1693  *		estimates the space required to serialize indexscan node.
1694  * ----------------------------------------------------------------
1695  */
1696 void
ExecIndexScanEstimate(IndexScanState * node,ParallelContext * pcxt)1697 ExecIndexScanEstimate(IndexScanState *node,
1698 					  ParallelContext *pcxt)
1699 {
1700 	EState	   *estate = node->ss.ps.state;
1701 
1702 	node->iss_PscanLen = index_parallelscan_estimate(node->iss_RelationDesc,
1703 													 estate->es_snapshot);
1704 	shm_toc_estimate_chunk(&pcxt->estimator, node->iss_PscanLen);
1705 	shm_toc_estimate_keys(&pcxt->estimator, 1);
1706 }
1707 
1708 /* ----------------------------------------------------------------
1709  *		ExecIndexScanInitializeDSM
1710  *
1711  *		Set up a parallel index scan descriptor.
1712  * ----------------------------------------------------------------
1713  */
1714 void
ExecIndexScanInitializeDSM(IndexScanState * node,ParallelContext * pcxt)1715 ExecIndexScanInitializeDSM(IndexScanState *node,
1716 						   ParallelContext *pcxt)
1717 {
1718 	EState	   *estate = node->ss.ps.state;
1719 	ParallelIndexScanDesc piscan;
1720 
1721 	piscan = shm_toc_allocate(pcxt->toc, node->iss_PscanLen);
1722 	index_parallelscan_initialize(node->ss.ss_currentRelation,
1723 								  node->iss_RelationDesc,
1724 								  estate->es_snapshot,
1725 								  piscan);
1726 	shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, piscan);
1727 	node->iss_ScanDesc =
1728 		index_beginscan_parallel(node->ss.ss_currentRelation,
1729 								 node->iss_RelationDesc,
1730 								 node->iss_NumScanKeys,
1731 								 node->iss_NumOrderByKeys,
1732 								 piscan);
1733 
1734 	/*
1735 	 * If no run-time keys to calculate or they are ready, go ahead and pass
1736 	 * the scankeys to the index AM.
1737 	 */
1738 	if (node->iss_NumRuntimeKeys == 0 || node->iss_RuntimeKeysReady)
1739 		index_rescan(node->iss_ScanDesc,
1740 					 node->iss_ScanKeys, node->iss_NumScanKeys,
1741 					 node->iss_OrderByKeys, node->iss_NumOrderByKeys);
1742 }
1743 
1744 /* ----------------------------------------------------------------
1745  *		ExecIndexScanReInitializeDSM
1746  *
1747  *		Reset shared state before beginning a fresh scan.
1748  * ----------------------------------------------------------------
1749  */
1750 void
ExecIndexScanReInitializeDSM(IndexScanState * node,ParallelContext * pcxt)1751 ExecIndexScanReInitializeDSM(IndexScanState *node,
1752 							 ParallelContext *pcxt)
1753 {
1754 	index_parallelrescan(node->iss_ScanDesc);
1755 }
1756 
1757 /* ----------------------------------------------------------------
1758  *		ExecIndexScanInitializeWorker
1759  *
1760  *		Copy relevant information from TOC into planstate.
1761  * ----------------------------------------------------------------
1762  */
1763 void
ExecIndexScanInitializeWorker(IndexScanState * node,shm_toc * toc)1764 ExecIndexScanInitializeWorker(IndexScanState *node, shm_toc *toc)
1765 {
1766 	ParallelIndexScanDesc piscan;
1767 
1768 	piscan = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id, false);
1769 	node->iss_ScanDesc =
1770 		index_beginscan_parallel(node->ss.ss_currentRelation,
1771 								 node->iss_RelationDesc,
1772 								 node->iss_NumScanKeys,
1773 								 node->iss_NumOrderByKeys,
1774 								 piscan);
1775 
1776 	/*
1777 	 * If no run-time keys to calculate or they are ready, go ahead and pass
1778 	 * the scankeys to the index AM.
1779 	 */
1780 	if (node->iss_NumRuntimeKeys == 0 || node->iss_RuntimeKeysReady)
1781 		index_rescan(node->iss_ScanDesc,
1782 					 node->iss_ScanKeys, node->iss_NumScanKeys,
1783 					 node->iss_OrderByKeys, node->iss_NumOrderByKeys);
1784 }
1785