1 /*-------------------------------------------------------------------------
2 *
3 * nodeIndexscan.c
4 * Routines to support indexed scans of relations
5 *
6 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/executor/nodeIndexscan.c
12 *
13 *-------------------------------------------------------------------------
14 */
15 /*
16 * INTERFACE ROUTINES
17 * ExecIndexScan scans a relation using an index
18 * IndexNext retrieve next tuple using index
19 * IndexNextWithReorder same, but recheck ORDER BY expressions
20 * ExecInitIndexScan creates and initializes state info.
21 * ExecReScanIndexScan rescans the indexed relation.
22 * ExecEndIndexScan releases all storage.
23 * ExecIndexMarkPos marks scan position.
24 * ExecIndexRestrPos restores scan position.
25 * ExecIndexScanEstimate estimates DSM space needed for parallel index scan
26 * ExecIndexScanInitializeDSM initialize DSM for parallel indexscan
27 * ExecIndexScanReInitializeDSM reinitialize DSM for fresh scan
28 * ExecIndexScanInitializeWorker attach to DSM info in parallel worker
29 */
30 #include "postgres.h"
31
32 #include "access/nbtree.h"
33 #include "access/relscan.h"
34 #include "catalog/pg_am.h"
35 #include "executor/execdebug.h"
36 #include "executor/nodeIndexscan.h"
37 #include "lib/pairingheap.h"
38 #include "miscadmin.h"
39 #include "nodes/nodeFuncs.h"
40 #include "optimizer/clauses.h"
41 #include "utils/array.h"
42 #include "utils/datum.h"
43 #include "utils/lsyscache.h"
44 #include "utils/memutils.h"
45 #include "utils/rel.h"
46
47 /*
48 * When an ordering operator is used, tuples fetched from the index that
49 * need to be reordered are queued in a pairing heap, as ReorderTuples.
50 */
51 typedef struct
52 {
53 pairingheap_node ph_node;
54 HeapTuple htup;
55 Datum *orderbyvals;
56 bool *orderbynulls;
57 } ReorderTuple;
58
59 static TupleTableSlot *IndexNext(IndexScanState *node);
60 static TupleTableSlot *IndexNextWithReorder(IndexScanState *node);
61 static void EvalOrderByExpressions(IndexScanState *node, ExprContext *econtext);
62 static bool IndexRecheck(IndexScanState *node, TupleTableSlot *slot);
63 static int cmp_orderbyvals(const Datum *adist, const bool *anulls,
64 const Datum *bdist, const bool *bnulls,
65 IndexScanState *node);
66 static int reorderqueue_cmp(const pairingheap_node *a,
67 const pairingheap_node *b, void *arg);
68 static void reorderqueue_push(IndexScanState *node, HeapTuple tuple,
69 Datum *orderbyvals, bool *orderbynulls);
70 static HeapTuple reorderqueue_pop(IndexScanState *node);
71
72
73 /* ----------------------------------------------------------------
74 * IndexNext
75 *
76 * Retrieve a tuple from the IndexScan node's currentRelation
77 * using the index specified in the IndexScanState information.
78 * ----------------------------------------------------------------
79 */
80 static TupleTableSlot *
IndexNext(IndexScanState * node)81 IndexNext(IndexScanState *node)
82 {
83 EState *estate;
84 ExprContext *econtext;
85 ScanDirection direction;
86 IndexScanDesc scandesc;
87 HeapTuple tuple;
88 TupleTableSlot *slot;
89
90 /*
91 * extract necessary information from index scan node
92 */
93 estate = node->ss.ps.state;
94 direction = estate->es_direction;
95 /* flip direction if this is an overall backward scan */
96 if (ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indexorderdir))
97 {
98 if (ScanDirectionIsForward(direction))
99 direction = BackwardScanDirection;
100 else if (ScanDirectionIsBackward(direction))
101 direction = ForwardScanDirection;
102 }
103 scandesc = node->iss_ScanDesc;
104 econtext = node->ss.ps.ps_ExprContext;
105 slot = node->ss.ss_ScanTupleSlot;
106
107 if (scandesc == NULL)
108 {
109 /*
110 * We reach here if the index scan is not parallel, or if we're
111 * serially executing an index scan that was planned to be parallel.
112 */
113 scandesc = index_beginscan(node->ss.ss_currentRelation,
114 node->iss_RelationDesc,
115 estate->es_snapshot,
116 node->iss_NumScanKeys,
117 node->iss_NumOrderByKeys);
118
119 node->iss_ScanDesc = scandesc;
120
121 /*
122 * If no run-time keys to calculate or they are ready, go ahead and
123 * pass the scankeys to the index AM.
124 */
125 if (node->iss_NumRuntimeKeys == 0 || node->iss_RuntimeKeysReady)
126 index_rescan(scandesc,
127 node->iss_ScanKeys, node->iss_NumScanKeys,
128 node->iss_OrderByKeys, node->iss_NumOrderByKeys);
129 }
130
131 /*
132 * ok, now that we have what we need, fetch the next tuple.
133 */
134 while ((tuple = index_getnext(scandesc, direction)) != NULL)
135 {
136 CHECK_FOR_INTERRUPTS();
137
138 /*
139 * Store the scanned tuple in the scan tuple slot of the scan state.
140 * Note: we pass 'false' because tuples returned by amgetnext are
141 * pointers onto disk pages and must not be pfree()'d.
142 */
143 ExecStoreTuple(tuple, /* tuple to store */
144 slot, /* slot to store in */
145 scandesc->xs_cbuf, /* buffer containing tuple */
146 false); /* don't pfree */
147
148 /*
149 * If the index was lossy, we have to recheck the index quals using
150 * the fetched tuple.
151 */
152 if (scandesc->xs_recheck)
153 {
154 econtext->ecxt_scantuple = slot;
155 ResetExprContext(econtext);
156 if (!ExecQual(node->indexqualorig, econtext))
157 {
158 /* Fails recheck, so drop it and loop back for another */
159 InstrCountFiltered2(node, 1);
160 continue;
161 }
162 }
163
164 return slot;
165 }
166
167 /*
168 * if we get here it means the index scan failed so we are at the end of
169 * the scan..
170 */
171 node->iss_ReachedEnd = true;
172 return ExecClearTuple(slot);
173 }
174
175 /* ----------------------------------------------------------------
176 * IndexNextWithReorder
177 *
178 * Like IndexNext, but this version can also re-check ORDER BY
179 * expressions, and reorder the tuples as necessary.
180 * ----------------------------------------------------------------
181 */
182 static TupleTableSlot *
IndexNextWithReorder(IndexScanState * node)183 IndexNextWithReorder(IndexScanState *node)
184 {
185 EState *estate;
186 ExprContext *econtext;
187 IndexScanDesc scandesc;
188 HeapTuple tuple;
189 TupleTableSlot *slot;
190 ReorderTuple *topmost = NULL;
191 bool was_exact;
192 Datum *lastfetched_vals;
193 bool *lastfetched_nulls;
194 int cmp;
195
196 estate = node->ss.ps.state;
197
198 /*
199 * Only forward scan is supported with reordering. Note: we can get away
200 * with just Asserting here because the system will not try to run the
201 * plan backwards if ExecSupportsBackwardScan() says it won't work.
202 * Currently, that is guaranteed because no index AMs support both
203 * amcanorderbyop and amcanbackward; if any ever do,
204 * ExecSupportsBackwardScan() will need to consider indexorderbys
205 * explicitly.
206 */
207 Assert(!ScanDirectionIsBackward(((IndexScan *) node->ss.ps.plan)->indexorderdir));
208 Assert(ScanDirectionIsForward(estate->es_direction));
209
210 scandesc = node->iss_ScanDesc;
211 econtext = node->ss.ps.ps_ExprContext;
212 slot = node->ss.ss_ScanTupleSlot;
213
214 if (scandesc == NULL)
215 {
216 /*
217 * We reach here if the index scan is not parallel, or if we're
218 * serially executing an index scan that was planned to be parallel.
219 */
220 scandesc = index_beginscan(node->ss.ss_currentRelation,
221 node->iss_RelationDesc,
222 estate->es_snapshot,
223 node->iss_NumScanKeys,
224 node->iss_NumOrderByKeys);
225
226 node->iss_ScanDesc = scandesc;
227
228 /*
229 * If no run-time keys to calculate or they are ready, go ahead and
230 * pass the scankeys to the index AM.
231 */
232 if (node->iss_NumRuntimeKeys == 0 || node->iss_RuntimeKeysReady)
233 index_rescan(scandesc,
234 node->iss_ScanKeys, node->iss_NumScanKeys,
235 node->iss_OrderByKeys, node->iss_NumOrderByKeys);
236 }
237
238 for (;;)
239 {
240 CHECK_FOR_INTERRUPTS();
241
242 /*
243 * Check the reorder queue first. If the topmost tuple in the queue
244 * has an ORDER BY value smaller than (or equal to) the value last
245 * returned by the index, we can return it now.
246 */
247 if (!pairingheap_is_empty(node->iss_ReorderQueue))
248 {
249 topmost = (ReorderTuple *) pairingheap_first(node->iss_ReorderQueue);
250
251 if (node->iss_ReachedEnd ||
252 cmp_orderbyvals(topmost->orderbyvals,
253 topmost->orderbynulls,
254 scandesc->xs_orderbyvals,
255 scandesc->xs_orderbynulls,
256 node) <= 0)
257 {
258 tuple = reorderqueue_pop(node);
259
260 /* Pass 'true', as the tuple in the queue is a palloc'd copy */
261 ExecStoreTuple(tuple, slot, InvalidBuffer, true);
262 return slot;
263 }
264 }
265 else if (node->iss_ReachedEnd)
266 {
267 /* Queue is empty, and no more tuples from index. We're done. */
268 return ExecClearTuple(slot);
269 }
270
271 /*
272 * Fetch next tuple from the index.
273 */
274 next_indextuple:
275 tuple = index_getnext(scandesc, ForwardScanDirection);
276 if (!tuple)
277 {
278 /*
279 * No more tuples from the index. But we still need to drain any
280 * remaining tuples from the queue before we're done.
281 */
282 node->iss_ReachedEnd = true;
283 continue;
284 }
285
286 /*
287 * Store the scanned tuple in the scan tuple slot of the scan state.
288 * Note: we pass 'false' because tuples returned by amgetnext are
289 * pointers onto disk pages and must not be pfree()'d.
290 */
291 ExecStoreTuple(tuple, /* tuple to store */
292 slot, /* slot to store in */
293 scandesc->xs_cbuf, /* buffer containing tuple */
294 false); /* don't pfree */
295
296 /*
297 * If the index was lossy, we have to recheck the index quals and
298 * ORDER BY expressions using the fetched tuple.
299 */
300 if (scandesc->xs_recheck)
301 {
302 econtext->ecxt_scantuple = slot;
303 ResetExprContext(econtext);
304 if (!ExecQual(node->indexqualorig, econtext))
305 {
306 /* Fails recheck, so drop it and loop back for another */
307 InstrCountFiltered2(node, 1);
308 /* allow this loop to be cancellable */
309 CHECK_FOR_INTERRUPTS();
310 goto next_indextuple;
311 }
312 }
313
314 if (scandesc->xs_recheckorderby)
315 {
316 econtext->ecxt_scantuple = slot;
317 ResetExprContext(econtext);
318 EvalOrderByExpressions(node, econtext);
319
320 /*
321 * Was the ORDER BY value returned by the index accurate? The
322 * recheck flag means that the index can return inaccurate values,
323 * but then again, the value returned for any particular tuple
324 * could also be exactly correct. Compare the value returned by
325 * the index with the recalculated value. (If the value returned
326 * by the index happened to be exact right, we can often avoid
327 * pushing the tuple to the queue, just to pop it back out again.)
328 */
329 cmp = cmp_orderbyvals(node->iss_OrderByValues,
330 node->iss_OrderByNulls,
331 scandesc->xs_orderbyvals,
332 scandesc->xs_orderbynulls,
333 node);
334 if (cmp < 0)
335 elog(ERROR, "index returned tuples in wrong order");
336 else if (cmp == 0)
337 was_exact = true;
338 else
339 was_exact = false;
340 lastfetched_vals = node->iss_OrderByValues;
341 lastfetched_nulls = node->iss_OrderByNulls;
342 }
343 else
344 {
345 was_exact = true;
346 lastfetched_vals = scandesc->xs_orderbyvals;
347 lastfetched_nulls = scandesc->xs_orderbynulls;
348 }
349
350 /*
351 * Can we return this tuple immediately, or does it need to be pushed
352 * to the reorder queue? If the ORDER BY expression values returned
353 * by the index were inaccurate, we can't return it yet, because the
354 * next tuple from the index might need to come before this one. Also,
355 * we can't return it yet if there are any smaller tuples in the queue
356 * already.
357 */
358 if (!was_exact || (topmost && cmp_orderbyvals(lastfetched_vals,
359 lastfetched_nulls,
360 topmost->orderbyvals,
361 topmost->orderbynulls,
362 node) > 0))
363 {
364 /* Put this tuple to the queue */
365 reorderqueue_push(node, tuple, lastfetched_vals, lastfetched_nulls);
366 continue;
367 }
368 else
369 {
370 /* Can return this tuple immediately. */
371 return slot;
372 }
373 }
374
375 /*
376 * if we get here it means the index scan failed so we are at the end of
377 * the scan..
378 */
379 return ExecClearTuple(slot);
380 }
381
382 /*
383 * Calculate the expressions in the ORDER BY clause, based on the heap tuple.
384 */
385 static void
EvalOrderByExpressions(IndexScanState * node,ExprContext * econtext)386 EvalOrderByExpressions(IndexScanState *node, ExprContext *econtext)
387 {
388 int i;
389 ListCell *l;
390 MemoryContext oldContext;
391
392 oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
393
394 i = 0;
395 foreach(l, node->indexorderbyorig)
396 {
397 ExprState *orderby = (ExprState *) lfirst(l);
398
399 node->iss_OrderByValues[i] = ExecEvalExpr(orderby,
400 econtext,
401 &node->iss_OrderByNulls[i]);
402 i++;
403 }
404
405 MemoryContextSwitchTo(oldContext);
406 }
407
408 /*
409 * IndexRecheck -- access method routine to recheck a tuple in EvalPlanQual
410 */
411 static bool
IndexRecheck(IndexScanState * node,TupleTableSlot * slot)412 IndexRecheck(IndexScanState *node, TupleTableSlot *slot)
413 {
414 ExprContext *econtext;
415
416 /*
417 * extract necessary information from index scan node
418 */
419 econtext = node->ss.ps.ps_ExprContext;
420
421 /* Does the tuple meet the indexqual condition? */
422 econtext->ecxt_scantuple = slot;
423
424 ResetExprContext(econtext);
425
426 return ExecQual(node->indexqualorig, econtext);
427 }
428
429
430 /*
431 * Compare ORDER BY expression values.
432 */
433 static int
cmp_orderbyvals(const Datum * adist,const bool * anulls,const Datum * bdist,const bool * bnulls,IndexScanState * node)434 cmp_orderbyvals(const Datum *adist, const bool *anulls,
435 const Datum *bdist, const bool *bnulls,
436 IndexScanState *node)
437 {
438 int i;
439 int result;
440
441 for (i = 0; i < node->iss_NumOrderByKeys; i++)
442 {
443 SortSupport ssup = &node->iss_SortSupport[i];
444
445 /*
446 * Handle nulls. We only need to support NULLS LAST ordering, because
447 * match_pathkeys_to_index() doesn't consider indexorderby
448 * implementation otherwise.
449 */
450 if (anulls[i] && !bnulls[i])
451 return 1;
452 else if (!anulls[i] && bnulls[i])
453 return -1;
454 else if (anulls[i] && bnulls[i])
455 return 0;
456
457 result = ssup->comparator(adist[i], bdist[i], ssup);
458 if (result != 0)
459 return result;
460 }
461
462 return 0;
463 }
464
465 /*
466 * Pairing heap provides getting topmost (greatest) element while KNN provides
467 * ascending sort. That's why we invert the sort order.
468 */
469 static int
reorderqueue_cmp(const pairingheap_node * a,const pairingheap_node * b,void * arg)470 reorderqueue_cmp(const pairingheap_node *a, const pairingheap_node *b,
471 void *arg)
472 {
473 ReorderTuple *rta = (ReorderTuple *) a;
474 ReorderTuple *rtb = (ReorderTuple *) b;
475 IndexScanState *node = (IndexScanState *) arg;
476
477 /* exchange argument order to invert the sort order */
478 return cmp_orderbyvals(rtb->orderbyvals, rtb->orderbynulls,
479 rta->orderbyvals, rta->orderbynulls,
480 node);
481 }
482
483 /*
484 * Helper function to push a tuple to the reorder queue.
485 */
486 static void
reorderqueue_push(IndexScanState * node,HeapTuple tuple,Datum * orderbyvals,bool * orderbynulls)487 reorderqueue_push(IndexScanState *node, HeapTuple tuple,
488 Datum *orderbyvals, bool *orderbynulls)
489 {
490 IndexScanDesc scandesc = node->iss_ScanDesc;
491 EState *estate = node->ss.ps.state;
492 MemoryContext oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
493 ReorderTuple *rt;
494 int i;
495
496 rt = (ReorderTuple *) palloc(sizeof(ReorderTuple));
497 rt->htup = heap_copytuple(tuple);
498 rt->orderbyvals =
499 (Datum *) palloc(sizeof(Datum) * scandesc->numberOfOrderBys);
500 rt->orderbynulls =
501 (bool *) palloc(sizeof(bool) * scandesc->numberOfOrderBys);
502 for (i = 0; i < node->iss_NumOrderByKeys; i++)
503 {
504 if (!orderbynulls[i])
505 rt->orderbyvals[i] = datumCopy(orderbyvals[i],
506 node->iss_OrderByTypByVals[i],
507 node->iss_OrderByTypLens[i]);
508 else
509 rt->orderbyvals[i] = (Datum) 0;
510 rt->orderbynulls[i] = orderbynulls[i];
511 }
512 pairingheap_add(node->iss_ReorderQueue, &rt->ph_node);
513
514 MemoryContextSwitchTo(oldContext);
515 }
516
517 /*
518 * Helper function to pop the next tuple from the reorder queue.
519 */
520 static HeapTuple
reorderqueue_pop(IndexScanState * node)521 reorderqueue_pop(IndexScanState *node)
522 {
523 HeapTuple result;
524 ReorderTuple *topmost;
525 int i;
526
527 topmost = (ReorderTuple *) pairingheap_remove_first(node->iss_ReorderQueue);
528
529 result = topmost->htup;
530 for (i = 0; i < node->iss_NumOrderByKeys; i++)
531 {
532 if (!node->iss_OrderByTypByVals[i] && !topmost->orderbynulls[i])
533 pfree(DatumGetPointer(topmost->orderbyvals[i]));
534 }
535 pfree(topmost->orderbyvals);
536 pfree(topmost->orderbynulls);
537 pfree(topmost);
538
539 return result;
540 }
541
542
543 /* ----------------------------------------------------------------
544 * ExecIndexScan(node)
545 * ----------------------------------------------------------------
546 */
547 static TupleTableSlot *
ExecIndexScan(PlanState * pstate)548 ExecIndexScan(PlanState *pstate)
549 {
550 IndexScanState *node = castNode(IndexScanState, pstate);
551
552 /*
553 * If we have runtime keys and they've not already been set up, do it now.
554 */
555 if (node->iss_NumRuntimeKeys != 0 && !node->iss_RuntimeKeysReady)
556 ExecReScan((PlanState *) node);
557
558 if (node->iss_NumOrderByKeys > 0)
559 return ExecScan(&node->ss,
560 (ExecScanAccessMtd) IndexNextWithReorder,
561 (ExecScanRecheckMtd) IndexRecheck);
562 else
563 return ExecScan(&node->ss,
564 (ExecScanAccessMtd) IndexNext,
565 (ExecScanRecheckMtd) IndexRecheck);
566 }
567
568 /* ----------------------------------------------------------------
569 * ExecReScanIndexScan(node)
570 *
571 * Recalculates the values of any scan keys whose value depends on
572 * information known at runtime, then rescans the indexed relation.
573 *
574 * Updating the scan key was formerly done separately in
575 * ExecUpdateIndexScanKeys. Integrating it into ReScan makes
576 * rescans of indices and relations/general streams more uniform.
577 * ----------------------------------------------------------------
578 */
579 void
ExecReScanIndexScan(IndexScanState * node)580 ExecReScanIndexScan(IndexScanState *node)
581 {
582 /*
583 * If we are doing runtime key calculations (ie, any of the index key
584 * values weren't simple Consts), compute the new key values. But first,
585 * reset the context so we don't leak memory as each outer tuple is
586 * scanned. Note this assumes that we will recalculate *all* runtime keys
587 * on each call.
588 */
589 if (node->iss_NumRuntimeKeys != 0)
590 {
591 ExprContext *econtext = node->iss_RuntimeContext;
592
593 ResetExprContext(econtext);
594 ExecIndexEvalRuntimeKeys(econtext,
595 node->iss_RuntimeKeys,
596 node->iss_NumRuntimeKeys);
597 }
598 node->iss_RuntimeKeysReady = true;
599
600 /* flush the reorder queue */
601 if (node->iss_ReorderQueue)
602 {
603 while (!pairingheap_is_empty(node->iss_ReorderQueue))
604 reorderqueue_pop(node);
605 }
606
607 /* reset index scan */
608 if (node->iss_ScanDesc)
609 index_rescan(node->iss_ScanDesc,
610 node->iss_ScanKeys, node->iss_NumScanKeys,
611 node->iss_OrderByKeys, node->iss_NumOrderByKeys);
612 node->iss_ReachedEnd = false;
613
614 ExecScanReScan(&node->ss);
615 }
616
617
618 /*
619 * ExecIndexEvalRuntimeKeys
620 * Evaluate any runtime key values, and update the scankeys.
621 */
622 void
ExecIndexEvalRuntimeKeys(ExprContext * econtext,IndexRuntimeKeyInfo * runtimeKeys,int numRuntimeKeys)623 ExecIndexEvalRuntimeKeys(ExprContext *econtext,
624 IndexRuntimeKeyInfo *runtimeKeys, int numRuntimeKeys)
625 {
626 int j;
627 MemoryContext oldContext;
628
629 /* We want to keep the key values in per-tuple memory */
630 oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
631
632 for (j = 0; j < numRuntimeKeys; j++)
633 {
634 ScanKey scan_key = runtimeKeys[j].scan_key;
635 ExprState *key_expr = runtimeKeys[j].key_expr;
636 Datum scanvalue;
637 bool isNull;
638
639 /*
640 * For each run-time key, extract the run-time expression and evaluate
641 * it with respect to the current context. We then stick the result
642 * into the proper scan key.
643 *
644 * Note: the result of the eval could be a pass-by-ref value that's
645 * stored in some outer scan's tuple, not in
646 * econtext->ecxt_per_tuple_memory. We assume that the outer tuple
647 * will stay put throughout our scan. If this is wrong, we could copy
648 * the result into our context explicitly, but I think that's not
649 * necessary.
650 *
651 * It's also entirely possible that the result of the eval is a
652 * toasted value. In this case we should forcibly detoast it, to
653 * avoid repeat detoastings each time the value is examined by an
654 * index support function.
655 */
656 scanvalue = ExecEvalExpr(key_expr,
657 econtext,
658 &isNull);
659 if (isNull)
660 {
661 scan_key->sk_argument = scanvalue;
662 scan_key->sk_flags |= SK_ISNULL;
663 }
664 else
665 {
666 if (runtimeKeys[j].key_toastable)
667 scanvalue = PointerGetDatum(PG_DETOAST_DATUM(scanvalue));
668 scan_key->sk_argument = scanvalue;
669 scan_key->sk_flags &= ~SK_ISNULL;
670 }
671 }
672
673 MemoryContextSwitchTo(oldContext);
674 }
675
676 /*
677 * ExecIndexEvalArrayKeys
678 * Evaluate any array key values, and set up to iterate through arrays.
679 *
680 * Returns TRUE if there are array elements to consider; FALSE means there
681 * is at least one null or empty array, so no match is possible. On TRUE
682 * result, the scankeys are initialized with the first elements of the arrays.
683 */
684 bool
ExecIndexEvalArrayKeys(ExprContext * econtext,IndexArrayKeyInfo * arrayKeys,int numArrayKeys)685 ExecIndexEvalArrayKeys(ExprContext *econtext,
686 IndexArrayKeyInfo *arrayKeys, int numArrayKeys)
687 {
688 bool result = true;
689 int j;
690 MemoryContext oldContext;
691
692 /* We want to keep the arrays in per-tuple memory */
693 oldContext = MemoryContextSwitchTo(econtext->ecxt_per_tuple_memory);
694
695 for (j = 0; j < numArrayKeys; j++)
696 {
697 ScanKey scan_key = arrayKeys[j].scan_key;
698 ExprState *array_expr = arrayKeys[j].array_expr;
699 Datum arraydatum;
700 bool isNull;
701 ArrayType *arrayval;
702 int16 elmlen;
703 bool elmbyval;
704 char elmalign;
705 int num_elems;
706 Datum *elem_values;
707 bool *elem_nulls;
708
709 /*
710 * Compute and deconstruct the array expression. (Notes in
711 * ExecIndexEvalRuntimeKeys() apply here too.)
712 */
713 arraydatum = ExecEvalExpr(array_expr,
714 econtext,
715 &isNull);
716 if (isNull)
717 {
718 result = false;
719 break; /* no point in evaluating more */
720 }
721 arrayval = DatumGetArrayTypeP(arraydatum);
722 /* We could cache this data, but not clear it's worth it */
723 get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
724 &elmlen, &elmbyval, &elmalign);
725 deconstruct_array(arrayval,
726 ARR_ELEMTYPE(arrayval),
727 elmlen, elmbyval, elmalign,
728 &elem_values, &elem_nulls, &num_elems);
729 if (num_elems <= 0)
730 {
731 result = false;
732 break; /* no point in evaluating more */
733 }
734
735 /*
736 * Note: we expect the previous array data, if any, to be
737 * automatically freed by resetting the per-tuple context; hence no
738 * pfree's here.
739 */
740 arrayKeys[j].elem_values = elem_values;
741 arrayKeys[j].elem_nulls = elem_nulls;
742 arrayKeys[j].num_elems = num_elems;
743 scan_key->sk_argument = elem_values[0];
744 if (elem_nulls[0])
745 scan_key->sk_flags |= SK_ISNULL;
746 else
747 scan_key->sk_flags &= ~SK_ISNULL;
748 arrayKeys[j].next_elem = 1;
749 }
750
751 MemoryContextSwitchTo(oldContext);
752
753 return result;
754 }
755
756 /*
757 * ExecIndexAdvanceArrayKeys
758 * Advance to the next set of array key values, if any.
759 *
760 * Returns TRUE if there is another set of values to consider, FALSE if not.
761 * On TRUE result, the scankeys are initialized with the next set of values.
762 */
763 bool
ExecIndexAdvanceArrayKeys(IndexArrayKeyInfo * arrayKeys,int numArrayKeys)764 ExecIndexAdvanceArrayKeys(IndexArrayKeyInfo *arrayKeys, int numArrayKeys)
765 {
766 bool found = false;
767 int j;
768
769 /*
770 * Note we advance the rightmost array key most quickly, since it will
771 * correspond to the lowest-order index column among the available
772 * qualifications. This is hypothesized to result in better locality of
773 * access in the index.
774 */
775 for (j = numArrayKeys - 1; j >= 0; j--)
776 {
777 ScanKey scan_key = arrayKeys[j].scan_key;
778 int next_elem = arrayKeys[j].next_elem;
779 int num_elems = arrayKeys[j].num_elems;
780 Datum *elem_values = arrayKeys[j].elem_values;
781 bool *elem_nulls = arrayKeys[j].elem_nulls;
782
783 if (next_elem >= num_elems)
784 {
785 next_elem = 0;
786 found = false; /* need to advance next array key */
787 }
788 else
789 found = true;
790 scan_key->sk_argument = elem_values[next_elem];
791 if (elem_nulls[next_elem])
792 scan_key->sk_flags |= SK_ISNULL;
793 else
794 scan_key->sk_flags &= ~SK_ISNULL;
795 arrayKeys[j].next_elem = next_elem + 1;
796 if (found)
797 break;
798 }
799
800 return found;
801 }
802
803
804 /* ----------------------------------------------------------------
805 * ExecEndIndexScan
806 * ----------------------------------------------------------------
807 */
808 void
ExecEndIndexScan(IndexScanState * node)809 ExecEndIndexScan(IndexScanState *node)
810 {
811 Relation indexRelationDesc;
812 IndexScanDesc indexScanDesc;
813 Relation relation;
814
815 /*
816 * extract information from the node
817 */
818 indexRelationDesc = node->iss_RelationDesc;
819 indexScanDesc = node->iss_ScanDesc;
820 relation = node->ss.ss_currentRelation;
821
822 /*
823 * Free the exprcontext(s) ... now dead code, see ExecFreeExprContext
824 */
825 #ifdef NOT_USED
826 ExecFreeExprContext(&node->ss.ps);
827 if (node->iss_RuntimeContext)
828 FreeExprContext(node->iss_RuntimeContext, true);
829 #endif
830
831 /*
832 * clear out tuple table slots
833 */
834 ExecClearTuple(node->ss.ps.ps_ResultTupleSlot);
835 ExecClearTuple(node->ss.ss_ScanTupleSlot);
836
837 /*
838 * close the index relation (no-op if we didn't open it)
839 */
840 if (indexScanDesc)
841 index_endscan(indexScanDesc);
842 if (indexRelationDesc)
843 index_close(indexRelationDesc, NoLock);
844
845 /*
846 * close the heap relation.
847 */
848 ExecCloseScanRelation(relation);
849 }
850
851 /* ----------------------------------------------------------------
852 * ExecIndexMarkPos
853 *
854 * Note: we assume that no caller attempts to set a mark before having read
855 * at least one tuple. Otherwise, iss_ScanDesc might still be NULL.
856 * ----------------------------------------------------------------
857 */
858 void
ExecIndexMarkPos(IndexScanState * node)859 ExecIndexMarkPos(IndexScanState *node)
860 {
861 EState *estate = node->ss.ps.state;
862
863 if (estate->es_epqTuple != NULL)
864 {
865 /*
866 * We are inside an EvalPlanQual recheck. If a test tuple exists for
867 * this relation, then we shouldn't access the index at all. We would
868 * instead need to save, and later restore, the state of the
869 * es_epqScanDone flag, so that re-fetching the test tuple is
870 * possible. However, given the assumption that no caller sets a mark
871 * at the start of the scan, we can only get here with es_epqScanDone
872 * already set, and so no state need be saved.
873 */
874 Index scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
875
876 Assert(scanrelid > 0);
877 if (estate->es_epqTupleSet[scanrelid - 1])
878 {
879 /* Verify the claim above */
880 if (!estate->es_epqScanDone[scanrelid - 1])
881 elog(ERROR, "unexpected ExecIndexMarkPos call in EPQ recheck");
882 return;
883 }
884 }
885
886 index_markpos(node->iss_ScanDesc);
887 }
888
889 /* ----------------------------------------------------------------
890 * ExecIndexRestrPos
891 * ----------------------------------------------------------------
892 */
893 void
ExecIndexRestrPos(IndexScanState * node)894 ExecIndexRestrPos(IndexScanState *node)
895 {
896 EState *estate = node->ss.ps.state;
897
898 if (estate->es_epqTuple != NULL)
899 {
900 /* See comments in ExecIndexMarkPos */
901 Index scanrelid = ((Scan *) node->ss.ps.plan)->scanrelid;
902
903 Assert(scanrelid > 0);
904 if (estate->es_epqTupleSet[scanrelid - 1])
905 {
906 /* Verify the claim above */
907 if (!estate->es_epqScanDone[scanrelid - 1])
908 elog(ERROR, "unexpected ExecIndexRestrPos call in EPQ recheck");
909 return;
910 }
911 }
912
913 index_restrpos(node->iss_ScanDesc);
914 }
915
916 /* ----------------------------------------------------------------
917 * ExecInitIndexScan
918 *
919 * Initializes the index scan's state information, creates
920 * scan keys, and opens the base and index relations.
921 *
922 * Note: index scans have 2 sets of state information because
923 * we have to keep track of the base relation and the
924 * index relation.
925 * ----------------------------------------------------------------
926 */
927 IndexScanState *
ExecInitIndexScan(IndexScan * node,EState * estate,int eflags)928 ExecInitIndexScan(IndexScan *node, EState *estate, int eflags)
929 {
930 IndexScanState *indexstate;
931 Relation currentRelation;
932 bool relistarget;
933
934 /*
935 * create state structure
936 */
937 indexstate = makeNode(IndexScanState);
938 indexstate->ss.ps.plan = (Plan *) node;
939 indexstate->ss.ps.state = estate;
940 indexstate->ss.ps.ExecProcNode = ExecIndexScan;
941
942 /*
943 * Miscellaneous initialization
944 *
945 * create expression context for node
946 */
947 ExecAssignExprContext(estate, &indexstate->ss.ps);
948
949 /*
950 * initialize child expressions
951 *
952 * Note: we don't initialize all of the indexqual expression, only the
953 * sub-parts corresponding to runtime keys (see below). Likewise for
954 * indexorderby, if any. But the indexqualorig expression is always
955 * initialized even though it will only be used in some uncommon cases ---
956 * would be nice to improve that. (Problem is that any SubPlans present
957 * in the expression must be found now...)
958 */
959 indexstate->ss.ps.qual =
960 ExecInitQual(node->scan.plan.qual, (PlanState *) indexstate);
961 indexstate->indexqualorig =
962 ExecInitQual(node->indexqualorig, (PlanState *) indexstate);
963 indexstate->indexorderbyorig =
964 ExecInitExprList(node->indexorderbyorig, (PlanState *) indexstate);
965
966 /*
967 * tuple table initialization
968 */
969 ExecInitResultTupleSlot(estate, &indexstate->ss.ps);
970 ExecInitScanTupleSlot(estate, &indexstate->ss);
971
972 /*
973 * open the base relation and acquire appropriate lock on it.
974 */
975 currentRelation = ExecOpenScanRelation(estate, node->scan.scanrelid, eflags);
976
977 indexstate->ss.ss_currentRelation = currentRelation;
978 indexstate->ss.ss_currentScanDesc = NULL; /* no heap scan here */
979
980 /*
981 * get the scan type from the relation descriptor.
982 */
983 ExecAssignScanType(&indexstate->ss, RelationGetDescr(currentRelation));
984
985 /*
986 * Initialize result tuple type and projection info.
987 */
988 ExecAssignResultTypeFromTL(&indexstate->ss.ps);
989 ExecAssignScanProjectionInfo(&indexstate->ss);
990
991 /*
992 * If we are just doing EXPLAIN (ie, aren't going to run the plan), stop
993 * here. This allows an index-advisor plugin to EXPLAIN a plan containing
994 * references to nonexistent indexes.
995 */
996 if (eflags & EXEC_FLAG_EXPLAIN_ONLY)
997 return indexstate;
998
999 /*
1000 * Open the index relation.
1001 *
1002 * If the parent table is one of the target relations of the query, then
1003 * InitPlan already opened and write-locked the index, so we can avoid
1004 * taking another lock here. Otherwise we need a normal reader's lock.
1005 */
1006 relistarget = ExecRelationIsTargetRelation(estate, node->scan.scanrelid);
1007 indexstate->iss_RelationDesc = index_open(node->indexid,
1008 relistarget ? NoLock : AccessShareLock);
1009
1010 /*
1011 * Initialize index-specific scan state
1012 */
1013 indexstate->iss_RuntimeKeysReady = false;
1014 indexstate->iss_RuntimeKeys = NULL;
1015 indexstate->iss_NumRuntimeKeys = 0;
1016
1017 /*
1018 * build the index scan keys from the index qualification
1019 */
1020 ExecIndexBuildScanKeys((PlanState *) indexstate,
1021 indexstate->iss_RelationDesc,
1022 node->indexqual,
1023 false,
1024 &indexstate->iss_ScanKeys,
1025 &indexstate->iss_NumScanKeys,
1026 &indexstate->iss_RuntimeKeys,
1027 &indexstate->iss_NumRuntimeKeys,
1028 NULL, /* no ArrayKeys */
1029 NULL);
1030
1031 /*
1032 * any ORDER BY exprs have to be turned into scankeys in the same way
1033 */
1034 ExecIndexBuildScanKeys((PlanState *) indexstate,
1035 indexstate->iss_RelationDesc,
1036 node->indexorderby,
1037 true,
1038 &indexstate->iss_OrderByKeys,
1039 &indexstate->iss_NumOrderByKeys,
1040 &indexstate->iss_RuntimeKeys,
1041 &indexstate->iss_NumRuntimeKeys,
1042 NULL, /* no ArrayKeys */
1043 NULL);
1044
1045 /* Initialize sort support, if we need to re-check ORDER BY exprs */
1046 if (indexstate->iss_NumOrderByKeys > 0)
1047 {
1048 int numOrderByKeys = indexstate->iss_NumOrderByKeys;
1049 int i;
1050 ListCell *lco;
1051 ListCell *lcx;
1052
1053 /*
1054 * Prepare sort support, and look up the data type for each ORDER BY
1055 * expression.
1056 */
1057 Assert(numOrderByKeys == list_length(node->indexorderbyops));
1058 Assert(numOrderByKeys == list_length(node->indexorderbyorig));
1059 indexstate->iss_SortSupport = (SortSupportData *)
1060 palloc0(numOrderByKeys * sizeof(SortSupportData));
1061 indexstate->iss_OrderByTypByVals = (bool *)
1062 palloc(numOrderByKeys * sizeof(bool));
1063 indexstate->iss_OrderByTypLens = (int16 *)
1064 palloc(numOrderByKeys * sizeof(int16));
1065 i = 0;
1066 forboth(lco, node->indexorderbyops, lcx, node->indexorderbyorig)
1067 {
1068 Oid orderbyop = lfirst_oid(lco);
1069 Node *orderbyexpr = (Node *) lfirst(lcx);
1070 Oid orderbyType = exprType(orderbyexpr);
1071 Oid orderbyColl = exprCollation(orderbyexpr);
1072 SortSupport orderbysort = &indexstate->iss_SortSupport[i];
1073
1074 /* Initialize sort support */
1075 orderbysort->ssup_cxt = CurrentMemoryContext;
1076 orderbysort->ssup_collation = orderbyColl;
1077 /* See cmp_orderbyvals() comments on NULLS LAST */
1078 orderbysort->ssup_nulls_first = false;
1079 /* ssup_attno is unused here and elsewhere */
1080 orderbysort->ssup_attno = 0;
1081 /* No abbreviation */
1082 orderbysort->abbreviate = false;
1083 PrepareSortSupportFromOrderingOp(orderbyop, orderbysort);
1084
1085 get_typlenbyval(orderbyType,
1086 &indexstate->iss_OrderByTypLens[i],
1087 &indexstate->iss_OrderByTypByVals[i]);
1088 i++;
1089 }
1090
1091 /* allocate arrays to hold the re-calculated distances */
1092 indexstate->iss_OrderByValues = (Datum *)
1093 palloc(numOrderByKeys * sizeof(Datum));
1094 indexstate->iss_OrderByNulls = (bool *)
1095 palloc(numOrderByKeys * sizeof(bool));
1096
1097 /* and initialize the reorder queue */
1098 indexstate->iss_ReorderQueue = pairingheap_allocate(reorderqueue_cmp,
1099 indexstate);
1100 }
1101
1102 /*
1103 * If we have runtime keys, we need an ExprContext to evaluate them. The
1104 * node's standard context won't do because we want to reset that context
1105 * for every tuple. So, build another context just like the other one...
1106 * -tgl 7/11/00
1107 */
1108 if (indexstate->iss_NumRuntimeKeys != 0)
1109 {
1110 ExprContext *stdecontext = indexstate->ss.ps.ps_ExprContext;
1111
1112 ExecAssignExprContext(estate, &indexstate->ss.ps);
1113 indexstate->iss_RuntimeContext = indexstate->ss.ps.ps_ExprContext;
1114 indexstate->ss.ps.ps_ExprContext = stdecontext;
1115 }
1116 else
1117 {
1118 indexstate->iss_RuntimeContext = NULL;
1119 }
1120
1121 /*
1122 * all done.
1123 */
1124 return indexstate;
1125 }
1126
1127
1128 /*
1129 * ExecIndexBuildScanKeys
1130 * Build the index scan keys from the index qualification expressions
1131 *
1132 * The index quals are passed to the index AM in the form of a ScanKey array.
1133 * This routine sets up the ScanKeys, fills in all constant fields of the
1134 * ScanKeys, and prepares information about the keys that have non-constant
1135 * comparison values. We divide index qual expressions into five types:
1136 *
1137 * 1. Simple operator with constant comparison value ("indexkey op constant").
1138 * For these, we just fill in a ScanKey containing the constant value.
1139 *
1140 * 2. Simple operator with non-constant value ("indexkey op expression").
1141 * For these, we create a ScanKey with everything filled in except the
1142 * expression value, and set up an IndexRuntimeKeyInfo struct to drive
1143 * evaluation of the expression at the right times.
1144 *
1145 * 3. RowCompareExpr ("(indexkey, indexkey, ...) op (expr, expr, ...)").
1146 * For these, we create a header ScanKey plus a subsidiary ScanKey array,
1147 * as specified in access/skey.h. The elements of the row comparison
1148 * can have either constant or non-constant comparison values.
1149 *
1150 * 4. ScalarArrayOpExpr ("indexkey op ANY (array-expression)"). If the index
1151 * supports amsearcharray, we handle these the same as simple operators,
1152 * setting the SK_SEARCHARRAY flag to tell the AM to handle them. Otherwise,
1153 * we create a ScanKey with everything filled in except the comparison value,
1154 * and set up an IndexArrayKeyInfo struct to drive processing of the qual.
1155 * (Note that if we use an IndexArrayKeyInfo struct, the array expression is
1156 * always treated as requiring runtime evaluation, even if it's a constant.)
1157 *
1158 * 5. NullTest ("indexkey IS NULL/IS NOT NULL"). We just fill in the
1159 * ScanKey properly.
1160 *
1161 * This code is also used to prepare ORDER BY expressions for amcanorderbyop
1162 * indexes. The behavior is exactly the same, except that we have to look up
1163 * the operator differently. Note that only cases 1 and 2 are currently
1164 * possible for ORDER BY.
1165 *
1166 * Input params are:
1167 *
1168 * planstate: executor state node we are working for
1169 * index: the index we are building scan keys for
1170 * quals: indexquals (or indexorderbys) expressions
1171 * isorderby: true if processing ORDER BY exprs, false if processing quals
1172 * *runtimeKeys: ptr to pre-existing IndexRuntimeKeyInfos, or NULL if none
1173 * *numRuntimeKeys: number of pre-existing runtime keys
1174 *
1175 * Output params are:
1176 *
1177 * *scanKeys: receives ptr to array of ScanKeys
1178 * *numScanKeys: receives number of scankeys
1179 * *runtimeKeys: receives ptr to array of IndexRuntimeKeyInfos, or NULL if none
1180 * *numRuntimeKeys: receives number of runtime keys
1181 * *arrayKeys: receives ptr to array of IndexArrayKeyInfos, or NULL if none
1182 * *numArrayKeys: receives number of array keys
1183 *
1184 * Caller may pass NULL for arrayKeys and numArrayKeys to indicate that
1185 * IndexArrayKeyInfos are not supported.
1186 */
1187 void
ExecIndexBuildScanKeys(PlanState * planstate,Relation index,List * quals,bool isorderby,ScanKey * scanKeys,int * numScanKeys,IndexRuntimeKeyInfo ** runtimeKeys,int * numRuntimeKeys,IndexArrayKeyInfo ** arrayKeys,int * numArrayKeys)1188 ExecIndexBuildScanKeys(PlanState *planstate, Relation index,
1189 List *quals, bool isorderby,
1190 ScanKey *scanKeys, int *numScanKeys,
1191 IndexRuntimeKeyInfo **runtimeKeys, int *numRuntimeKeys,
1192 IndexArrayKeyInfo **arrayKeys, int *numArrayKeys)
1193 {
1194 ListCell *qual_cell;
1195 ScanKey scan_keys;
1196 IndexRuntimeKeyInfo *runtime_keys;
1197 IndexArrayKeyInfo *array_keys;
1198 int n_scan_keys;
1199 int n_runtime_keys;
1200 int max_runtime_keys;
1201 int n_array_keys;
1202 int j;
1203
1204 /* Allocate array for ScanKey structs: one per qual */
1205 n_scan_keys = list_length(quals);
1206 scan_keys = (ScanKey) palloc(n_scan_keys * sizeof(ScanKeyData));
1207
1208 /*
1209 * runtime_keys array is dynamically resized as needed. We handle it this
1210 * way so that the same runtime keys array can be shared between
1211 * indexquals and indexorderbys, which will be processed in separate calls
1212 * of this function. Caller must be sure to pass in NULL/0 for first
1213 * call.
1214 */
1215 runtime_keys = *runtimeKeys;
1216 n_runtime_keys = max_runtime_keys = *numRuntimeKeys;
1217
1218 /* Allocate array_keys as large as it could possibly need to be */
1219 array_keys = (IndexArrayKeyInfo *)
1220 palloc0(n_scan_keys * sizeof(IndexArrayKeyInfo));
1221 n_array_keys = 0;
1222
1223 /*
1224 * for each opclause in the given qual, convert the opclause into a single
1225 * scan key
1226 */
1227 j = 0;
1228 foreach(qual_cell, quals)
1229 {
1230 Expr *clause = (Expr *) lfirst(qual_cell);
1231 ScanKey this_scan_key = &scan_keys[j++];
1232 Oid opno; /* operator's OID */
1233 RegProcedure opfuncid; /* operator proc id used in scan */
1234 Oid opfamily; /* opfamily of index column */
1235 int op_strategy; /* operator's strategy number */
1236 Oid op_lefttype; /* operator's declared input types */
1237 Oid op_righttype;
1238 Expr *leftop; /* expr on lhs of operator */
1239 Expr *rightop; /* expr on rhs ... */
1240 AttrNumber varattno; /* att number used in scan */
1241
1242 if (IsA(clause, OpExpr))
1243 {
1244 /* indexkey op const or indexkey op expression */
1245 int flags = 0;
1246 Datum scanvalue;
1247
1248 opno = ((OpExpr *) clause)->opno;
1249 opfuncid = ((OpExpr *) clause)->opfuncid;
1250
1251 /*
1252 * leftop should be the index key Var, possibly relabeled
1253 */
1254 leftop = (Expr *) get_leftop(clause);
1255
1256 if (leftop && IsA(leftop, RelabelType))
1257 leftop = ((RelabelType *) leftop)->arg;
1258
1259 Assert(leftop != NULL);
1260
1261 if (!(IsA(leftop, Var) &&
1262 ((Var *) leftop)->varno == INDEX_VAR))
1263 elog(ERROR, "indexqual doesn't have key on left side");
1264
1265 varattno = ((Var *) leftop)->varattno;
1266 if (varattno < 1 || varattno > index->rd_index->indnatts)
1267 elog(ERROR, "bogus index qualification");
1268
1269 /*
1270 * We have to look up the operator's strategy number. This
1271 * provides a cross-check that the operator does match the index.
1272 */
1273 opfamily = index->rd_opfamily[varattno - 1];
1274
1275 get_op_opfamily_properties(opno, opfamily, isorderby,
1276 &op_strategy,
1277 &op_lefttype,
1278 &op_righttype);
1279
1280 if (isorderby)
1281 flags |= SK_ORDER_BY;
1282
1283 /*
1284 * rightop is the constant or variable comparison value
1285 */
1286 rightop = (Expr *) get_rightop(clause);
1287
1288 if (rightop && IsA(rightop, RelabelType))
1289 rightop = ((RelabelType *) rightop)->arg;
1290
1291 Assert(rightop != NULL);
1292
1293 if (IsA(rightop, Const))
1294 {
1295 /* OK, simple constant comparison value */
1296 scanvalue = ((Const *) rightop)->constvalue;
1297 if (((Const *) rightop)->constisnull)
1298 flags |= SK_ISNULL;
1299 }
1300 else
1301 {
1302 /* Need to treat this one as a runtime key */
1303 if (n_runtime_keys >= max_runtime_keys)
1304 {
1305 if (max_runtime_keys == 0)
1306 {
1307 max_runtime_keys = 8;
1308 runtime_keys = (IndexRuntimeKeyInfo *)
1309 palloc(max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
1310 }
1311 else
1312 {
1313 max_runtime_keys *= 2;
1314 runtime_keys = (IndexRuntimeKeyInfo *)
1315 repalloc(runtime_keys, max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
1316 }
1317 }
1318 runtime_keys[n_runtime_keys].scan_key = this_scan_key;
1319 runtime_keys[n_runtime_keys].key_expr =
1320 ExecInitExpr(rightop, planstate);
1321 runtime_keys[n_runtime_keys].key_toastable =
1322 TypeIsToastable(op_righttype);
1323 n_runtime_keys++;
1324 scanvalue = (Datum) 0;
1325 }
1326
1327 /*
1328 * initialize the scan key's fields appropriately
1329 */
1330 ScanKeyEntryInitialize(this_scan_key,
1331 flags,
1332 varattno, /* attribute number to scan */
1333 op_strategy, /* op's strategy */
1334 op_righttype, /* strategy subtype */
1335 ((OpExpr *) clause)->inputcollid, /* collation */
1336 opfuncid, /* reg proc to use */
1337 scanvalue); /* constant */
1338 }
1339 else if (IsA(clause, RowCompareExpr))
1340 {
1341 /* (indexkey, indexkey, ...) op (expression, expression, ...) */
1342 RowCompareExpr *rc = (RowCompareExpr *) clause;
1343 ListCell *largs_cell = list_head(rc->largs);
1344 ListCell *rargs_cell = list_head(rc->rargs);
1345 ListCell *opnos_cell = list_head(rc->opnos);
1346 ListCell *collids_cell = list_head(rc->inputcollids);
1347 ScanKey first_sub_key;
1348 int n_sub_key;
1349
1350 Assert(!isorderby);
1351
1352 first_sub_key = (ScanKey)
1353 palloc(list_length(rc->opnos) * sizeof(ScanKeyData));
1354 n_sub_key = 0;
1355
1356 /* Scan RowCompare columns and generate subsidiary ScanKey items */
1357 while (opnos_cell != NULL)
1358 {
1359 ScanKey this_sub_key = &first_sub_key[n_sub_key];
1360 int flags = SK_ROW_MEMBER;
1361 Datum scanvalue;
1362 Oid inputcollation;
1363
1364 /*
1365 * leftop should be the index key Var, possibly relabeled
1366 */
1367 leftop = (Expr *) lfirst(largs_cell);
1368 largs_cell = lnext(largs_cell);
1369
1370 if (leftop && IsA(leftop, RelabelType))
1371 leftop = ((RelabelType *) leftop)->arg;
1372
1373 Assert(leftop != NULL);
1374
1375 if (!(IsA(leftop, Var) &&
1376 ((Var *) leftop)->varno == INDEX_VAR))
1377 elog(ERROR, "indexqual doesn't have key on left side");
1378
1379 varattno = ((Var *) leftop)->varattno;
1380
1381 /*
1382 * We have to look up the operator's associated btree support
1383 * function
1384 */
1385 opno = lfirst_oid(opnos_cell);
1386 opnos_cell = lnext(opnos_cell);
1387
1388 if (index->rd_rel->relam != BTREE_AM_OID ||
1389 varattno < 1 || varattno > index->rd_index->indnatts)
1390 elog(ERROR, "bogus RowCompare index qualification");
1391 opfamily = index->rd_opfamily[varattno - 1];
1392
1393 get_op_opfamily_properties(opno, opfamily, isorderby,
1394 &op_strategy,
1395 &op_lefttype,
1396 &op_righttype);
1397
1398 if (op_strategy != rc->rctype)
1399 elog(ERROR, "RowCompare index qualification contains wrong operator");
1400
1401 opfuncid = get_opfamily_proc(opfamily,
1402 op_lefttype,
1403 op_righttype,
1404 BTORDER_PROC);
1405 if (!RegProcedureIsValid(opfuncid))
1406 elog(ERROR, "missing support function %d(%u,%u) in opfamily %u",
1407 BTORDER_PROC, op_lefttype, op_righttype, opfamily);
1408
1409 inputcollation = lfirst_oid(collids_cell);
1410 collids_cell = lnext(collids_cell);
1411
1412 /*
1413 * rightop is the constant or variable comparison value
1414 */
1415 rightop = (Expr *) lfirst(rargs_cell);
1416 rargs_cell = lnext(rargs_cell);
1417
1418 if (rightop && IsA(rightop, RelabelType))
1419 rightop = ((RelabelType *) rightop)->arg;
1420
1421 Assert(rightop != NULL);
1422
1423 if (IsA(rightop, Const))
1424 {
1425 /* OK, simple constant comparison value */
1426 scanvalue = ((Const *) rightop)->constvalue;
1427 if (((Const *) rightop)->constisnull)
1428 flags |= SK_ISNULL;
1429 }
1430 else
1431 {
1432 /* Need to treat this one as a runtime key */
1433 if (n_runtime_keys >= max_runtime_keys)
1434 {
1435 if (max_runtime_keys == 0)
1436 {
1437 max_runtime_keys = 8;
1438 runtime_keys = (IndexRuntimeKeyInfo *)
1439 palloc(max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
1440 }
1441 else
1442 {
1443 max_runtime_keys *= 2;
1444 runtime_keys = (IndexRuntimeKeyInfo *)
1445 repalloc(runtime_keys, max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
1446 }
1447 }
1448 runtime_keys[n_runtime_keys].scan_key = this_sub_key;
1449 runtime_keys[n_runtime_keys].key_expr =
1450 ExecInitExpr(rightop, planstate);
1451 runtime_keys[n_runtime_keys].key_toastable =
1452 TypeIsToastable(op_righttype);
1453 n_runtime_keys++;
1454 scanvalue = (Datum) 0;
1455 }
1456
1457 /*
1458 * initialize the subsidiary scan key's fields appropriately
1459 */
1460 ScanKeyEntryInitialize(this_sub_key,
1461 flags,
1462 varattno, /* attribute number */
1463 op_strategy, /* op's strategy */
1464 op_righttype, /* strategy subtype */
1465 inputcollation, /* collation */
1466 opfuncid, /* reg proc to use */
1467 scanvalue); /* constant */
1468 n_sub_key++;
1469 }
1470
1471 /* Mark the last subsidiary scankey correctly */
1472 first_sub_key[n_sub_key - 1].sk_flags |= SK_ROW_END;
1473
1474 /*
1475 * We don't use ScanKeyEntryInitialize for the header because it
1476 * isn't going to contain a valid sk_func pointer.
1477 */
1478 MemSet(this_scan_key, 0, sizeof(ScanKeyData));
1479 this_scan_key->sk_flags = SK_ROW_HEADER;
1480 this_scan_key->sk_attno = first_sub_key->sk_attno;
1481 this_scan_key->sk_strategy = rc->rctype;
1482 /* sk_subtype, sk_collation, sk_func not used in a header */
1483 this_scan_key->sk_argument = PointerGetDatum(first_sub_key);
1484 }
1485 else if (IsA(clause, ScalarArrayOpExpr))
1486 {
1487 /* indexkey op ANY (array-expression) */
1488 ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
1489 int flags = 0;
1490 Datum scanvalue;
1491
1492 Assert(!isorderby);
1493
1494 Assert(saop->useOr);
1495 opno = saop->opno;
1496 opfuncid = saop->opfuncid;
1497
1498 /*
1499 * leftop should be the index key Var, possibly relabeled
1500 */
1501 leftop = (Expr *) linitial(saop->args);
1502
1503 if (leftop && IsA(leftop, RelabelType))
1504 leftop = ((RelabelType *) leftop)->arg;
1505
1506 Assert(leftop != NULL);
1507
1508 if (!(IsA(leftop, Var) &&
1509 ((Var *) leftop)->varno == INDEX_VAR))
1510 elog(ERROR, "indexqual doesn't have key on left side");
1511
1512 varattno = ((Var *) leftop)->varattno;
1513 if (varattno < 1 || varattno > index->rd_index->indnatts)
1514 elog(ERROR, "bogus index qualification");
1515
1516 /*
1517 * We have to look up the operator's strategy number. This
1518 * provides a cross-check that the operator does match the index.
1519 */
1520 opfamily = index->rd_opfamily[varattno - 1];
1521
1522 get_op_opfamily_properties(opno, opfamily, isorderby,
1523 &op_strategy,
1524 &op_lefttype,
1525 &op_righttype);
1526
1527 /*
1528 * rightop is the constant or variable array value
1529 */
1530 rightop = (Expr *) lsecond(saop->args);
1531
1532 if (rightop && IsA(rightop, RelabelType))
1533 rightop = ((RelabelType *) rightop)->arg;
1534
1535 Assert(rightop != NULL);
1536
1537 if (index->rd_amroutine->amsearcharray)
1538 {
1539 /* Index AM will handle this like a simple operator */
1540 flags |= SK_SEARCHARRAY;
1541 if (IsA(rightop, Const))
1542 {
1543 /* OK, simple constant comparison value */
1544 scanvalue = ((Const *) rightop)->constvalue;
1545 if (((Const *) rightop)->constisnull)
1546 flags |= SK_ISNULL;
1547 }
1548 else
1549 {
1550 /* Need to treat this one as a runtime key */
1551 if (n_runtime_keys >= max_runtime_keys)
1552 {
1553 if (max_runtime_keys == 0)
1554 {
1555 max_runtime_keys = 8;
1556 runtime_keys = (IndexRuntimeKeyInfo *)
1557 palloc(max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
1558 }
1559 else
1560 {
1561 max_runtime_keys *= 2;
1562 runtime_keys = (IndexRuntimeKeyInfo *)
1563 repalloc(runtime_keys, max_runtime_keys * sizeof(IndexRuntimeKeyInfo));
1564 }
1565 }
1566 runtime_keys[n_runtime_keys].scan_key = this_scan_key;
1567 runtime_keys[n_runtime_keys].key_expr =
1568 ExecInitExpr(rightop, planstate);
1569
1570 /*
1571 * Careful here: the runtime expression is not of
1572 * op_righttype, but rather is an array of same; so
1573 * TypeIsToastable() isn't helpful. However, we can
1574 * assume that all array types are toastable.
1575 */
1576 runtime_keys[n_runtime_keys].key_toastable = true;
1577 n_runtime_keys++;
1578 scanvalue = (Datum) 0;
1579 }
1580 }
1581 else
1582 {
1583 /* Executor has to expand the array value */
1584 array_keys[n_array_keys].scan_key = this_scan_key;
1585 array_keys[n_array_keys].array_expr =
1586 ExecInitExpr(rightop, planstate);
1587 /* the remaining fields were zeroed by palloc0 */
1588 n_array_keys++;
1589 scanvalue = (Datum) 0;
1590 }
1591
1592 /*
1593 * initialize the scan key's fields appropriately
1594 */
1595 ScanKeyEntryInitialize(this_scan_key,
1596 flags,
1597 varattno, /* attribute number to scan */
1598 op_strategy, /* op's strategy */
1599 op_righttype, /* strategy subtype */
1600 saop->inputcollid, /* collation */
1601 opfuncid, /* reg proc to use */
1602 scanvalue); /* constant */
1603 }
1604 else if (IsA(clause, NullTest))
1605 {
1606 /* indexkey IS NULL or indexkey IS NOT NULL */
1607 NullTest *ntest = (NullTest *) clause;
1608 int flags;
1609
1610 Assert(!isorderby);
1611
1612 /*
1613 * argument should be the index key Var, possibly relabeled
1614 */
1615 leftop = ntest->arg;
1616
1617 if (leftop && IsA(leftop, RelabelType))
1618 leftop = ((RelabelType *) leftop)->arg;
1619
1620 Assert(leftop != NULL);
1621
1622 if (!(IsA(leftop, Var) &&
1623 ((Var *) leftop)->varno == INDEX_VAR))
1624 elog(ERROR, "NullTest indexqual has wrong key");
1625
1626 varattno = ((Var *) leftop)->varattno;
1627
1628 /*
1629 * initialize the scan key's fields appropriately
1630 */
1631 switch (ntest->nulltesttype)
1632 {
1633 case IS_NULL:
1634 flags = SK_ISNULL | SK_SEARCHNULL;
1635 break;
1636 case IS_NOT_NULL:
1637 flags = SK_ISNULL | SK_SEARCHNOTNULL;
1638 break;
1639 default:
1640 elog(ERROR, "unrecognized nulltesttype: %d",
1641 (int) ntest->nulltesttype);
1642 flags = 0; /* keep compiler quiet */
1643 break;
1644 }
1645
1646 ScanKeyEntryInitialize(this_scan_key,
1647 flags,
1648 varattno, /* attribute number to scan */
1649 InvalidStrategy, /* no strategy */
1650 InvalidOid, /* no strategy subtype */
1651 InvalidOid, /* no collation */
1652 InvalidOid, /* no reg proc for this */
1653 (Datum) 0); /* constant */
1654 }
1655 else
1656 elog(ERROR, "unsupported indexqual type: %d",
1657 (int) nodeTag(clause));
1658 }
1659
1660 Assert(n_runtime_keys <= max_runtime_keys);
1661
1662 /* Get rid of any unused arrays */
1663 if (n_array_keys == 0)
1664 {
1665 pfree(array_keys);
1666 array_keys = NULL;
1667 }
1668
1669 /*
1670 * Return info to our caller.
1671 */
1672 *scanKeys = scan_keys;
1673 *numScanKeys = n_scan_keys;
1674 *runtimeKeys = runtime_keys;
1675 *numRuntimeKeys = n_runtime_keys;
1676 if (arrayKeys)
1677 {
1678 *arrayKeys = array_keys;
1679 *numArrayKeys = n_array_keys;
1680 }
1681 else if (n_array_keys != 0)
1682 elog(ERROR, "ScalarArrayOpExpr index qual found where not allowed");
1683 }
1684
1685 /* ----------------------------------------------------------------
1686 * Parallel Scan Support
1687 * ----------------------------------------------------------------
1688 */
1689
1690 /* ----------------------------------------------------------------
1691 * ExecIndexScanEstimate
1692 *
1693 * estimates the space required to serialize indexscan node.
1694 * ----------------------------------------------------------------
1695 */
1696 void
ExecIndexScanEstimate(IndexScanState * node,ParallelContext * pcxt)1697 ExecIndexScanEstimate(IndexScanState *node,
1698 ParallelContext *pcxt)
1699 {
1700 EState *estate = node->ss.ps.state;
1701
1702 node->iss_PscanLen = index_parallelscan_estimate(node->iss_RelationDesc,
1703 estate->es_snapshot);
1704 shm_toc_estimate_chunk(&pcxt->estimator, node->iss_PscanLen);
1705 shm_toc_estimate_keys(&pcxt->estimator, 1);
1706 }
1707
1708 /* ----------------------------------------------------------------
1709 * ExecIndexScanInitializeDSM
1710 *
1711 * Set up a parallel index scan descriptor.
1712 * ----------------------------------------------------------------
1713 */
1714 void
ExecIndexScanInitializeDSM(IndexScanState * node,ParallelContext * pcxt)1715 ExecIndexScanInitializeDSM(IndexScanState *node,
1716 ParallelContext *pcxt)
1717 {
1718 EState *estate = node->ss.ps.state;
1719 ParallelIndexScanDesc piscan;
1720
1721 piscan = shm_toc_allocate(pcxt->toc, node->iss_PscanLen);
1722 index_parallelscan_initialize(node->ss.ss_currentRelation,
1723 node->iss_RelationDesc,
1724 estate->es_snapshot,
1725 piscan);
1726 shm_toc_insert(pcxt->toc, node->ss.ps.plan->plan_node_id, piscan);
1727 node->iss_ScanDesc =
1728 index_beginscan_parallel(node->ss.ss_currentRelation,
1729 node->iss_RelationDesc,
1730 node->iss_NumScanKeys,
1731 node->iss_NumOrderByKeys,
1732 piscan);
1733
1734 /*
1735 * If no run-time keys to calculate or they are ready, go ahead and pass
1736 * the scankeys to the index AM.
1737 */
1738 if (node->iss_NumRuntimeKeys == 0 || node->iss_RuntimeKeysReady)
1739 index_rescan(node->iss_ScanDesc,
1740 node->iss_ScanKeys, node->iss_NumScanKeys,
1741 node->iss_OrderByKeys, node->iss_NumOrderByKeys);
1742 }
1743
1744 /* ----------------------------------------------------------------
1745 * ExecIndexScanReInitializeDSM
1746 *
1747 * Reset shared state before beginning a fresh scan.
1748 * ----------------------------------------------------------------
1749 */
1750 void
ExecIndexScanReInitializeDSM(IndexScanState * node,ParallelContext * pcxt)1751 ExecIndexScanReInitializeDSM(IndexScanState *node,
1752 ParallelContext *pcxt)
1753 {
1754 index_parallelrescan(node->iss_ScanDesc);
1755 }
1756
1757 /* ----------------------------------------------------------------
1758 * ExecIndexScanInitializeWorker
1759 *
1760 * Copy relevant information from TOC into planstate.
1761 * ----------------------------------------------------------------
1762 */
1763 void
ExecIndexScanInitializeWorker(IndexScanState * node,shm_toc * toc)1764 ExecIndexScanInitializeWorker(IndexScanState *node, shm_toc *toc)
1765 {
1766 ParallelIndexScanDesc piscan;
1767
1768 piscan = shm_toc_lookup(toc, node->ss.ps.plan->plan_node_id, false);
1769 node->iss_ScanDesc =
1770 index_beginscan_parallel(node->ss.ss_currentRelation,
1771 node->iss_RelationDesc,
1772 node->iss_NumScanKeys,
1773 node->iss_NumOrderByKeys,
1774 piscan);
1775
1776 /*
1777 * If no run-time keys to calculate or they are ready, go ahead and pass
1778 * the scankeys to the index AM.
1779 */
1780 if (node->iss_NumRuntimeKeys == 0 || node->iss_RuntimeKeysReady)
1781 index_rescan(node->iss_ScanDesc,
1782 node->iss_ScanKeys, node->iss_NumScanKeys,
1783 node->iss_OrderByKeys, node->iss_NumOrderByKeys);
1784 }
1785