1 /*-------------------------------------------------------------------------
2  *
3  * genam.c
4  *	  general index access method routines
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/access/index/genam.c
12  *
13  * NOTES
14  *	  many of the old access method routines have been turned into
15  *	  macros and moved to genam.h -cim 4/30/91
16  *
17  *-------------------------------------------------------------------------
18  */
19 
20 #include "postgres.h"
21 
22 #include "access/genam.h"
23 #include "access/heapam.h"
24 #include "access/relscan.h"
25 #include "access/tableam.h"
26 #include "access/transam.h"
27 #include "catalog/index.h"
28 #include "lib/stringinfo.h"
29 #include "miscadmin.h"
30 #include "storage/bufmgr.h"
31 #include "storage/procarray.h"
32 #include "utils/acl.h"
33 #include "utils/builtins.h"
34 #include "utils/lsyscache.h"
35 #include "utils/rel.h"
36 #include "utils/rls.h"
37 #include "utils/ruleutils.h"
38 #include "utils/snapmgr.h"
39 #include "utils/syscache.h"
40 
41 
42 /* ----------------------------------------------------------------
43  *		general access method routines
44  *
45  *		All indexed access methods use an identical scan structure.
46  *		We don't know how the various AMs do locking, however, so we don't
47  *		do anything about that here.
48  *
49  *		The intent is that an AM implementor will define a beginscan routine
50  *		that calls RelationGetIndexScan, to fill in the scan, and then does
51  *		whatever kind of locking he wants.
52  *
53  *		At the end of a scan, the AM's endscan routine undoes the locking,
54  *		but does *not* call IndexScanEnd --- the higher-level index_endscan
55  *		routine does that.  (We can't do it in the AM because index_endscan
56  *		still needs to touch the IndexScanDesc after calling the AM.)
57  *
58  *		Because of this, the AM does not have a choice whether to call
59  *		RelationGetIndexScan or not; its beginscan routine must return an
60  *		object made by RelationGetIndexScan.  This is kinda ugly but not
61  *		worth cleaning up now.
62  * ----------------------------------------------------------------
63  */
64 
65 /* ----------------
66  *	RelationGetIndexScan -- Create and fill an IndexScanDesc.
67  *
68  *		This routine creates an index scan structure and sets up initial
69  *		contents for it.
70  *
71  *		Parameters:
72  *				indexRelation -- index relation for scan.
73  *				nkeys -- count of scan keys (index qual conditions).
74  *				norderbys -- count of index order-by operators.
75  *
76  *		Returns:
77  *				An initialized IndexScanDesc.
78  * ----------------
79  */
80 IndexScanDesc
RelationGetIndexScan(Relation indexRelation,int nkeys,int norderbys)81 RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
82 {
83 	IndexScanDesc scan;
84 
85 	scan = (IndexScanDesc) palloc(sizeof(IndexScanDescData));
86 
87 	scan->heapRelation = NULL;	/* may be set later */
88 	scan->xs_heapfetch = NULL;
89 	scan->indexRelation = indexRelation;
90 	scan->xs_snapshot = InvalidSnapshot;	/* caller must initialize this */
91 	scan->numberOfKeys = nkeys;
92 	scan->numberOfOrderBys = norderbys;
93 
94 	/*
95 	 * We allocate key workspace here, but it won't get filled until amrescan.
96 	 */
97 	if (nkeys > 0)
98 		scan->keyData = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
99 	else
100 		scan->keyData = NULL;
101 	if (norderbys > 0)
102 		scan->orderByData = (ScanKey) palloc(sizeof(ScanKeyData) * norderbys);
103 	else
104 		scan->orderByData = NULL;
105 
106 	scan->xs_want_itup = false; /* may be set later */
107 
108 	/*
109 	 * During recovery we ignore killed tuples and don't bother to kill them
110 	 * either. We do this because the xmin on the primary node could easily be
111 	 * later than the xmin on the standby node, so that what the primary
112 	 * thinks is killed is supposed to be visible on standby. So for correct
113 	 * MVCC for queries during recovery we must ignore these hints and check
114 	 * all tuples. Do *not* set ignore_killed_tuples to true when running in a
115 	 * transaction that was started during recovery. xactStartedInRecovery
116 	 * should not be altered by index AMs.
117 	 */
118 	scan->kill_prior_tuple = false;
119 	scan->xactStartedInRecovery = TransactionStartedDuringRecovery();
120 	scan->ignore_killed_tuples = !scan->xactStartedInRecovery;
121 
122 	scan->opaque = NULL;
123 
124 	scan->xs_itup = NULL;
125 	scan->xs_itupdesc = NULL;
126 	scan->xs_hitup = NULL;
127 	scan->xs_hitupdesc = NULL;
128 
129 	return scan;
130 }
131 
132 /* ----------------
133  *	IndexScanEnd -- End an index scan.
134  *
135  *		This routine just releases the storage acquired by
136  *		RelationGetIndexScan().  Any AM-level resources are
137  *		assumed to already have been released by the AM's
138  *		endscan routine.
139  *
140  *	Returns:
141  *		None.
142  * ----------------
143  */
144 void
IndexScanEnd(IndexScanDesc scan)145 IndexScanEnd(IndexScanDesc scan)
146 {
147 	if (scan->keyData != NULL)
148 		pfree(scan->keyData);
149 	if (scan->orderByData != NULL)
150 		pfree(scan->orderByData);
151 
152 	pfree(scan);
153 }
154 
155 /*
156  * BuildIndexValueDescription
157  *
158  * Construct a string describing the contents of an index entry, in the
159  * form "(key_name, ...)=(key_value, ...)".  This is currently used
160  * for building unique-constraint and exclusion-constraint error messages,
161  * so only key columns of the index are checked and printed.
162  *
163  * Note that if the user does not have permissions to view all of the
164  * columns involved then a NULL is returned.  Returning a partial key seems
165  * unlikely to be useful and we have no way to know which of the columns the
166  * user provided (unlike in ExecBuildSlotValueDescription).
167  *
168  * The passed-in values/nulls arrays are the "raw" input to the index AM,
169  * e.g. results of FormIndexDatum --- this is not necessarily what is stored
170  * in the index, but it's what the user perceives to be stored.
171  *
172  * Note: if you change anything here, check whether
173  * ExecBuildSlotPartitionKeyDescription() in execMain.c needs a similar
174  * change.
175  */
176 char *
BuildIndexValueDescription(Relation indexRelation,Datum * values,bool * isnull)177 BuildIndexValueDescription(Relation indexRelation,
178 						   Datum *values, bool *isnull)
179 {
180 	StringInfoData buf;
181 	Form_pg_index idxrec;
182 	int			indnkeyatts;
183 	int			i;
184 	int			keyno;
185 	Oid			indexrelid = RelationGetRelid(indexRelation);
186 	Oid			indrelid;
187 	AclResult	aclresult;
188 
189 	indnkeyatts = IndexRelationGetNumberOfKeyAttributes(indexRelation);
190 
191 	/*
192 	 * Check permissions- if the user does not have access to view all of the
193 	 * key columns then return NULL to avoid leaking data.
194 	 *
195 	 * First check if RLS is enabled for the relation.  If so, return NULL to
196 	 * avoid leaking data.
197 	 *
198 	 * Next we need to check table-level SELECT access and then, if there is
199 	 * no access there, check column-level permissions.
200 	 */
201 	idxrec = indexRelation->rd_index;
202 	indrelid = idxrec->indrelid;
203 	Assert(indexrelid == idxrec->indexrelid);
204 
205 	/* RLS check- if RLS is enabled then we don't return anything. */
206 	if (check_enable_rls(indrelid, InvalidOid, true) == RLS_ENABLED)
207 		return NULL;
208 
209 	/* Table-level SELECT is enough, if the user has it */
210 	aclresult = pg_class_aclcheck(indrelid, GetUserId(), ACL_SELECT);
211 	if (aclresult != ACLCHECK_OK)
212 	{
213 		/*
214 		 * No table-level access, so step through the columns in the index and
215 		 * make sure the user has SELECT rights on all of them.
216 		 */
217 		for (keyno = 0; keyno < indnkeyatts; keyno++)
218 		{
219 			AttrNumber	attnum = idxrec->indkey.values[keyno];
220 
221 			/*
222 			 * Note that if attnum == InvalidAttrNumber, then this is an index
223 			 * based on an expression and we return no detail rather than try
224 			 * to figure out what column(s) the expression includes and if the
225 			 * user has SELECT rights on them.
226 			 */
227 			if (attnum == InvalidAttrNumber ||
228 				pg_attribute_aclcheck(indrelid, attnum, GetUserId(),
229 									  ACL_SELECT) != ACLCHECK_OK)
230 			{
231 				/* No access, so clean up and return */
232 				return NULL;
233 			}
234 		}
235 	}
236 
237 	initStringInfo(&buf);
238 	appendStringInfo(&buf, "(%s)=(",
239 					 pg_get_indexdef_columns(indexrelid, true));
240 
241 	for (i = 0; i < indnkeyatts; i++)
242 	{
243 		char	   *val;
244 
245 		if (isnull[i])
246 			val = "null";
247 		else
248 		{
249 			Oid			foutoid;
250 			bool		typisvarlena;
251 
252 			/*
253 			 * The provided data is not necessarily of the type stored in the
254 			 * index; rather it is of the index opclass's input type. So look
255 			 * at rd_opcintype not the index tupdesc.
256 			 *
257 			 * Note: this is a bit shaky for opclasses that have pseudotype
258 			 * input types such as ANYARRAY or RECORD.  Currently, the
259 			 * typoutput functions associated with the pseudotypes will work
260 			 * okay, but we might have to try harder in future.
261 			 */
262 			getTypeOutputInfo(indexRelation->rd_opcintype[i],
263 							  &foutoid, &typisvarlena);
264 			val = OidOutputFunctionCall(foutoid, values[i]);
265 		}
266 
267 		if (i > 0)
268 			appendStringInfoString(&buf, ", ");
269 		appendStringInfoString(&buf, val);
270 	}
271 
272 	appendStringInfoChar(&buf, ')');
273 
274 	return buf.data;
275 }
276 
277 /*
278  * Get the latestRemovedXid from the table entries pointed at by the index
279  * tuples being deleted using an AM-generic approach.
280  *
281  * This is a table_index_delete_tuples() shim used by index AMs that have
282  * simple requirements.  These callers only need to consult the tableam to get
283  * a latestRemovedXid value, and only expect to delete tuples that are already
284  * known deletable.  When a latestRemovedXid value isn't needed in index AM's
285  * deletion WAL record, it is safe for it to skip calling here entirely.
286  *
287  * We assume that caller index AM uses the standard IndexTuple representation,
288  * with table TIDs stored in the t_tid field.  We also expect (and assert)
289  * that the line pointers on page for 'itemnos' offsets are already marked
290  * LP_DEAD.
291  */
292 TransactionId
index_compute_xid_horizon_for_tuples(Relation irel,Relation hrel,Buffer ibuf,OffsetNumber * itemnos,int nitems)293 index_compute_xid_horizon_for_tuples(Relation irel,
294 									 Relation hrel,
295 									 Buffer ibuf,
296 									 OffsetNumber *itemnos,
297 									 int nitems)
298 {
299 	TM_IndexDeleteOp delstate;
300 	TransactionId latestRemovedXid = InvalidTransactionId;
301 	Page		ipage = BufferGetPage(ibuf);
302 	IndexTuple	itup;
303 
304 	Assert(nitems > 0);
305 
306 	delstate.bottomup = false;
307 	delstate.bottomupfreespace = 0;
308 	delstate.ndeltids = 0;
309 	delstate.deltids = palloc(nitems * sizeof(TM_IndexDelete));
310 	delstate.status = palloc(nitems * sizeof(TM_IndexStatus));
311 
312 	/* identify what the index tuples about to be deleted point to */
313 	for (int i = 0; i < nitems; i++)
314 	{
315 		ItemId		iitemid;
316 
317 		iitemid = PageGetItemId(ipage, itemnos[i]);
318 		itup = (IndexTuple) PageGetItem(ipage, iitemid);
319 
320 		Assert(ItemIdIsDead(iitemid));
321 
322 		ItemPointerCopy(&itup->t_tid, &delstate.deltids[i].tid);
323 		delstate.deltids[i].id = delstate.ndeltids;
324 		delstate.status[i].idxoffnum = InvalidOffsetNumber; /* unused */
325 		delstate.status[i].knowndeletable = true;	/* LP_DEAD-marked */
326 		delstate.status[i].promising = false;	/* unused */
327 		delstate.status[i].freespace = 0;	/* unused */
328 
329 		delstate.ndeltids++;
330 	}
331 
332 	/* determine the actual xid horizon */
333 	latestRemovedXid = table_index_delete_tuples(hrel, &delstate);
334 
335 	/* assert tableam agrees that all items are deletable */
336 	Assert(delstate.ndeltids == nitems);
337 
338 	pfree(delstate.deltids);
339 	pfree(delstate.status);
340 
341 	return latestRemovedXid;
342 }
343 
344 
345 /* ----------------------------------------------------------------
346  *		heap-or-index-scan access to system catalogs
347  *
348  *		These functions support system catalog accesses that normally use
349  *		an index but need to be capable of being switched to heap scans
350  *		if the system indexes are unavailable.
351  *
352  *		The specified scan keys must be compatible with the named index.
353  *		Generally this means that they must constrain either all columns
354  *		of the index, or the first K columns of an N-column index.
355  *
356  *		These routines could work with non-system tables, actually,
357  *		but they're only useful when there is a known index to use with
358  *		the given scan keys; so in practice they're only good for
359  *		predetermined types of scans of system catalogs.
360  * ----------------------------------------------------------------
361  */
362 
363 /*
364  * systable_beginscan --- set up for heap-or-index scan
365  *
366  *	rel: catalog to scan, already opened and suitably locked
367  *	indexId: OID of index to conditionally use
368  *	indexOK: if false, forces a heap scan (see notes below)
369  *	snapshot: time qual to use (NULL for a recent catalog snapshot)
370  *	nkeys, key: scan keys
371  *
372  * The attribute numbers in the scan key should be set for the heap case.
373  * If we choose to index, we reset them to 1..n to reference the index
374  * columns.  Note this means there must be one scankey qualification per
375  * index column!  This is checked by the Asserts in the normal, index-using
376  * case, but won't be checked if the heapscan path is taken.
377  *
378  * The routine checks the normal cases for whether an indexscan is safe,
379  * but caller can make additional checks and pass indexOK=false if needed.
380  * In standard case indexOK can simply be constant TRUE.
381  */
382 SysScanDesc
systable_beginscan(Relation heapRelation,Oid indexId,bool indexOK,Snapshot snapshot,int nkeys,ScanKey key)383 systable_beginscan(Relation heapRelation,
384 				   Oid indexId,
385 				   bool indexOK,
386 				   Snapshot snapshot,
387 				   int nkeys, ScanKey key)
388 {
389 	SysScanDesc sysscan;
390 	Relation	irel;
391 
392 	if (indexOK &&
393 		!IgnoreSystemIndexes &&
394 		!ReindexIsProcessingIndex(indexId))
395 		irel = index_open(indexId, AccessShareLock);
396 	else
397 		irel = NULL;
398 
399 	sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData));
400 
401 	sysscan->heap_rel = heapRelation;
402 	sysscan->irel = irel;
403 	sysscan->slot = table_slot_create(heapRelation, NULL);
404 
405 	if (snapshot == NULL)
406 	{
407 		Oid			relid = RelationGetRelid(heapRelation);
408 
409 		snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
410 		sysscan->snapshot = snapshot;
411 	}
412 	else
413 	{
414 		/* Caller is responsible for any snapshot. */
415 		sysscan->snapshot = NULL;
416 	}
417 
418 	if (irel)
419 	{
420 		int			i;
421 
422 		/* Change attribute numbers to be index column numbers. */
423 		for (i = 0; i < nkeys; i++)
424 		{
425 			int			j;
426 
427 			for (j = 0; j < IndexRelationGetNumberOfAttributes(irel); j++)
428 			{
429 				if (key[i].sk_attno == irel->rd_index->indkey.values[j])
430 				{
431 					key[i].sk_attno = j + 1;
432 					break;
433 				}
434 			}
435 			if (j == IndexRelationGetNumberOfAttributes(irel))
436 				elog(ERROR, "column is not in index");
437 		}
438 
439 		sysscan->iscan = index_beginscan(heapRelation, irel,
440 										 snapshot, nkeys, 0);
441 		index_rescan(sysscan->iscan, key, nkeys, NULL, 0);
442 		sysscan->scan = NULL;
443 	}
444 	else
445 	{
446 		/*
447 		 * We disallow synchronized scans when forced to use a heapscan on a
448 		 * catalog.  In most cases the desired rows are near the front, so
449 		 * that the unpredictable start point of a syncscan is a serious
450 		 * disadvantage; and there are no compensating advantages, because
451 		 * it's unlikely that such scans will occur in parallel.
452 		 */
453 		sysscan->scan = table_beginscan_strat(heapRelation, snapshot,
454 											  nkeys, key,
455 											  true, false);
456 		sysscan->iscan = NULL;
457 	}
458 
459 	/*
460 	 * If CheckXidAlive is set then set a flag to indicate that system table
461 	 * scan is in-progress.  See detailed comments in xact.c where these
462 	 * variables are declared.
463 	 */
464 	if (TransactionIdIsValid(CheckXidAlive))
465 		bsysscan = true;
466 
467 	return sysscan;
468 }
469 
470 /*
471  * HandleConcurrentAbort - Handle concurrent abort of the CheckXidAlive.
472  *
473  * Error out, if CheckXidAlive is aborted. We can't directly use
474  * TransactionIdDidAbort as after crash such transaction might not have been
475  * marked as aborted.  See detailed comments in xact.c where the variable
476  * is declared.
477  */
478 static inline void
HandleConcurrentAbort()479 HandleConcurrentAbort()
480 {
481 	if (TransactionIdIsValid(CheckXidAlive) &&
482 		!TransactionIdIsInProgress(CheckXidAlive) &&
483 		!TransactionIdDidCommit(CheckXidAlive))
484 		ereport(ERROR,
485 				(errcode(ERRCODE_TRANSACTION_ROLLBACK),
486 				 errmsg("transaction aborted during system catalog scan")));
487 }
488 
489 /*
490  * systable_getnext --- get next tuple in a heap-or-index scan
491  *
492  * Returns NULL if no more tuples available.
493  *
494  * Note that returned tuple is a reference to data in a disk buffer;
495  * it must not be modified, and should be presumed inaccessible after
496  * next getnext() or endscan() call.
497  *
498  * XXX: It'd probably make sense to offer a slot based interface, at least
499  * optionally.
500  */
501 HeapTuple
systable_getnext(SysScanDesc sysscan)502 systable_getnext(SysScanDesc sysscan)
503 {
504 	HeapTuple	htup = NULL;
505 
506 	if (sysscan->irel)
507 	{
508 		if (index_getnext_slot(sysscan->iscan, ForwardScanDirection, sysscan->slot))
509 		{
510 			bool		shouldFree;
511 
512 			htup = ExecFetchSlotHeapTuple(sysscan->slot, false, &shouldFree);
513 			Assert(!shouldFree);
514 
515 			/*
516 			 * We currently don't need to support lossy index operators for
517 			 * any system catalog scan.  It could be done here, using the scan
518 			 * keys to drive the operator calls, if we arranged to save the
519 			 * heap attnums during systable_beginscan(); this is practical
520 			 * because we still wouldn't need to support indexes on
521 			 * expressions.
522 			 */
523 			if (sysscan->iscan->xs_recheck)
524 				elog(ERROR, "system catalog scans with lossy index conditions are not implemented");
525 		}
526 	}
527 	else
528 	{
529 		if (table_scan_getnextslot(sysscan->scan, ForwardScanDirection, sysscan->slot))
530 		{
531 			bool		shouldFree;
532 
533 			htup = ExecFetchSlotHeapTuple(sysscan->slot, false, &shouldFree);
534 			Assert(!shouldFree);
535 		}
536 	}
537 
538 	/*
539 	 * Handle the concurrent abort while fetching the catalog tuple during
540 	 * logical streaming of a transaction.
541 	 */
542 	HandleConcurrentAbort();
543 
544 	return htup;
545 }
546 
547 /*
548  * systable_recheck_tuple --- recheck visibility of most-recently-fetched tuple
549  *
550  * In particular, determine if this tuple would be visible to a catalog scan
551  * that started now.  We don't handle the case of a non-MVCC scan snapshot,
552  * because no caller needs that yet.
553  *
554  * This is useful to test whether an object was deleted while we waited to
555  * acquire lock on it.
556  *
557  * Note: we don't actually *need* the tuple to be passed in, but it's a
558  * good crosscheck that the caller is interested in the right tuple.
559  */
560 bool
systable_recheck_tuple(SysScanDesc sysscan,HeapTuple tup)561 systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup)
562 {
563 	Snapshot	freshsnap;
564 	bool		result;
565 
566 	Assert(tup == ExecFetchSlotHeapTuple(sysscan->slot, false, NULL));
567 
568 	/*
569 	 * Trust that table_tuple_satisfies_snapshot() and its subsidiaries
570 	 * (commonly LockBuffer() and HeapTupleSatisfiesMVCC()) do not themselves
571 	 * acquire snapshots, so we need not register the snapshot.  Those
572 	 * facilities are too low-level to have any business scanning tables.
573 	 */
574 	freshsnap = GetCatalogSnapshot(RelationGetRelid(sysscan->heap_rel));
575 
576 	result = table_tuple_satisfies_snapshot(sysscan->heap_rel,
577 											sysscan->slot,
578 											freshsnap);
579 
580 	/*
581 	 * Handle the concurrent abort while fetching the catalog tuple during
582 	 * logical streaming of a transaction.
583 	 */
584 	HandleConcurrentAbort();
585 
586 	return result;
587 }
588 
589 /*
590  * systable_endscan --- close scan, release resources
591  *
592  * Note that it's still up to the caller to close the heap relation.
593  */
594 void
systable_endscan(SysScanDesc sysscan)595 systable_endscan(SysScanDesc sysscan)
596 {
597 	if (sysscan->slot)
598 	{
599 		ExecDropSingleTupleTableSlot(sysscan->slot);
600 		sysscan->slot = NULL;
601 	}
602 
603 	if (sysscan->irel)
604 	{
605 		index_endscan(sysscan->iscan);
606 		index_close(sysscan->irel, AccessShareLock);
607 	}
608 	else
609 		table_endscan(sysscan->scan);
610 
611 	if (sysscan->snapshot)
612 		UnregisterSnapshot(sysscan->snapshot);
613 
614 	/*
615 	 * Reset the bsysscan flag at the end of the systable scan.  See detailed
616 	 * comments in xact.c where these variables are declared.
617 	 */
618 	if (TransactionIdIsValid(CheckXidAlive))
619 		bsysscan = false;
620 
621 	pfree(sysscan);
622 }
623 
624 
625 /*
626  * systable_beginscan_ordered --- set up for ordered catalog scan
627  *
628  * These routines have essentially the same API as systable_beginscan etc,
629  * except that they guarantee to return multiple matching tuples in
630  * index order.  Also, for largely historical reasons, the index to use
631  * is opened and locked by the caller, not here.
632  *
633  * Currently we do not support non-index-based scans here.  (In principle
634  * we could do a heapscan and sort, but the uses are in places that
635  * probably don't need to still work with corrupted catalog indexes.)
636  * For the moment, therefore, these functions are merely the thinest of
637  * wrappers around index_beginscan/index_getnext_slot.  The main reason for
638  * their existence is to centralize possible future support of lossy operators
639  * in catalog scans.
640  */
641 SysScanDesc
systable_beginscan_ordered(Relation heapRelation,Relation indexRelation,Snapshot snapshot,int nkeys,ScanKey key)642 systable_beginscan_ordered(Relation heapRelation,
643 						   Relation indexRelation,
644 						   Snapshot snapshot,
645 						   int nkeys, ScanKey key)
646 {
647 	SysScanDesc sysscan;
648 	int			i;
649 
650 	/* REINDEX can probably be a hard error here ... */
651 	if (ReindexIsProcessingIndex(RelationGetRelid(indexRelation)))
652 		elog(ERROR, "cannot do ordered scan on index \"%s\", because it is being reindexed",
653 			 RelationGetRelationName(indexRelation));
654 	/* ... but we only throw a warning about violating IgnoreSystemIndexes */
655 	if (IgnoreSystemIndexes)
656 		elog(WARNING, "using index \"%s\" despite IgnoreSystemIndexes",
657 			 RelationGetRelationName(indexRelation));
658 
659 	sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData));
660 
661 	sysscan->heap_rel = heapRelation;
662 	sysscan->irel = indexRelation;
663 	sysscan->slot = table_slot_create(heapRelation, NULL);
664 
665 	if (snapshot == NULL)
666 	{
667 		Oid			relid = RelationGetRelid(heapRelation);
668 
669 		snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
670 		sysscan->snapshot = snapshot;
671 	}
672 	else
673 	{
674 		/* Caller is responsible for any snapshot. */
675 		sysscan->snapshot = NULL;
676 	}
677 
678 	/* Change attribute numbers to be index column numbers. */
679 	for (i = 0; i < nkeys; i++)
680 	{
681 		int			j;
682 
683 		for (j = 0; j < IndexRelationGetNumberOfAttributes(indexRelation); j++)
684 		{
685 			if (key[i].sk_attno == indexRelation->rd_index->indkey.values[j])
686 			{
687 				key[i].sk_attno = j + 1;
688 				break;
689 			}
690 		}
691 		if (j == IndexRelationGetNumberOfAttributes(indexRelation))
692 			elog(ERROR, "column is not in index");
693 	}
694 
695 	sysscan->iscan = index_beginscan(heapRelation, indexRelation,
696 									 snapshot, nkeys, 0);
697 	index_rescan(sysscan->iscan, key, nkeys, NULL, 0);
698 	sysscan->scan = NULL;
699 
700 	return sysscan;
701 }
702 
703 /*
704  * systable_getnext_ordered --- get next tuple in an ordered catalog scan
705  */
706 HeapTuple
systable_getnext_ordered(SysScanDesc sysscan,ScanDirection direction)707 systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction)
708 {
709 	HeapTuple	htup = NULL;
710 
711 	Assert(sysscan->irel);
712 	if (index_getnext_slot(sysscan->iscan, direction, sysscan->slot))
713 		htup = ExecFetchSlotHeapTuple(sysscan->slot, false, NULL);
714 
715 	/* See notes in systable_getnext */
716 	if (htup && sysscan->iscan->xs_recheck)
717 		elog(ERROR, "system catalog scans with lossy index conditions are not implemented");
718 
719 	/*
720 	 * Handle the concurrent abort while fetching the catalog tuple during
721 	 * logical streaming of a transaction.
722 	 */
723 	HandleConcurrentAbort();
724 
725 	return htup;
726 }
727 
728 /*
729  * systable_endscan_ordered --- close scan, release resources
730  */
731 void
systable_endscan_ordered(SysScanDesc sysscan)732 systable_endscan_ordered(SysScanDesc sysscan)
733 {
734 	if (sysscan->slot)
735 	{
736 		ExecDropSingleTupleTableSlot(sysscan->slot);
737 		sysscan->slot = NULL;
738 	}
739 
740 	Assert(sysscan->irel);
741 	index_endscan(sysscan->iscan);
742 	if (sysscan->snapshot)
743 		UnregisterSnapshot(sysscan->snapshot);
744 	pfree(sysscan);
745 }
746