1 /*-------------------------------------------------------------------------
2 *
3 * genam.c
4 * general index access method routines
5 *
6 * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/access/index/genam.c
12 *
13 * NOTES
14 * many of the old access method routines have been turned into
15 * macros and moved to genam.h -cim 4/30/91
16 *
17 *-------------------------------------------------------------------------
18 */
19
20 #include "postgres.h"
21
22 #include "access/genam.h"
23 #include "access/heapam.h"
24 #include "access/relscan.h"
25 #include "access/tableam.h"
26 #include "access/transam.h"
27 #include "catalog/index.h"
28 #include "lib/stringinfo.h"
29 #include "miscadmin.h"
30 #include "storage/bufmgr.h"
31 #include "utils/acl.h"
32 #include "utils/builtins.h"
33 #include "utils/lsyscache.h"
34 #include "utils/rel.h"
35 #include "utils/rls.h"
36 #include "utils/ruleutils.h"
37 #include "utils/snapmgr.h"
38 #include "utils/syscache.h"
39
40
41 /* ----------------------------------------------------------------
42 * general access method routines
43 *
44 * All indexed access methods use an identical scan structure.
45 * We don't know how the various AMs do locking, however, so we don't
46 * do anything about that here.
47 *
48 * The intent is that an AM implementor will define a beginscan routine
49 * that calls RelationGetIndexScan, to fill in the scan, and then does
50 * whatever kind of locking he wants.
51 *
52 * At the end of a scan, the AM's endscan routine undoes the locking,
53 * but does *not* call IndexScanEnd --- the higher-level index_endscan
54 * routine does that. (We can't do it in the AM because index_endscan
55 * still needs to touch the IndexScanDesc after calling the AM.)
56 *
57 * Because of this, the AM does not have a choice whether to call
58 * RelationGetIndexScan or not; its beginscan routine must return an
59 * object made by RelationGetIndexScan. This is kinda ugly but not
60 * worth cleaning up now.
61 * ----------------------------------------------------------------
62 */
63
64 /* ----------------
65 * RelationGetIndexScan -- Create and fill an IndexScanDesc.
66 *
67 * This routine creates an index scan structure and sets up initial
68 * contents for it.
69 *
70 * Parameters:
71 * indexRelation -- index relation for scan.
72 * nkeys -- count of scan keys (index qual conditions).
73 * norderbys -- count of index order-by operators.
74 *
75 * Returns:
76 * An initialized IndexScanDesc.
77 * ----------------
78 */
79 IndexScanDesc
RelationGetIndexScan(Relation indexRelation,int nkeys,int norderbys)80 RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
81 {
82 IndexScanDesc scan;
83
84 scan = (IndexScanDesc) palloc(sizeof(IndexScanDescData));
85
86 scan->heapRelation = NULL; /* may be set later */
87 scan->xs_heapfetch = NULL;
88 scan->indexRelation = indexRelation;
89 scan->xs_snapshot = InvalidSnapshot; /* caller must initialize this */
90 scan->numberOfKeys = nkeys;
91 scan->numberOfOrderBys = norderbys;
92
93 /*
94 * We allocate key workspace here, but it won't get filled until amrescan.
95 */
96 if (nkeys > 0)
97 scan->keyData = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
98 else
99 scan->keyData = NULL;
100 if (norderbys > 0)
101 scan->orderByData = (ScanKey) palloc(sizeof(ScanKeyData) * norderbys);
102 else
103 scan->orderByData = NULL;
104
105 scan->xs_want_itup = false; /* may be set later */
106
107 /*
108 * During recovery we ignore killed tuples and don't bother to kill them
109 * either. We do this because the xmin on the primary node could easily be
110 * later than the xmin on the standby node, so that what the primary
111 * thinks is killed is supposed to be visible on standby. So for correct
112 * MVCC for queries during recovery we must ignore these hints and check
113 * all tuples. Do *not* set ignore_killed_tuples to true when running in a
114 * transaction that was started during recovery. xactStartedInRecovery
115 * should not be altered by index AMs.
116 */
117 scan->kill_prior_tuple = false;
118 scan->xactStartedInRecovery = TransactionStartedDuringRecovery();
119 scan->ignore_killed_tuples = !scan->xactStartedInRecovery;
120
121 scan->opaque = NULL;
122
123 scan->xs_itup = NULL;
124 scan->xs_itupdesc = NULL;
125 scan->xs_hitup = NULL;
126 scan->xs_hitupdesc = NULL;
127
128 return scan;
129 }
130
131 /* ----------------
132 * IndexScanEnd -- End an index scan.
133 *
134 * This routine just releases the storage acquired by
135 * RelationGetIndexScan(). Any AM-level resources are
136 * assumed to already have been released by the AM's
137 * endscan routine.
138 *
139 * Returns:
140 * None.
141 * ----------------
142 */
143 void
IndexScanEnd(IndexScanDesc scan)144 IndexScanEnd(IndexScanDesc scan)
145 {
146 if (scan->keyData != NULL)
147 pfree(scan->keyData);
148 if (scan->orderByData != NULL)
149 pfree(scan->orderByData);
150
151 pfree(scan);
152 }
153
154 /*
155 * BuildIndexValueDescription
156 *
157 * Construct a string describing the contents of an index entry, in the
158 * form "(key_name, ...)=(key_value, ...)". This is currently used
159 * for building unique-constraint and exclusion-constraint error messages,
160 * so only key columns of the index are checked and printed.
161 *
162 * Note that if the user does not have permissions to view all of the
163 * columns involved then a NULL is returned. Returning a partial key seems
164 * unlikely to be useful and we have no way to know which of the columns the
165 * user provided (unlike in ExecBuildSlotValueDescription).
166 *
167 * The passed-in values/nulls arrays are the "raw" input to the index AM,
168 * e.g. results of FormIndexDatum --- this is not necessarily what is stored
169 * in the index, but it's what the user perceives to be stored.
170 *
171 * Note: if you change anything here, check whether
172 * ExecBuildSlotPartitionKeyDescription() in execMain.c needs a similar
173 * change.
174 */
175 char *
BuildIndexValueDescription(Relation indexRelation,Datum * values,bool * isnull)176 BuildIndexValueDescription(Relation indexRelation,
177 Datum *values, bool *isnull)
178 {
179 StringInfoData buf;
180 Form_pg_index idxrec;
181 int indnkeyatts;
182 int i;
183 int keyno;
184 Oid indexrelid = RelationGetRelid(indexRelation);
185 Oid indrelid;
186 AclResult aclresult;
187
188 indnkeyatts = IndexRelationGetNumberOfKeyAttributes(indexRelation);
189
190 /*
191 * Check permissions- if the user does not have access to view all of the
192 * key columns then return NULL to avoid leaking data.
193 *
194 * First check if RLS is enabled for the relation. If so, return NULL to
195 * avoid leaking data.
196 *
197 * Next we need to check table-level SELECT access and then, if there is
198 * no access there, check column-level permissions.
199 */
200 idxrec = indexRelation->rd_index;
201 indrelid = idxrec->indrelid;
202 Assert(indexrelid == idxrec->indexrelid);
203
204 /* RLS check- if RLS is enabled then we don't return anything. */
205 if (check_enable_rls(indrelid, InvalidOid, true) == RLS_ENABLED)
206 return NULL;
207
208 /* Table-level SELECT is enough, if the user has it */
209 aclresult = pg_class_aclcheck(indrelid, GetUserId(), ACL_SELECT);
210 if (aclresult != ACLCHECK_OK)
211 {
212 /*
213 * No table-level access, so step through the columns in the index and
214 * make sure the user has SELECT rights on all of them.
215 */
216 for (keyno = 0; keyno < indnkeyatts; keyno++)
217 {
218 AttrNumber attnum = idxrec->indkey.values[keyno];
219
220 /*
221 * Note that if attnum == InvalidAttrNumber, then this is an index
222 * based on an expression and we return no detail rather than try
223 * to figure out what column(s) the expression includes and if the
224 * user has SELECT rights on them.
225 */
226 if (attnum == InvalidAttrNumber ||
227 pg_attribute_aclcheck(indrelid, attnum, GetUserId(),
228 ACL_SELECT) != ACLCHECK_OK)
229 {
230 /* No access, so clean up and return */
231 return NULL;
232 }
233 }
234 }
235
236 initStringInfo(&buf);
237 appendStringInfo(&buf, "(%s)=(",
238 pg_get_indexdef_columns(indexrelid, true));
239
240 for (i = 0; i < indnkeyatts; i++)
241 {
242 char *val;
243
244 if (isnull[i])
245 val = "null";
246 else
247 {
248 Oid foutoid;
249 bool typisvarlena;
250
251 /*
252 * The provided data is not necessarily of the type stored in the
253 * index; rather it is of the index opclass's input type. So look
254 * at rd_opcintype not the index tupdesc.
255 *
256 * Note: this is a bit shaky for opclasses that have pseudotype
257 * input types such as ANYARRAY or RECORD. Currently, the
258 * typoutput functions associated with the pseudotypes will work
259 * okay, but we might have to try harder in future.
260 */
261 getTypeOutputInfo(indexRelation->rd_opcintype[i],
262 &foutoid, &typisvarlena);
263 val = OidOutputFunctionCall(foutoid, values[i]);
264 }
265
266 if (i > 0)
267 appendStringInfoString(&buf, ", ");
268 appendStringInfoString(&buf, val);
269 }
270
271 appendStringInfoChar(&buf, ')');
272
273 return buf.data;
274 }
275
276 /*
277 * Get the latestRemovedXid from the table entries pointed at by the index
278 * tuples being deleted.
279 *
280 * Note: index access methods that don't consistently use the standard
281 * IndexTuple + heap TID item pointer representation will need to provide
282 * their own version of this function.
283 */
284 TransactionId
index_compute_xid_horizon_for_tuples(Relation irel,Relation hrel,Buffer ibuf,OffsetNumber * itemnos,int nitems)285 index_compute_xid_horizon_for_tuples(Relation irel,
286 Relation hrel,
287 Buffer ibuf,
288 OffsetNumber *itemnos,
289 int nitems)
290 {
291 ItemPointerData *ttids =
292 (ItemPointerData *) palloc(sizeof(ItemPointerData) * nitems);
293 TransactionId latestRemovedXid = InvalidTransactionId;
294 Page ipage = BufferGetPage(ibuf);
295 IndexTuple itup;
296
297 /* identify what the index tuples about to be deleted point to */
298 for (int i = 0; i < nitems; i++)
299 {
300 ItemId iitemid;
301
302 iitemid = PageGetItemId(ipage, itemnos[i]);
303 itup = (IndexTuple) PageGetItem(ipage, iitemid);
304
305 ItemPointerCopy(&itup->t_tid, &ttids[i]);
306 }
307
308 /* determine the actual xid horizon */
309 latestRemovedXid =
310 table_compute_xid_horizon_for_tuples(hrel, ttids, nitems);
311
312 pfree(ttids);
313
314 return latestRemovedXid;
315 }
316
317
318 /* ----------------------------------------------------------------
319 * heap-or-index-scan access to system catalogs
320 *
321 * These functions support system catalog accesses that normally use
322 * an index but need to be capable of being switched to heap scans
323 * if the system indexes are unavailable.
324 *
325 * The specified scan keys must be compatible with the named index.
326 * Generally this means that they must constrain either all columns
327 * of the index, or the first K columns of an N-column index.
328 *
329 * These routines could work with non-system tables, actually,
330 * but they're only useful when there is a known index to use with
331 * the given scan keys; so in practice they're only good for
332 * predetermined types of scans of system catalogs.
333 * ----------------------------------------------------------------
334 */
335
336 /*
337 * systable_beginscan --- set up for heap-or-index scan
338 *
339 * rel: catalog to scan, already opened and suitably locked
340 * indexId: OID of index to conditionally use
341 * indexOK: if false, forces a heap scan (see notes below)
342 * snapshot: time qual to use (NULL for a recent catalog snapshot)
343 * nkeys, key: scan keys
344 *
345 * The attribute numbers in the scan key should be set for the heap case.
346 * If we choose to index, we reset them to 1..n to reference the index
347 * columns. Note this means there must be one scankey qualification per
348 * index column! This is checked by the Asserts in the normal, index-using
349 * case, but won't be checked if the heapscan path is taken.
350 *
351 * The routine checks the normal cases for whether an indexscan is safe,
352 * but caller can make additional checks and pass indexOK=false if needed.
353 * In standard case indexOK can simply be constant TRUE.
354 */
355 SysScanDesc
systable_beginscan(Relation heapRelation,Oid indexId,bool indexOK,Snapshot snapshot,int nkeys,ScanKey key)356 systable_beginscan(Relation heapRelation,
357 Oid indexId,
358 bool indexOK,
359 Snapshot snapshot,
360 int nkeys, ScanKey key)
361 {
362 SysScanDesc sysscan;
363 Relation irel;
364
365 if (indexOK &&
366 !IgnoreSystemIndexes &&
367 !ReindexIsProcessingIndex(indexId))
368 irel = index_open(indexId, AccessShareLock);
369 else
370 irel = NULL;
371
372 sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData));
373
374 sysscan->heap_rel = heapRelation;
375 sysscan->irel = irel;
376 sysscan->slot = table_slot_create(heapRelation, NULL);
377
378 if (snapshot == NULL)
379 {
380 Oid relid = RelationGetRelid(heapRelation);
381
382 snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
383 sysscan->snapshot = snapshot;
384 }
385 else
386 {
387 /* Caller is responsible for any snapshot. */
388 sysscan->snapshot = NULL;
389 }
390
391 if (irel)
392 {
393 int i;
394
395 /* Change attribute numbers to be index column numbers. */
396 for (i = 0; i < nkeys; i++)
397 {
398 int j;
399
400 for (j = 0; j < IndexRelationGetNumberOfAttributes(irel); j++)
401 {
402 if (key[i].sk_attno == irel->rd_index->indkey.values[j])
403 {
404 key[i].sk_attno = j + 1;
405 break;
406 }
407 }
408 if (j == IndexRelationGetNumberOfAttributes(irel))
409 elog(ERROR, "column is not in index");
410 }
411
412 sysscan->iscan = index_beginscan(heapRelation, irel,
413 snapshot, nkeys, 0);
414 index_rescan(sysscan->iscan, key, nkeys, NULL, 0);
415 sysscan->scan = NULL;
416 }
417 else
418 {
419 /*
420 * We disallow synchronized scans when forced to use a heapscan on a
421 * catalog. In most cases the desired rows are near the front, so
422 * that the unpredictable start point of a syncscan is a serious
423 * disadvantage; and there are no compensating advantages, because
424 * it's unlikely that such scans will occur in parallel.
425 */
426 sysscan->scan = table_beginscan_strat(heapRelation, snapshot,
427 nkeys, key,
428 true, false);
429 sysscan->iscan = NULL;
430 }
431
432 return sysscan;
433 }
434
435 /*
436 * systable_getnext --- get next tuple in a heap-or-index scan
437 *
438 * Returns NULL if no more tuples available.
439 *
440 * Note that returned tuple is a reference to data in a disk buffer;
441 * it must not be modified, and should be presumed inaccessible after
442 * next getnext() or endscan() call.
443 *
444 * XXX: It'd probably make sense to offer a slot based interface, at least
445 * optionally.
446 */
447 HeapTuple
systable_getnext(SysScanDesc sysscan)448 systable_getnext(SysScanDesc sysscan)
449 {
450 HeapTuple htup = NULL;
451
452 if (sysscan->irel)
453 {
454 if (index_getnext_slot(sysscan->iscan, ForwardScanDirection, sysscan->slot))
455 {
456 bool shouldFree;
457
458 htup = ExecFetchSlotHeapTuple(sysscan->slot, false, &shouldFree);
459 Assert(!shouldFree);
460
461 /*
462 * We currently don't need to support lossy index operators for
463 * any system catalog scan. It could be done here, using the scan
464 * keys to drive the operator calls, if we arranged to save the
465 * heap attnums during systable_beginscan(); this is practical
466 * because we still wouldn't need to support indexes on
467 * expressions.
468 */
469 if (sysscan->iscan->xs_recheck)
470 elog(ERROR, "system catalog scans with lossy index conditions are not implemented");
471 }
472 }
473 else
474 {
475 if (table_scan_getnextslot(sysscan->scan, ForwardScanDirection, sysscan->slot))
476 {
477 bool shouldFree;
478
479 htup = ExecFetchSlotHeapTuple(sysscan->slot, false, &shouldFree);
480 Assert(!shouldFree);
481 }
482 }
483
484 return htup;
485 }
486
487 /*
488 * systable_recheck_tuple --- recheck visibility of most-recently-fetched tuple
489 *
490 * In particular, determine if this tuple would be visible to a catalog scan
491 * that started now. We don't handle the case of a non-MVCC scan snapshot,
492 * because no caller needs that yet.
493 *
494 * This is useful to test whether an object was deleted while we waited to
495 * acquire lock on it.
496 *
497 * Note: we don't actually *need* the tuple to be passed in, but it's a
498 * good crosscheck that the caller is interested in the right tuple.
499 */
500 bool
systable_recheck_tuple(SysScanDesc sysscan,HeapTuple tup)501 systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup)
502 {
503 Snapshot freshsnap;
504 bool result;
505
506 Assert(tup == ExecFetchSlotHeapTuple(sysscan->slot, false, NULL));
507
508 /*
509 * Trust that table_tuple_satisfies_snapshot() and its subsidiaries
510 * (commonly LockBuffer() and HeapTupleSatisfiesMVCC()) do not themselves
511 * acquire snapshots, so we need not register the snapshot. Those
512 * facilities are too low-level to have any business scanning tables.
513 */
514 freshsnap = GetCatalogSnapshot(RelationGetRelid(sysscan->heap_rel));
515
516 result = table_tuple_satisfies_snapshot(sysscan->heap_rel,
517 sysscan->slot,
518 freshsnap);
519
520 return result;
521 }
522
523 /*
524 * systable_endscan --- close scan, release resources
525 *
526 * Note that it's still up to the caller to close the heap relation.
527 */
528 void
systable_endscan(SysScanDesc sysscan)529 systable_endscan(SysScanDesc sysscan)
530 {
531 if (sysscan->slot)
532 {
533 ExecDropSingleTupleTableSlot(sysscan->slot);
534 sysscan->slot = NULL;
535 }
536
537 if (sysscan->irel)
538 {
539 index_endscan(sysscan->iscan);
540 index_close(sysscan->irel, AccessShareLock);
541 }
542 else
543 table_endscan(sysscan->scan);
544
545 if (sysscan->snapshot)
546 UnregisterSnapshot(sysscan->snapshot);
547
548 pfree(sysscan);
549 }
550
551
552 /*
553 * systable_beginscan_ordered --- set up for ordered catalog scan
554 *
555 * These routines have essentially the same API as systable_beginscan etc,
556 * except that they guarantee to return multiple matching tuples in
557 * index order. Also, for largely historical reasons, the index to use
558 * is opened and locked by the caller, not here.
559 *
560 * Currently we do not support non-index-based scans here. (In principle
561 * we could do a heapscan and sort, but the uses are in places that
562 * probably don't need to still work with corrupted catalog indexes.)
563 * For the moment, therefore, these functions are merely the thinnest of
564 * wrappers around index_beginscan/index_getnext_slot. The main reason for
565 * their existence is to centralize possible future support of lossy operators
566 * in catalog scans.
567 */
568 SysScanDesc
systable_beginscan_ordered(Relation heapRelation,Relation indexRelation,Snapshot snapshot,int nkeys,ScanKey key)569 systable_beginscan_ordered(Relation heapRelation,
570 Relation indexRelation,
571 Snapshot snapshot,
572 int nkeys, ScanKey key)
573 {
574 SysScanDesc sysscan;
575 int i;
576
577 /* REINDEX can probably be a hard error here ... */
578 if (ReindexIsProcessingIndex(RelationGetRelid(indexRelation)))
579 elog(ERROR, "cannot do ordered scan on index \"%s\", because it is being reindexed",
580 RelationGetRelationName(indexRelation));
581 /* ... but we only throw a warning about violating IgnoreSystemIndexes */
582 if (IgnoreSystemIndexes)
583 elog(WARNING, "using index \"%s\" despite IgnoreSystemIndexes",
584 RelationGetRelationName(indexRelation));
585
586 sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData));
587
588 sysscan->heap_rel = heapRelation;
589 sysscan->irel = indexRelation;
590 sysscan->slot = table_slot_create(heapRelation, NULL);
591
592 if (snapshot == NULL)
593 {
594 Oid relid = RelationGetRelid(heapRelation);
595
596 snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
597 sysscan->snapshot = snapshot;
598 }
599 else
600 {
601 /* Caller is responsible for any snapshot. */
602 sysscan->snapshot = NULL;
603 }
604
605 /* Change attribute numbers to be index column numbers. */
606 for (i = 0; i < nkeys; i++)
607 {
608 int j;
609
610 for (j = 0; j < IndexRelationGetNumberOfAttributes(indexRelation); j++)
611 {
612 if (key[i].sk_attno == indexRelation->rd_index->indkey.values[j])
613 {
614 key[i].sk_attno = j + 1;
615 break;
616 }
617 }
618 if (j == IndexRelationGetNumberOfAttributes(indexRelation))
619 elog(ERROR, "column is not in index");
620 }
621
622 sysscan->iscan = index_beginscan(heapRelation, indexRelation,
623 snapshot, nkeys, 0);
624 index_rescan(sysscan->iscan, key, nkeys, NULL, 0);
625 sysscan->scan = NULL;
626
627 return sysscan;
628 }
629
630 /*
631 * systable_getnext_ordered --- get next tuple in an ordered catalog scan
632 */
633 HeapTuple
systable_getnext_ordered(SysScanDesc sysscan,ScanDirection direction)634 systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction)
635 {
636 HeapTuple htup = NULL;
637
638 Assert(sysscan->irel);
639 if (index_getnext_slot(sysscan->iscan, direction, sysscan->slot))
640 htup = ExecFetchSlotHeapTuple(sysscan->slot, false, NULL);
641
642 /* See notes in systable_getnext */
643 if (htup && sysscan->iscan->xs_recheck)
644 elog(ERROR, "system catalog scans with lossy index conditions are not implemented");
645
646 return htup;
647 }
648
649 /*
650 * systable_endscan_ordered --- close scan, release resources
651 */
652 void
systable_endscan_ordered(SysScanDesc sysscan)653 systable_endscan_ordered(SysScanDesc sysscan)
654 {
655 if (sysscan->slot)
656 {
657 ExecDropSingleTupleTableSlot(sysscan->slot);
658 sysscan->slot = NULL;
659 }
660
661 Assert(sysscan->irel);
662 index_endscan(sysscan->iscan);
663 if (sysscan->snapshot)
664 UnregisterSnapshot(sysscan->snapshot);
665 pfree(sysscan);
666 }
667