1 /*-------------------------------------------------------------------------
2 *
3 * genam.c
4 * general index access method routines
5 *
6 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/access/index/genam.c
12 *
13 * NOTES
14 * many of the old access method routines have been turned into
15 * macros and moved to genam.h -cim 4/30/91
16 *
17 *-------------------------------------------------------------------------
18 */
19
20 #include "postgres.h"
21
22 #include "access/relscan.h"
23 #include "access/transam.h"
24 #include "catalog/index.h"
25 #include "lib/stringinfo.h"
26 #include "miscadmin.h"
27 #include "storage/bufmgr.h"
28 #include "utils/acl.h"
29 #include "utils/builtins.h"
30 #include "utils/lsyscache.h"
31 #include "utils/rel.h"
32 #include "utils/rls.h"
33 #include "utils/ruleutils.h"
34 #include "utils/snapmgr.h"
35 #include "utils/syscache.h"
36 #include "utils/tqual.h"
37
38
39 /* ----------------------------------------------------------------
40 * general access method routines
41 *
42 * All indexed access methods use an identical scan structure.
43 * We don't know how the various AMs do locking, however, so we don't
44 * do anything about that here.
45 *
46 * The intent is that an AM implementor will define a beginscan routine
47 * that calls RelationGetIndexScan, to fill in the scan, and then does
48 * whatever kind of locking he wants.
49 *
50 * At the end of a scan, the AM's endscan routine undoes the locking,
51 * but does *not* call IndexScanEnd --- the higher-level index_endscan
52 * routine does that. (We can't do it in the AM because index_endscan
53 * still needs to touch the IndexScanDesc after calling the AM.)
54 *
55 * Because of this, the AM does not have a choice whether to call
56 * RelationGetIndexScan or not; its beginscan routine must return an
57 * object made by RelationGetIndexScan. This is kinda ugly but not
58 * worth cleaning up now.
59 * ----------------------------------------------------------------
60 */
61
62 /* ----------------
63 * RelationGetIndexScan -- Create and fill an IndexScanDesc.
64 *
65 * This routine creates an index scan structure and sets up initial
66 * contents for it.
67 *
68 * Parameters:
69 * indexRelation -- index relation for scan.
70 * nkeys -- count of scan keys (index qual conditions).
71 * norderbys -- count of index order-by operators.
72 *
73 * Returns:
74 * An initialized IndexScanDesc.
75 * ----------------
76 */
77 IndexScanDesc
RelationGetIndexScan(Relation indexRelation,int nkeys,int norderbys)78 RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
79 {
80 IndexScanDesc scan;
81
82 scan = (IndexScanDesc) palloc(sizeof(IndexScanDescData));
83
84 scan->heapRelation = NULL; /* may be set later */
85 scan->indexRelation = indexRelation;
86 scan->xs_snapshot = InvalidSnapshot; /* caller must initialize this */
87 scan->numberOfKeys = nkeys;
88 scan->numberOfOrderBys = norderbys;
89
90 /*
91 * We allocate key workspace here, but it won't get filled until amrescan.
92 */
93 if (nkeys > 0)
94 scan->keyData = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys);
95 else
96 scan->keyData = NULL;
97 if (norderbys > 0)
98 scan->orderByData = (ScanKey) palloc(sizeof(ScanKeyData) * norderbys);
99 else
100 scan->orderByData = NULL;
101
102 scan->xs_want_itup = false; /* may be set later */
103
104 /*
105 * During recovery we ignore killed tuples and don't bother to kill them
106 * either. We do this because the xmin on the primary node could easily be
107 * later than the xmin on the standby node, so that what the primary
108 * thinks is killed is supposed to be visible on standby. So for correct
109 * MVCC for queries during recovery we must ignore these hints and check
110 * all tuples. Do *not* set ignore_killed_tuples to true when running in a
111 * transaction that was started during recovery. xactStartedInRecovery
112 * should not be altered by index AMs.
113 */
114 scan->kill_prior_tuple = false;
115 scan->xactStartedInRecovery = TransactionStartedDuringRecovery();
116 scan->ignore_killed_tuples = !scan->xactStartedInRecovery;
117
118 scan->opaque = NULL;
119
120 scan->xs_itup = NULL;
121 scan->xs_itupdesc = NULL;
122 scan->xs_hitup = NULL;
123 scan->xs_hitupdesc = NULL;
124
125 ItemPointerSetInvalid(&scan->xs_ctup.t_self);
126 scan->xs_ctup.t_data = NULL;
127 scan->xs_cbuf = InvalidBuffer;
128 scan->xs_continue_hot = false;
129
130 return scan;
131 }
132
133 /* ----------------
134 * IndexScanEnd -- End an index scan.
135 *
136 * This routine just releases the storage acquired by
137 * RelationGetIndexScan(). Any AM-level resources are
138 * assumed to already have been released by the AM's
139 * endscan routine.
140 *
141 * Returns:
142 * None.
143 * ----------------
144 */
145 void
IndexScanEnd(IndexScanDesc scan)146 IndexScanEnd(IndexScanDesc scan)
147 {
148 if (scan->keyData != NULL)
149 pfree(scan->keyData);
150 if (scan->orderByData != NULL)
151 pfree(scan->orderByData);
152
153 pfree(scan);
154 }
155
156 /*
157 * BuildIndexValueDescription
158 *
159 * Construct a string describing the contents of an index entry, in the
160 * form "(key_name, ...)=(key_value, ...)". This is currently used
161 * for building unique-constraint and exclusion-constraint error messages,
162 * so only key columns of the index are checked and printed.
163 *
164 * Note that if the user does not have permissions to view all of the
165 * columns involved then a NULL is returned. Returning a partial key seems
166 * unlikely to be useful and we have no way to know which of the columns the
167 * user provided (unlike in ExecBuildSlotValueDescription).
168 *
169 * The passed-in values/nulls arrays are the "raw" input to the index AM,
170 * e.g. results of FormIndexDatum --- this is not necessarily what is stored
171 * in the index, but it's what the user perceives to be stored.
172 *
173 * Note: if you change anything here, check whether
174 * ExecBuildSlotPartitionKeyDescription() in execMain.c needs a similar
175 * change.
176 */
177 char *
BuildIndexValueDescription(Relation indexRelation,Datum * values,bool * isnull)178 BuildIndexValueDescription(Relation indexRelation,
179 Datum *values, bool *isnull)
180 {
181 StringInfoData buf;
182 Form_pg_index idxrec;
183 HeapTuple ht_idx;
184 int indnkeyatts;
185 int i;
186 int keyno;
187 Oid indexrelid = RelationGetRelid(indexRelation);
188 Oid indrelid;
189 AclResult aclresult;
190
191 indnkeyatts = IndexRelationGetNumberOfKeyAttributes(indexRelation);
192
193 /*
194 * Check permissions- if the user does not have access to view all of the
195 * key columns then return NULL to avoid leaking data.
196 *
197 * First check if RLS is enabled for the relation. If so, return NULL to
198 * avoid leaking data.
199 *
200 * Next we need to check table-level SELECT access and then, if there is
201 * no access there, check column-level permissions.
202 */
203
204 /*
205 * Fetch the pg_index tuple by the Oid of the index
206 */
207 ht_idx = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexrelid));
208 if (!HeapTupleIsValid(ht_idx))
209 elog(ERROR, "cache lookup failed for index %u", indexrelid);
210 idxrec = (Form_pg_index) GETSTRUCT(ht_idx);
211
212 indrelid = idxrec->indrelid;
213 Assert(indexrelid == idxrec->indexrelid);
214
215 /* RLS check- if RLS is enabled then we don't return anything. */
216 if (check_enable_rls(indrelid, InvalidOid, true) == RLS_ENABLED)
217 {
218 ReleaseSysCache(ht_idx);
219 return NULL;
220 }
221
222 /* Table-level SELECT is enough, if the user has it */
223 aclresult = pg_class_aclcheck(indrelid, GetUserId(), ACL_SELECT);
224 if (aclresult != ACLCHECK_OK)
225 {
226 /*
227 * No table-level access, so step through the columns in the index and
228 * make sure the user has SELECT rights on all of them.
229 */
230 for (keyno = 0; keyno < idxrec->indnkeyatts; keyno++)
231 {
232 AttrNumber attnum = idxrec->indkey.values[keyno];
233
234 /*
235 * Note that if attnum == InvalidAttrNumber, then this is an index
236 * based on an expression and we return no detail rather than try
237 * to figure out what column(s) the expression includes and if the
238 * user has SELECT rights on them.
239 */
240 if (attnum == InvalidAttrNumber ||
241 pg_attribute_aclcheck(indrelid, attnum, GetUserId(),
242 ACL_SELECT) != ACLCHECK_OK)
243 {
244 /* No access, so clean up and return */
245 ReleaseSysCache(ht_idx);
246 return NULL;
247 }
248 }
249 }
250 ReleaseSysCache(ht_idx);
251
252 initStringInfo(&buf);
253 appendStringInfo(&buf, "(%s)=(",
254 pg_get_indexdef_columns(indexrelid, true));
255
256 for (i = 0; i < indnkeyatts; i++)
257 {
258 char *val;
259
260 if (isnull[i])
261 val = "null";
262 else
263 {
264 Oid foutoid;
265 bool typisvarlena;
266
267 /*
268 * The provided data is not necessarily of the type stored in the
269 * index; rather it is of the index opclass's input type. So look
270 * at rd_opcintype not the index tupdesc.
271 *
272 * Note: this is a bit shaky for opclasses that have pseudotype
273 * input types such as ANYARRAY or RECORD. Currently, the
274 * typoutput functions associated with the pseudotypes will work
275 * okay, but we might have to try harder in future.
276 */
277 getTypeOutputInfo(indexRelation->rd_opcintype[i],
278 &foutoid, &typisvarlena);
279 val = OidOutputFunctionCall(foutoid, values[i]);
280 }
281
282 if (i > 0)
283 appendStringInfoString(&buf, ", ");
284 appendStringInfoString(&buf, val);
285 }
286
287 appendStringInfoChar(&buf, ')');
288
289 return buf.data;
290 }
291
292
293 /* ----------------------------------------------------------------
294 * heap-or-index-scan access to system catalogs
295 *
296 * These functions support system catalog accesses that normally use
297 * an index but need to be capable of being switched to heap scans
298 * if the system indexes are unavailable.
299 *
300 * The specified scan keys must be compatible with the named index.
301 * Generally this means that they must constrain either all columns
302 * of the index, or the first K columns of an N-column index.
303 *
304 * These routines could work with non-system tables, actually,
305 * but they're only useful when there is a known index to use with
306 * the given scan keys; so in practice they're only good for
307 * predetermined types of scans of system catalogs.
308 * ----------------------------------------------------------------
309 */
310
311 /*
312 * systable_beginscan --- set up for heap-or-index scan
313 *
314 * rel: catalog to scan, already opened and suitably locked
315 * indexId: OID of index to conditionally use
316 * indexOK: if false, forces a heap scan (see notes below)
317 * snapshot: time qual to use (NULL for a recent catalog snapshot)
318 * nkeys, key: scan keys
319 *
320 * The attribute numbers in the scan key should be set for the heap case.
321 * If we choose to index, we reset them to 1..n to reference the index
322 * columns. Note this means there must be one scankey qualification per
323 * index column! This is checked by the Asserts in the normal, index-using
324 * case, but won't be checked if the heapscan path is taken.
325 *
326 * The routine checks the normal cases for whether an indexscan is safe,
327 * but caller can make additional checks and pass indexOK=false if needed.
328 * In standard case indexOK can simply be constant TRUE.
329 */
330 SysScanDesc
systable_beginscan(Relation heapRelation,Oid indexId,bool indexOK,Snapshot snapshot,int nkeys,ScanKey key)331 systable_beginscan(Relation heapRelation,
332 Oid indexId,
333 bool indexOK,
334 Snapshot snapshot,
335 int nkeys, ScanKey key)
336 {
337 SysScanDesc sysscan;
338 Relation irel;
339
340 if (indexOK &&
341 !IgnoreSystemIndexes &&
342 !ReindexIsProcessingIndex(indexId))
343 irel = index_open(indexId, AccessShareLock);
344 else
345 irel = NULL;
346
347 sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData));
348
349 sysscan->heap_rel = heapRelation;
350 sysscan->irel = irel;
351
352 if (snapshot == NULL)
353 {
354 Oid relid = RelationGetRelid(heapRelation);
355
356 snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
357 sysscan->snapshot = snapshot;
358 }
359 else
360 {
361 /* Caller is responsible for any snapshot. */
362 sysscan->snapshot = NULL;
363 }
364
365 if (irel)
366 {
367 int i;
368
369 /* Change attribute numbers to be index column numbers. */
370 for (i = 0; i < nkeys; i++)
371 {
372 int j;
373
374 for (j = 0; j < IndexRelationGetNumberOfAttributes(irel); j++)
375 {
376 if (key[i].sk_attno == irel->rd_index->indkey.values[j])
377 {
378 key[i].sk_attno = j + 1;
379 break;
380 }
381 }
382 if (j == IndexRelationGetNumberOfAttributes(irel))
383 elog(ERROR, "column is not in index");
384 }
385
386 sysscan->iscan = index_beginscan(heapRelation, irel,
387 snapshot, nkeys, 0);
388 index_rescan(sysscan->iscan, key, nkeys, NULL, 0);
389 sysscan->scan = NULL;
390 }
391 else
392 {
393 /*
394 * We disallow synchronized scans when forced to use a heapscan on a
395 * catalog. In most cases the desired rows are near the front, so
396 * that the unpredictable start point of a syncscan is a serious
397 * disadvantage; and there are no compensating advantages, because
398 * it's unlikely that such scans will occur in parallel.
399 */
400 sysscan->scan = heap_beginscan_strat(heapRelation, snapshot,
401 nkeys, key,
402 true, false);
403 sysscan->iscan = NULL;
404 }
405
406 return sysscan;
407 }
408
409 /*
410 * systable_getnext --- get next tuple in a heap-or-index scan
411 *
412 * Returns NULL if no more tuples available.
413 *
414 * Note that returned tuple is a reference to data in a disk buffer;
415 * it must not be modified, and should be presumed inaccessible after
416 * next getnext() or endscan() call.
417 */
418 HeapTuple
systable_getnext(SysScanDesc sysscan)419 systable_getnext(SysScanDesc sysscan)
420 {
421 HeapTuple htup;
422
423 if (sysscan->irel)
424 {
425 htup = index_getnext(sysscan->iscan, ForwardScanDirection);
426
427 /*
428 * We currently don't need to support lossy index operators for any
429 * system catalog scan. It could be done here, using the scan keys to
430 * drive the operator calls, if we arranged to save the heap attnums
431 * during systable_beginscan(); this is practical because we still
432 * wouldn't need to support indexes on expressions.
433 */
434 if (htup && sysscan->iscan->xs_recheck)
435 elog(ERROR, "system catalog scans with lossy index conditions are not implemented");
436 }
437 else
438 htup = heap_getnext(sysscan->scan, ForwardScanDirection);
439
440 return htup;
441 }
442
443 /*
444 * systable_recheck_tuple --- recheck visibility of most-recently-fetched tuple
445 *
446 * In particular, determine if this tuple would be visible to a catalog scan
447 * that started now. We don't handle the case of a non-MVCC scan snapshot,
448 * because no caller needs that yet.
449 *
450 * This is useful to test whether an object was deleted while we waited to
451 * acquire lock on it.
452 *
453 * Note: we don't actually *need* the tuple to be passed in, but it's a
454 * good crosscheck that the caller is interested in the right tuple.
455 */
456 bool
systable_recheck_tuple(SysScanDesc sysscan,HeapTuple tup)457 systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup)
458 {
459 Snapshot freshsnap;
460 bool result;
461
462 /*
463 * Trust that LockBuffer() and HeapTupleSatisfiesMVCC() do not themselves
464 * acquire snapshots, so we need not register the snapshot. Those
465 * facilities are too low-level to have any business scanning tables.
466 */
467 freshsnap = GetCatalogSnapshot(RelationGetRelid(sysscan->heap_rel));
468
469 if (sysscan->irel)
470 {
471 IndexScanDesc scan = sysscan->iscan;
472
473 Assert(IsMVCCSnapshot(scan->xs_snapshot));
474 Assert(tup == &scan->xs_ctup);
475 Assert(BufferIsValid(scan->xs_cbuf));
476 /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */
477 LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
478 result = HeapTupleSatisfiesVisibility(tup, freshsnap, scan->xs_cbuf);
479 LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
480 }
481 else
482 {
483 HeapScanDesc scan = sysscan->scan;
484
485 Assert(IsMVCCSnapshot(scan->rs_snapshot));
486 Assert(tup == &scan->rs_ctup);
487 Assert(BufferIsValid(scan->rs_cbuf));
488 /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */
489 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
490 result = HeapTupleSatisfiesVisibility(tup, freshsnap, scan->rs_cbuf);
491 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
492 }
493 return result;
494 }
495
496 /*
497 * systable_endscan --- close scan, release resources
498 *
499 * Note that it's still up to the caller to close the heap relation.
500 */
501 void
systable_endscan(SysScanDesc sysscan)502 systable_endscan(SysScanDesc sysscan)
503 {
504 if (sysscan->irel)
505 {
506 index_endscan(sysscan->iscan);
507 index_close(sysscan->irel, AccessShareLock);
508 }
509 else
510 heap_endscan(sysscan->scan);
511
512 if (sysscan->snapshot)
513 UnregisterSnapshot(sysscan->snapshot);
514
515 pfree(sysscan);
516 }
517
518
519 /*
520 * systable_beginscan_ordered --- set up for ordered catalog scan
521 *
522 * These routines have essentially the same API as systable_beginscan etc,
523 * except that they guarantee to return multiple matching tuples in
524 * index order. Also, for largely historical reasons, the index to use
525 * is opened and locked by the caller, not here.
526 *
527 * Currently we do not support non-index-based scans here. (In principle
528 * we could do a heapscan and sort, but the uses are in places that
529 * probably don't need to still work with corrupted catalog indexes.)
530 * For the moment, therefore, these functions are merely the thinnest of
531 * wrappers around index_beginscan/index_getnext. The main reason for their
532 * existence is to centralize possible future support of lossy operators
533 * in catalog scans.
534 */
535 SysScanDesc
systable_beginscan_ordered(Relation heapRelation,Relation indexRelation,Snapshot snapshot,int nkeys,ScanKey key)536 systable_beginscan_ordered(Relation heapRelation,
537 Relation indexRelation,
538 Snapshot snapshot,
539 int nkeys, ScanKey key)
540 {
541 SysScanDesc sysscan;
542 int i;
543
544 /* REINDEX can probably be a hard error here ... */
545 if (ReindexIsProcessingIndex(RelationGetRelid(indexRelation)))
546 elog(ERROR, "cannot do ordered scan on index \"%s\", because it is being reindexed",
547 RelationGetRelationName(indexRelation));
548 /* ... but we only throw a warning about violating IgnoreSystemIndexes */
549 if (IgnoreSystemIndexes)
550 elog(WARNING, "using index \"%s\" despite IgnoreSystemIndexes",
551 RelationGetRelationName(indexRelation));
552
553 sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData));
554
555 sysscan->heap_rel = heapRelation;
556 sysscan->irel = indexRelation;
557
558 if (snapshot == NULL)
559 {
560 Oid relid = RelationGetRelid(heapRelation);
561
562 snapshot = RegisterSnapshot(GetCatalogSnapshot(relid));
563 sysscan->snapshot = snapshot;
564 }
565 else
566 {
567 /* Caller is responsible for any snapshot. */
568 sysscan->snapshot = NULL;
569 }
570
571 /* Change attribute numbers to be index column numbers. */
572 for (i = 0; i < nkeys; i++)
573 {
574 int j;
575
576 for (j = 0; j < IndexRelationGetNumberOfAttributes(indexRelation); j++)
577 {
578 if (key[i].sk_attno == indexRelation->rd_index->indkey.values[j])
579 {
580 key[i].sk_attno = j + 1;
581 break;
582 }
583 }
584 if (j == IndexRelationGetNumberOfAttributes(indexRelation))
585 elog(ERROR, "column is not in index");
586 }
587
588 sysscan->iscan = index_beginscan(heapRelation, indexRelation,
589 snapshot, nkeys, 0);
590 index_rescan(sysscan->iscan, key, nkeys, NULL, 0);
591 sysscan->scan = NULL;
592
593 return sysscan;
594 }
595
596 /*
597 * systable_getnext_ordered --- get next tuple in an ordered catalog scan
598 */
599 HeapTuple
systable_getnext_ordered(SysScanDesc sysscan,ScanDirection direction)600 systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction)
601 {
602 HeapTuple htup;
603
604 Assert(sysscan->irel);
605 htup = index_getnext(sysscan->iscan, direction);
606 /* See notes in systable_getnext */
607 if (htup && sysscan->iscan->xs_recheck)
608 elog(ERROR, "system catalog scans with lossy index conditions are not implemented");
609
610 return htup;
611 }
612
613 /*
614 * systable_endscan_ordered --- close scan, release resources
615 */
616 void
systable_endscan_ordered(SysScanDesc sysscan)617 systable_endscan_ordered(SysScanDesc sysscan)
618 {
619 Assert(sysscan->irel);
620 index_endscan(sysscan->iscan);
621 if (sysscan->snapshot)
622 UnregisterSnapshot(sysscan->snapshot);
623 pfree(sysscan);
624 }
625