1 /*------------------------------------------------------------------------- 2 * 3 * genam.c 4 * general index access method routines 5 * 6 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group 7 * Portions Copyright (c) 1994, Regents of the University of California 8 * 9 * 10 * IDENTIFICATION 11 * src/backend/access/index/genam.c 12 * 13 * NOTES 14 * many of the old access method routines have been turned into 15 * macros and moved to genam.h -cim 4/30/91 16 * 17 *------------------------------------------------------------------------- 18 */ 19 20 #include "postgres.h" 21 22 #include "access/relscan.h" 23 #include "access/transam.h" 24 #include "catalog/index.h" 25 #include "lib/stringinfo.h" 26 #include "miscadmin.h" 27 #include "storage/bufmgr.h" 28 #include "utils/acl.h" 29 #include "utils/builtins.h" 30 #include "utils/lsyscache.h" 31 #include "utils/rel.h" 32 #include "utils/rls.h" 33 #include "utils/ruleutils.h" 34 #include "utils/snapmgr.h" 35 #include "utils/syscache.h" 36 #include "utils/tqual.h" 37 38 39 /* ---------------------------------------------------------------- 40 * general access method routines 41 * 42 * All indexed access methods use an identical scan structure. 43 * We don't know how the various AMs do locking, however, so we don't 44 * do anything about that here. 45 * 46 * The intent is that an AM implementor will define a beginscan routine 47 * that calls RelationGetIndexScan, to fill in the scan, and then does 48 * whatever kind of locking he wants. 49 * 50 * At the end of a scan, the AM's endscan routine undoes the locking, 51 * but does *not* call IndexScanEnd --- the higher-level index_endscan 52 * routine does that. (We can't do it in the AM because index_endscan 53 * still needs to touch the IndexScanDesc after calling the AM.) 54 * 55 * Because of this, the AM does not have a choice whether to call 56 * RelationGetIndexScan or not; its beginscan routine must return an 57 * object made by RelationGetIndexScan. This is kinda ugly but not 58 * worth cleaning up now. 59 * ---------------------------------------------------------------- 60 */ 61 62 /* ---------------- 63 * RelationGetIndexScan -- Create and fill an IndexScanDesc. 64 * 65 * This routine creates an index scan structure and sets up initial 66 * contents for it. 67 * 68 * Parameters: 69 * indexRelation -- index relation for scan. 70 * nkeys -- count of scan keys (index qual conditions). 71 * norderbys -- count of index order-by operators. 72 * 73 * Returns: 74 * An initialized IndexScanDesc. 75 * ---------------- 76 */ 77 IndexScanDesc 78 RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys) 79 { 80 IndexScanDesc scan; 81 82 scan = (IndexScanDesc) palloc(sizeof(IndexScanDescData)); 83 84 scan->heapRelation = NULL; /* may be set later */ 85 scan->indexRelation = indexRelation; 86 scan->xs_snapshot = InvalidSnapshot; /* caller must initialize this */ 87 scan->numberOfKeys = nkeys; 88 scan->numberOfOrderBys = norderbys; 89 90 /* 91 * We allocate key workspace here, but it won't get filled until amrescan. 92 */ 93 if (nkeys > 0) 94 scan->keyData = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys); 95 else 96 scan->keyData = NULL; 97 if (norderbys > 0) 98 scan->orderByData = (ScanKey) palloc(sizeof(ScanKeyData) * norderbys); 99 else 100 scan->orderByData = NULL; 101 102 scan->xs_want_itup = false; /* may be set later */ 103 104 /* 105 * During recovery we ignore killed tuples and don't bother to kill them 106 * either. We do this because the xmin on the primary node could easily be 107 * later than the xmin on the standby node, so that what the primary 108 * thinks is killed is supposed to be visible on standby. So for correct 109 * MVCC for queries during recovery we must ignore these hints and check 110 * all tuples. Do *not* set ignore_killed_tuples to true when running in a 111 * transaction that was started during recovery. xactStartedInRecovery 112 * should not be altered by index AMs. 113 */ 114 scan->kill_prior_tuple = false; 115 scan->xactStartedInRecovery = TransactionStartedDuringRecovery(); 116 scan->ignore_killed_tuples = !scan->xactStartedInRecovery; 117 118 scan->opaque = NULL; 119 120 scan->xs_itup = NULL; 121 scan->xs_itupdesc = NULL; 122 scan->xs_hitup = NULL; 123 scan->xs_hitupdesc = NULL; 124 125 ItemPointerSetInvalid(&scan->xs_ctup.t_self); 126 scan->xs_ctup.t_data = NULL; 127 scan->xs_cbuf = InvalidBuffer; 128 scan->xs_continue_hot = false; 129 130 return scan; 131 } 132 133 /* ---------------- 134 * IndexScanEnd -- End an index scan. 135 * 136 * This routine just releases the storage acquired by 137 * RelationGetIndexScan(). Any AM-level resources are 138 * assumed to already have been released by the AM's 139 * endscan routine. 140 * 141 * Returns: 142 * None. 143 * ---------------- 144 */ 145 void 146 IndexScanEnd(IndexScanDesc scan) 147 { 148 if (scan->keyData != NULL) 149 pfree(scan->keyData); 150 if (scan->orderByData != NULL) 151 pfree(scan->orderByData); 152 153 pfree(scan); 154 } 155 156 /* 157 * BuildIndexValueDescription 158 * 159 * Construct a string describing the contents of an index entry, in the 160 * form "(key_name, ...)=(key_value, ...)". This is currently used 161 * for building unique-constraint and exclusion-constraint error messages, 162 * so only key columns of the index are checked and printed. 163 * 164 * Note that if the user does not have permissions to view all of the 165 * columns involved then a NULL is returned. Returning a partial key seems 166 * unlikely to be useful and we have no way to know which of the columns the 167 * user provided (unlike in ExecBuildSlotValueDescription). 168 * 169 * The passed-in values/nulls arrays are the "raw" input to the index AM, 170 * e.g. results of FormIndexDatum --- this is not necessarily what is stored 171 * in the index, but it's what the user perceives to be stored. 172 * 173 * Note: if you change anything here, check whether 174 * ExecBuildSlotPartitionKeyDescription() in execMain.c needs a similar 175 * change. 176 */ 177 char * 178 BuildIndexValueDescription(Relation indexRelation, 179 Datum *values, bool *isnull) 180 { 181 StringInfoData buf; 182 Form_pg_index idxrec; 183 HeapTuple ht_idx; 184 int indnkeyatts; 185 int i; 186 int keyno; 187 Oid indexrelid = RelationGetRelid(indexRelation); 188 Oid indrelid; 189 AclResult aclresult; 190 191 indnkeyatts = IndexRelationGetNumberOfKeyAttributes(indexRelation); 192 193 /* 194 * Check permissions- if the user does not have access to view all of the 195 * key columns then return NULL to avoid leaking data. 196 * 197 * First check if RLS is enabled for the relation. If so, return NULL to 198 * avoid leaking data. 199 * 200 * Next we need to check table-level SELECT access and then, if there is 201 * no access there, check column-level permissions. 202 */ 203 204 /* 205 * Fetch the pg_index tuple by the Oid of the index 206 */ 207 ht_idx = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexrelid)); 208 if (!HeapTupleIsValid(ht_idx)) 209 elog(ERROR, "cache lookup failed for index %u", indexrelid); 210 idxrec = (Form_pg_index) GETSTRUCT(ht_idx); 211 212 indrelid = idxrec->indrelid; 213 Assert(indexrelid == idxrec->indexrelid); 214 215 /* RLS check- if RLS is enabled then we don't return anything. */ 216 if (check_enable_rls(indrelid, InvalidOid, true) == RLS_ENABLED) 217 { 218 ReleaseSysCache(ht_idx); 219 return NULL; 220 } 221 222 /* Table-level SELECT is enough, if the user has it */ 223 aclresult = pg_class_aclcheck(indrelid, GetUserId(), ACL_SELECT); 224 if (aclresult != ACLCHECK_OK) 225 { 226 /* 227 * No table-level access, so step through the columns in the index and 228 * make sure the user has SELECT rights on all of them. 229 */ 230 for (keyno = 0; keyno < idxrec->indnkeyatts; keyno++) 231 { 232 AttrNumber attnum = idxrec->indkey.values[keyno]; 233 234 /* 235 * Note that if attnum == InvalidAttrNumber, then this is an index 236 * based on an expression and we return no detail rather than try 237 * to figure out what column(s) the expression includes and if the 238 * user has SELECT rights on them. 239 */ 240 if (attnum == InvalidAttrNumber || 241 pg_attribute_aclcheck(indrelid, attnum, GetUserId(), 242 ACL_SELECT) != ACLCHECK_OK) 243 { 244 /* No access, so clean up and return */ 245 ReleaseSysCache(ht_idx); 246 return NULL; 247 } 248 } 249 } 250 ReleaseSysCache(ht_idx); 251 252 initStringInfo(&buf); 253 appendStringInfo(&buf, "(%s)=(", 254 pg_get_indexdef_columns(indexrelid, true)); 255 256 for (i = 0; i < indnkeyatts; i++) 257 { 258 char *val; 259 260 if (isnull[i]) 261 val = "null"; 262 else 263 { 264 Oid foutoid; 265 bool typisvarlena; 266 267 /* 268 * The provided data is not necessarily of the type stored in the 269 * index; rather it is of the index opclass's input type. So look 270 * at rd_opcintype not the index tupdesc. 271 * 272 * Note: this is a bit shaky for opclasses that have pseudotype 273 * input types such as ANYARRAY or RECORD. Currently, the 274 * typoutput functions associated with the pseudotypes will work 275 * okay, but we might have to try harder in future. 276 */ 277 getTypeOutputInfo(indexRelation->rd_opcintype[i], 278 &foutoid, &typisvarlena); 279 val = OidOutputFunctionCall(foutoid, values[i]); 280 } 281 282 if (i > 0) 283 appendStringInfoString(&buf, ", "); 284 appendStringInfoString(&buf, val); 285 } 286 287 appendStringInfoChar(&buf, ')'); 288 289 return buf.data; 290 } 291 292 293 /* ---------------------------------------------------------------- 294 * heap-or-index-scan access to system catalogs 295 * 296 * These functions support system catalog accesses that normally use 297 * an index but need to be capable of being switched to heap scans 298 * if the system indexes are unavailable. 299 * 300 * The specified scan keys must be compatible with the named index. 301 * Generally this means that they must constrain either all columns 302 * of the index, or the first K columns of an N-column index. 303 * 304 * These routines could work with non-system tables, actually, 305 * but they're only useful when there is a known index to use with 306 * the given scan keys; so in practice they're only good for 307 * predetermined types of scans of system catalogs. 308 * ---------------------------------------------------------------- 309 */ 310 311 /* 312 * systable_beginscan --- set up for heap-or-index scan 313 * 314 * rel: catalog to scan, already opened and suitably locked 315 * indexId: OID of index to conditionally use 316 * indexOK: if false, forces a heap scan (see notes below) 317 * snapshot: time qual to use (NULL for a recent catalog snapshot) 318 * nkeys, key: scan keys 319 * 320 * The attribute numbers in the scan key should be set for the heap case. 321 * If we choose to index, we reset them to 1..n to reference the index 322 * columns. Note this means there must be one scankey qualification per 323 * index column! This is checked by the Asserts in the normal, index-using 324 * case, but won't be checked if the heapscan path is taken. 325 * 326 * The routine checks the normal cases for whether an indexscan is safe, 327 * but caller can make additional checks and pass indexOK=false if needed. 328 * In standard case indexOK can simply be constant TRUE. 329 */ 330 SysScanDesc 331 systable_beginscan(Relation heapRelation, 332 Oid indexId, 333 bool indexOK, 334 Snapshot snapshot, 335 int nkeys, ScanKey key) 336 { 337 SysScanDesc sysscan; 338 Relation irel; 339 340 if (indexOK && 341 !IgnoreSystemIndexes && 342 !ReindexIsProcessingIndex(indexId)) 343 irel = index_open(indexId, AccessShareLock); 344 else 345 irel = NULL; 346 347 sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData)); 348 349 sysscan->heap_rel = heapRelation; 350 sysscan->irel = irel; 351 352 if (snapshot == NULL) 353 { 354 Oid relid = RelationGetRelid(heapRelation); 355 356 snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); 357 sysscan->snapshot = snapshot; 358 } 359 else 360 { 361 /* Caller is responsible for any snapshot. */ 362 sysscan->snapshot = NULL; 363 } 364 365 if (irel) 366 { 367 int i; 368 369 /* Change attribute numbers to be index column numbers. */ 370 for (i = 0; i < nkeys; i++) 371 { 372 int j; 373 374 for (j = 0; j < IndexRelationGetNumberOfAttributes(irel); j++) 375 { 376 if (key[i].sk_attno == irel->rd_index->indkey.values[j]) 377 { 378 key[i].sk_attno = j + 1; 379 break; 380 } 381 } 382 if (j == IndexRelationGetNumberOfAttributes(irel)) 383 elog(ERROR, "column is not in index"); 384 } 385 386 sysscan->iscan = index_beginscan(heapRelation, irel, 387 snapshot, nkeys, 0); 388 index_rescan(sysscan->iscan, key, nkeys, NULL, 0); 389 sysscan->scan = NULL; 390 } 391 else 392 { 393 /* 394 * We disallow synchronized scans when forced to use a heapscan on a 395 * catalog. In most cases the desired rows are near the front, so 396 * that the unpredictable start point of a syncscan is a serious 397 * disadvantage; and there are no compensating advantages, because 398 * it's unlikely that such scans will occur in parallel. 399 */ 400 sysscan->scan = heap_beginscan_strat(heapRelation, snapshot, 401 nkeys, key, 402 true, false); 403 sysscan->iscan = NULL; 404 } 405 406 return sysscan; 407 } 408 409 /* 410 * systable_getnext --- get next tuple in a heap-or-index scan 411 * 412 * Returns NULL if no more tuples available. 413 * 414 * Note that returned tuple is a reference to data in a disk buffer; 415 * it must not be modified, and should be presumed inaccessible after 416 * next getnext() or endscan() call. 417 */ 418 HeapTuple 419 systable_getnext(SysScanDesc sysscan) 420 { 421 HeapTuple htup; 422 423 if (sysscan->irel) 424 { 425 htup = index_getnext(sysscan->iscan, ForwardScanDirection); 426 427 /* 428 * We currently don't need to support lossy index operators for any 429 * system catalog scan. It could be done here, using the scan keys to 430 * drive the operator calls, if we arranged to save the heap attnums 431 * during systable_beginscan(); this is practical because we still 432 * wouldn't need to support indexes on expressions. 433 */ 434 if (htup && sysscan->iscan->xs_recheck) 435 elog(ERROR, "system catalog scans with lossy index conditions are not implemented"); 436 } 437 else 438 htup = heap_getnext(sysscan->scan, ForwardScanDirection); 439 440 return htup; 441 } 442 443 /* 444 * systable_recheck_tuple --- recheck visibility of most-recently-fetched tuple 445 * 446 * In particular, determine if this tuple would be visible to a catalog scan 447 * that started now. We don't handle the case of a non-MVCC scan snapshot, 448 * because no caller needs that yet. 449 * 450 * This is useful to test whether an object was deleted while we waited to 451 * acquire lock on it. 452 * 453 * Note: we don't actually *need* the tuple to be passed in, but it's a 454 * good crosscheck that the caller is interested in the right tuple. 455 */ 456 bool 457 systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup) 458 { 459 Snapshot freshsnap; 460 bool result; 461 462 /* 463 * Trust that LockBuffer() and HeapTupleSatisfiesMVCC() do not themselves 464 * acquire snapshots, so we need not register the snapshot. Those 465 * facilities are too low-level to have any business scanning tables. 466 */ 467 freshsnap = GetCatalogSnapshot(RelationGetRelid(sysscan->heap_rel)); 468 469 if (sysscan->irel) 470 { 471 IndexScanDesc scan = sysscan->iscan; 472 473 Assert(IsMVCCSnapshot(scan->xs_snapshot)); 474 Assert(tup == &scan->xs_ctup); 475 Assert(BufferIsValid(scan->xs_cbuf)); 476 /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */ 477 LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE); 478 result = HeapTupleSatisfiesVisibility(tup, freshsnap, scan->xs_cbuf); 479 LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK); 480 } 481 else 482 { 483 HeapScanDesc scan = sysscan->scan; 484 485 Assert(IsMVCCSnapshot(scan->rs_snapshot)); 486 Assert(tup == &scan->rs_ctup); 487 Assert(BufferIsValid(scan->rs_cbuf)); 488 /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */ 489 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); 490 result = HeapTupleSatisfiesVisibility(tup, freshsnap, scan->rs_cbuf); 491 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); 492 } 493 return result; 494 } 495 496 /* 497 * systable_endscan --- close scan, release resources 498 * 499 * Note that it's still up to the caller to close the heap relation. 500 */ 501 void 502 systable_endscan(SysScanDesc sysscan) 503 { 504 if (sysscan->irel) 505 { 506 index_endscan(sysscan->iscan); 507 index_close(sysscan->irel, AccessShareLock); 508 } 509 else 510 heap_endscan(sysscan->scan); 511 512 if (sysscan->snapshot) 513 UnregisterSnapshot(sysscan->snapshot); 514 515 pfree(sysscan); 516 } 517 518 519 /* 520 * systable_beginscan_ordered --- set up for ordered catalog scan 521 * 522 * These routines have essentially the same API as systable_beginscan etc, 523 * except that they guarantee to return multiple matching tuples in 524 * index order. Also, for largely historical reasons, the index to use 525 * is opened and locked by the caller, not here. 526 * 527 * Currently we do not support non-index-based scans here. (In principle 528 * we could do a heapscan and sort, but the uses are in places that 529 * probably don't need to still work with corrupted catalog indexes.) 530 * For the moment, therefore, these functions are merely the thinnest of 531 * wrappers around index_beginscan/index_getnext. The main reason for their 532 * existence is to centralize possible future support of lossy operators 533 * in catalog scans. 534 */ 535 SysScanDesc 536 systable_beginscan_ordered(Relation heapRelation, 537 Relation indexRelation, 538 Snapshot snapshot, 539 int nkeys, ScanKey key) 540 { 541 SysScanDesc sysscan; 542 int i; 543 544 /* REINDEX can probably be a hard error here ... */ 545 if (ReindexIsProcessingIndex(RelationGetRelid(indexRelation))) 546 elog(ERROR, "cannot do ordered scan on index \"%s\", because it is being reindexed", 547 RelationGetRelationName(indexRelation)); 548 /* ... but we only throw a warning about violating IgnoreSystemIndexes */ 549 if (IgnoreSystemIndexes) 550 elog(WARNING, "using index \"%s\" despite IgnoreSystemIndexes", 551 RelationGetRelationName(indexRelation)); 552 553 sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData)); 554 555 sysscan->heap_rel = heapRelation; 556 sysscan->irel = indexRelation; 557 558 if (snapshot == NULL) 559 { 560 Oid relid = RelationGetRelid(heapRelation); 561 562 snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); 563 sysscan->snapshot = snapshot; 564 } 565 else 566 { 567 /* Caller is responsible for any snapshot. */ 568 sysscan->snapshot = NULL; 569 } 570 571 /* Change attribute numbers to be index column numbers. */ 572 for (i = 0; i < nkeys; i++) 573 { 574 int j; 575 576 for (j = 0; j < IndexRelationGetNumberOfAttributes(indexRelation); j++) 577 { 578 if (key[i].sk_attno == indexRelation->rd_index->indkey.values[j]) 579 { 580 key[i].sk_attno = j + 1; 581 break; 582 } 583 } 584 if (j == IndexRelationGetNumberOfAttributes(indexRelation)) 585 elog(ERROR, "column is not in index"); 586 } 587 588 sysscan->iscan = index_beginscan(heapRelation, indexRelation, 589 snapshot, nkeys, 0); 590 index_rescan(sysscan->iscan, key, nkeys, NULL, 0); 591 sysscan->scan = NULL; 592 593 return sysscan; 594 } 595 596 /* 597 * systable_getnext_ordered --- get next tuple in an ordered catalog scan 598 */ 599 HeapTuple 600 systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction) 601 { 602 HeapTuple htup; 603 604 Assert(sysscan->irel); 605 htup = index_getnext(sysscan->iscan, direction); 606 /* See notes in systable_getnext */ 607 if (htup && sysscan->iscan->xs_recheck) 608 elog(ERROR, "system catalog scans with lossy index conditions are not implemented"); 609 610 return htup; 611 } 612 613 /* 614 * systable_endscan_ordered --- close scan, release resources 615 */ 616 void 617 systable_endscan_ordered(SysScanDesc sysscan) 618 { 619 Assert(sysscan->irel); 620 index_endscan(sysscan->iscan); 621 if (sysscan->snapshot) 622 UnregisterSnapshot(sysscan->snapshot); 623 pfree(sysscan); 624 } 625