1 /*-------------------------------------------------------------------------
2 *
3 * index.c
4 * code to create and destroy POSTGRES index relations
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/catalog/index.c
12 *
13 *
14 * INTERFACE ROUTINES
15 * index_create() - Create a cataloged index relation
16 * index_drop() - Removes index relation from catalogs
17 * BuildIndexInfo() - Prepare to insert index tuples
18 * FormIndexDatum() - Construct datum vector for one index tuple
19 *
20 *-------------------------------------------------------------------------
21 */
22 #include "postgres.h"
23
24 #include <unistd.h>
25
26 #include "access/amapi.h"
27 #include "access/heapam.h"
28 #include "access/multixact.h"
29 #include "access/relscan.h"
30 #include "access/sysattr.h"
31 #include "access/tableam.h"
32 #include "access/transam.h"
33 #include "access/visibilitymap.h"
34 #include "access/xact.h"
35 #include "bootstrap/bootstrap.h"
36 #include "catalog/binary_upgrade.h"
37 #include "catalog/catalog.h"
38 #include "catalog/dependency.h"
39 #include "catalog/heap.h"
40 #include "catalog/index.h"
41 #include "catalog/objectaccess.h"
42 #include "catalog/partition.h"
43 #include "catalog/pg_am.h"
44 #include "catalog/pg_collation.h"
45 #include "catalog/pg_constraint.h"
46 #include "catalog/pg_description.h"
47 #include "catalog/pg_depend.h"
48 #include "catalog/pg_inherits.h"
49 #include "catalog/pg_operator.h"
50 #include "catalog/pg_opclass.h"
51 #include "catalog/pg_tablespace.h"
52 #include "catalog/pg_trigger.h"
53 #include "catalog/pg_type.h"
54 #include "catalog/storage.h"
55 #include "commands/event_trigger.h"
56 #include "commands/progress.h"
57 #include "commands/tablecmds.h"
58 #include "commands/trigger.h"
59 #include "executor/executor.h"
60 #include "miscadmin.h"
61 #include "nodes/makefuncs.h"
62 #include "nodes/nodeFuncs.h"
63 #include "optimizer/optimizer.h"
64 #include "parser/parser.h"
65 #include "pgstat.h"
66 #include "rewrite/rewriteManip.h"
67 #include "storage/bufmgr.h"
68 #include "storage/lmgr.h"
69 #include "storage/predicate.h"
70 #include "storage/procarray.h"
71 #include "storage/smgr.h"
72 #include "utils/builtins.h"
73 #include "utils/fmgroids.h"
74 #include "utils/guc.h"
75 #include "utils/inval.h"
76 #include "utils/lsyscache.h"
77 #include "utils/memutils.h"
78 #include "utils/pg_rusage.h"
79 #include "utils/syscache.h"
80 #include "utils/tuplesort.h"
81 #include "utils/snapmgr.h"
82
83
84 /* Potentially set by pg_upgrade_support functions */
85 Oid binary_upgrade_next_index_pg_class_oid = InvalidOid;
86
87 /*
88 * Pointer-free representation of variables used when reindexing system
89 * catalogs; we use this to propagate those values to parallel workers.
90 */
91 typedef struct
92 {
93 Oid currentlyReindexedHeap;
94 Oid currentlyReindexedIndex;
95 int numPendingReindexedIndexes;
96 Oid pendingReindexedIndexes[FLEXIBLE_ARRAY_MEMBER];
97 } SerializedReindexState;
98
99 /* non-export function prototypes */
100 static bool relationHasPrimaryKey(Relation rel);
101 static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
102 IndexInfo *indexInfo,
103 List *indexColNames,
104 Oid accessMethodObjectId,
105 Oid *collationObjectId,
106 Oid *classObjectId);
107 static void InitializeAttributeOids(Relation indexRelation,
108 int numatts, Oid indexoid);
109 static void AppendAttributeTuples(Relation indexRelation, int numatts);
110 static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
111 Oid parentIndexId,
112 IndexInfo *indexInfo,
113 Oid *collationOids,
114 Oid *classOids,
115 int16 *coloptions,
116 bool primary,
117 bool isexclusion,
118 bool immediate,
119 bool isvalid,
120 bool isready);
121 static void index_update_stats(Relation rel,
122 bool hasindex,
123 double reltuples);
124 static void IndexCheckExclusion(Relation heapRelation,
125 Relation indexRelation,
126 IndexInfo *indexInfo);
127 static bool validate_index_callback(ItemPointer itemptr, void *opaque);
128 static bool ReindexIsCurrentlyProcessingIndex(Oid indexOid);
129 static void SetReindexProcessing(Oid heapOid, Oid indexOid);
130 static void ResetReindexProcessing(void);
131 static void SetReindexPending(List *indexes);
132 static void RemoveReindexPending(Oid indexOid);
133
134
135 /*
136 * relationHasPrimaryKey
137 * See whether an existing relation has a primary key.
138 *
139 * Caller must have suitable lock on the relation.
140 *
141 * Note: we intentionally do not check indisvalid here; that's because this
142 * is used to enforce the rule that there can be only one indisprimary index,
143 * and we want that to be true even if said index is invalid.
144 */
145 static bool
relationHasPrimaryKey(Relation rel)146 relationHasPrimaryKey(Relation rel)
147 {
148 bool result = false;
149 List *indexoidlist;
150 ListCell *indexoidscan;
151
152 /*
153 * Get the list of index OIDs for the table from the relcache, and look up
154 * each one in the pg_index syscache until we find one marked primary key
155 * (hopefully there isn't more than one such).
156 */
157 indexoidlist = RelationGetIndexList(rel);
158
159 foreach(indexoidscan, indexoidlist)
160 {
161 Oid indexoid = lfirst_oid(indexoidscan);
162 HeapTuple indexTuple;
163
164 indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid));
165 if (!HeapTupleIsValid(indexTuple)) /* should not happen */
166 elog(ERROR, "cache lookup failed for index %u", indexoid);
167 result = ((Form_pg_index) GETSTRUCT(indexTuple))->indisprimary;
168 ReleaseSysCache(indexTuple);
169 if (result)
170 break;
171 }
172
173 list_free(indexoidlist);
174
175 return result;
176 }
177
178 /*
179 * index_check_primary_key
180 * Apply special checks needed before creating a PRIMARY KEY index
181 *
182 * This processing used to be in DefineIndex(), but has been split out
183 * so that it can be applied during ALTER TABLE ADD PRIMARY KEY USING INDEX.
184 *
185 * We check for a pre-existing primary key, and that all columns of the index
186 * are simple column references (not expressions), and that all those
187 * columns are marked NOT NULL. If not, fail.
188 *
189 * We used to automatically change unmarked columns to NOT NULL here by doing
190 * our own local ALTER TABLE command. But that doesn't work well if we're
191 * executing one subcommand of an ALTER TABLE: the operations may not get
192 * performed in the right order overall. Now we expect that the parser
193 * inserted any required ALTER TABLE SET NOT NULL operations before trying
194 * to create a primary-key index.
195 *
196 * Caller had better have at least ShareLock on the table, else the not-null
197 * checking isn't trustworthy.
198 */
199 void
index_check_primary_key(Relation heapRel,IndexInfo * indexInfo,bool is_alter_table,IndexStmt * stmt)200 index_check_primary_key(Relation heapRel,
201 IndexInfo *indexInfo,
202 bool is_alter_table,
203 IndexStmt *stmt)
204 {
205 int i;
206
207 /*
208 * If ALTER TABLE or CREATE TABLE .. PARTITION OF, check that there isn't
209 * already a PRIMARY KEY. In CREATE TABLE for an ordinary relation, we
210 * have faith that the parser rejected multiple pkey clauses; and CREATE
211 * INDEX doesn't have a way to say PRIMARY KEY, so it's no problem either.
212 */
213 if ((is_alter_table || heapRel->rd_rel->relispartition) &&
214 relationHasPrimaryKey(heapRel))
215 {
216 ereport(ERROR,
217 (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
218 errmsg("multiple primary keys for table \"%s\" are not allowed",
219 RelationGetRelationName(heapRel))));
220 }
221
222 /*
223 * Check that all of the attributes in a primary key are marked as not
224 * null. (We don't really expect to see that; it'd mean the parser messed
225 * up. But it seems wise to check anyway.)
226 */
227 for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
228 {
229 AttrNumber attnum = indexInfo->ii_IndexAttrNumbers[i];
230 HeapTuple atttuple;
231 Form_pg_attribute attform;
232
233 if (attnum == 0)
234 ereport(ERROR,
235 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
236 errmsg("primary keys cannot be expressions")));
237
238 /* System attributes are never null, so no need to check */
239 if (attnum < 0)
240 continue;
241
242 atttuple = SearchSysCache2(ATTNUM,
243 ObjectIdGetDatum(RelationGetRelid(heapRel)),
244 Int16GetDatum(attnum));
245 if (!HeapTupleIsValid(atttuple))
246 elog(ERROR, "cache lookup failed for attribute %d of relation %u",
247 attnum, RelationGetRelid(heapRel));
248 attform = (Form_pg_attribute) GETSTRUCT(atttuple);
249
250 if (!attform->attnotnull)
251 ereport(ERROR,
252 (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
253 errmsg("primary key column \"%s\" is not marked NOT NULL",
254 NameStr(attform->attname))));
255
256 ReleaseSysCache(atttuple);
257 }
258 }
259
260 /*
261 * ConstructTupleDescriptor
262 *
263 * Build an index tuple descriptor for a new index
264 */
265 static TupleDesc
ConstructTupleDescriptor(Relation heapRelation,IndexInfo * indexInfo,List * indexColNames,Oid accessMethodObjectId,Oid * collationObjectId,Oid * classObjectId)266 ConstructTupleDescriptor(Relation heapRelation,
267 IndexInfo *indexInfo,
268 List *indexColNames,
269 Oid accessMethodObjectId,
270 Oid *collationObjectId,
271 Oid *classObjectId)
272 {
273 int numatts = indexInfo->ii_NumIndexAttrs;
274 int numkeyatts = indexInfo->ii_NumIndexKeyAttrs;
275 ListCell *colnames_item = list_head(indexColNames);
276 ListCell *indexpr_item = list_head(indexInfo->ii_Expressions);
277 IndexAmRoutine *amroutine;
278 TupleDesc heapTupDesc;
279 TupleDesc indexTupDesc;
280 int natts; /* #atts in heap rel --- for error checks */
281 int i;
282
283 /* We need access to the index AM's API struct */
284 amroutine = GetIndexAmRoutineByAmId(accessMethodObjectId, false);
285
286 /* ... and to the table's tuple descriptor */
287 heapTupDesc = RelationGetDescr(heapRelation);
288 natts = RelationGetForm(heapRelation)->relnatts;
289
290 /*
291 * allocate the new tuple descriptor
292 */
293 indexTupDesc = CreateTemplateTupleDesc(numatts);
294
295 /*
296 * Fill in the pg_attribute row.
297 */
298 for (i = 0; i < numatts; i++)
299 {
300 AttrNumber atnum = indexInfo->ii_IndexAttrNumbers[i];
301 Form_pg_attribute to = TupleDescAttr(indexTupDesc, i);
302 HeapTuple tuple;
303 Form_pg_type typeTup;
304 Form_pg_opclass opclassTup;
305 Oid keyType;
306
307 MemSet(to, 0, ATTRIBUTE_FIXED_PART_SIZE);
308 to->attnum = i + 1;
309 to->attstattarget = -1;
310 to->attcacheoff = -1;
311 to->attislocal = true;
312 to->attcollation = (i < numkeyatts) ?
313 collationObjectId[i] : InvalidOid;
314
315 /*
316 * Set the attribute name as specified by caller.
317 */
318 if (colnames_item == NULL) /* shouldn't happen */
319 elog(ERROR, "too few entries in colnames list");
320 namestrcpy(&to->attname, (const char *) lfirst(colnames_item));
321 colnames_item = lnext(colnames_item);
322
323 /*
324 * For simple index columns, we copy some pg_attribute fields from the
325 * parent relation. For expressions we have to look at the expression
326 * result.
327 */
328 if (atnum != 0)
329 {
330 /* Simple index column */
331 const FormData_pg_attribute *from;
332
333 Assert(atnum > 0); /* should've been caught above */
334
335 if (atnum > natts) /* safety check */
336 elog(ERROR, "invalid column number %d", atnum);
337 from = TupleDescAttr(heapTupDesc,
338 AttrNumberGetAttrOffset(atnum));
339
340 to->atttypid = from->atttypid;
341 to->attlen = from->attlen;
342 to->attndims = from->attndims;
343 to->atttypmod = from->atttypmod;
344 to->attbyval = from->attbyval;
345 to->attstorage = from->attstorage;
346 to->attalign = from->attalign;
347 }
348 else
349 {
350 /* Expressional index */
351 Node *indexkey;
352
353 if (indexpr_item == NULL) /* shouldn't happen */
354 elog(ERROR, "too few entries in indexprs list");
355 indexkey = (Node *) lfirst(indexpr_item);
356 indexpr_item = lnext(indexpr_item);
357
358 /*
359 * Lookup the expression type in pg_type for the type length etc.
360 */
361 keyType = exprType(indexkey);
362 tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
363 if (!HeapTupleIsValid(tuple))
364 elog(ERROR, "cache lookup failed for type %u", keyType);
365 typeTup = (Form_pg_type) GETSTRUCT(tuple);
366
367 /*
368 * Assign some of the attributes values. Leave the rest.
369 */
370 to->atttypid = keyType;
371 to->attlen = typeTup->typlen;
372 to->attbyval = typeTup->typbyval;
373 to->attstorage = typeTup->typstorage;
374 to->attalign = typeTup->typalign;
375 to->atttypmod = exprTypmod(indexkey);
376
377 ReleaseSysCache(tuple);
378
379 /*
380 * Make sure the expression yields a type that's safe to store in
381 * an index. We need this defense because we have index opclasses
382 * for pseudo-types such as "record", and the actually stored type
383 * had better be safe; eg, a named composite type is okay, an
384 * anonymous record type is not. The test is the same as for
385 * whether a table column is of a safe type (which is why we
386 * needn't check for the non-expression case).
387 */
388 CheckAttributeType(NameStr(to->attname),
389 to->atttypid, to->attcollation,
390 NIL, 0);
391 }
392
393 /*
394 * We do not yet have the correct relation OID for the index, so just
395 * set it invalid for now. InitializeAttributeOids() will fix it
396 * later.
397 */
398 to->attrelid = InvalidOid;
399
400 /*
401 * Check the opclass and index AM to see if either provides a keytype
402 * (overriding the attribute type). Opclass (if exists) takes
403 * precedence.
404 */
405 keyType = amroutine->amkeytype;
406
407 /*
408 * Code below is concerned to the opclasses which are not used with
409 * the included columns.
410 */
411 if (i < indexInfo->ii_NumIndexKeyAttrs)
412 {
413 tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(classObjectId[i]));
414 if (!HeapTupleIsValid(tuple))
415 elog(ERROR, "cache lookup failed for opclass %u",
416 classObjectId[i]);
417 opclassTup = (Form_pg_opclass) GETSTRUCT(tuple);
418 if (OidIsValid(opclassTup->opckeytype))
419 keyType = opclassTup->opckeytype;
420
421 /*
422 * If keytype is specified as ANYELEMENT, and opcintype is
423 * ANYARRAY, then the attribute type must be an array (else it'd
424 * not have matched this opclass); use its element type.
425 */
426 if (keyType == ANYELEMENTOID && opclassTup->opcintype == ANYARRAYOID)
427 {
428 keyType = get_base_element_type(to->atttypid);
429 if (!OidIsValid(keyType))
430 elog(ERROR, "could not get element type of array type %u",
431 to->atttypid);
432 }
433
434 ReleaseSysCache(tuple);
435 }
436
437 /*
438 * If a key type different from the heap value is specified, update
439 * the type-related fields in the index tupdesc.
440 */
441 if (OidIsValid(keyType) && keyType != to->atttypid)
442 {
443 tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
444 if (!HeapTupleIsValid(tuple))
445 elog(ERROR, "cache lookup failed for type %u", keyType);
446 typeTup = (Form_pg_type) GETSTRUCT(tuple);
447
448 to->atttypid = keyType;
449 to->atttypmod = -1;
450 to->attlen = typeTup->typlen;
451 to->attbyval = typeTup->typbyval;
452 to->attalign = typeTup->typalign;
453 to->attstorage = typeTup->typstorage;
454
455 ReleaseSysCache(tuple);
456 }
457 }
458
459 pfree(amroutine);
460
461 return indexTupDesc;
462 }
463
464 /* ----------------------------------------------------------------
465 * InitializeAttributeOids
466 * ----------------------------------------------------------------
467 */
468 static void
InitializeAttributeOids(Relation indexRelation,int numatts,Oid indexoid)469 InitializeAttributeOids(Relation indexRelation,
470 int numatts,
471 Oid indexoid)
472 {
473 TupleDesc tupleDescriptor;
474 int i;
475
476 tupleDescriptor = RelationGetDescr(indexRelation);
477
478 for (i = 0; i < numatts; i += 1)
479 TupleDescAttr(tupleDescriptor, i)->attrelid = indexoid;
480 }
481
482 /* ----------------------------------------------------------------
483 * AppendAttributeTuples
484 * ----------------------------------------------------------------
485 */
486 static void
AppendAttributeTuples(Relation indexRelation,int numatts)487 AppendAttributeTuples(Relation indexRelation, int numatts)
488 {
489 Relation pg_attribute;
490 CatalogIndexState indstate;
491 TupleDesc indexTupDesc;
492 int i;
493
494 /*
495 * open the attribute relation and its indexes
496 */
497 pg_attribute = table_open(AttributeRelationId, RowExclusiveLock);
498
499 indstate = CatalogOpenIndexes(pg_attribute);
500
501 /*
502 * insert data from new index's tupdesc into pg_attribute
503 */
504 indexTupDesc = RelationGetDescr(indexRelation);
505
506 for (i = 0; i < numatts; i++)
507 {
508 Form_pg_attribute attr = TupleDescAttr(indexTupDesc, i);
509
510 Assert(attr->attnum == i + 1);
511
512 InsertPgAttributeTuple(pg_attribute, attr, indstate);
513 }
514
515 CatalogCloseIndexes(indstate);
516
517 table_close(pg_attribute, RowExclusiveLock);
518 }
519
520 /* ----------------------------------------------------------------
521 * UpdateIndexRelation
522 *
523 * Construct and insert a new entry in the pg_index catalog
524 * ----------------------------------------------------------------
525 */
526 static void
UpdateIndexRelation(Oid indexoid,Oid heapoid,Oid parentIndexId,IndexInfo * indexInfo,Oid * collationOids,Oid * classOids,int16 * coloptions,bool primary,bool isexclusion,bool immediate,bool isvalid,bool isready)527 UpdateIndexRelation(Oid indexoid,
528 Oid heapoid,
529 Oid parentIndexId,
530 IndexInfo *indexInfo,
531 Oid *collationOids,
532 Oid *classOids,
533 int16 *coloptions,
534 bool primary,
535 bool isexclusion,
536 bool immediate,
537 bool isvalid,
538 bool isready)
539 {
540 int2vector *indkey;
541 oidvector *indcollation;
542 oidvector *indclass;
543 int2vector *indoption;
544 Datum exprsDatum;
545 Datum predDatum;
546 Datum values[Natts_pg_index];
547 bool nulls[Natts_pg_index];
548 Relation pg_index;
549 HeapTuple tuple;
550 int i;
551
552 /*
553 * Copy the index key, opclass, and indoption info into arrays (should we
554 * make the caller pass them like this to start with?)
555 */
556 indkey = buildint2vector(NULL, indexInfo->ii_NumIndexAttrs);
557 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
558 indkey->values[i] = indexInfo->ii_IndexAttrNumbers[i];
559 indcollation = buildoidvector(collationOids, indexInfo->ii_NumIndexKeyAttrs);
560 indclass = buildoidvector(classOids, indexInfo->ii_NumIndexKeyAttrs);
561 indoption = buildint2vector(coloptions, indexInfo->ii_NumIndexKeyAttrs);
562
563 /*
564 * Convert the index expressions (if any) to a text datum
565 */
566 if (indexInfo->ii_Expressions != NIL)
567 {
568 char *exprsString;
569
570 exprsString = nodeToString(indexInfo->ii_Expressions);
571 exprsDatum = CStringGetTextDatum(exprsString);
572 pfree(exprsString);
573 }
574 else
575 exprsDatum = (Datum) 0;
576
577 /*
578 * Convert the index predicate (if any) to a text datum. Note we convert
579 * implicit-AND format to normal explicit-AND for storage.
580 */
581 if (indexInfo->ii_Predicate != NIL)
582 {
583 char *predString;
584
585 predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate));
586 predDatum = CStringGetTextDatum(predString);
587 pfree(predString);
588 }
589 else
590 predDatum = (Datum) 0;
591
592 /*
593 * open the system catalog index relation
594 */
595 pg_index = table_open(IndexRelationId, RowExclusiveLock);
596
597 /*
598 * Build a pg_index tuple
599 */
600 MemSet(nulls, false, sizeof(nulls));
601
602 values[Anum_pg_index_indexrelid - 1] = ObjectIdGetDatum(indexoid);
603 values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid);
604 values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs);
605 values[Anum_pg_index_indnkeyatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexKeyAttrs);
606 values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique);
607 values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
608 values[Anum_pg_index_indisexclusion - 1] = BoolGetDatum(isexclusion);
609 values[Anum_pg_index_indimmediate - 1] = BoolGetDatum(immediate);
610 values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
611 values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
612 values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false);
613 values[Anum_pg_index_indisready - 1] = BoolGetDatum(isready);
614 values[Anum_pg_index_indislive - 1] = BoolGetDatum(true);
615 values[Anum_pg_index_indisreplident - 1] = BoolGetDatum(false);
616 values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey);
617 values[Anum_pg_index_indcollation - 1] = PointerGetDatum(indcollation);
618 values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass);
619 values[Anum_pg_index_indoption - 1] = PointerGetDatum(indoption);
620 values[Anum_pg_index_indexprs - 1] = exprsDatum;
621 if (exprsDatum == (Datum) 0)
622 nulls[Anum_pg_index_indexprs - 1] = true;
623 values[Anum_pg_index_indpred - 1] = predDatum;
624 if (predDatum == (Datum) 0)
625 nulls[Anum_pg_index_indpred - 1] = true;
626
627 tuple = heap_form_tuple(RelationGetDescr(pg_index), values, nulls);
628
629 /*
630 * insert the tuple into the pg_index catalog
631 */
632 CatalogTupleInsert(pg_index, tuple);
633
634 /*
635 * close the relation and free the tuple
636 */
637 table_close(pg_index, RowExclusiveLock);
638 heap_freetuple(tuple);
639 }
640
641
642 /*
643 * index_create
644 *
645 * heapRelation: table to build index on (suitably locked by caller)
646 * indexRelationName: what it say
647 * indexRelationId: normally, pass InvalidOid to let this routine
648 * generate an OID for the index. During bootstrap this may be
649 * nonzero to specify a preselected OID.
650 * parentIndexRelid: if creating an index partition, the OID of the
651 * parent index; otherwise InvalidOid.
652 * parentConstraintId: if creating a constraint on a partition, the OID
653 * of the constraint in the parent; otherwise InvalidOid.
654 * relFileNode: normally, pass InvalidOid to get new storage. May be
655 * nonzero to attach an existing valid build.
656 * indexInfo: same info executor uses to insert into the index
657 * indexColNames: column names to use for index (List of char *)
658 * accessMethodObjectId: OID of index AM to use
659 * tableSpaceId: OID of tablespace to use
660 * collationObjectId: array of collation OIDs, one per index column
661 * classObjectId: array of index opclass OIDs, one per index column
662 * coloptions: array of per-index-column indoption settings
663 * reloptions: AM-specific options
664 * flags: bitmask that can include any combination of these bits:
665 * INDEX_CREATE_IS_PRIMARY
666 * the index is a primary key
667 * INDEX_CREATE_ADD_CONSTRAINT:
668 * invoke index_constraint_create also
669 * INDEX_CREATE_SKIP_BUILD:
670 * skip the index_build() step for the moment; caller must do it
671 * later (typically via reindex_index())
672 * INDEX_CREATE_CONCURRENT:
673 * do not lock the table against writers. The index will be
674 * marked "invalid" and the caller must take additional steps
675 * to fix it up.
676 * INDEX_CREATE_IF_NOT_EXISTS:
677 * do not throw an error if a relation with the same name
678 * already exists.
679 * INDEX_CREATE_PARTITIONED:
680 * create a partitioned index (table must be partitioned)
681 * constr_flags: flags passed to index_constraint_create
682 * (only if INDEX_CREATE_ADD_CONSTRAINT is set)
683 * allow_system_table_mods: allow table to be a system catalog
684 * is_internal: if true, post creation hook for new index
685 * constraintId: if not NULL, receives OID of created constraint
686 *
687 * Returns the OID of the created index.
688 */
689 Oid
index_create(Relation heapRelation,const char * indexRelationName,Oid indexRelationId,Oid parentIndexRelid,Oid parentConstraintId,Oid relFileNode,IndexInfo * indexInfo,List * indexColNames,Oid accessMethodObjectId,Oid tableSpaceId,Oid * collationObjectId,Oid * classObjectId,int16 * coloptions,Datum reloptions,bits16 flags,bits16 constr_flags,bool allow_system_table_mods,bool is_internal,Oid * constraintId)690 index_create(Relation heapRelation,
691 const char *indexRelationName,
692 Oid indexRelationId,
693 Oid parentIndexRelid,
694 Oid parentConstraintId,
695 Oid relFileNode,
696 IndexInfo *indexInfo,
697 List *indexColNames,
698 Oid accessMethodObjectId,
699 Oid tableSpaceId,
700 Oid *collationObjectId,
701 Oid *classObjectId,
702 int16 *coloptions,
703 Datum reloptions,
704 bits16 flags,
705 bits16 constr_flags,
706 bool allow_system_table_mods,
707 bool is_internal,
708 Oid *constraintId)
709 {
710 Oid heapRelationId = RelationGetRelid(heapRelation);
711 Relation pg_class;
712 Relation indexRelation;
713 TupleDesc indexTupDesc;
714 bool shared_relation;
715 bool mapped_relation;
716 bool is_exclusion;
717 Oid namespaceId;
718 int i;
719 char relpersistence;
720 bool isprimary = (flags & INDEX_CREATE_IS_PRIMARY) != 0;
721 bool invalid = (flags & INDEX_CREATE_INVALID) != 0;
722 bool concurrent = (flags & INDEX_CREATE_CONCURRENT) != 0;
723 bool partitioned = (flags & INDEX_CREATE_PARTITIONED) != 0;
724 char relkind;
725 TransactionId relfrozenxid;
726 MultiXactId relminmxid;
727
728 /* constraint flags can only be set when a constraint is requested */
729 Assert((constr_flags == 0) ||
730 ((flags & INDEX_CREATE_ADD_CONSTRAINT) != 0));
731 /* partitioned indexes must never be "built" by themselves */
732 Assert(!partitioned || (flags & INDEX_CREATE_SKIP_BUILD));
733
734 relkind = partitioned ? RELKIND_PARTITIONED_INDEX : RELKIND_INDEX;
735 is_exclusion = (indexInfo->ii_ExclusionOps != NULL);
736
737 pg_class = table_open(RelationRelationId, RowExclusiveLock);
738
739 /*
740 * The index will be in the same namespace as its parent table, and is
741 * shared across databases if and only if the parent is. Likewise, it
742 * will use the relfilenode map if and only if the parent does; and it
743 * inherits the parent's relpersistence.
744 */
745 namespaceId = RelationGetNamespace(heapRelation);
746 shared_relation = heapRelation->rd_rel->relisshared;
747 mapped_relation = RelationIsMapped(heapRelation);
748 relpersistence = heapRelation->rd_rel->relpersistence;
749
750 /*
751 * check parameters
752 */
753 if (indexInfo->ii_NumIndexAttrs < 1)
754 elog(ERROR, "must index at least one column");
755
756 if (!allow_system_table_mods &&
757 IsSystemRelation(heapRelation) &&
758 IsNormalProcessingMode())
759 ereport(ERROR,
760 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
761 errmsg("user-defined indexes on system catalog tables are not supported")));
762
763 /*
764 * Btree text_pattern_ops uses text_eq as the equality operator, which is
765 * fine as long as the collation is deterministic; text_eq then reduces to
766 * bitwise equality and so it is semantically compatible with the other
767 * operators and functions in that opclass. But with a nondeterministic
768 * collation, text_eq could yield results that are incompatible with the
769 * actual behavior of the index (which is determined by the opclass's
770 * comparison function). We prevent such problems by refusing creation of
771 * an index with that opclass and a nondeterministic collation.
772 *
773 * The same applies to varchar_pattern_ops and bpchar_pattern_ops. If we
774 * find more cases, we might decide to create a real mechanism for marking
775 * opclasses as incompatible with nondeterminism; but for now, this small
776 * hack suffices.
777 *
778 * Another solution is to use a special operator, not text_eq, as the
779 * equality opclass member; but that is undesirable because it would
780 * prevent index usage in many queries that work fine today.
781 */
782 for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
783 {
784 Oid collation = collationObjectId[i];
785 Oid opclass = classObjectId[i];
786
787 if (collation)
788 {
789 if ((opclass == TEXT_BTREE_PATTERN_OPS_OID ||
790 opclass == VARCHAR_BTREE_PATTERN_OPS_OID ||
791 opclass == BPCHAR_BTREE_PATTERN_OPS_OID) &&
792 !get_collation_isdeterministic(collation))
793 {
794 HeapTuple classtup;
795
796 classtup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclass));
797 if (!HeapTupleIsValid(classtup))
798 elog(ERROR, "cache lookup failed for operator class %u", opclass);
799 ereport(ERROR,
800 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
801 errmsg("nondeterministic collations are not supported for operator class \"%s\"",
802 NameStr(((Form_pg_opclass) GETSTRUCT(classtup))->opcname))));
803 ReleaseSysCache(classtup);
804 }
805 }
806 }
807
808 /*
809 * Concurrent index build on a system catalog is unsafe because we tend to
810 * release locks before committing in catalogs.
811 */
812 if (concurrent &&
813 IsCatalogRelation(heapRelation))
814 ereport(ERROR,
815 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
816 errmsg("concurrent index creation on system catalog tables is not supported")));
817
818 /*
819 * This case is currently not supported. There's no way to ask for it in
820 * the grammar with CREATE INDEX, but it can happen with REINDEX.
821 */
822 if (concurrent && is_exclusion)
823 ereport(ERROR,
824 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
825 errmsg("concurrent index creation for exclusion constraints is not supported")));
826
827 /*
828 * We cannot allow indexing a shared relation after initdb (because
829 * there's no way to make the entry in other databases' pg_class).
830 */
831 if (shared_relation && !IsBootstrapProcessingMode())
832 ereport(ERROR,
833 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
834 errmsg("shared indexes cannot be created after initdb")));
835
836 /*
837 * Shared relations must be in pg_global, too (last-ditch check)
838 */
839 if (shared_relation && tableSpaceId != GLOBALTABLESPACE_OID)
840 elog(ERROR, "shared relations must be placed in pg_global tablespace");
841
842 /*
843 * Check for duplicate name (both as to the index, and as to the
844 * associated constraint if any). Such cases would fail on the relevant
845 * catalogs' unique indexes anyway, but we prefer to give a friendlier
846 * error message.
847 */
848 if (get_relname_relid(indexRelationName, namespaceId))
849 {
850 if ((flags & INDEX_CREATE_IF_NOT_EXISTS) != 0)
851 {
852 ereport(NOTICE,
853 (errcode(ERRCODE_DUPLICATE_TABLE),
854 errmsg("relation \"%s\" already exists, skipping",
855 indexRelationName)));
856 table_close(pg_class, RowExclusiveLock);
857 return InvalidOid;
858 }
859
860 ereport(ERROR,
861 (errcode(ERRCODE_DUPLICATE_TABLE),
862 errmsg("relation \"%s\" already exists",
863 indexRelationName)));
864 }
865
866 if ((flags & INDEX_CREATE_ADD_CONSTRAINT) != 0 &&
867 ConstraintNameIsUsed(CONSTRAINT_RELATION, heapRelationId,
868 indexRelationName))
869 {
870 /*
871 * INDEX_CREATE_IF_NOT_EXISTS does not apply here, since the
872 * conflicting constraint is not an index.
873 */
874 ereport(ERROR,
875 (errcode(ERRCODE_DUPLICATE_OBJECT),
876 errmsg("constraint \"%s\" for relation \"%s\" already exists",
877 indexRelationName, RelationGetRelationName(heapRelation))));
878 }
879
880 /*
881 * construct tuple descriptor for index tuples
882 */
883 indexTupDesc = ConstructTupleDescriptor(heapRelation,
884 indexInfo,
885 indexColNames,
886 accessMethodObjectId,
887 collationObjectId,
888 classObjectId);
889
890 /*
891 * Allocate an OID for the index, unless we were told what to use.
892 *
893 * The OID will be the relfilenode as well, so make sure it doesn't
894 * collide with either pg_class OIDs or existing physical files.
895 */
896 if (!OidIsValid(indexRelationId))
897 {
898 /* Use binary-upgrade override for pg_class.oid/relfilenode? */
899 if (IsBinaryUpgrade)
900 {
901 if (!OidIsValid(binary_upgrade_next_index_pg_class_oid))
902 ereport(ERROR,
903 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
904 errmsg("pg_class index OID value not set when in binary upgrade mode")));
905
906 indexRelationId = binary_upgrade_next_index_pg_class_oid;
907 binary_upgrade_next_index_pg_class_oid = InvalidOid;
908 }
909 else
910 {
911 indexRelationId =
912 GetNewRelFileNode(tableSpaceId, pg_class, relpersistence);
913 }
914 }
915
916 /*
917 * create the index relation's relcache entry and, if necessary, the
918 * physical disk file. (If we fail further down, it's the smgr's
919 * responsibility to remove the disk file again, if any.)
920 */
921 indexRelation = heap_create(indexRelationName,
922 namespaceId,
923 tableSpaceId,
924 indexRelationId,
925 relFileNode,
926 accessMethodObjectId,
927 indexTupDesc,
928 relkind,
929 relpersistence,
930 shared_relation,
931 mapped_relation,
932 allow_system_table_mods,
933 &relfrozenxid,
934 &relminmxid);
935
936 Assert(relfrozenxid == InvalidTransactionId);
937 Assert(relminmxid == InvalidMultiXactId);
938 Assert(indexRelationId == RelationGetRelid(indexRelation));
939
940 /*
941 * Obtain exclusive lock on it. Although no other transactions can see it
942 * until we commit, this prevents deadlock-risk complaints from lock
943 * manager in cases such as CLUSTER.
944 */
945 LockRelation(indexRelation, AccessExclusiveLock);
946
947 /*
948 * Fill in fields of the index's pg_class entry that are not set correctly
949 * by heap_create.
950 *
951 * XXX should have a cleaner way to create cataloged indexes
952 */
953 indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner;
954 indexRelation->rd_rel->relam = accessMethodObjectId;
955 indexRelation->rd_rel->relispartition = OidIsValid(parentIndexRelid);
956
957 /*
958 * store index's pg_class entry
959 */
960 InsertPgClassTuple(pg_class, indexRelation,
961 RelationGetRelid(indexRelation),
962 (Datum) 0,
963 reloptions);
964
965 /* done with pg_class */
966 table_close(pg_class, RowExclusiveLock);
967
968 /*
969 * now update the object id's of all the attribute tuple forms in the
970 * index relation's tuple descriptor
971 */
972 InitializeAttributeOids(indexRelation,
973 indexInfo->ii_NumIndexAttrs,
974 indexRelationId);
975
976 /*
977 * append ATTRIBUTE tuples for the index
978 */
979 AppendAttributeTuples(indexRelation, indexInfo->ii_NumIndexAttrs);
980
981 /* ----------------
982 * update pg_index
983 * (append INDEX tuple)
984 *
985 * Note that this stows away a representation of "predicate".
986 * (Or, could define a rule to maintain the predicate) --Nels, Feb '92
987 * ----------------
988 */
989 UpdateIndexRelation(indexRelationId, heapRelationId, parentIndexRelid,
990 indexInfo,
991 collationObjectId, classObjectId, coloptions,
992 isprimary, is_exclusion,
993 (constr_flags & INDEX_CONSTR_CREATE_DEFERRABLE) == 0,
994 !concurrent && !invalid,
995 !concurrent);
996
997 /*
998 * Register relcache invalidation on the indexes' heap relation, to
999 * maintain consistency of its index list
1000 */
1001 CacheInvalidateRelcache(heapRelation);
1002
1003 /* update pg_inherits and the parent's relhassubclass, if needed */
1004 if (OidIsValid(parentIndexRelid))
1005 {
1006 StoreSingleInheritance(indexRelationId, parentIndexRelid, 1);
1007 SetRelationHasSubclass(parentIndexRelid, true);
1008 }
1009
1010 /*
1011 * Register constraint and dependencies for the index.
1012 *
1013 * If the index is from a CONSTRAINT clause, construct a pg_constraint
1014 * entry. The index will be linked to the constraint, which in turn is
1015 * linked to the table. If it's not a CONSTRAINT, we need to make a
1016 * dependency directly on the table.
1017 *
1018 * We don't need a dependency on the namespace, because there'll be an
1019 * indirect dependency via our parent table.
1020 *
1021 * During bootstrap we can't register any dependencies, and we don't try
1022 * to make a constraint either.
1023 */
1024 if (!IsBootstrapProcessingMode())
1025 {
1026 ObjectAddress myself,
1027 referenced;
1028
1029 myself.classId = RelationRelationId;
1030 myself.objectId = indexRelationId;
1031 myself.objectSubId = 0;
1032
1033 if ((flags & INDEX_CREATE_ADD_CONSTRAINT) != 0)
1034 {
1035 char constraintType;
1036 ObjectAddress localaddr;
1037
1038 if (isprimary)
1039 constraintType = CONSTRAINT_PRIMARY;
1040 else if (indexInfo->ii_Unique)
1041 constraintType = CONSTRAINT_UNIQUE;
1042 else if (is_exclusion)
1043 constraintType = CONSTRAINT_EXCLUSION;
1044 else
1045 {
1046 elog(ERROR, "constraint must be PRIMARY, UNIQUE or EXCLUDE");
1047 constraintType = 0; /* keep compiler quiet */
1048 }
1049
1050 localaddr = index_constraint_create(heapRelation,
1051 indexRelationId,
1052 parentConstraintId,
1053 indexInfo,
1054 indexRelationName,
1055 constraintType,
1056 constr_flags,
1057 allow_system_table_mods,
1058 is_internal);
1059 if (constraintId)
1060 *constraintId = localaddr.objectId;
1061 }
1062 else
1063 {
1064 bool have_simple_col = false;
1065
1066 /* Create auto dependencies on simply-referenced columns */
1067 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1068 {
1069 if (indexInfo->ii_IndexAttrNumbers[i] != 0)
1070 {
1071 referenced.classId = RelationRelationId;
1072 referenced.objectId = heapRelationId;
1073 referenced.objectSubId = indexInfo->ii_IndexAttrNumbers[i];
1074
1075 recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
1076
1077 have_simple_col = true;
1078 }
1079 }
1080
1081 /*
1082 * If there are no simply-referenced columns, give the index an
1083 * auto dependency on the whole table. In most cases, this will
1084 * be redundant, but it might not be if the index expressions and
1085 * predicate contain no Vars or only whole-row Vars.
1086 */
1087 if (!have_simple_col)
1088 {
1089 referenced.classId = RelationRelationId;
1090 referenced.objectId = heapRelationId;
1091 referenced.objectSubId = 0;
1092
1093 recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
1094 }
1095 }
1096
1097 /*
1098 * If this is an index partition, create partition dependencies on
1099 * both the parent index and the table. (Note: these must be *in
1100 * addition to*, not instead of, all other dependencies. Otherwise
1101 * we'll be short some dependencies after DETACH PARTITION.)
1102 */
1103 if (OidIsValid(parentIndexRelid))
1104 {
1105 referenced.classId = RelationRelationId;
1106 referenced.objectId = parentIndexRelid;
1107 referenced.objectSubId = 0;
1108
1109 recordDependencyOn(&myself, &referenced, DEPENDENCY_PARTITION_PRI);
1110
1111 referenced.classId = RelationRelationId;
1112 referenced.objectId = heapRelationId;
1113 referenced.objectSubId = 0;
1114
1115 recordDependencyOn(&myself, &referenced, DEPENDENCY_PARTITION_SEC);
1116 }
1117
1118 /* Store dependency on collations */
1119 /* The default collation is pinned, so don't bother recording it */
1120 for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
1121 {
1122 if (OidIsValid(collationObjectId[i]) &&
1123 collationObjectId[i] != DEFAULT_COLLATION_OID)
1124 {
1125 referenced.classId = CollationRelationId;
1126 referenced.objectId = collationObjectId[i];
1127 referenced.objectSubId = 0;
1128
1129 recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
1130 }
1131 }
1132
1133 /* Store dependency on operator classes */
1134 for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
1135 {
1136 referenced.classId = OperatorClassRelationId;
1137 referenced.objectId = classObjectId[i];
1138 referenced.objectSubId = 0;
1139
1140 recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
1141 }
1142
1143 /* Store dependencies on anything mentioned in index expressions */
1144 if (indexInfo->ii_Expressions)
1145 {
1146 recordDependencyOnSingleRelExpr(&myself,
1147 (Node *) indexInfo->ii_Expressions,
1148 heapRelationId,
1149 DEPENDENCY_NORMAL,
1150 DEPENDENCY_AUTO, false);
1151 }
1152
1153 /* Store dependencies on anything mentioned in predicate */
1154 if (indexInfo->ii_Predicate)
1155 {
1156 recordDependencyOnSingleRelExpr(&myself,
1157 (Node *) indexInfo->ii_Predicate,
1158 heapRelationId,
1159 DEPENDENCY_NORMAL,
1160 DEPENDENCY_AUTO, false);
1161 }
1162 }
1163 else
1164 {
1165 /* Bootstrap mode - assert we weren't asked for constraint support */
1166 Assert((flags & INDEX_CREATE_ADD_CONSTRAINT) == 0);
1167 }
1168
1169 /* Post creation hook for new index */
1170 InvokeObjectPostCreateHookArg(RelationRelationId,
1171 indexRelationId, 0, is_internal);
1172
1173 /*
1174 * Advance the command counter so that we can see the newly-entered
1175 * catalog tuples for the index.
1176 */
1177 CommandCounterIncrement();
1178
1179 /*
1180 * In bootstrap mode, we have to fill in the index strategy structure with
1181 * information from the catalogs. If we aren't bootstrapping, then the
1182 * relcache entry has already been rebuilt thanks to sinval update during
1183 * CommandCounterIncrement.
1184 */
1185 if (IsBootstrapProcessingMode())
1186 RelationInitIndexAccessInfo(indexRelation);
1187 else
1188 Assert(indexRelation->rd_indexcxt != NULL);
1189
1190 indexRelation->rd_index->indnkeyatts = indexInfo->ii_NumIndexKeyAttrs;
1191
1192 /*
1193 * If this is bootstrap (initdb) time, then we don't actually fill in the
1194 * index yet. We'll be creating more indexes and classes later, so we
1195 * delay filling them in until just before we're done with bootstrapping.
1196 * Similarly, if the caller specified to skip the build then filling the
1197 * index is delayed till later (ALTER TABLE can save work in some cases
1198 * with this). Otherwise, we call the AM routine that constructs the
1199 * index.
1200 */
1201 if (IsBootstrapProcessingMode())
1202 {
1203 index_register(heapRelationId, indexRelationId, indexInfo);
1204 }
1205 else if ((flags & INDEX_CREATE_SKIP_BUILD) != 0)
1206 {
1207 /*
1208 * Caller is responsible for filling the index later on. However,
1209 * we'd better make sure that the heap relation is correctly marked as
1210 * having an index.
1211 */
1212 index_update_stats(heapRelation,
1213 true,
1214 -1.0);
1215 /* Make the above update visible */
1216 CommandCounterIncrement();
1217 }
1218 else
1219 {
1220 index_build(heapRelation, indexRelation, indexInfo, false, true);
1221 }
1222
1223 /*
1224 * Close the index; but we keep the lock that we acquired above until end
1225 * of transaction. Closing the heap is caller's responsibility.
1226 */
1227 index_close(indexRelation, NoLock);
1228
1229 return indexRelationId;
1230 }
1231
1232 /*
1233 * index_concurrently_create_copy
1234 *
1235 * Create concurrently an index based on the definition of the one provided by
1236 * caller. The index is inserted into catalogs and needs to be built later
1237 * on. This is called during concurrent reindex processing.
1238 */
1239 Oid
index_concurrently_create_copy(Relation heapRelation,Oid oldIndexId,const char * newName)1240 index_concurrently_create_copy(Relation heapRelation, Oid oldIndexId, const char *newName)
1241 {
1242 Relation indexRelation;
1243 IndexInfo *oldInfo,
1244 *newInfo;
1245 Oid newIndexId = InvalidOid;
1246 HeapTuple indexTuple,
1247 classTuple;
1248 Datum indclassDatum,
1249 colOptionDatum,
1250 optionDatum;
1251 oidvector *indclass;
1252 int2vector *indcoloptions;
1253 bool isnull;
1254 List *indexColNames = NIL;
1255 List *indexExprs = NIL;
1256 List *indexPreds = NIL;
1257
1258 indexRelation = index_open(oldIndexId, RowExclusiveLock);
1259
1260 /* The new index needs some information from the old index */
1261 oldInfo = BuildIndexInfo(indexRelation);
1262
1263 /*
1264 * Concurrent build of an index with exclusion constraints is not
1265 * supported.
1266 */
1267 if (oldInfo->ii_ExclusionOps != NULL)
1268 ereport(ERROR,
1269 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1270 errmsg("concurrent index creation for exclusion constraints is not supported")));
1271
1272 /* Get the array of class and column options IDs from index info */
1273 indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(oldIndexId));
1274 if (!HeapTupleIsValid(indexTuple))
1275 elog(ERROR, "cache lookup failed for index %u", oldIndexId);
1276 indclassDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
1277 Anum_pg_index_indclass, &isnull);
1278 Assert(!isnull);
1279 indclass = (oidvector *) DatumGetPointer(indclassDatum);
1280
1281 colOptionDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
1282 Anum_pg_index_indoption, &isnull);
1283 Assert(!isnull);
1284 indcoloptions = (int2vector *) DatumGetPointer(colOptionDatum);
1285
1286 /* Fetch options of index if any */
1287 classTuple = SearchSysCache1(RELOID, oldIndexId);
1288 if (!HeapTupleIsValid(classTuple))
1289 elog(ERROR, "cache lookup failed for relation %u", oldIndexId);
1290 optionDatum = SysCacheGetAttr(RELOID, classTuple,
1291 Anum_pg_class_reloptions, &isnull);
1292
1293 /*
1294 * Fetch the list of expressions and predicates directly from the
1295 * catalogs. This cannot rely on the information from IndexInfo of the
1296 * old index as these have been flattened for the planner.
1297 */
1298 if (oldInfo->ii_Expressions != NIL)
1299 {
1300 Datum exprDatum;
1301 char *exprString;
1302
1303 exprDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
1304 Anum_pg_index_indexprs, &isnull);
1305 Assert(!isnull);
1306 exprString = TextDatumGetCString(exprDatum);
1307 indexExprs = (List *) stringToNode(exprString);
1308 pfree(exprString);
1309 }
1310 if (oldInfo->ii_Predicate != NIL)
1311 {
1312 Datum predDatum;
1313 char *predString;
1314
1315 predDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
1316 Anum_pg_index_indpred, &isnull);
1317 Assert(!isnull);
1318 predString = TextDatumGetCString(predDatum);
1319 indexPreds = (List *) stringToNode(predString);
1320
1321 /* Also convert to implicit-AND format */
1322 indexPreds = make_ands_implicit((Expr *) indexPreds);
1323 pfree(predString);
1324 }
1325
1326 /*
1327 * Build the index information for the new index. Note that rebuild of
1328 * indexes with exclusion constraints is not supported, hence there is no
1329 * need to fill all the ii_Exclusion* fields.
1330 */
1331 newInfo = makeIndexInfo(oldInfo->ii_NumIndexAttrs,
1332 oldInfo->ii_NumIndexKeyAttrs,
1333 oldInfo->ii_Am,
1334 indexExprs,
1335 indexPreds,
1336 oldInfo->ii_Unique,
1337 false, /* not ready for inserts */
1338 true);
1339
1340 /*
1341 * Extract the list of column names and the column numbers for the new
1342 * index information. All this information will be used for the index
1343 * creation.
1344 */
1345 for (int i = 0; i < oldInfo->ii_NumIndexAttrs; i++)
1346 {
1347 TupleDesc indexTupDesc = RelationGetDescr(indexRelation);
1348 Form_pg_attribute att = TupleDescAttr(indexTupDesc, i);
1349
1350 indexColNames = lappend(indexColNames, NameStr(att->attname));
1351 newInfo->ii_IndexAttrNumbers[i] = oldInfo->ii_IndexAttrNumbers[i];
1352 }
1353
1354 /*
1355 * Now create the new index.
1356 *
1357 * For a partition index, we adjust the partition dependency later, to
1358 * ensure a consistent state at all times. That is why parentIndexRelid
1359 * is not set here.
1360 */
1361 newIndexId = index_create(heapRelation,
1362 newName,
1363 InvalidOid, /* indexRelationId */
1364 InvalidOid, /* parentIndexRelid */
1365 InvalidOid, /* parentConstraintId */
1366 InvalidOid, /* relFileNode */
1367 newInfo,
1368 indexColNames,
1369 indexRelation->rd_rel->relam,
1370 indexRelation->rd_rel->reltablespace,
1371 indexRelation->rd_indcollation,
1372 indclass->values,
1373 indcoloptions->values,
1374 optionDatum,
1375 INDEX_CREATE_SKIP_BUILD | INDEX_CREATE_CONCURRENT,
1376 0,
1377 true, /* allow table to be a system catalog? */
1378 false, /* is_internal? */
1379 NULL);
1380
1381 /* Close the relations used and clean up */
1382 index_close(indexRelation, NoLock);
1383 ReleaseSysCache(indexTuple);
1384 ReleaseSysCache(classTuple);
1385
1386 return newIndexId;
1387 }
1388
1389 /*
1390 * index_concurrently_build
1391 *
1392 * Build index for a concurrent operation. Low-level locks are taken when
1393 * this operation is performed to prevent only schema changes, but they need
1394 * to be kept until the end of the transaction performing this operation.
1395 * 'indexOid' refers to an index relation OID already created as part of
1396 * previous processing, and 'heapOid' refers to its parent heap relation.
1397 */
1398 void
index_concurrently_build(Oid heapRelationId,Oid indexRelationId)1399 index_concurrently_build(Oid heapRelationId,
1400 Oid indexRelationId)
1401 {
1402 Relation heapRel;
1403 Relation indexRelation;
1404 IndexInfo *indexInfo;
1405
1406 /* This had better make sure that a snapshot is active */
1407 Assert(ActiveSnapshotSet());
1408
1409 /* Open and lock the parent heap relation */
1410 heapRel = table_open(heapRelationId, ShareUpdateExclusiveLock);
1411
1412 /* And the target index relation */
1413 indexRelation = index_open(indexRelationId, RowExclusiveLock);
1414
1415 /*
1416 * We have to re-build the IndexInfo struct, since it was lost in the
1417 * commit of the transaction where this concurrent index was created at
1418 * the catalog level.
1419 */
1420 indexInfo = BuildIndexInfo(indexRelation);
1421 Assert(!indexInfo->ii_ReadyForInserts);
1422 indexInfo->ii_Concurrent = true;
1423 indexInfo->ii_BrokenHotChain = false;
1424
1425 /* Now build the index */
1426 index_build(heapRel, indexRelation, indexInfo, false, true);
1427
1428 /* Close both the relations, but keep the locks */
1429 table_close(heapRel, NoLock);
1430 index_close(indexRelation, NoLock);
1431
1432 /*
1433 * Update the pg_index row to mark the index as ready for inserts. Once we
1434 * commit this transaction, any new transactions that open the table must
1435 * insert new entries into the index for insertions and non-HOT updates.
1436 */
1437 index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
1438 }
1439
1440 /*
1441 * index_concurrently_swap
1442 *
1443 * Swap name, dependencies, and constraints of the old index over to the new
1444 * index, while marking the old index as invalid and the new as valid.
1445 */
1446 void
index_concurrently_swap(Oid newIndexId,Oid oldIndexId,const char * oldName)1447 index_concurrently_swap(Oid newIndexId, Oid oldIndexId, const char *oldName)
1448 {
1449 Relation pg_class,
1450 pg_index,
1451 pg_constraint,
1452 pg_trigger;
1453 Relation oldClassRel,
1454 newClassRel;
1455 HeapTuple oldClassTuple,
1456 newClassTuple;
1457 Form_pg_class oldClassForm,
1458 newClassForm;
1459 HeapTuple oldIndexTuple,
1460 newIndexTuple;
1461 Form_pg_index oldIndexForm,
1462 newIndexForm;
1463 bool isPartition;
1464 Oid indexConstraintOid;
1465 List *constraintOids = NIL;
1466 ListCell *lc;
1467
1468 /*
1469 * Take a necessary lock on the old and new index before swapping them.
1470 */
1471 oldClassRel = relation_open(oldIndexId, ShareUpdateExclusiveLock);
1472 newClassRel = relation_open(newIndexId, ShareUpdateExclusiveLock);
1473
1474 /* Now swap names and dependencies of those indexes */
1475 pg_class = table_open(RelationRelationId, RowExclusiveLock);
1476
1477 oldClassTuple = SearchSysCacheCopy1(RELOID,
1478 ObjectIdGetDatum(oldIndexId));
1479 if (!HeapTupleIsValid(oldClassTuple))
1480 elog(ERROR, "could not find tuple for relation %u", oldIndexId);
1481 newClassTuple = SearchSysCacheCopy1(RELOID,
1482 ObjectIdGetDatum(newIndexId));
1483 if (!HeapTupleIsValid(newClassTuple))
1484 elog(ERROR, "could not find tuple for relation %u", newIndexId);
1485
1486 oldClassForm = (Form_pg_class) GETSTRUCT(oldClassTuple);
1487 newClassForm = (Form_pg_class) GETSTRUCT(newClassTuple);
1488
1489 /* Swap the names */
1490 namestrcpy(&newClassForm->relname, NameStr(oldClassForm->relname));
1491 namestrcpy(&oldClassForm->relname, oldName);
1492
1493 /* Swap the partition flags to track inheritance properly */
1494 isPartition = newClassForm->relispartition;
1495 newClassForm->relispartition = oldClassForm->relispartition;
1496 oldClassForm->relispartition = isPartition;
1497
1498 CatalogTupleUpdate(pg_class, &oldClassTuple->t_self, oldClassTuple);
1499 CatalogTupleUpdate(pg_class, &newClassTuple->t_self, newClassTuple);
1500
1501 heap_freetuple(oldClassTuple);
1502 heap_freetuple(newClassTuple);
1503
1504 /* Now swap index info */
1505 pg_index = table_open(IndexRelationId, RowExclusiveLock);
1506
1507 oldIndexTuple = SearchSysCacheCopy1(INDEXRELID,
1508 ObjectIdGetDatum(oldIndexId));
1509 if (!HeapTupleIsValid(oldIndexTuple))
1510 elog(ERROR, "could not find tuple for relation %u", oldIndexId);
1511 newIndexTuple = SearchSysCacheCopy1(INDEXRELID,
1512 ObjectIdGetDatum(newIndexId));
1513 if (!HeapTupleIsValid(newIndexTuple))
1514 elog(ERROR, "could not find tuple for relation %u", newIndexId);
1515
1516 oldIndexForm = (Form_pg_index) GETSTRUCT(oldIndexTuple);
1517 newIndexForm = (Form_pg_index) GETSTRUCT(newIndexTuple);
1518
1519 /*
1520 * Copy constraint flags from the old index. This is safe because the old
1521 * index guaranteed uniqueness.
1522 */
1523 newIndexForm->indisprimary = oldIndexForm->indisprimary;
1524 oldIndexForm->indisprimary = false;
1525 newIndexForm->indisexclusion = oldIndexForm->indisexclusion;
1526 oldIndexForm->indisexclusion = false;
1527 newIndexForm->indimmediate = oldIndexForm->indimmediate;
1528 oldIndexForm->indimmediate = true;
1529
1530 /* Preserve indisreplident in the new index */
1531 newIndexForm->indisreplident = oldIndexForm->indisreplident;
1532 oldIndexForm->indisreplident = false;
1533
1534 /* Preserve indisclustered in the new index */
1535 newIndexForm->indisclustered = oldIndexForm->indisclustered;
1536
1537 /*
1538 * Mark the new index as valid, and the old index as invalid similarly to
1539 * what index_set_state_flags() does.
1540 */
1541 newIndexForm->indisvalid = true;
1542 oldIndexForm->indisvalid = false;
1543 oldIndexForm->indisclustered = false;
1544
1545 CatalogTupleUpdate(pg_index, &oldIndexTuple->t_self, oldIndexTuple);
1546 CatalogTupleUpdate(pg_index, &newIndexTuple->t_self, newIndexTuple);
1547
1548 heap_freetuple(oldIndexTuple);
1549 heap_freetuple(newIndexTuple);
1550
1551 /*
1552 * Move constraints and triggers over to the new index
1553 */
1554
1555 constraintOids = get_index_ref_constraints(oldIndexId);
1556
1557 indexConstraintOid = get_index_constraint(oldIndexId);
1558
1559 if (OidIsValid(indexConstraintOid))
1560 constraintOids = lappend_oid(constraintOids, indexConstraintOid);
1561
1562 pg_constraint = table_open(ConstraintRelationId, RowExclusiveLock);
1563 pg_trigger = table_open(TriggerRelationId, RowExclusiveLock);
1564
1565 foreach(lc, constraintOids)
1566 {
1567 HeapTuple constraintTuple,
1568 triggerTuple;
1569 Form_pg_constraint conForm;
1570 ScanKeyData key[1];
1571 SysScanDesc scan;
1572 Oid constraintOid = lfirst_oid(lc);
1573
1574 /* Move the constraint from the old to the new index */
1575 constraintTuple = SearchSysCacheCopy1(CONSTROID,
1576 ObjectIdGetDatum(constraintOid));
1577 if (!HeapTupleIsValid(constraintTuple))
1578 elog(ERROR, "could not find tuple for constraint %u", constraintOid);
1579
1580 conForm = ((Form_pg_constraint) GETSTRUCT(constraintTuple));
1581
1582 if (conForm->conindid == oldIndexId)
1583 {
1584 conForm->conindid = newIndexId;
1585
1586 CatalogTupleUpdate(pg_constraint, &constraintTuple->t_self, constraintTuple);
1587 }
1588
1589 heap_freetuple(constraintTuple);
1590
1591 /* Search for trigger records */
1592 ScanKeyInit(&key[0],
1593 Anum_pg_trigger_tgconstraint,
1594 BTEqualStrategyNumber, F_OIDEQ,
1595 ObjectIdGetDatum(constraintOid));
1596
1597 scan = systable_beginscan(pg_trigger, TriggerConstraintIndexId, true,
1598 NULL, 1, key);
1599
1600 while (HeapTupleIsValid((triggerTuple = systable_getnext(scan))))
1601 {
1602 Form_pg_trigger tgForm = (Form_pg_trigger) GETSTRUCT(triggerTuple);
1603
1604 if (tgForm->tgconstrindid != oldIndexId)
1605 continue;
1606
1607 /* Make a modifiable copy */
1608 triggerTuple = heap_copytuple(triggerTuple);
1609 tgForm = (Form_pg_trigger) GETSTRUCT(triggerTuple);
1610
1611 tgForm->tgconstrindid = newIndexId;
1612
1613 CatalogTupleUpdate(pg_trigger, &triggerTuple->t_self, triggerTuple);
1614
1615 heap_freetuple(triggerTuple);
1616 }
1617
1618 systable_endscan(scan);
1619 }
1620
1621 /*
1622 * Move comment if any
1623 */
1624 {
1625 Relation description;
1626 ScanKeyData skey[3];
1627 SysScanDesc sd;
1628 HeapTuple tuple;
1629 Datum values[Natts_pg_description] = {0};
1630 bool nulls[Natts_pg_description] = {0};
1631 bool replaces[Natts_pg_description] = {0};
1632
1633 values[Anum_pg_description_objoid - 1] = ObjectIdGetDatum(newIndexId);
1634 replaces[Anum_pg_description_objoid - 1] = true;
1635
1636 ScanKeyInit(&skey[0],
1637 Anum_pg_description_objoid,
1638 BTEqualStrategyNumber, F_OIDEQ,
1639 ObjectIdGetDatum(oldIndexId));
1640 ScanKeyInit(&skey[1],
1641 Anum_pg_description_classoid,
1642 BTEqualStrategyNumber, F_OIDEQ,
1643 ObjectIdGetDatum(RelationRelationId));
1644 ScanKeyInit(&skey[2],
1645 Anum_pg_description_objsubid,
1646 BTEqualStrategyNumber, F_INT4EQ,
1647 Int32GetDatum(0));
1648
1649 description = table_open(DescriptionRelationId, RowExclusiveLock);
1650
1651 sd = systable_beginscan(description, DescriptionObjIndexId, true,
1652 NULL, 3, skey);
1653
1654 while ((tuple = systable_getnext(sd)) != NULL)
1655 {
1656 tuple = heap_modify_tuple(tuple, RelationGetDescr(description),
1657 values, nulls, replaces);
1658 CatalogTupleUpdate(description, &tuple->t_self, tuple);
1659
1660 break; /* Assume there can be only one match */
1661 }
1662
1663 systable_endscan(sd);
1664 table_close(description, NoLock);
1665 }
1666
1667 /*
1668 * Swap inheritance relationship with parent index
1669 */
1670 if (get_rel_relispartition(oldIndexId))
1671 {
1672 List *ancestors = get_partition_ancestors(oldIndexId);
1673 Oid parentIndexRelid = linitial_oid(ancestors);
1674
1675 DeleteInheritsTuple(oldIndexId, parentIndexRelid);
1676 StoreSingleInheritance(newIndexId, parentIndexRelid, 1);
1677
1678 list_free(ancestors);
1679 }
1680
1681 /*
1682 * Swap all dependencies of and on the old index to the new one, and
1683 * vice-versa. Note that a call to CommandCounterIncrement() would cause
1684 * duplicate entries in pg_depend, so this should not be done.
1685 */
1686 changeDependenciesOf(RelationRelationId, newIndexId, oldIndexId);
1687 changeDependenciesOn(RelationRelationId, newIndexId, oldIndexId);
1688
1689 changeDependenciesOf(RelationRelationId, oldIndexId, newIndexId);
1690 changeDependenciesOn(RelationRelationId, oldIndexId, newIndexId);
1691
1692 /*
1693 * Copy over statistics from old to new index
1694 */
1695 {
1696 PgStat_StatTabEntry *tabentry;
1697
1698 tabentry = pgstat_fetch_stat_tabentry(oldIndexId);
1699 if (tabentry)
1700 {
1701 if (newClassRel->pgstat_info)
1702 {
1703 newClassRel->pgstat_info->t_counts.t_numscans = tabentry->numscans;
1704 newClassRel->pgstat_info->t_counts.t_tuples_returned = tabentry->tuples_returned;
1705 newClassRel->pgstat_info->t_counts.t_tuples_fetched = tabentry->tuples_fetched;
1706 newClassRel->pgstat_info->t_counts.t_blocks_fetched = tabentry->blocks_fetched;
1707 newClassRel->pgstat_info->t_counts.t_blocks_hit = tabentry->blocks_hit;
1708
1709 /*
1710 * The data will be sent by the next pgstat_report_stat()
1711 * call.
1712 */
1713 }
1714 }
1715 }
1716
1717 /* Copy data of pg_statistic from the old index to the new one */
1718 CopyStatistics(oldIndexId, newIndexId);
1719
1720 /* Copy pg_attribute.attstattarget for each index attribute */
1721 {
1722 HeapTuple attrTuple;
1723 Relation pg_attribute;
1724 SysScanDesc scan;
1725 ScanKeyData key[1];
1726
1727 pg_attribute = table_open(AttributeRelationId, RowExclusiveLock);
1728 ScanKeyInit(&key[0],
1729 Anum_pg_attribute_attrelid,
1730 BTEqualStrategyNumber, F_OIDEQ,
1731 ObjectIdGetDatum(newIndexId));
1732 scan = systable_beginscan(pg_attribute, AttributeRelidNumIndexId,
1733 true, NULL, 1, key);
1734
1735 while (HeapTupleIsValid((attrTuple = systable_getnext(scan))))
1736 {
1737 Form_pg_attribute att = (Form_pg_attribute) GETSTRUCT(attrTuple);
1738 Datum repl_val[Natts_pg_attribute];
1739 bool repl_null[Natts_pg_attribute];
1740 bool repl_repl[Natts_pg_attribute];
1741 int attstattarget;
1742 HeapTuple newTuple;
1743
1744 /* Ignore dropped columns */
1745 if (att->attisdropped)
1746 continue;
1747
1748 /*
1749 * Get attstattarget from the old index and refresh the new value.
1750 */
1751 attstattarget = get_attstattarget(oldIndexId, att->attnum);
1752
1753 /* no need for a refresh if both match */
1754 if (attstattarget == att->attstattarget)
1755 continue;
1756
1757 memset(repl_val, 0, sizeof(repl_val));
1758 memset(repl_null, false, sizeof(repl_null));
1759 memset(repl_repl, false, sizeof(repl_repl));
1760
1761 repl_repl[Anum_pg_attribute_attstattarget - 1] = true;
1762 repl_val[Anum_pg_attribute_attstattarget - 1] = Int32GetDatum(attstattarget);
1763
1764 newTuple = heap_modify_tuple(attrTuple,
1765 RelationGetDescr(pg_attribute),
1766 repl_val, repl_null, repl_repl);
1767 CatalogTupleUpdate(pg_attribute, &newTuple->t_self, newTuple);
1768
1769 heap_freetuple(newTuple);
1770 }
1771
1772 systable_endscan(scan);
1773 table_close(pg_attribute, RowExclusiveLock);
1774 }
1775
1776 /* Close relations */
1777 table_close(pg_class, RowExclusiveLock);
1778 table_close(pg_index, RowExclusiveLock);
1779 table_close(pg_constraint, RowExclusiveLock);
1780 table_close(pg_trigger, RowExclusiveLock);
1781
1782 /* The lock taken previously is not released until the end of transaction */
1783 relation_close(oldClassRel, NoLock);
1784 relation_close(newClassRel, NoLock);
1785 }
1786
1787 /*
1788 * index_concurrently_set_dead
1789 *
1790 * Perform the last invalidation stage of DROP INDEX CONCURRENTLY or REINDEX
1791 * CONCURRENTLY before actually dropping the index. After calling this
1792 * function, the index is seen by all the backends as dead. Low-level locks
1793 * taken here are kept until the end of the transaction calling this function.
1794 */
1795 void
index_concurrently_set_dead(Oid heapId,Oid indexId)1796 index_concurrently_set_dead(Oid heapId, Oid indexId)
1797 {
1798 Relation userHeapRelation;
1799 Relation userIndexRelation;
1800
1801 /*
1802 * No more predicate locks will be acquired on this index, and we're about
1803 * to stop doing inserts into the index which could show conflicts with
1804 * existing predicate locks, so now is the time to move them to the heap
1805 * relation.
1806 */
1807 userHeapRelation = table_open(heapId, ShareUpdateExclusiveLock);
1808 userIndexRelation = index_open(indexId, ShareUpdateExclusiveLock);
1809 TransferPredicateLocksToHeapRelation(userIndexRelation);
1810
1811 /*
1812 * Now we are sure that nobody uses the index for queries; they just might
1813 * have it open for updating it. So now we can unset indisready and
1814 * indislive, then wait till nobody could be using it at all anymore.
1815 */
1816 index_set_state_flags(indexId, INDEX_DROP_SET_DEAD);
1817
1818 /*
1819 * Invalidate the relcache for the table, so that after this commit all
1820 * sessions will refresh the table's index list. Forgetting just the
1821 * index's relcache entry is not enough.
1822 */
1823 CacheInvalidateRelcache(userHeapRelation);
1824
1825 /*
1826 * Close the relations again, though still holding session lock.
1827 */
1828 table_close(userHeapRelation, NoLock);
1829 index_close(userIndexRelation, NoLock);
1830 }
1831
1832 /*
1833 * index_constraint_create
1834 *
1835 * Set up a constraint associated with an index. Return the new constraint's
1836 * address.
1837 *
1838 * heapRelation: table owning the index (must be suitably locked by caller)
1839 * indexRelationId: OID of the index
1840 * parentConstraintId: if constraint is on a partition, the OID of the
1841 * constraint in the parent.
1842 * indexInfo: same info executor uses to insert into the index
1843 * constraintName: what it say (generally, should match name of index)
1844 * constraintType: one of CONSTRAINT_PRIMARY, CONSTRAINT_UNIQUE, or
1845 * CONSTRAINT_EXCLUSION
1846 * flags: bitmask that can include any combination of these bits:
1847 * INDEX_CONSTR_CREATE_MARK_AS_PRIMARY: index is a PRIMARY KEY
1848 * INDEX_CONSTR_CREATE_DEFERRABLE: constraint is DEFERRABLE
1849 * INDEX_CONSTR_CREATE_INIT_DEFERRED: constraint is INITIALLY DEFERRED
1850 * INDEX_CONSTR_CREATE_UPDATE_INDEX: update the pg_index row
1851 * INDEX_CONSTR_CREATE_REMOVE_OLD_DEPS: remove existing dependencies
1852 * of index on table's columns
1853 * allow_system_table_mods: allow table to be a system catalog
1854 * is_internal: index is constructed due to internal process
1855 */
1856 ObjectAddress
index_constraint_create(Relation heapRelation,Oid indexRelationId,Oid parentConstraintId,IndexInfo * indexInfo,const char * constraintName,char constraintType,bits16 constr_flags,bool allow_system_table_mods,bool is_internal)1857 index_constraint_create(Relation heapRelation,
1858 Oid indexRelationId,
1859 Oid parentConstraintId,
1860 IndexInfo *indexInfo,
1861 const char *constraintName,
1862 char constraintType,
1863 bits16 constr_flags,
1864 bool allow_system_table_mods,
1865 bool is_internal)
1866 {
1867 Oid namespaceId = RelationGetNamespace(heapRelation);
1868 ObjectAddress myself,
1869 idxaddr;
1870 Oid conOid;
1871 bool deferrable;
1872 bool initdeferred;
1873 bool mark_as_primary;
1874 bool islocal;
1875 bool noinherit;
1876 int inhcount;
1877
1878 deferrable = (constr_flags & INDEX_CONSTR_CREATE_DEFERRABLE) != 0;
1879 initdeferred = (constr_flags & INDEX_CONSTR_CREATE_INIT_DEFERRED) != 0;
1880 mark_as_primary = (constr_flags & INDEX_CONSTR_CREATE_MARK_AS_PRIMARY) != 0;
1881
1882 /* constraint creation support doesn't work while bootstrapping */
1883 Assert(!IsBootstrapProcessingMode());
1884
1885 /* enforce system-table restriction */
1886 if (!allow_system_table_mods &&
1887 IsSystemRelation(heapRelation) &&
1888 IsNormalProcessingMode())
1889 ereport(ERROR,
1890 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1891 errmsg("user-defined indexes on system catalog tables are not supported")));
1892
1893 /* primary/unique constraints shouldn't have any expressions */
1894 if (indexInfo->ii_Expressions &&
1895 constraintType != CONSTRAINT_EXCLUSION)
1896 elog(ERROR, "constraints cannot have index expressions");
1897
1898 /*
1899 * If we're manufacturing a constraint for a pre-existing index, we need
1900 * to get rid of the existing auto dependencies for the index (the ones
1901 * that index_create() would have made instead of calling this function).
1902 *
1903 * Note: this code would not necessarily do the right thing if the index
1904 * has any expressions or predicate, but we'd never be turning such an
1905 * index into a UNIQUE or PRIMARY KEY constraint.
1906 */
1907 if (constr_flags & INDEX_CONSTR_CREATE_REMOVE_OLD_DEPS)
1908 deleteDependencyRecordsForClass(RelationRelationId, indexRelationId,
1909 RelationRelationId, DEPENDENCY_AUTO);
1910
1911 if (OidIsValid(parentConstraintId))
1912 {
1913 islocal = false;
1914 inhcount = 1;
1915 noinherit = false;
1916 }
1917 else
1918 {
1919 islocal = true;
1920 inhcount = 0;
1921 noinherit = true;
1922 }
1923
1924 /*
1925 * Construct a pg_constraint entry.
1926 */
1927 conOid = CreateConstraintEntry(constraintName,
1928 namespaceId,
1929 constraintType,
1930 deferrable,
1931 initdeferred,
1932 true,
1933 parentConstraintId,
1934 RelationGetRelid(heapRelation),
1935 indexInfo->ii_IndexAttrNumbers,
1936 indexInfo->ii_NumIndexKeyAttrs,
1937 indexInfo->ii_NumIndexAttrs,
1938 InvalidOid, /* no domain */
1939 indexRelationId, /* index OID */
1940 InvalidOid, /* no foreign key */
1941 NULL,
1942 NULL,
1943 NULL,
1944 NULL,
1945 0,
1946 ' ',
1947 ' ',
1948 ' ',
1949 indexInfo->ii_ExclusionOps,
1950 NULL, /* no check constraint */
1951 NULL,
1952 islocal,
1953 inhcount,
1954 noinherit,
1955 is_internal);
1956
1957 /*
1958 * Register the index as internally dependent on the constraint.
1959 *
1960 * Note that the constraint has a dependency on the table, so we don't
1961 * need (or want) any direct dependency from the index to the table.
1962 */
1963 ObjectAddressSet(myself, ConstraintRelationId, conOid);
1964 ObjectAddressSet(idxaddr, RelationRelationId, indexRelationId);
1965 recordDependencyOn(&idxaddr, &myself, DEPENDENCY_INTERNAL);
1966
1967 /*
1968 * Also, if this is a constraint on a partition, give it partition-type
1969 * dependencies on the parent constraint as well as the table.
1970 */
1971 if (OidIsValid(parentConstraintId))
1972 {
1973 ObjectAddress referenced;
1974
1975 ObjectAddressSet(referenced, ConstraintRelationId, parentConstraintId);
1976 recordDependencyOn(&myself, &referenced, DEPENDENCY_PARTITION_PRI);
1977 ObjectAddressSet(referenced, RelationRelationId,
1978 RelationGetRelid(heapRelation));
1979 recordDependencyOn(&myself, &referenced, DEPENDENCY_PARTITION_SEC);
1980 }
1981
1982 /*
1983 * If the constraint is deferrable, create the deferred uniqueness
1984 * checking trigger. (The trigger will be given an internal dependency on
1985 * the constraint by CreateTrigger.)
1986 */
1987 if (deferrable)
1988 {
1989 CreateTrigStmt *trigger;
1990
1991 trigger = makeNode(CreateTrigStmt);
1992 trigger->trigname = (constraintType == CONSTRAINT_PRIMARY) ?
1993 "PK_ConstraintTrigger" :
1994 "Unique_ConstraintTrigger";
1995 trigger->relation = NULL;
1996 trigger->funcname = SystemFuncName("unique_key_recheck");
1997 trigger->args = NIL;
1998 trigger->row = true;
1999 trigger->timing = TRIGGER_TYPE_AFTER;
2000 trigger->events = TRIGGER_TYPE_INSERT | TRIGGER_TYPE_UPDATE;
2001 trigger->columns = NIL;
2002 trigger->whenClause = NULL;
2003 trigger->isconstraint = true;
2004 trigger->deferrable = true;
2005 trigger->initdeferred = initdeferred;
2006 trigger->constrrel = NULL;
2007
2008 (void) CreateTrigger(trigger, NULL, RelationGetRelid(heapRelation),
2009 InvalidOid, conOid, indexRelationId, InvalidOid,
2010 InvalidOid, NULL, true, false);
2011 }
2012
2013 /*
2014 * If needed, mark the index as primary and/or deferred in pg_index.
2015 *
2016 * Note: When making an existing index into a constraint, caller must have
2017 * a table lock that prevents concurrent table updates; otherwise, there
2018 * is a risk that concurrent readers of the table will miss seeing this
2019 * index at all.
2020 */
2021 if ((constr_flags & INDEX_CONSTR_CREATE_UPDATE_INDEX) &&
2022 (mark_as_primary || deferrable))
2023 {
2024 Relation pg_index;
2025 HeapTuple indexTuple;
2026 Form_pg_index indexForm;
2027 bool dirty = false;
2028
2029 pg_index = table_open(IndexRelationId, RowExclusiveLock);
2030
2031 indexTuple = SearchSysCacheCopy1(INDEXRELID,
2032 ObjectIdGetDatum(indexRelationId));
2033 if (!HeapTupleIsValid(indexTuple))
2034 elog(ERROR, "cache lookup failed for index %u", indexRelationId);
2035 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
2036
2037 if (mark_as_primary && !indexForm->indisprimary)
2038 {
2039 indexForm->indisprimary = true;
2040 dirty = true;
2041 }
2042
2043 if (deferrable && indexForm->indimmediate)
2044 {
2045 indexForm->indimmediate = false;
2046 dirty = true;
2047 }
2048
2049 if (dirty)
2050 {
2051 CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
2052
2053 InvokeObjectPostAlterHookArg(IndexRelationId, indexRelationId, 0,
2054 InvalidOid, is_internal);
2055 }
2056
2057 heap_freetuple(indexTuple);
2058 table_close(pg_index, RowExclusiveLock);
2059 }
2060
2061 return myself;
2062 }
2063
2064 /*
2065 * index_drop
2066 *
2067 * NOTE: this routine should now only be called through performDeletion(),
2068 * else associated dependencies won't be cleaned up.
2069 *
2070 * If concurrent is true, do a DROP INDEX CONCURRENTLY. If concurrent is
2071 * false but concurrent_lock_mode is true, then do a normal DROP INDEX but
2072 * take a lock for CONCURRENTLY processing. That is used as part of REINDEX
2073 * CONCURRENTLY.
2074 */
2075 void
index_drop(Oid indexId,bool concurrent,bool concurrent_lock_mode)2076 index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode)
2077 {
2078 Oid heapId;
2079 Relation userHeapRelation;
2080 Relation userIndexRelation;
2081 Relation indexRelation;
2082 HeapTuple tuple;
2083 bool hasexprs;
2084 LockRelId heaprelid,
2085 indexrelid;
2086 LOCKTAG heaplocktag;
2087 LOCKMODE lockmode;
2088
2089 /*
2090 * A temporary relation uses a non-concurrent DROP. Other backends can't
2091 * access a temporary relation, so there's no harm in grabbing a stronger
2092 * lock (see comments in RemoveRelations), and a non-concurrent DROP is
2093 * more efficient.
2094 */
2095 Assert(get_rel_persistence(indexId) != RELPERSISTENCE_TEMP ||
2096 (!concurrent && !concurrent_lock_mode));
2097
2098 /*
2099 * To drop an index safely, we must grab exclusive lock on its parent
2100 * table. Exclusive lock on the index alone is insufficient because
2101 * another backend might be about to execute a query on the parent table.
2102 * If it relies on a previously cached list of index OIDs, then it could
2103 * attempt to access the just-dropped index. We must therefore take a
2104 * table lock strong enough to prevent all queries on the table from
2105 * proceeding until we commit and send out a shared-cache-inval notice
2106 * that will make them update their index lists.
2107 *
2108 * In the concurrent case we avoid this requirement by disabling index use
2109 * in multiple steps and waiting out any transactions that might be using
2110 * the index, so we don't need exclusive lock on the parent table. Instead
2111 * we take ShareUpdateExclusiveLock, to ensure that two sessions aren't
2112 * doing CREATE/DROP INDEX CONCURRENTLY on the same index. (We will get
2113 * AccessExclusiveLock on the index below, once we're sure nobody else is
2114 * using it.)
2115 */
2116 heapId = IndexGetRelation(indexId, false);
2117 lockmode = (concurrent || concurrent_lock_mode) ? ShareUpdateExclusiveLock : AccessExclusiveLock;
2118 userHeapRelation = table_open(heapId, lockmode);
2119 userIndexRelation = index_open(indexId, lockmode);
2120
2121 /*
2122 * We might still have open queries using it in our own session, which the
2123 * above locking won't prevent, so test explicitly.
2124 */
2125 CheckTableNotInUse(userIndexRelation, "DROP INDEX");
2126
2127 /*
2128 * Drop Index Concurrently is more or less the reverse process of Create
2129 * Index Concurrently.
2130 *
2131 * First we unset indisvalid so queries starting afterwards don't use the
2132 * index to answer queries anymore. We have to keep indisready = true so
2133 * transactions that are still scanning the index can continue to see
2134 * valid index contents. For instance, if they are using READ COMMITTED
2135 * mode, and another transaction makes changes and commits, they need to
2136 * see those new tuples in the index.
2137 *
2138 * After all transactions that could possibly have used the index for
2139 * queries end, we can unset indisready and indislive, then wait till
2140 * nobody could be touching it anymore. (Note: we need indislive because
2141 * this state must be distinct from the initial state during CREATE INDEX
2142 * CONCURRENTLY, which has indislive true while indisready and indisvalid
2143 * are false. That's because in that state, transactions must examine the
2144 * index for HOT-safety decisions, while in this state we don't want them
2145 * to open it at all.)
2146 *
2147 * Since all predicate locks on the index are about to be made invalid, we
2148 * must promote them to predicate locks on the heap. In the
2149 * non-concurrent case we can just do that now. In the concurrent case
2150 * it's a bit trickier. The predicate locks must be moved when there are
2151 * no index scans in progress on the index and no more can subsequently
2152 * start, so that no new predicate locks can be made on the index. Also,
2153 * they must be moved before heap inserts stop maintaining the index, else
2154 * the conflict with the predicate lock on the index gap could be missed
2155 * before the lock on the heap relation is in place to detect a conflict
2156 * based on the heap tuple insert.
2157 */
2158 if (concurrent)
2159 {
2160 /*
2161 * We must commit our transaction in order to make the first pg_index
2162 * state update visible to other sessions. If the DROP machinery has
2163 * already performed any other actions (removal of other objects,
2164 * pg_depend entries, etc), the commit would make those actions
2165 * permanent, which would leave us with inconsistent catalog state if
2166 * we fail partway through the following sequence. Since DROP INDEX
2167 * CONCURRENTLY is restricted to dropping just one index that has no
2168 * dependencies, we should get here before anything's been done ---
2169 * but let's check that to be sure. We can verify that the current
2170 * transaction has not executed any transactional updates by checking
2171 * that no XID has been assigned.
2172 */
2173 if (GetTopTransactionIdIfAny() != InvalidTransactionId)
2174 ereport(ERROR,
2175 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2176 errmsg("DROP INDEX CONCURRENTLY must be first action in transaction")));
2177
2178 /*
2179 * Mark index invalid by updating its pg_index entry
2180 */
2181 index_set_state_flags(indexId, INDEX_DROP_CLEAR_VALID);
2182
2183 /*
2184 * Invalidate the relcache for the table, so that after this commit
2185 * all sessions will refresh any cached plans that might reference the
2186 * index.
2187 */
2188 CacheInvalidateRelcache(userHeapRelation);
2189
2190 /* save lockrelid and locktag for below, then close but keep locks */
2191 heaprelid = userHeapRelation->rd_lockInfo.lockRelId;
2192 SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
2193 indexrelid = userIndexRelation->rd_lockInfo.lockRelId;
2194
2195 table_close(userHeapRelation, NoLock);
2196 index_close(userIndexRelation, NoLock);
2197
2198 /*
2199 * We must commit our current transaction so that the indisvalid
2200 * update becomes visible to other transactions; then start another.
2201 * Note that any previously-built data structures are lost in the
2202 * commit. The only data we keep past here are the relation IDs.
2203 *
2204 * Before committing, get a session-level lock on the table, to ensure
2205 * that neither it nor the index can be dropped before we finish. This
2206 * cannot block, even if someone else is waiting for access, because
2207 * we already have the same lock within our transaction.
2208 */
2209 LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
2210 LockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
2211
2212 PopActiveSnapshot();
2213 CommitTransactionCommand();
2214 StartTransactionCommand();
2215
2216 /*
2217 * Now we must wait until no running transaction could be using the
2218 * index for a query. Use AccessExclusiveLock here to check for
2219 * running transactions that hold locks of any kind on the table. Note
2220 * we do not need to worry about xacts that open the table for reading
2221 * after this point; they will see the index as invalid when they open
2222 * the relation.
2223 *
2224 * Note: the reason we use actual lock acquisition here, rather than
2225 * just checking the ProcArray and sleeping, is that deadlock is
2226 * possible if one of the transactions in question is blocked trying
2227 * to acquire an exclusive lock on our table. The lock code will
2228 * detect deadlock and error out properly.
2229 *
2230 * Note: we report progress through WaitForLockers() unconditionally
2231 * here, even though it will only be used when we're called by REINDEX
2232 * CONCURRENTLY and not when called by DROP INDEX CONCURRENTLY.
2233 */
2234 WaitForLockers(heaplocktag, AccessExclusiveLock, true);
2235
2236 /* Finish invalidation of index and mark it as dead */
2237 index_concurrently_set_dead(heapId, indexId);
2238
2239 /*
2240 * Again, commit the transaction to make the pg_index update visible
2241 * to other sessions.
2242 */
2243 CommitTransactionCommand();
2244 StartTransactionCommand();
2245
2246 /*
2247 * Wait till every transaction that saw the old index state has
2248 * finished. See above about progress reporting.
2249 */
2250 WaitForLockers(heaplocktag, AccessExclusiveLock, true);
2251
2252 /*
2253 * Re-open relations to allow us to complete our actions.
2254 *
2255 * At this point, nothing should be accessing the index, but lets
2256 * leave nothing to chance and grab AccessExclusiveLock on the index
2257 * before the physical deletion.
2258 */
2259 userHeapRelation = table_open(heapId, ShareUpdateExclusiveLock);
2260 userIndexRelation = index_open(indexId, AccessExclusiveLock);
2261 }
2262 else
2263 {
2264 /* Not concurrent, so just transfer predicate locks and we're good */
2265 TransferPredicateLocksToHeapRelation(userIndexRelation);
2266 }
2267
2268 /*
2269 * Schedule physical removal of the files (if any)
2270 */
2271 if (userIndexRelation->rd_rel->relkind != RELKIND_PARTITIONED_INDEX)
2272 RelationDropStorage(userIndexRelation);
2273
2274 /*
2275 * Close and flush the index's relcache entry, to ensure relcache doesn't
2276 * try to rebuild it while we're deleting catalog entries. We keep the
2277 * lock though.
2278 */
2279 index_close(userIndexRelation, NoLock);
2280
2281 RelationForgetRelation(indexId);
2282
2283 /*
2284 * fix INDEX relation, and check for expressional index
2285 */
2286 indexRelation = table_open(IndexRelationId, RowExclusiveLock);
2287
2288 tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
2289 if (!HeapTupleIsValid(tuple))
2290 elog(ERROR, "cache lookup failed for index %u", indexId);
2291
2292 hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs,
2293 RelationGetDescr(indexRelation));
2294
2295 CatalogTupleDelete(indexRelation, &tuple->t_self);
2296
2297 ReleaseSysCache(tuple);
2298 table_close(indexRelation, RowExclusiveLock);
2299
2300 /*
2301 * if it has any expression columns, we might have stored statistics about
2302 * them.
2303 */
2304 if (hasexprs)
2305 RemoveStatistics(indexId, 0);
2306
2307 /*
2308 * fix ATTRIBUTE relation
2309 */
2310 DeleteAttributeTuples(indexId);
2311
2312 /*
2313 * fix RELATION relation
2314 */
2315 DeleteRelationTuple(indexId);
2316
2317 /*
2318 * fix INHERITS relation
2319 */
2320 DeleteInheritsTuple(indexId, InvalidOid);
2321
2322 /*
2323 * We are presently too lazy to attempt to compute the new correct value
2324 * of relhasindex (the next VACUUM will fix it if necessary). So there is
2325 * no need to update the pg_class tuple for the owning relation. But we
2326 * must send out a shared-cache-inval notice on the owning relation to
2327 * ensure other backends update their relcache lists of indexes. (In the
2328 * concurrent case, this is redundant but harmless.)
2329 */
2330 CacheInvalidateRelcache(userHeapRelation);
2331
2332 /*
2333 * Close owning rel, but keep lock
2334 */
2335 table_close(userHeapRelation, NoLock);
2336
2337 /*
2338 * Release the session locks before we go.
2339 */
2340 if (concurrent)
2341 {
2342 UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
2343 UnlockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
2344 }
2345 }
2346
2347 /* ----------------------------------------------------------------
2348 * index_build support
2349 * ----------------------------------------------------------------
2350 */
2351
2352 /* ----------------
2353 * BuildIndexInfo
2354 * Construct an IndexInfo record for an open index
2355 *
2356 * IndexInfo stores the information about the index that's needed by
2357 * FormIndexDatum, which is used for both index_build() and later insertion
2358 * of individual index tuples. Normally we build an IndexInfo for an index
2359 * just once per command, and then use it for (potentially) many tuples.
2360 * ----------------
2361 */
2362 IndexInfo *
BuildIndexInfo(Relation index)2363 BuildIndexInfo(Relation index)
2364 {
2365 IndexInfo *ii = makeNode(IndexInfo);
2366 Form_pg_index indexStruct = index->rd_index;
2367 int i;
2368 int numAtts;
2369
2370 /* check the number of keys, and copy attr numbers into the IndexInfo */
2371 numAtts = indexStruct->indnatts;
2372 if (numAtts < 1 || numAtts > INDEX_MAX_KEYS)
2373 elog(ERROR, "invalid indnatts %d for index %u",
2374 numAtts, RelationGetRelid(index));
2375 ii->ii_NumIndexAttrs = numAtts;
2376 ii->ii_NumIndexKeyAttrs = indexStruct->indnkeyatts;
2377 Assert(ii->ii_NumIndexKeyAttrs != 0);
2378 Assert(ii->ii_NumIndexKeyAttrs <= ii->ii_NumIndexAttrs);
2379
2380 for (i = 0; i < numAtts; i++)
2381 ii->ii_IndexAttrNumbers[i] = indexStruct->indkey.values[i];
2382
2383 /* fetch any expressions needed for expressional indexes */
2384 ii->ii_Expressions = RelationGetIndexExpressions(index);
2385 ii->ii_ExpressionsState = NIL;
2386
2387 /* fetch index predicate if any */
2388 ii->ii_Predicate = RelationGetIndexPredicate(index);
2389 ii->ii_PredicateState = NULL;
2390
2391 /* fetch exclusion constraint info if any */
2392 if (indexStruct->indisexclusion)
2393 {
2394 RelationGetExclusionInfo(index,
2395 &ii->ii_ExclusionOps,
2396 &ii->ii_ExclusionProcs,
2397 &ii->ii_ExclusionStrats);
2398 }
2399 else
2400 {
2401 ii->ii_ExclusionOps = NULL;
2402 ii->ii_ExclusionProcs = NULL;
2403 ii->ii_ExclusionStrats = NULL;
2404 }
2405
2406 /* other info */
2407 ii->ii_Unique = indexStruct->indisunique;
2408 ii->ii_ReadyForInserts = indexStruct->indisready;
2409 /* assume not doing speculative insertion for now */
2410 ii->ii_UniqueOps = NULL;
2411 ii->ii_UniqueProcs = NULL;
2412 ii->ii_UniqueStrats = NULL;
2413
2414 /* initialize index-build state to default */
2415 ii->ii_Concurrent = false;
2416 ii->ii_BrokenHotChain = false;
2417 ii->ii_ParallelWorkers = 0;
2418
2419 /* set up for possible use by index AM */
2420 ii->ii_Am = index->rd_rel->relam;
2421 ii->ii_AmCache = NULL;
2422 ii->ii_Context = CurrentMemoryContext;
2423
2424 return ii;
2425 }
2426
2427 /* ----------------
2428 * BuildDummyIndexInfo
2429 * Construct a dummy IndexInfo record for an open index
2430 *
2431 * This differs from the real BuildIndexInfo in that it will never run any
2432 * user-defined code that might exist in index expressions or predicates.
2433 * Instead of the real index expressions, we return null constants that have
2434 * the right types/typmods/collations. Predicates and exclusion clauses are
2435 * just ignored. This is sufficient for the purpose of truncating an index,
2436 * since we will not need to actually evaluate the expressions or predicates;
2437 * the only thing that's likely to be done with the data is construction of
2438 * a tupdesc describing the index's rowtype.
2439 * ----------------
2440 */
2441 IndexInfo *
BuildDummyIndexInfo(Relation index)2442 BuildDummyIndexInfo(Relation index)
2443 {
2444 IndexInfo *ii;
2445 Form_pg_index indexStruct = index->rd_index;
2446 int i;
2447 int numAtts;
2448
2449 /* check the number of keys, and copy attr numbers into the IndexInfo */
2450 numAtts = indexStruct->indnatts;
2451 if (numAtts < 1 || numAtts > INDEX_MAX_KEYS)
2452 elog(ERROR, "invalid indnatts %d for index %u",
2453 numAtts, RelationGetRelid(index));
2454
2455 /*
2456 * Create the node, using dummy index expressions, and pretending there is
2457 * no predicate.
2458 */
2459 ii = makeIndexInfo(indexStruct->indnatts,
2460 indexStruct->indnkeyatts,
2461 index->rd_rel->relam,
2462 RelationGetDummyIndexExpressions(index),
2463 NIL,
2464 indexStruct->indisunique,
2465 indexStruct->indisready,
2466 false);
2467
2468 /* fill in attribute numbers */
2469 for (i = 0; i < numAtts; i++)
2470 ii->ii_IndexAttrNumbers[i] = indexStruct->indkey.values[i];
2471
2472 /* We ignore the exclusion constraint if any */
2473
2474 return ii;
2475 }
2476
2477 /*
2478 * CompareIndexInfo
2479 * Return whether the properties of two indexes (in different tables)
2480 * indicate that they have the "same" definitions.
2481 *
2482 * Note: passing collations and opfamilies separately is a kludge. Adding
2483 * them to IndexInfo may result in better coding here and elsewhere.
2484 *
2485 * Use convert_tuples_by_name_map(index2, index1) to build the attmap.
2486 */
2487 bool
CompareIndexInfo(IndexInfo * info1,IndexInfo * info2,Oid * collations1,Oid * collations2,Oid * opfamilies1,Oid * opfamilies2,AttrNumber * attmap,int maplen)2488 CompareIndexInfo(IndexInfo *info1, IndexInfo *info2,
2489 Oid *collations1, Oid *collations2,
2490 Oid *opfamilies1, Oid *opfamilies2,
2491 AttrNumber *attmap, int maplen)
2492 {
2493 int i;
2494
2495 if (info1->ii_Unique != info2->ii_Unique)
2496 return false;
2497
2498 /* indexes are only equivalent if they have the same access method */
2499 if (info1->ii_Am != info2->ii_Am)
2500 return false;
2501
2502 /* and same number of attributes */
2503 if (info1->ii_NumIndexAttrs != info2->ii_NumIndexAttrs)
2504 return false;
2505
2506 /* and same number of key attributes */
2507 if (info1->ii_NumIndexKeyAttrs != info2->ii_NumIndexKeyAttrs)
2508 return false;
2509
2510 /*
2511 * and columns match through the attribute map (actual attribute numbers
2512 * might differ!) Note that this implies that index columns that are
2513 * expressions appear in the same positions. We will next compare the
2514 * expressions themselves.
2515 */
2516 for (i = 0; i < info1->ii_NumIndexAttrs; i++)
2517 {
2518 if (maplen < info2->ii_IndexAttrNumbers[i])
2519 elog(ERROR, "incorrect attribute map");
2520
2521 /* ignore expressions at this stage */
2522 if ((info1->ii_IndexAttrNumbers[i] != InvalidAttrNumber) &&
2523 (attmap[info2->ii_IndexAttrNumbers[i] - 1] !=
2524 info1->ii_IndexAttrNumbers[i]))
2525 return false;
2526
2527 /* collation and opfamily is not valid for including columns */
2528 if (i >= info1->ii_NumIndexKeyAttrs)
2529 continue;
2530
2531 if (collations1[i] != collations2[i])
2532 return false;
2533 if (opfamilies1[i] != opfamilies2[i])
2534 return false;
2535 }
2536
2537 /*
2538 * For expression indexes: either both are expression indexes, or neither
2539 * is; if they are, make sure the expressions match.
2540 */
2541 if ((info1->ii_Expressions != NIL) != (info2->ii_Expressions != NIL))
2542 return false;
2543 if (info1->ii_Expressions != NIL)
2544 {
2545 bool found_whole_row;
2546 Node *mapped;
2547
2548 mapped = map_variable_attnos((Node *) info2->ii_Expressions,
2549 1, 0, attmap, maplen,
2550 InvalidOid, &found_whole_row);
2551 if (found_whole_row)
2552 {
2553 /*
2554 * we could throw an error here, but seems out of scope for this
2555 * routine.
2556 */
2557 return false;
2558 }
2559
2560 if (!equal(info1->ii_Expressions, mapped))
2561 return false;
2562 }
2563
2564 /* Partial index predicates must be identical, if they exist */
2565 if ((info1->ii_Predicate == NULL) != (info2->ii_Predicate == NULL))
2566 return false;
2567 if (info1->ii_Predicate != NULL)
2568 {
2569 bool found_whole_row;
2570 Node *mapped;
2571
2572 mapped = map_variable_attnos((Node *) info2->ii_Predicate,
2573 1, 0, attmap, maplen,
2574 InvalidOid, &found_whole_row);
2575 if (found_whole_row)
2576 {
2577 /*
2578 * we could throw an error here, but seems out of scope for this
2579 * routine.
2580 */
2581 return false;
2582 }
2583 if (!equal(info1->ii_Predicate, mapped))
2584 return false;
2585 }
2586
2587 /* No support currently for comparing exclusion indexes. */
2588 if (info1->ii_ExclusionOps != NULL || info2->ii_ExclusionOps != NULL)
2589 return false;
2590
2591 return true;
2592 }
2593
2594 /* ----------------
2595 * BuildSpeculativeIndexInfo
2596 * Add extra state to IndexInfo record
2597 *
2598 * For unique indexes, we usually don't want to add info to the IndexInfo for
2599 * checking uniqueness, since the B-Tree AM handles that directly. However,
2600 * in the case of speculative insertion, additional support is required.
2601 *
2602 * Do this processing here rather than in BuildIndexInfo() to not incur the
2603 * overhead in the common non-speculative cases.
2604 * ----------------
2605 */
2606 void
BuildSpeculativeIndexInfo(Relation index,IndexInfo * ii)2607 BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii)
2608 {
2609 int indnkeyatts;
2610 int i;
2611
2612 indnkeyatts = IndexRelationGetNumberOfKeyAttributes(index);
2613
2614 /*
2615 * fetch info for checking unique indexes
2616 */
2617 Assert(ii->ii_Unique);
2618
2619 if (index->rd_rel->relam != BTREE_AM_OID)
2620 elog(ERROR, "unexpected non-btree speculative unique index");
2621
2622 ii->ii_UniqueOps = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
2623 ii->ii_UniqueProcs = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
2624 ii->ii_UniqueStrats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts);
2625
2626 /*
2627 * We have to look up the operator's strategy number. This provides a
2628 * cross-check that the operator does match the index.
2629 */
2630 /* We need the func OIDs and strategy numbers too */
2631 for (i = 0; i < indnkeyatts; i++)
2632 {
2633 ii->ii_UniqueStrats[i] = BTEqualStrategyNumber;
2634 ii->ii_UniqueOps[i] =
2635 get_opfamily_member(index->rd_opfamily[i],
2636 index->rd_opcintype[i],
2637 index->rd_opcintype[i],
2638 ii->ii_UniqueStrats[i]);
2639 if (!OidIsValid(ii->ii_UniqueOps[i]))
2640 elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
2641 ii->ii_UniqueStrats[i], index->rd_opcintype[i],
2642 index->rd_opcintype[i], index->rd_opfamily[i]);
2643 ii->ii_UniqueProcs[i] = get_opcode(ii->ii_UniqueOps[i]);
2644 }
2645 }
2646
2647 /* ----------------
2648 * FormIndexDatum
2649 * Construct values[] and isnull[] arrays for a new index tuple.
2650 *
2651 * indexInfo Info about the index
2652 * slot Heap tuple for which we must prepare an index entry
2653 * estate executor state for evaluating any index expressions
2654 * values Array of index Datums (output area)
2655 * isnull Array of is-null indicators (output area)
2656 *
2657 * When there are no index expressions, estate may be NULL. Otherwise it
2658 * must be supplied, *and* the ecxt_scantuple slot of its per-tuple expr
2659 * context must point to the heap tuple passed in.
2660 *
2661 * Notice we don't actually call index_form_tuple() here; we just prepare
2662 * its input arrays values[] and isnull[]. This is because the index AM
2663 * may wish to alter the data before storage.
2664 * ----------------
2665 */
2666 void
FormIndexDatum(IndexInfo * indexInfo,TupleTableSlot * slot,EState * estate,Datum * values,bool * isnull)2667 FormIndexDatum(IndexInfo *indexInfo,
2668 TupleTableSlot *slot,
2669 EState *estate,
2670 Datum *values,
2671 bool *isnull)
2672 {
2673 ListCell *indexpr_item;
2674 int i;
2675
2676 if (indexInfo->ii_Expressions != NIL &&
2677 indexInfo->ii_ExpressionsState == NIL)
2678 {
2679 /* First time through, set up expression evaluation state */
2680 indexInfo->ii_ExpressionsState =
2681 ExecPrepareExprList(indexInfo->ii_Expressions, estate);
2682 /* Check caller has set up context correctly */
2683 Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
2684 }
2685 indexpr_item = list_head(indexInfo->ii_ExpressionsState);
2686
2687 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
2688 {
2689 int keycol = indexInfo->ii_IndexAttrNumbers[i];
2690 Datum iDatum;
2691 bool isNull;
2692
2693 if (keycol < 0)
2694 iDatum = slot_getsysattr(slot, keycol, &isNull);
2695 else if (keycol != 0)
2696 {
2697 /*
2698 * Plain index column; get the value we need directly from the
2699 * heap tuple.
2700 */
2701 iDatum = slot_getattr(slot, keycol, &isNull);
2702 }
2703 else
2704 {
2705 /*
2706 * Index expression --- need to evaluate it.
2707 */
2708 if (indexpr_item == NULL)
2709 elog(ERROR, "wrong number of index expressions");
2710 iDatum = ExecEvalExprSwitchContext((ExprState *) lfirst(indexpr_item),
2711 GetPerTupleExprContext(estate),
2712 &isNull);
2713 indexpr_item = lnext(indexpr_item);
2714 }
2715 values[i] = iDatum;
2716 isnull[i] = isNull;
2717 }
2718
2719 if (indexpr_item != NULL)
2720 elog(ERROR, "wrong number of index expressions");
2721 }
2722
2723
2724 /*
2725 * index_update_stats --- update pg_class entry after CREATE INDEX or REINDEX
2726 *
2727 * This routine updates the pg_class row of either an index or its parent
2728 * relation after CREATE INDEX or REINDEX. Its rather bizarre API is designed
2729 * to ensure we can do all the necessary work in just one update.
2730 *
2731 * hasindex: set relhasindex to this value
2732 * reltuples: if >= 0, set reltuples to this value; else no change
2733 *
2734 * If reltuples >= 0, relpages and relallvisible are also updated (using
2735 * RelationGetNumberOfBlocks() and visibilitymap_count()).
2736 *
2737 * NOTE: an important side-effect of this operation is that an SI invalidation
2738 * message is sent out to all backends --- including me --- causing relcache
2739 * entries to be flushed or updated with the new data. This must happen even
2740 * if we find that no change is needed in the pg_class row. When updating
2741 * a heap entry, this ensures that other backends find out about the new
2742 * index. When updating an index, it's important because some index AMs
2743 * expect a relcache flush to occur after REINDEX.
2744 */
2745 static void
index_update_stats(Relation rel,bool hasindex,double reltuples)2746 index_update_stats(Relation rel,
2747 bool hasindex,
2748 double reltuples)
2749 {
2750 Oid relid = RelationGetRelid(rel);
2751 Relation pg_class;
2752 HeapTuple tuple;
2753 Form_pg_class rd_rel;
2754 bool dirty;
2755
2756 /*
2757 * We always update the pg_class row using a non-transactional,
2758 * overwrite-in-place update. There are several reasons for this:
2759 *
2760 * 1. In bootstrap mode, we have no choice --- UPDATE wouldn't work.
2761 *
2762 * 2. We could be reindexing pg_class itself, in which case we can't move
2763 * its pg_class row because CatalogTupleInsert/CatalogTupleUpdate might
2764 * not know about all the indexes yet (see reindex_relation).
2765 *
2766 * 3. Because we execute CREATE INDEX with just share lock on the parent
2767 * rel (to allow concurrent index creations), an ordinary update could
2768 * suffer a tuple-concurrently-updated failure against another CREATE
2769 * INDEX committing at about the same time. We can avoid that by having
2770 * them both do nontransactional updates (we assume they will both be
2771 * trying to change the pg_class row to the same thing, so it doesn't
2772 * matter which goes first).
2773 *
2774 * It is safe to use a non-transactional update even though our
2775 * transaction could still fail before committing. Setting relhasindex
2776 * true is safe even if there are no indexes (VACUUM will eventually fix
2777 * it). And of course the new relpages and reltuples counts are correct
2778 * regardless. However, we don't want to change relpages (or
2779 * relallvisible) if the caller isn't providing an updated reltuples
2780 * count, because that would bollix the reltuples/relpages ratio which is
2781 * what's really important.
2782 */
2783
2784 pg_class = table_open(RelationRelationId, RowExclusiveLock);
2785
2786 /*
2787 * Make a copy of the tuple to update. Normally we use the syscache, but
2788 * we can't rely on that during bootstrap or while reindexing pg_class
2789 * itself.
2790 */
2791 if (IsBootstrapProcessingMode() ||
2792 ReindexIsProcessingHeap(RelationRelationId))
2793 {
2794 /* don't assume syscache will work */
2795 TableScanDesc pg_class_scan;
2796 ScanKeyData key[1];
2797
2798 ScanKeyInit(&key[0],
2799 Anum_pg_class_oid,
2800 BTEqualStrategyNumber, F_OIDEQ,
2801 ObjectIdGetDatum(relid));
2802
2803 pg_class_scan = table_beginscan_catalog(pg_class, 1, key);
2804 tuple = heap_getnext(pg_class_scan, ForwardScanDirection);
2805 tuple = heap_copytuple(tuple);
2806 table_endscan(pg_class_scan);
2807 }
2808 else
2809 {
2810 /* normal case, use syscache */
2811 tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
2812 }
2813
2814 if (!HeapTupleIsValid(tuple))
2815 elog(ERROR, "could not find tuple for relation %u", relid);
2816 rd_rel = (Form_pg_class) GETSTRUCT(tuple);
2817
2818 /* Should this be a more comprehensive test? */
2819 Assert(rd_rel->relkind != RELKIND_PARTITIONED_INDEX);
2820
2821 /* Apply required updates, if any, to copied tuple */
2822
2823 dirty = false;
2824 if (rd_rel->relhasindex != hasindex)
2825 {
2826 rd_rel->relhasindex = hasindex;
2827 dirty = true;
2828 }
2829
2830 if (reltuples >= 0)
2831 {
2832 BlockNumber relpages = RelationGetNumberOfBlocks(rel);
2833 BlockNumber relallvisible;
2834
2835 if (rd_rel->relkind != RELKIND_INDEX)
2836 visibilitymap_count(rel, &relallvisible, NULL);
2837 else /* don't bother for indexes */
2838 relallvisible = 0;
2839
2840 if (rd_rel->relpages != (int32) relpages)
2841 {
2842 rd_rel->relpages = (int32) relpages;
2843 dirty = true;
2844 }
2845 if (rd_rel->reltuples != (float4) reltuples)
2846 {
2847 rd_rel->reltuples = (float4) reltuples;
2848 dirty = true;
2849 }
2850 if (rd_rel->relallvisible != (int32) relallvisible)
2851 {
2852 rd_rel->relallvisible = (int32) relallvisible;
2853 dirty = true;
2854 }
2855 }
2856
2857 /*
2858 * If anything changed, write out the tuple
2859 */
2860 if (dirty)
2861 {
2862 heap_inplace_update(pg_class, tuple);
2863 /* the above sends a cache inval message */
2864 }
2865 else
2866 {
2867 /* no need to change tuple, but force relcache inval anyway */
2868 CacheInvalidateRelcacheByTuple(tuple);
2869 }
2870
2871 heap_freetuple(tuple);
2872
2873 table_close(pg_class, RowExclusiveLock);
2874 }
2875
2876
2877 /*
2878 * index_build - invoke access-method-specific index build procedure
2879 *
2880 * On entry, the index's catalog entries are valid, and its physical disk
2881 * file has been created but is empty. We call the AM-specific build
2882 * procedure to fill in the index contents. We then update the pg_class
2883 * entries of the index and heap relation as needed, using statistics
2884 * returned by ambuild as well as data passed by the caller.
2885 *
2886 * isreindex indicates we are recreating a previously-existing index.
2887 * parallel indicates if parallelism may be useful.
2888 *
2889 * Note: before Postgres 8.2, the passed-in heap and index Relations
2890 * were automatically closed by this routine. This is no longer the case.
2891 * The caller opened 'em, and the caller should close 'em.
2892 */
2893 void
index_build(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,bool isreindex,bool parallel)2894 index_build(Relation heapRelation,
2895 Relation indexRelation,
2896 IndexInfo *indexInfo,
2897 bool isreindex,
2898 bool parallel)
2899 {
2900 IndexBuildResult *stats;
2901 Oid save_userid;
2902 int save_sec_context;
2903 int save_nestlevel;
2904
2905 /*
2906 * sanity checks
2907 */
2908 Assert(RelationIsValid(indexRelation));
2909 Assert(PointerIsValid(indexRelation->rd_indam));
2910 Assert(PointerIsValid(indexRelation->rd_indam->ambuild));
2911 Assert(PointerIsValid(indexRelation->rd_indam->ambuildempty));
2912
2913 /*
2914 * Determine worker process details for parallel CREATE INDEX. Currently,
2915 * only btree has support for parallel builds.
2916 *
2917 * Note that planner considers parallel safety for us.
2918 */
2919 if (parallel && IsNormalProcessingMode() &&
2920 indexRelation->rd_rel->relam == BTREE_AM_OID)
2921 indexInfo->ii_ParallelWorkers =
2922 plan_create_index_workers(RelationGetRelid(heapRelation),
2923 RelationGetRelid(indexRelation));
2924
2925 if (indexInfo->ii_ParallelWorkers == 0)
2926 ereport(DEBUG1,
2927 (errmsg("building index \"%s\" on table \"%s\" serially",
2928 RelationGetRelationName(indexRelation),
2929 RelationGetRelationName(heapRelation))));
2930 else
2931 ereport(DEBUG1,
2932 (errmsg_plural("building index \"%s\" on table \"%s\" with request for %d parallel worker",
2933 "building index \"%s\" on table \"%s\" with request for %d parallel workers",
2934 indexInfo->ii_ParallelWorkers,
2935 RelationGetRelationName(indexRelation),
2936 RelationGetRelationName(heapRelation),
2937 indexInfo->ii_ParallelWorkers)));
2938
2939 /*
2940 * Switch to the table owner's userid, so that any index functions are run
2941 * as that user. Also lock down security-restricted operations and
2942 * arrange to make GUC variable changes local to this command.
2943 */
2944 GetUserIdAndSecContext(&save_userid, &save_sec_context);
2945 SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
2946 save_sec_context | SECURITY_RESTRICTED_OPERATION);
2947 save_nestlevel = NewGUCNestLevel();
2948
2949 /* Set up initial progress report status */
2950 {
2951 const int index[] = {
2952 PROGRESS_CREATEIDX_PHASE,
2953 PROGRESS_CREATEIDX_SUBPHASE,
2954 PROGRESS_CREATEIDX_TUPLES_DONE,
2955 PROGRESS_CREATEIDX_TUPLES_TOTAL,
2956 PROGRESS_SCAN_BLOCKS_DONE,
2957 PROGRESS_SCAN_BLOCKS_TOTAL
2958 };
2959 const int64 val[] = {
2960 PROGRESS_CREATEIDX_PHASE_BUILD,
2961 PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE,
2962 0, 0, 0, 0
2963 };
2964
2965 pgstat_progress_update_multi_param(6, index, val);
2966 }
2967
2968 /*
2969 * Call the access method's build procedure
2970 */
2971 stats = indexRelation->rd_indam->ambuild(heapRelation, indexRelation,
2972 indexInfo);
2973 Assert(PointerIsValid(stats));
2974
2975 /*
2976 * If this is an unlogged index, we may need to write out an init fork for
2977 * it -- but we must first check whether one already exists. If, for
2978 * example, an unlogged relation is truncated in the transaction that
2979 * created it, or truncated twice in a subsequent transaction, the
2980 * relfilenode won't change, and nothing needs to be done here.
2981 */
2982 if (indexRelation->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
2983 !smgrexists(indexRelation->rd_smgr, INIT_FORKNUM))
2984 {
2985 RelationOpenSmgr(indexRelation);
2986 smgrcreate(indexRelation->rd_smgr, INIT_FORKNUM, false);
2987 indexRelation->rd_indam->ambuildempty(indexRelation);
2988 }
2989
2990 /*
2991 * If we found any potentially broken HOT chains, mark the index as not
2992 * being usable until the current transaction is below the event horizon.
2993 * See src/backend/access/heap/README.HOT for discussion. Also set this
2994 * if early pruning/vacuuming is enabled for the heap relation. While it
2995 * might become safe to use the index earlier based on actual cleanup
2996 * activity and other active transactions, the test for that would be much
2997 * more complex and would require some form of blocking, so keep it simple
2998 * and fast by just using the current transaction.
2999 *
3000 * However, when reindexing an existing index, we should do nothing here.
3001 * Any HOT chains that are broken with respect to the index must predate
3002 * the index's original creation, so there is no need to change the
3003 * index's usability horizon. Moreover, we *must not* try to change the
3004 * index's pg_index entry while reindexing pg_index itself, and this
3005 * optimization nicely prevents that. The more complex rules needed for a
3006 * reindex are handled separately after this function returns.
3007 *
3008 * We also need not set indcheckxmin during a concurrent index build,
3009 * because we won't set indisvalid true until all transactions that care
3010 * about the broken HOT chains or early pruning/vacuuming are gone.
3011 *
3012 * Therefore, this code path can only be taken during non-concurrent
3013 * CREATE INDEX. Thus the fact that heap_update will set the pg_index
3014 * tuple's xmin doesn't matter, because that tuple was created in the
3015 * current transaction anyway. That also means we don't need to worry
3016 * about any concurrent readers of the tuple; no other transaction can see
3017 * it yet.
3018 */
3019 if ((indexInfo->ii_BrokenHotChain || EarlyPruningEnabled(heapRelation)) &&
3020 !isreindex &&
3021 !indexInfo->ii_Concurrent)
3022 {
3023 Oid indexId = RelationGetRelid(indexRelation);
3024 Relation pg_index;
3025 HeapTuple indexTuple;
3026 Form_pg_index indexForm;
3027
3028 pg_index = table_open(IndexRelationId, RowExclusiveLock);
3029
3030 indexTuple = SearchSysCacheCopy1(INDEXRELID,
3031 ObjectIdGetDatum(indexId));
3032 if (!HeapTupleIsValid(indexTuple))
3033 elog(ERROR, "cache lookup failed for index %u", indexId);
3034 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3035
3036 /* If it's a new index, indcheckxmin shouldn't be set ... */
3037 Assert(!indexForm->indcheckxmin);
3038
3039 indexForm->indcheckxmin = true;
3040 CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
3041
3042 heap_freetuple(indexTuple);
3043 table_close(pg_index, RowExclusiveLock);
3044 }
3045
3046 /*
3047 * Update heap and index pg_class rows
3048 */
3049 index_update_stats(heapRelation,
3050 true,
3051 stats->heap_tuples);
3052
3053 index_update_stats(indexRelation,
3054 false,
3055 stats->index_tuples);
3056
3057 /* Make the updated catalog row versions visible */
3058 CommandCounterIncrement();
3059
3060 /*
3061 * If it's for an exclusion constraint, make a second pass over the heap
3062 * to verify that the constraint is satisfied. We must not do this until
3063 * the index is fully valid. (Broken HOT chains shouldn't matter, though;
3064 * see comments for IndexCheckExclusion.)
3065 */
3066 if (indexInfo->ii_ExclusionOps != NULL)
3067 IndexCheckExclusion(heapRelation, indexRelation, indexInfo);
3068
3069 /* Roll back any GUC changes executed by index functions */
3070 AtEOXact_GUC(false, save_nestlevel);
3071
3072 /* Restore userid and security context */
3073 SetUserIdAndSecContext(save_userid, save_sec_context);
3074 }
3075
3076 /*
3077 * IndexCheckExclusion - verify that a new exclusion constraint is satisfied
3078 *
3079 * When creating an exclusion constraint, we first build the index normally
3080 * and then rescan the heap to check for conflicts. We assume that we only
3081 * need to validate tuples that are live according to an up-to-date snapshot,
3082 * and that these were correctly indexed even in the presence of broken HOT
3083 * chains. This should be OK since we are holding at least ShareLock on the
3084 * table, meaning there can be no uncommitted updates from other transactions.
3085 * (Note: that wouldn't necessarily work for system catalogs, since many
3086 * operations release write lock early on the system catalogs.)
3087 */
3088 static void
IndexCheckExclusion(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo)3089 IndexCheckExclusion(Relation heapRelation,
3090 Relation indexRelation,
3091 IndexInfo *indexInfo)
3092 {
3093 TableScanDesc scan;
3094 Datum values[INDEX_MAX_KEYS];
3095 bool isnull[INDEX_MAX_KEYS];
3096 ExprState *predicate;
3097 TupleTableSlot *slot;
3098 EState *estate;
3099 ExprContext *econtext;
3100 Snapshot snapshot;
3101
3102 /*
3103 * If we are reindexing the target index, mark it as no longer being
3104 * reindexed, to forestall an Assert in index_beginscan when we try to use
3105 * the index for probes. This is OK because the index is now fully valid.
3106 */
3107 if (ReindexIsCurrentlyProcessingIndex(RelationGetRelid(indexRelation)))
3108 ResetReindexProcessing();
3109
3110 /*
3111 * Need an EState for evaluation of index expressions and partial-index
3112 * predicates. Also a slot to hold the current tuple.
3113 */
3114 estate = CreateExecutorState();
3115 econtext = GetPerTupleExprContext(estate);
3116 slot = table_slot_create(heapRelation, NULL);
3117
3118 /* Arrange for econtext's scan tuple to be the tuple under test */
3119 econtext->ecxt_scantuple = slot;
3120
3121 /* Set up execution state for predicate, if any. */
3122 predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
3123
3124 /*
3125 * Scan all live tuples in the base relation.
3126 */
3127 snapshot = RegisterSnapshot(GetLatestSnapshot());
3128 scan = table_beginscan_strat(heapRelation, /* relation */
3129 snapshot, /* snapshot */
3130 0, /* number of keys */
3131 NULL, /* scan key */
3132 true, /* buffer access strategy OK */
3133 true); /* syncscan OK */
3134
3135 while (table_scan_getnextslot(scan, ForwardScanDirection, slot))
3136 {
3137 CHECK_FOR_INTERRUPTS();
3138
3139 /*
3140 * In a partial index, ignore tuples that don't satisfy the predicate.
3141 */
3142 if (predicate != NULL)
3143 {
3144 if (!ExecQual(predicate, econtext))
3145 continue;
3146 }
3147
3148 /*
3149 * Extract index column values, including computing expressions.
3150 */
3151 FormIndexDatum(indexInfo,
3152 slot,
3153 estate,
3154 values,
3155 isnull);
3156
3157 /*
3158 * Check that this tuple has no conflicts.
3159 */
3160 check_exclusion_constraint(heapRelation,
3161 indexRelation, indexInfo,
3162 &(slot->tts_tid), values, isnull,
3163 estate, true);
3164
3165 MemoryContextReset(econtext->ecxt_per_tuple_memory);
3166 }
3167
3168 table_endscan(scan);
3169 UnregisterSnapshot(snapshot);
3170
3171 ExecDropSingleTupleTableSlot(slot);
3172
3173 FreeExecutorState(estate);
3174
3175 /* These may have been pointing to the now-gone estate */
3176 indexInfo->ii_ExpressionsState = NIL;
3177 indexInfo->ii_PredicateState = NULL;
3178 }
3179
3180
3181 /*
3182 * validate_index - support code for concurrent index builds
3183 *
3184 * We do a concurrent index build by first inserting the catalog entry for the
3185 * index via index_create(), marking it not indisready and not indisvalid.
3186 * Then we commit our transaction and start a new one, then we wait for all
3187 * transactions that could have been modifying the table to terminate. Now
3188 * we know that any subsequently-started transactions will see the index and
3189 * honor its constraints on HOT updates; so while existing HOT-chains might
3190 * be broken with respect to the index, no currently live tuple will have an
3191 * incompatible HOT update done to it. We now build the index normally via
3192 * index_build(), while holding a weak lock that allows concurrent
3193 * insert/update/delete. Also, we index only tuples that are valid
3194 * as of the start of the scan (see table_index_build_scan), whereas a normal
3195 * build takes care to include recently-dead tuples. This is OK because
3196 * we won't mark the index valid until all transactions that might be able
3197 * to see those tuples are gone. The reason for doing that is to avoid
3198 * bogus unique-index failures due to concurrent UPDATEs (we might see
3199 * different versions of the same row as being valid when we pass over them,
3200 * if we used HeapTupleSatisfiesVacuum). This leaves us with an index that
3201 * does not contain any tuples added to the table while we built the index.
3202 *
3203 * Next, we mark the index "indisready" (but still not "indisvalid") and
3204 * commit the second transaction and start a third. Again we wait for all
3205 * transactions that could have been modifying the table to terminate. Now
3206 * we know that any subsequently-started transactions will see the index and
3207 * insert their new tuples into it. We then take a new reference snapshot
3208 * which is passed to validate_index(). Any tuples that are valid according
3209 * to this snap, but are not in the index, must be added to the index.
3210 * (Any tuples committed live after the snap will be inserted into the
3211 * index by their originating transaction. Any tuples committed dead before
3212 * the snap need not be indexed, because we will wait out all transactions
3213 * that might care about them before we mark the index valid.)
3214 *
3215 * validate_index() works by first gathering all the TIDs currently in the
3216 * index, using a bulkdelete callback that just stores the TIDs and doesn't
3217 * ever say "delete it". (This should be faster than a plain indexscan;
3218 * also, not all index AMs support full-index indexscan.) Then we sort the
3219 * TIDs, and finally scan the table doing a "merge join" against the TID list
3220 * to see which tuples are missing from the index. Thus we will ensure that
3221 * all tuples valid according to the reference snapshot are in the index.
3222 *
3223 * Building a unique index this way is tricky: we might try to insert a
3224 * tuple that is already dead or is in process of being deleted, and we
3225 * mustn't have a uniqueness failure against an updated version of the same
3226 * row. We could try to check the tuple to see if it's already dead and tell
3227 * index_insert() not to do the uniqueness check, but that still leaves us
3228 * with a race condition against an in-progress update. To handle that,
3229 * we expect the index AM to recheck liveness of the to-be-inserted tuple
3230 * before it declares a uniqueness error.
3231 *
3232 * After completing validate_index(), we wait until all transactions that
3233 * were alive at the time of the reference snapshot are gone; this is
3234 * necessary to be sure there are none left with a transaction snapshot
3235 * older than the reference (and hence possibly able to see tuples we did
3236 * not index). Then we mark the index "indisvalid" and commit. Subsequent
3237 * transactions will be able to use it for queries.
3238 *
3239 * Doing two full table scans is a brute-force strategy. We could try to be
3240 * cleverer, eg storing new tuples in a special area of the table (perhaps
3241 * making the table append-only by setting use_fsm). However that would
3242 * add yet more locking issues.
3243 */
3244 void
validate_index(Oid heapId,Oid indexId,Snapshot snapshot)3245 validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
3246 {
3247 Relation heapRelation,
3248 indexRelation;
3249 IndexInfo *indexInfo;
3250 IndexVacuumInfo ivinfo;
3251 ValidateIndexState state;
3252 Oid save_userid;
3253 int save_sec_context;
3254 int save_nestlevel;
3255
3256 {
3257 const int index[] = {
3258 PROGRESS_CREATEIDX_PHASE,
3259 PROGRESS_CREATEIDX_TUPLES_DONE,
3260 PROGRESS_CREATEIDX_TUPLES_TOTAL,
3261 PROGRESS_SCAN_BLOCKS_DONE,
3262 PROGRESS_SCAN_BLOCKS_TOTAL
3263 };
3264 const int64 val[] = {
3265 PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN,
3266 0, 0, 0, 0
3267 };
3268
3269 pgstat_progress_update_multi_param(5, index, val);
3270 }
3271
3272 /* Open and lock the parent heap relation */
3273 heapRelation = table_open(heapId, ShareUpdateExclusiveLock);
3274 /* And the target index relation */
3275 indexRelation = index_open(indexId, RowExclusiveLock);
3276
3277 /*
3278 * Fetch info needed for index_insert. (You might think this should be
3279 * passed in from DefineIndex, but its copy is long gone due to having
3280 * been built in a previous transaction.)
3281 */
3282 indexInfo = BuildIndexInfo(indexRelation);
3283
3284 /* mark build is concurrent just for consistency */
3285 indexInfo->ii_Concurrent = true;
3286
3287 /*
3288 * Switch to the table owner's userid, so that any index functions are run
3289 * as that user. Also lock down security-restricted operations and
3290 * arrange to make GUC variable changes local to this command.
3291 */
3292 GetUserIdAndSecContext(&save_userid, &save_sec_context);
3293 SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
3294 save_sec_context | SECURITY_RESTRICTED_OPERATION);
3295 save_nestlevel = NewGUCNestLevel();
3296
3297 /*
3298 * Scan the index and gather up all the TIDs into a tuplesort object.
3299 */
3300 ivinfo.index = indexRelation;
3301 ivinfo.analyze_only = false;
3302 ivinfo.report_progress = true;
3303 ivinfo.estimated_count = true;
3304 ivinfo.message_level = DEBUG2;
3305 ivinfo.num_heap_tuples = heapRelation->rd_rel->reltuples;
3306 ivinfo.strategy = NULL;
3307
3308 /*
3309 * Encode TIDs as int8 values for the sort, rather than directly sorting
3310 * item pointers. This can be significantly faster, primarily because TID
3311 * is a pass-by-reference type on all platforms, whereas int8 is
3312 * pass-by-value on most platforms.
3313 */
3314 state.tuplesort = tuplesort_begin_datum(INT8OID, Int8LessOperator,
3315 InvalidOid, false,
3316 maintenance_work_mem,
3317 NULL, false);
3318 state.htups = state.itups = state.tups_inserted = 0;
3319
3320 /* ambulkdelete updates progress metrics */
3321 (void) index_bulk_delete(&ivinfo, NULL,
3322 validate_index_callback, (void *) &state);
3323
3324 /* Execute the sort */
3325 {
3326 const int index[] = {
3327 PROGRESS_CREATEIDX_PHASE,
3328 PROGRESS_SCAN_BLOCKS_DONE,
3329 PROGRESS_SCAN_BLOCKS_TOTAL
3330 };
3331 const int64 val[] = {
3332 PROGRESS_CREATEIDX_PHASE_VALIDATE_SORT,
3333 0, 0
3334 };
3335
3336 pgstat_progress_update_multi_param(3, index, val);
3337 }
3338 tuplesort_performsort(state.tuplesort);
3339
3340 /*
3341 * Now scan the heap and "merge" it with the index
3342 */
3343 pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
3344 PROGRESS_CREATEIDX_PHASE_VALIDATE_TABLESCAN);
3345 table_index_validate_scan(heapRelation,
3346 indexRelation,
3347 indexInfo,
3348 snapshot,
3349 &state);
3350
3351 /* Done with tuplesort object */
3352 tuplesort_end(state.tuplesort);
3353
3354 elog(DEBUG2,
3355 "validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples",
3356 state.htups, state.itups, state.tups_inserted);
3357
3358 /* Roll back any GUC changes executed by index functions */
3359 AtEOXact_GUC(false, save_nestlevel);
3360
3361 /* Restore userid and security context */
3362 SetUserIdAndSecContext(save_userid, save_sec_context);
3363
3364 /* Close rels, but keep locks */
3365 index_close(indexRelation, NoLock);
3366 table_close(heapRelation, NoLock);
3367 }
3368
3369 /*
3370 * validate_index_callback - bulkdelete callback to collect the index TIDs
3371 */
3372 static bool
validate_index_callback(ItemPointer itemptr,void * opaque)3373 validate_index_callback(ItemPointer itemptr, void *opaque)
3374 {
3375 ValidateIndexState *state = (ValidateIndexState *) opaque;
3376 int64 encoded = itemptr_encode(itemptr);
3377
3378 tuplesort_putdatum(state->tuplesort, Int64GetDatum(encoded), false);
3379 state->itups += 1;
3380 return false; /* never actually delete anything */
3381 }
3382
3383 /*
3384 * index_set_state_flags - adjust pg_index state flags
3385 *
3386 * This is used during CREATE/DROP INDEX CONCURRENTLY to adjust the pg_index
3387 * flags that denote the index's state.
3388 *
3389 * Note that CatalogTupleUpdate() sends a cache invalidation message for the
3390 * tuple, so other sessions will hear about the update as soon as we commit.
3391 */
3392 void
index_set_state_flags(Oid indexId,IndexStateFlagsAction action)3393 index_set_state_flags(Oid indexId, IndexStateFlagsAction action)
3394 {
3395 Relation pg_index;
3396 HeapTuple indexTuple;
3397 Form_pg_index indexForm;
3398
3399 /* Open pg_index and fetch a writable copy of the index's tuple */
3400 pg_index = table_open(IndexRelationId, RowExclusiveLock);
3401
3402 indexTuple = SearchSysCacheCopy1(INDEXRELID,
3403 ObjectIdGetDatum(indexId));
3404 if (!HeapTupleIsValid(indexTuple))
3405 elog(ERROR, "cache lookup failed for index %u", indexId);
3406 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3407
3408 /* Perform the requested state change on the copy */
3409 switch (action)
3410 {
3411 case INDEX_CREATE_SET_READY:
3412 /* Set indisready during a CREATE INDEX CONCURRENTLY sequence */
3413 Assert(indexForm->indislive);
3414 Assert(!indexForm->indisready);
3415 Assert(!indexForm->indisvalid);
3416 indexForm->indisready = true;
3417 break;
3418 case INDEX_CREATE_SET_VALID:
3419 /* Set indisvalid during a CREATE INDEX CONCURRENTLY sequence */
3420 Assert(indexForm->indislive);
3421 Assert(indexForm->indisready);
3422 Assert(!indexForm->indisvalid);
3423 indexForm->indisvalid = true;
3424 break;
3425 case INDEX_DROP_CLEAR_VALID:
3426
3427 /*
3428 * Clear indisvalid during a DROP INDEX CONCURRENTLY sequence
3429 *
3430 * If indisready == true we leave it set so the index still gets
3431 * maintained by active transactions. We only need to ensure that
3432 * indisvalid is false. (We don't assert that either is initially
3433 * true, though, since we want to be able to retry a DROP INDEX
3434 * CONCURRENTLY that failed partway through.)
3435 *
3436 * Note: the CLUSTER logic assumes that indisclustered cannot be
3437 * set on any invalid index, so clear that flag too.
3438 */
3439 indexForm->indisvalid = false;
3440 indexForm->indisclustered = false;
3441 break;
3442 case INDEX_DROP_SET_DEAD:
3443
3444 /*
3445 * Clear indisready/indislive during DROP INDEX CONCURRENTLY
3446 *
3447 * We clear both indisready and indislive, because we not only
3448 * want to stop updates, we want to prevent sessions from touching
3449 * the index at all.
3450 */
3451 Assert(!indexForm->indisvalid);
3452 indexForm->indisready = false;
3453 indexForm->indislive = false;
3454 break;
3455 }
3456
3457 /* ... and update it */
3458 CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
3459
3460 table_close(pg_index, RowExclusiveLock);
3461 }
3462
3463
3464 /*
3465 * IndexGetRelation: given an index's relation OID, get the OID of the
3466 * relation it is an index on. Uses the system cache.
3467 */
3468 Oid
IndexGetRelation(Oid indexId,bool missing_ok)3469 IndexGetRelation(Oid indexId, bool missing_ok)
3470 {
3471 HeapTuple tuple;
3472 Form_pg_index index;
3473 Oid result;
3474
3475 tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
3476 if (!HeapTupleIsValid(tuple))
3477 {
3478 if (missing_ok)
3479 return InvalidOid;
3480 elog(ERROR, "cache lookup failed for index %u", indexId);
3481 }
3482 index = (Form_pg_index) GETSTRUCT(tuple);
3483 Assert(index->indexrelid == indexId);
3484
3485 result = index->indrelid;
3486 ReleaseSysCache(tuple);
3487 return result;
3488 }
3489
3490 /*
3491 * reindex_index - This routine is used to recreate a single index
3492 */
3493 void
reindex_index(Oid indexId,bool skip_constraint_checks,char persistence,int options)3494 reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
3495 int options)
3496 {
3497 Relation iRel,
3498 heapRelation;
3499 Oid heapId;
3500 IndexInfo *indexInfo;
3501 volatile bool skipped_constraint = false;
3502 PGRUsage ru0;
3503 bool progress = (options & REINDEXOPT_REPORT_PROGRESS) != 0;
3504
3505 pg_rusage_init(&ru0);
3506
3507 /*
3508 * Open and lock the parent heap relation. ShareLock is sufficient since
3509 * we only need to be sure no schema or data changes are going on.
3510 */
3511 heapId = IndexGetRelation(indexId, false);
3512 heapRelation = table_open(heapId, ShareLock);
3513
3514 if (progress)
3515 {
3516 pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
3517 heapId);
3518 pgstat_progress_update_param(PROGRESS_CREATEIDX_COMMAND,
3519 PROGRESS_CREATEIDX_COMMAND_REINDEX);
3520 pgstat_progress_update_param(PROGRESS_CREATEIDX_INDEX_OID,
3521 indexId);
3522 }
3523
3524 /*
3525 * Open the target index relation and get an exclusive lock on it, to
3526 * ensure that no one else is touching this particular index.
3527 */
3528 iRel = index_open(indexId, AccessExclusiveLock);
3529
3530 if (progress)
3531 pgstat_progress_update_param(PROGRESS_CREATEIDX_ACCESS_METHOD_OID,
3532 iRel->rd_rel->relam);
3533
3534 /*
3535 * The case of reindexing partitioned tables and indexes is handled
3536 * differently by upper layers, so this case shouldn't arise.
3537 */
3538 if (iRel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
3539 elog(ERROR, "unsupported relation kind for index \"%s\"",
3540 RelationGetRelationName(iRel));
3541
3542 /*
3543 * Don't allow reindex on temp tables of other backends ... their local
3544 * buffer manager is not going to cope.
3545 */
3546 if (RELATION_IS_OTHER_TEMP(iRel))
3547 ereport(ERROR,
3548 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3549 errmsg("cannot reindex temporary tables of other sessions")));
3550
3551 /*
3552 * Don't allow reindex of an invalid index on TOAST table. This is a
3553 * leftover from a failed REINDEX CONCURRENTLY, and if rebuilt it would
3554 * not be possible to drop it anymore.
3555 */
3556 if (IsToastNamespace(RelationGetNamespace(iRel)) &&
3557 !get_index_isvalid(indexId))
3558 ereport(ERROR,
3559 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3560 errmsg("cannot reindex invalid index on TOAST table")));
3561
3562 /*
3563 * Also check for active uses of the index in the current transaction; we
3564 * don't want to reindex underneath an open indexscan.
3565 */
3566 CheckTableNotInUse(iRel, "REINDEX INDEX");
3567
3568 /*
3569 * All predicate locks on the index are about to be made invalid. Promote
3570 * them to relation locks on the heap.
3571 */
3572 TransferPredicateLocksToHeapRelation(iRel);
3573
3574 /* Fetch info needed for index_build */
3575 indexInfo = BuildIndexInfo(iRel);
3576
3577 /* If requested, skip checking uniqueness/exclusion constraints */
3578 if (skip_constraint_checks)
3579 {
3580 if (indexInfo->ii_Unique || indexInfo->ii_ExclusionOps != NULL)
3581 skipped_constraint = true;
3582 indexInfo->ii_Unique = false;
3583 indexInfo->ii_ExclusionOps = NULL;
3584 indexInfo->ii_ExclusionProcs = NULL;
3585 indexInfo->ii_ExclusionStrats = NULL;
3586 }
3587
3588 /* Suppress use of the target index while rebuilding it */
3589 SetReindexProcessing(heapId, indexId);
3590
3591 /* Create a new physical relation for the index */
3592 RelationSetNewRelfilenode(iRel, persistence);
3593
3594 /* Initialize the index and rebuild */
3595 /* Note: we do not need to re-establish pkey setting */
3596 index_build(heapRelation, iRel, indexInfo, true, true);
3597
3598 /* Re-allow use of target index */
3599 ResetReindexProcessing();
3600
3601 /*
3602 * If the index is marked invalid/not-ready/dead (ie, it's from a failed
3603 * CREATE INDEX CONCURRENTLY, or a DROP INDEX CONCURRENTLY failed midway),
3604 * and we didn't skip a uniqueness check, we can now mark it valid. This
3605 * allows REINDEX to be used to clean up in such cases.
3606 *
3607 * We can also reset indcheckxmin, because we have now done a
3608 * non-concurrent index build, *except* in the case where index_build
3609 * found some still-broken HOT chains. If it did, and we don't have to
3610 * change any of the other flags, we just leave indcheckxmin alone (note
3611 * that index_build won't have changed it, because this is a reindex).
3612 * This is okay and desirable because not updating the tuple leaves the
3613 * index's usability horizon (recorded as the tuple's xmin value) the same
3614 * as it was.
3615 *
3616 * But, if the index was invalid/not-ready/dead and there were broken HOT
3617 * chains, we had better force indcheckxmin true, because the normal
3618 * argument that the HOT chains couldn't conflict with the index is
3619 * suspect for an invalid index. (A conflict is definitely possible if
3620 * the index was dead. It probably shouldn't happen otherwise, but let's
3621 * be conservative.) In this case advancing the usability horizon is
3622 * appropriate.
3623 *
3624 * Another reason for avoiding unnecessary updates here is that while
3625 * reindexing pg_index itself, we must not try to update tuples in it.
3626 * pg_index's indexes should always have these flags in their clean state,
3627 * so that won't happen.
3628 *
3629 * If early pruning/vacuuming is enabled for the heap relation, the
3630 * usability horizon must be advanced to the current transaction on every
3631 * build or rebuild. pg_index is OK in this regard because catalog tables
3632 * are not subject to early cleanup.
3633 */
3634 if (!skipped_constraint)
3635 {
3636 Relation pg_index;
3637 HeapTuple indexTuple;
3638 Form_pg_index indexForm;
3639 bool index_bad;
3640 bool early_pruning_enabled = EarlyPruningEnabled(heapRelation);
3641
3642 pg_index = table_open(IndexRelationId, RowExclusiveLock);
3643
3644 indexTuple = SearchSysCacheCopy1(INDEXRELID,
3645 ObjectIdGetDatum(indexId));
3646 if (!HeapTupleIsValid(indexTuple))
3647 elog(ERROR, "cache lookup failed for index %u", indexId);
3648 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3649
3650 index_bad = (!indexForm->indisvalid ||
3651 !indexForm->indisready ||
3652 !indexForm->indislive);
3653 if (index_bad ||
3654 (indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain) ||
3655 early_pruning_enabled)
3656 {
3657 if (!indexInfo->ii_BrokenHotChain && !early_pruning_enabled)
3658 indexForm->indcheckxmin = false;
3659 else if (index_bad || early_pruning_enabled)
3660 indexForm->indcheckxmin = true;
3661 indexForm->indisvalid = true;
3662 indexForm->indisready = true;
3663 indexForm->indislive = true;
3664 CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
3665
3666 /*
3667 * Invalidate the relcache for the table, so that after we commit
3668 * all sessions will refresh the table's index list. This ensures
3669 * that if anyone misses seeing the pg_index row during this
3670 * update, they'll refresh their list before attempting any update
3671 * on the table.
3672 */
3673 CacheInvalidateRelcache(heapRelation);
3674 }
3675
3676 table_close(pg_index, RowExclusiveLock);
3677 }
3678
3679 /* Log what we did */
3680 if (options & REINDEXOPT_VERBOSE)
3681 ereport(INFO,
3682 (errmsg("index \"%s\" was reindexed",
3683 get_rel_name(indexId)),
3684 errdetail_internal("%s",
3685 pg_rusage_show(&ru0))));
3686
3687 if (progress)
3688 pgstat_progress_end_command();
3689
3690 /* Close rels, but keep locks */
3691 index_close(iRel, NoLock);
3692 table_close(heapRelation, NoLock);
3693 }
3694
3695 /*
3696 * reindex_relation - This routine is used to recreate all indexes
3697 * of a relation (and optionally its toast relation too, if any).
3698 *
3699 * "flags" is a bitmask that can include any combination of these bits:
3700 *
3701 * REINDEX_REL_PROCESS_TOAST: if true, process the toast table too (if any).
3702 *
3703 * REINDEX_REL_SUPPRESS_INDEX_USE: if true, the relation was just completely
3704 * rebuilt by an operation such as VACUUM FULL or CLUSTER, and therefore its
3705 * indexes are inconsistent with it. This makes things tricky if the relation
3706 * is a system catalog that we might consult during the reindexing. To deal
3707 * with that case, we mark all of the indexes as pending rebuild so that they
3708 * won't be trusted until rebuilt. The caller is required to call us *without*
3709 * having made the rebuilt table visible by doing CommandCounterIncrement;
3710 * we'll do CCI after having collected the index list. (This way we can still
3711 * use catalog indexes while collecting the list.)
3712 *
3713 * REINDEX_REL_CHECK_CONSTRAINTS: if true, recheck unique and exclusion
3714 * constraint conditions, else don't. To avoid deadlocks, VACUUM FULL or
3715 * CLUSTER on a system catalog must omit this flag. REINDEX should be used to
3716 * rebuild an index if constraint inconsistency is suspected. For optimal
3717 * performance, other callers should include the flag only after transforming
3718 * the data in a manner that risks a change in constraint validity.
3719 *
3720 * REINDEX_REL_FORCE_INDEXES_UNLOGGED: if true, set the persistence of the
3721 * rebuilt indexes to unlogged.
3722 *
3723 * REINDEX_REL_FORCE_INDEXES_PERMANENT: if true, set the persistence of the
3724 * rebuilt indexes to permanent.
3725 *
3726 * Returns true if any indexes were rebuilt (including toast table's index
3727 * when relevant). Note that a CommandCounterIncrement will occur after each
3728 * index rebuild.
3729 */
3730 bool
reindex_relation(Oid relid,int flags,int options)3731 reindex_relation(Oid relid, int flags, int options)
3732 {
3733 Relation rel;
3734 Oid toast_relid;
3735 List *indexIds;
3736 char persistence;
3737 bool result;
3738 ListCell *indexId;
3739 int i;
3740
3741 /*
3742 * Open and lock the relation. ShareLock is sufficient since we only need
3743 * to prevent schema and data changes in it. The lock level used here
3744 * should match ReindexTable().
3745 */
3746 rel = table_open(relid, ShareLock);
3747
3748 /*
3749 * This may be useful when implemented someday; but that day is not today.
3750 * For now, avoid erroring out when called in a multi-table context
3751 * (REINDEX SCHEMA) and happen to come across a partitioned table. The
3752 * partitions may be reindexed on their own anyway.
3753 */
3754 if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
3755 {
3756 ereport(WARNING,
3757 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3758 errmsg("REINDEX of partitioned tables is not yet implemented, skipping \"%s\"",
3759 RelationGetRelationName(rel))));
3760 table_close(rel, ShareLock);
3761 return false;
3762 }
3763
3764 toast_relid = rel->rd_rel->reltoastrelid;
3765
3766 /*
3767 * Get the list of index OIDs for this relation. (We trust to the
3768 * relcache to get this with a sequential scan if ignoring system
3769 * indexes.)
3770 */
3771 indexIds = RelationGetIndexList(rel);
3772
3773 if (flags & REINDEX_REL_SUPPRESS_INDEX_USE)
3774 {
3775 /* Suppress use of all the indexes until they are rebuilt */
3776 SetReindexPending(indexIds);
3777
3778 /*
3779 * Make the new heap contents visible --- now things might be
3780 * inconsistent!
3781 */
3782 CommandCounterIncrement();
3783 }
3784
3785 /*
3786 * Compute persistence of indexes: same as that of owning rel, unless
3787 * caller specified otherwise.
3788 */
3789 if (flags & REINDEX_REL_FORCE_INDEXES_UNLOGGED)
3790 persistence = RELPERSISTENCE_UNLOGGED;
3791 else if (flags & REINDEX_REL_FORCE_INDEXES_PERMANENT)
3792 persistence = RELPERSISTENCE_PERMANENT;
3793 else
3794 persistence = rel->rd_rel->relpersistence;
3795
3796 /* Reindex all the indexes. */
3797 i = 1;
3798 foreach(indexId, indexIds)
3799 {
3800 Oid indexOid = lfirst_oid(indexId);
3801 Oid indexNamespaceId = get_rel_namespace(indexOid);
3802
3803 /*
3804 * Skip any invalid indexes on a TOAST table. These can only be
3805 * duplicate leftovers from a failed REINDEX CONCURRENTLY, and if
3806 * rebuilt it would not be possible to drop them anymore.
3807 */
3808 if (IsToastNamespace(indexNamespaceId) &&
3809 !get_index_isvalid(indexOid))
3810 {
3811 ereport(WARNING,
3812 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3813 errmsg("cannot reindex invalid index \"%s.%s\" on TOAST table, skipping",
3814 get_namespace_name(indexNamespaceId),
3815 get_rel_name(indexOid))));
3816 continue;
3817 }
3818
3819 reindex_index(indexOid, !(flags & REINDEX_REL_CHECK_CONSTRAINTS),
3820 persistence, options);
3821
3822 CommandCounterIncrement();
3823
3824 /* Index should no longer be in the pending list */
3825 Assert(!ReindexIsProcessingIndex(indexOid));
3826
3827 /* Set index rebuild count */
3828 pgstat_progress_update_param(PROGRESS_CLUSTER_INDEX_REBUILD_COUNT,
3829 i);
3830 i++;
3831 }
3832
3833 /*
3834 * Close rel, but continue to hold the lock.
3835 */
3836 table_close(rel, NoLock);
3837
3838 result = (indexIds != NIL);
3839
3840 /*
3841 * If the relation has a secondary toast rel, reindex that too while we
3842 * still hold the lock on the master table.
3843 */
3844 if ((flags & REINDEX_REL_PROCESS_TOAST) && OidIsValid(toast_relid))
3845 result |= reindex_relation(toast_relid, flags, options);
3846
3847 return result;
3848 }
3849
3850
3851 /* ----------------------------------------------------------------
3852 * System index reindexing support
3853 *
3854 * When we are busy reindexing a system index, this code provides support
3855 * for preventing catalog lookups from using that index. We also make use
3856 * of this to catch attempted uses of user indexes during reindexing of
3857 * those indexes. This information is propagated to parallel workers;
3858 * attempting to change it during a parallel operation is not permitted.
3859 * ----------------------------------------------------------------
3860 */
3861
3862 static Oid currentlyReindexedHeap = InvalidOid;
3863 static Oid currentlyReindexedIndex = InvalidOid;
3864 static List *pendingReindexedIndexes = NIL;
3865 static int reindexingNestLevel = 0;
3866
3867 /*
3868 * ReindexIsProcessingHeap
3869 * True if heap specified by OID is currently being reindexed.
3870 */
3871 bool
ReindexIsProcessingHeap(Oid heapOid)3872 ReindexIsProcessingHeap(Oid heapOid)
3873 {
3874 return heapOid == currentlyReindexedHeap;
3875 }
3876
3877 /*
3878 * ReindexIsCurrentlyProcessingIndex
3879 * True if index specified by OID is currently being reindexed.
3880 */
3881 static bool
ReindexIsCurrentlyProcessingIndex(Oid indexOid)3882 ReindexIsCurrentlyProcessingIndex(Oid indexOid)
3883 {
3884 return indexOid == currentlyReindexedIndex;
3885 }
3886
3887 /*
3888 * ReindexIsProcessingIndex
3889 * True if index specified by OID is currently being reindexed,
3890 * or should be treated as invalid because it is awaiting reindex.
3891 */
3892 bool
ReindexIsProcessingIndex(Oid indexOid)3893 ReindexIsProcessingIndex(Oid indexOid)
3894 {
3895 return indexOid == currentlyReindexedIndex ||
3896 list_member_oid(pendingReindexedIndexes, indexOid);
3897 }
3898
3899 /*
3900 * SetReindexProcessing
3901 * Set flag that specified heap/index are being reindexed.
3902 */
3903 static void
SetReindexProcessing(Oid heapOid,Oid indexOid)3904 SetReindexProcessing(Oid heapOid, Oid indexOid)
3905 {
3906 Assert(OidIsValid(heapOid) && OidIsValid(indexOid));
3907 /* Reindexing is not re-entrant. */
3908 if (OidIsValid(currentlyReindexedHeap))
3909 elog(ERROR, "cannot reindex while reindexing");
3910 currentlyReindexedHeap = heapOid;
3911 currentlyReindexedIndex = indexOid;
3912 /* Index is no longer "pending" reindex. */
3913 RemoveReindexPending(indexOid);
3914 /* This may have been set already, but in case it isn't, do so now. */
3915 reindexingNestLevel = GetCurrentTransactionNestLevel();
3916 }
3917
3918 /*
3919 * ResetReindexProcessing
3920 * Unset reindexing status.
3921 */
3922 static void
ResetReindexProcessing(void)3923 ResetReindexProcessing(void)
3924 {
3925 currentlyReindexedHeap = InvalidOid;
3926 currentlyReindexedIndex = InvalidOid;
3927 /* reindexingNestLevel remains set till end of (sub)transaction */
3928 }
3929
3930 /*
3931 * SetReindexPending
3932 * Mark the given indexes as pending reindex.
3933 *
3934 * NB: we assume that the current memory context stays valid throughout.
3935 */
3936 static void
SetReindexPending(List * indexes)3937 SetReindexPending(List *indexes)
3938 {
3939 /* Reindexing is not re-entrant. */
3940 if (pendingReindexedIndexes)
3941 elog(ERROR, "cannot reindex while reindexing");
3942 if (IsInParallelMode())
3943 elog(ERROR, "cannot modify reindex state during a parallel operation");
3944 pendingReindexedIndexes = list_copy(indexes);
3945 reindexingNestLevel = GetCurrentTransactionNestLevel();
3946 }
3947
3948 /*
3949 * RemoveReindexPending
3950 * Remove the given index from the pending list.
3951 */
3952 static void
RemoveReindexPending(Oid indexOid)3953 RemoveReindexPending(Oid indexOid)
3954 {
3955 if (IsInParallelMode())
3956 elog(ERROR, "cannot modify reindex state during a parallel operation");
3957 pendingReindexedIndexes = list_delete_oid(pendingReindexedIndexes,
3958 indexOid);
3959 }
3960
3961 /*
3962 * ResetReindexState
3963 * Clear all reindexing state during (sub)transaction abort.
3964 */
3965 void
ResetReindexState(int nestLevel)3966 ResetReindexState(int nestLevel)
3967 {
3968 /*
3969 * Because reindexing is not re-entrant, we don't need to cope with nested
3970 * reindexing states. We just need to avoid messing up the outer-level
3971 * state in case a subtransaction fails within a REINDEX. So checking the
3972 * current nest level against that of the reindex operation is sufficient.
3973 */
3974 if (reindexingNestLevel >= nestLevel)
3975 {
3976 currentlyReindexedHeap = InvalidOid;
3977 currentlyReindexedIndex = InvalidOid;
3978
3979 /*
3980 * We needn't try to release the contents of pendingReindexedIndexes;
3981 * that list should be in a transaction-lifespan context, so it will
3982 * go away automatically.
3983 */
3984 pendingReindexedIndexes = NIL;
3985
3986 reindexingNestLevel = 0;
3987 }
3988 }
3989
3990 /*
3991 * EstimateReindexStateSpace
3992 * Estimate space needed to pass reindex state to parallel workers.
3993 */
3994 Size
EstimateReindexStateSpace(void)3995 EstimateReindexStateSpace(void)
3996 {
3997 return offsetof(SerializedReindexState, pendingReindexedIndexes)
3998 + mul_size(sizeof(Oid), list_length(pendingReindexedIndexes));
3999 }
4000
4001 /*
4002 * SerializeReindexState
4003 * Serialize reindex state for parallel workers.
4004 */
4005 void
SerializeReindexState(Size maxsize,char * start_address)4006 SerializeReindexState(Size maxsize, char *start_address)
4007 {
4008 SerializedReindexState *sistate = (SerializedReindexState *) start_address;
4009 int c = 0;
4010 ListCell *lc;
4011
4012 sistate->currentlyReindexedHeap = currentlyReindexedHeap;
4013 sistate->currentlyReindexedIndex = currentlyReindexedIndex;
4014 sistate->numPendingReindexedIndexes = list_length(pendingReindexedIndexes);
4015 foreach(lc, pendingReindexedIndexes)
4016 sistate->pendingReindexedIndexes[c++] = lfirst_oid(lc);
4017 }
4018
4019 /*
4020 * RestoreReindexState
4021 * Restore reindex state in a parallel worker.
4022 */
4023 void
RestoreReindexState(void * reindexstate)4024 RestoreReindexState(void *reindexstate)
4025 {
4026 SerializedReindexState *sistate = (SerializedReindexState *) reindexstate;
4027 int c = 0;
4028 MemoryContext oldcontext;
4029
4030 currentlyReindexedHeap = sistate->currentlyReindexedHeap;
4031 currentlyReindexedIndex = sistate->currentlyReindexedIndex;
4032
4033 Assert(pendingReindexedIndexes == NIL);
4034 oldcontext = MemoryContextSwitchTo(TopMemoryContext);
4035 for (c = 0; c < sistate->numPendingReindexedIndexes; ++c)
4036 pendingReindexedIndexes =
4037 lappend_oid(pendingReindexedIndexes,
4038 sistate->pendingReindexedIndexes[c]);
4039 MemoryContextSwitchTo(oldcontext);
4040
4041 /* Note the worker has its own transaction nesting level */
4042 reindexingNestLevel = GetCurrentTransactionNestLevel();
4043 }
4044