1 /*-------------------------------------------------------------------------
2 *
3 * index.c
4 * code to create and destroy POSTGRES index relations
5 *
6 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/catalog/index.c
12 *
13 *
14 * INTERFACE ROUTINES
15 * index_create() - Create a cataloged index relation
16 * index_drop() - Removes index relation from catalogs
17 * BuildIndexInfo() - Prepare to insert index tuples
18 * FormIndexDatum() - Construct datum vector for one index tuple
19 *
20 *-------------------------------------------------------------------------
21 */
22 #include "postgres.h"
23
24 #include <unistd.h>
25
26 #include "access/amapi.h"
27 #include "access/heapam.h"
28 #include "access/multixact.h"
29 #include "access/reloptions.h"
30 #include "access/relscan.h"
31 #include "access/sysattr.h"
32 #include "access/tableam.h"
33 #include "access/toast_compression.h"
34 #include "access/transam.h"
35 #include "access/visibilitymap.h"
36 #include "access/xact.h"
37 #include "bootstrap/bootstrap.h"
38 #include "catalog/binary_upgrade.h"
39 #include "catalog/catalog.h"
40 #include "catalog/dependency.h"
41 #include "catalog/heap.h"
42 #include "catalog/index.h"
43 #include "catalog/objectaccess.h"
44 #include "catalog/partition.h"
45 #include "catalog/pg_am.h"
46 #include "catalog/pg_collation.h"
47 #include "catalog/pg_constraint.h"
48 #include "catalog/pg_depend.h"
49 #include "catalog/pg_description.h"
50 #include "catalog/pg_inherits.h"
51 #include "catalog/pg_opclass.h"
52 #include "catalog/pg_operator.h"
53 #include "catalog/pg_tablespace.h"
54 #include "catalog/pg_trigger.h"
55 #include "catalog/pg_type.h"
56 #include "catalog/storage.h"
57 #include "commands/event_trigger.h"
58 #include "commands/progress.h"
59 #include "commands/tablecmds.h"
60 #include "commands/tablespace.h"
61 #include "commands/trigger.h"
62 #include "executor/executor.h"
63 #include "miscadmin.h"
64 #include "nodes/makefuncs.h"
65 #include "nodes/nodeFuncs.h"
66 #include "optimizer/optimizer.h"
67 #include "parser/parser.h"
68 #include "pgstat.h"
69 #include "rewrite/rewriteManip.h"
70 #include "storage/bufmgr.h"
71 #include "storage/lmgr.h"
72 #include "storage/predicate.h"
73 #include "storage/procarray.h"
74 #include "storage/smgr.h"
75 #include "utils/builtins.h"
76 #include "utils/datum.h"
77 #include "utils/fmgroids.h"
78 #include "utils/guc.h"
79 #include "utils/inval.h"
80 #include "utils/lsyscache.h"
81 #include "utils/memutils.h"
82 #include "utils/pg_rusage.h"
83 #include "utils/rel.h"
84 #include "utils/snapmgr.h"
85 #include "utils/syscache.h"
86 #include "utils/tuplesort.h"
87
88 /* Potentially set by pg_upgrade_support functions */
89 Oid binary_upgrade_next_index_pg_class_oid = InvalidOid;
90
91 /*
92 * Pointer-free representation of variables used when reindexing system
93 * catalogs; we use this to propagate those values to parallel workers.
94 */
95 typedef struct
96 {
97 Oid currentlyReindexedHeap;
98 Oid currentlyReindexedIndex;
99 int numPendingReindexedIndexes;
100 Oid pendingReindexedIndexes[FLEXIBLE_ARRAY_MEMBER];
101 } SerializedReindexState;
102
103 /* non-export function prototypes */
104 static bool relationHasPrimaryKey(Relation rel);
105 static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
106 IndexInfo *indexInfo,
107 List *indexColNames,
108 Oid accessMethodObjectId,
109 Oid *collationObjectId,
110 Oid *classObjectId);
111 static void InitializeAttributeOids(Relation indexRelation,
112 int numatts, Oid indexoid);
113 static void AppendAttributeTuples(Relation indexRelation, Datum *attopts);
114 static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
115 Oid parentIndexId,
116 IndexInfo *indexInfo,
117 Oid *collationOids,
118 Oid *classOids,
119 int16 *coloptions,
120 bool primary,
121 bool isexclusion,
122 bool immediate,
123 bool isvalid,
124 bool isready);
125 static void index_update_stats(Relation rel,
126 bool hasindex,
127 double reltuples);
128 static void IndexCheckExclusion(Relation heapRelation,
129 Relation indexRelation,
130 IndexInfo *indexInfo);
131 static bool validate_index_callback(ItemPointer itemptr, void *opaque);
132 static bool ReindexIsCurrentlyProcessingIndex(Oid indexOid);
133 static void SetReindexProcessing(Oid heapOid, Oid indexOid);
134 static void ResetReindexProcessing(void);
135 static void SetReindexPending(List *indexes);
136 static void RemoveReindexPending(Oid indexOid);
137
138
139 /*
140 * relationHasPrimaryKey
141 * See whether an existing relation has a primary key.
142 *
143 * Caller must have suitable lock on the relation.
144 *
145 * Note: we intentionally do not check indisvalid here; that's because this
146 * is used to enforce the rule that there can be only one indisprimary index,
147 * and we want that to be true even if said index is invalid.
148 */
149 static bool
relationHasPrimaryKey(Relation rel)150 relationHasPrimaryKey(Relation rel)
151 {
152 bool result = false;
153 List *indexoidlist;
154 ListCell *indexoidscan;
155
156 /*
157 * Get the list of index OIDs for the table from the relcache, and look up
158 * each one in the pg_index syscache until we find one marked primary key
159 * (hopefully there isn't more than one such).
160 */
161 indexoidlist = RelationGetIndexList(rel);
162
163 foreach(indexoidscan, indexoidlist)
164 {
165 Oid indexoid = lfirst_oid(indexoidscan);
166 HeapTuple indexTuple;
167
168 indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid));
169 if (!HeapTupleIsValid(indexTuple)) /* should not happen */
170 elog(ERROR, "cache lookup failed for index %u", indexoid);
171 result = ((Form_pg_index) GETSTRUCT(indexTuple))->indisprimary;
172 ReleaseSysCache(indexTuple);
173 if (result)
174 break;
175 }
176
177 list_free(indexoidlist);
178
179 return result;
180 }
181
182 /*
183 * index_check_primary_key
184 * Apply special checks needed before creating a PRIMARY KEY index
185 *
186 * This processing used to be in DefineIndex(), but has been split out
187 * so that it can be applied during ALTER TABLE ADD PRIMARY KEY USING INDEX.
188 *
189 * We check for a pre-existing primary key, and that all columns of the index
190 * are simple column references (not expressions), and that all those
191 * columns are marked NOT NULL. If not, fail.
192 *
193 * We used to automatically change unmarked columns to NOT NULL here by doing
194 * our own local ALTER TABLE command. But that doesn't work well if we're
195 * executing one subcommand of an ALTER TABLE: the operations may not get
196 * performed in the right order overall. Now we expect that the parser
197 * inserted any required ALTER TABLE SET NOT NULL operations before trying
198 * to create a primary-key index.
199 *
200 * Caller had better have at least ShareLock on the table, else the not-null
201 * checking isn't trustworthy.
202 */
203 void
index_check_primary_key(Relation heapRel,IndexInfo * indexInfo,bool is_alter_table,IndexStmt * stmt)204 index_check_primary_key(Relation heapRel,
205 IndexInfo *indexInfo,
206 bool is_alter_table,
207 IndexStmt *stmt)
208 {
209 int i;
210
211 /*
212 * If ALTER TABLE or CREATE TABLE .. PARTITION OF, check that there isn't
213 * already a PRIMARY KEY. In CREATE TABLE for an ordinary relation, we
214 * have faith that the parser rejected multiple pkey clauses; and CREATE
215 * INDEX doesn't have a way to say PRIMARY KEY, so it's no problem either.
216 */
217 if ((is_alter_table || heapRel->rd_rel->relispartition) &&
218 relationHasPrimaryKey(heapRel))
219 {
220 ereport(ERROR,
221 (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
222 errmsg("multiple primary keys for table \"%s\" are not allowed",
223 RelationGetRelationName(heapRel))));
224 }
225
226 /*
227 * Check that all of the attributes in a primary key are marked as not
228 * null. (We don't really expect to see that; it'd mean the parser messed
229 * up. But it seems wise to check anyway.)
230 */
231 for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
232 {
233 AttrNumber attnum = indexInfo->ii_IndexAttrNumbers[i];
234 HeapTuple atttuple;
235 Form_pg_attribute attform;
236
237 if (attnum == 0)
238 ereport(ERROR,
239 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
240 errmsg("primary keys cannot be expressions")));
241
242 /* System attributes are never null, so no need to check */
243 if (attnum < 0)
244 continue;
245
246 atttuple = SearchSysCache2(ATTNUM,
247 ObjectIdGetDatum(RelationGetRelid(heapRel)),
248 Int16GetDatum(attnum));
249 if (!HeapTupleIsValid(atttuple))
250 elog(ERROR, "cache lookup failed for attribute %d of relation %u",
251 attnum, RelationGetRelid(heapRel));
252 attform = (Form_pg_attribute) GETSTRUCT(atttuple);
253
254 if (!attform->attnotnull)
255 ereport(ERROR,
256 (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
257 errmsg("primary key column \"%s\" is not marked NOT NULL",
258 NameStr(attform->attname))));
259
260 ReleaseSysCache(atttuple);
261 }
262 }
263
264 /*
265 * ConstructTupleDescriptor
266 *
267 * Build an index tuple descriptor for a new index
268 */
269 static TupleDesc
ConstructTupleDescriptor(Relation heapRelation,IndexInfo * indexInfo,List * indexColNames,Oid accessMethodObjectId,Oid * collationObjectId,Oid * classObjectId)270 ConstructTupleDescriptor(Relation heapRelation,
271 IndexInfo *indexInfo,
272 List *indexColNames,
273 Oid accessMethodObjectId,
274 Oid *collationObjectId,
275 Oid *classObjectId)
276 {
277 int numatts = indexInfo->ii_NumIndexAttrs;
278 int numkeyatts = indexInfo->ii_NumIndexKeyAttrs;
279 ListCell *colnames_item = list_head(indexColNames);
280 ListCell *indexpr_item = list_head(indexInfo->ii_Expressions);
281 IndexAmRoutine *amroutine;
282 TupleDesc heapTupDesc;
283 TupleDesc indexTupDesc;
284 int natts; /* #atts in heap rel --- for error checks */
285 int i;
286
287 /* We need access to the index AM's API struct */
288 amroutine = GetIndexAmRoutineByAmId(accessMethodObjectId, false);
289
290 /* ... and to the table's tuple descriptor */
291 heapTupDesc = RelationGetDescr(heapRelation);
292 natts = RelationGetForm(heapRelation)->relnatts;
293
294 /*
295 * allocate the new tuple descriptor
296 */
297 indexTupDesc = CreateTemplateTupleDesc(numatts);
298
299 /*
300 * Fill in the pg_attribute row.
301 */
302 for (i = 0; i < numatts; i++)
303 {
304 AttrNumber atnum = indexInfo->ii_IndexAttrNumbers[i];
305 Form_pg_attribute to = TupleDescAttr(indexTupDesc, i);
306 HeapTuple tuple;
307 Form_pg_type typeTup;
308 Form_pg_opclass opclassTup;
309 Oid keyType;
310
311 MemSet(to, 0, ATTRIBUTE_FIXED_PART_SIZE);
312 to->attnum = i + 1;
313 to->attstattarget = -1;
314 to->attcacheoff = -1;
315 to->attislocal = true;
316 to->attcollation = (i < numkeyatts) ?
317 collationObjectId[i] : InvalidOid;
318
319 /*
320 * Set the attribute name as specified by caller.
321 */
322 if (colnames_item == NULL) /* shouldn't happen */
323 elog(ERROR, "too few entries in colnames list");
324 namestrcpy(&to->attname, (const char *) lfirst(colnames_item));
325 colnames_item = lnext(indexColNames, colnames_item);
326
327 /*
328 * For simple index columns, we copy some pg_attribute fields from the
329 * parent relation. For expressions we have to look at the expression
330 * result.
331 */
332 if (atnum != 0)
333 {
334 /* Simple index column */
335 const FormData_pg_attribute *from;
336
337 Assert(atnum > 0); /* should've been caught above */
338
339 if (atnum > natts) /* safety check */
340 elog(ERROR, "invalid column number %d", atnum);
341 from = TupleDescAttr(heapTupDesc,
342 AttrNumberGetAttrOffset(atnum));
343
344 to->atttypid = from->atttypid;
345 to->attlen = from->attlen;
346 to->attndims = from->attndims;
347 to->atttypmod = from->atttypmod;
348 to->attbyval = from->attbyval;
349 to->attalign = from->attalign;
350 to->attstorage = from->attstorage;
351 to->attcompression = from->attcompression;
352 }
353 else
354 {
355 /* Expressional index */
356 Node *indexkey;
357
358 if (indexpr_item == NULL) /* shouldn't happen */
359 elog(ERROR, "too few entries in indexprs list");
360 indexkey = (Node *) lfirst(indexpr_item);
361 indexpr_item = lnext(indexInfo->ii_Expressions, indexpr_item);
362
363 /*
364 * Lookup the expression type in pg_type for the type length etc.
365 */
366 keyType = exprType(indexkey);
367 tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
368 if (!HeapTupleIsValid(tuple))
369 elog(ERROR, "cache lookup failed for type %u", keyType);
370 typeTup = (Form_pg_type) GETSTRUCT(tuple);
371
372 /*
373 * Assign some of the attributes values. Leave the rest.
374 */
375 to->atttypid = keyType;
376 to->attlen = typeTup->typlen;
377 to->atttypmod = exprTypmod(indexkey);
378 to->attbyval = typeTup->typbyval;
379 to->attalign = typeTup->typalign;
380 to->attstorage = typeTup->typstorage;
381
382 /*
383 * For expression columns, set attcompression invalid, since
384 * there's no table column from which to copy the value. Whenever
385 * we actually need to compress a value, we'll use whatever the
386 * current value of default_toast_compression is at that point in
387 * time.
388 */
389 to->attcompression = InvalidCompressionMethod;
390
391 ReleaseSysCache(tuple);
392
393 /*
394 * Make sure the expression yields a type that's safe to store in
395 * an index. We need this defense because we have index opclasses
396 * for pseudo-types such as "record", and the actually stored type
397 * had better be safe; eg, a named composite type is okay, an
398 * anonymous record type is not. The test is the same as for
399 * whether a table column is of a safe type (which is why we
400 * needn't check for the non-expression case).
401 */
402 CheckAttributeType(NameStr(to->attname),
403 to->atttypid, to->attcollation,
404 NIL, 0);
405 }
406
407 /*
408 * We do not yet have the correct relation OID for the index, so just
409 * set it invalid for now. InitializeAttributeOids() will fix it
410 * later.
411 */
412 to->attrelid = InvalidOid;
413
414 /*
415 * Check the opclass and index AM to see if either provides a keytype
416 * (overriding the attribute type). Opclass (if exists) takes
417 * precedence.
418 */
419 keyType = amroutine->amkeytype;
420
421 if (i < indexInfo->ii_NumIndexKeyAttrs)
422 {
423 tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(classObjectId[i]));
424 if (!HeapTupleIsValid(tuple))
425 elog(ERROR, "cache lookup failed for opclass %u",
426 classObjectId[i]);
427 opclassTup = (Form_pg_opclass) GETSTRUCT(tuple);
428 if (OidIsValid(opclassTup->opckeytype))
429 keyType = opclassTup->opckeytype;
430
431 /*
432 * If keytype is specified as ANYELEMENT, and opcintype is
433 * ANYARRAY, then the attribute type must be an array (else it'd
434 * not have matched this opclass); use its element type.
435 *
436 * We could also allow ANYCOMPATIBLE/ANYCOMPATIBLEARRAY here, but
437 * there seems no need to do so; there's no reason to declare an
438 * opclass as taking ANYCOMPATIBLEARRAY rather than ANYARRAY.
439 */
440 if (keyType == ANYELEMENTOID && opclassTup->opcintype == ANYARRAYOID)
441 {
442 keyType = get_base_element_type(to->atttypid);
443 if (!OidIsValid(keyType))
444 elog(ERROR, "could not get element type of array type %u",
445 to->atttypid);
446 }
447
448 ReleaseSysCache(tuple);
449 }
450
451 /*
452 * If a key type different from the heap value is specified, update
453 * the type-related fields in the index tupdesc.
454 */
455 if (OidIsValid(keyType) && keyType != to->atttypid)
456 {
457 tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
458 if (!HeapTupleIsValid(tuple))
459 elog(ERROR, "cache lookup failed for type %u", keyType);
460 typeTup = (Form_pg_type) GETSTRUCT(tuple);
461
462 to->atttypid = keyType;
463 to->atttypmod = -1;
464 to->attlen = typeTup->typlen;
465 to->attbyval = typeTup->typbyval;
466 to->attalign = typeTup->typalign;
467 to->attstorage = typeTup->typstorage;
468 /* As above, use the default compression method in this case */
469 to->attcompression = InvalidCompressionMethod;
470
471 ReleaseSysCache(tuple);
472 }
473 }
474
475 pfree(amroutine);
476
477 return indexTupDesc;
478 }
479
480 /* ----------------------------------------------------------------
481 * InitializeAttributeOids
482 * ----------------------------------------------------------------
483 */
484 static void
InitializeAttributeOids(Relation indexRelation,int numatts,Oid indexoid)485 InitializeAttributeOids(Relation indexRelation,
486 int numatts,
487 Oid indexoid)
488 {
489 TupleDesc tupleDescriptor;
490 int i;
491
492 tupleDescriptor = RelationGetDescr(indexRelation);
493
494 for (i = 0; i < numatts; i += 1)
495 TupleDescAttr(tupleDescriptor, i)->attrelid = indexoid;
496 }
497
498 /* ----------------------------------------------------------------
499 * AppendAttributeTuples
500 * ----------------------------------------------------------------
501 */
502 static void
AppendAttributeTuples(Relation indexRelation,Datum * attopts)503 AppendAttributeTuples(Relation indexRelation, Datum *attopts)
504 {
505 Relation pg_attribute;
506 CatalogIndexState indstate;
507 TupleDesc indexTupDesc;
508
509 /*
510 * open the attribute relation and its indexes
511 */
512 pg_attribute = table_open(AttributeRelationId, RowExclusiveLock);
513
514 indstate = CatalogOpenIndexes(pg_attribute);
515
516 /*
517 * insert data from new index's tupdesc into pg_attribute
518 */
519 indexTupDesc = RelationGetDescr(indexRelation);
520
521 InsertPgAttributeTuples(pg_attribute, indexTupDesc, InvalidOid, attopts, indstate);
522
523 CatalogCloseIndexes(indstate);
524
525 table_close(pg_attribute, RowExclusiveLock);
526 }
527
528 /* ----------------------------------------------------------------
529 * UpdateIndexRelation
530 *
531 * Construct and insert a new entry in the pg_index catalog
532 * ----------------------------------------------------------------
533 */
534 static void
UpdateIndexRelation(Oid indexoid,Oid heapoid,Oid parentIndexId,IndexInfo * indexInfo,Oid * collationOids,Oid * classOids,int16 * coloptions,bool primary,bool isexclusion,bool immediate,bool isvalid,bool isready)535 UpdateIndexRelation(Oid indexoid,
536 Oid heapoid,
537 Oid parentIndexId,
538 IndexInfo *indexInfo,
539 Oid *collationOids,
540 Oid *classOids,
541 int16 *coloptions,
542 bool primary,
543 bool isexclusion,
544 bool immediate,
545 bool isvalid,
546 bool isready)
547 {
548 int2vector *indkey;
549 oidvector *indcollation;
550 oidvector *indclass;
551 int2vector *indoption;
552 Datum exprsDatum;
553 Datum predDatum;
554 Datum values[Natts_pg_index];
555 bool nulls[Natts_pg_index];
556 Relation pg_index;
557 HeapTuple tuple;
558 int i;
559
560 /*
561 * Copy the index key, opclass, and indoption info into arrays (should we
562 * make the caller pass them like this to start with?)
563 */
564 indkey = buildint2vector(NULL, indexInfo->ii_NumIndexAttrs);
565 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
566 indkey->values[i] = indexInfo->ii_IndexAttrNumbers[i];
567 indcollation = buildoidvector(collationOids, indexInfo->ii_NumIndexKeyAttrs);
568 indclass = buildoidvector(classOids, indexInfo->ii_NumIndexKeyAttrs);
569 indoption = buildint2vector(coloptions, indexInfo->ii_NumIndexKeyAttrs);
570
571 /*
572 * Convert the index expressions (if any) to a text datum
573 */
574 if (indexInfo->ii_Expressions != NIL)
575 {
576 char *exprsString;
577
578 exprsString = nodeToString(indexInfo->ii_Expressions);
579 exprsDatum = CStringGetTextDatum(exprsString);
580 pfree(exprsString);
581 }
582 else
583 exprsDatum = (Datum) 0;
584
585 /*
586 * Convert the index predicate (if any) to a text datum. Note we convert
587 * implicit-AND format to normal explicit-AND for storage.
588 */
589 if (indexInfo->ii_Predicate != NIL)
590 {
591 char *predString;
592
593 predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate));
594 predDatum = CStringGetTextDatum(predString);
595 pfree(predString);
596 }
597 else
598 predDatum = (Datum) 0;
599
600
601 /*
602 * open the system catalog index relation
603 */
604 pg_index = table_open(IndexRelationId, RowExclusiveLock);
605
606 /*
607 * Build a pg_index tuple
608 */
609 MemSet(nulls, false, sizeof(nulls));
610
611 values[Anum_pg_index_indexrelid - 1] = ObjectIdGetDatum(indexoid);
612 values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid);
613 values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs);
614 values[Anum_pg_index_indnkeyatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexKeyAttrs);
615 values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique);
616 values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
617 values[Anum_pg_index_indisexclusion - 1] = BoolGetDatum(isexclusion);
618 values[Anum_pg_index_indimmediate - 1] = BoolGetDatum(immediate);
619 values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
620 values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
621 values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false);
622 values[Anum_pg_index_indisready - 1] = BoolGetDatum(isready);
623 values[Anum_pg_index_indislive - 1] = BoolGetDatum(true);
624 values[Anum_pg_index_indisreplident - 1] = BoolGetDatum(false);
625 values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey);
626 values[Anum_pg_index_indcollation - 1] = PointerGetDatum(indcollation);
627 values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass);
628 values[Anum_pg_index_indoption - 1] = PointerGetDatum(indoption);
629 values[Anum_pg_index_indexprs - 1] = exprsDatum;
630 if (exprsDatum == (Datum) 0)
631 nulls[Anum_pg_index_indexprs - 1] = true;
632 values[Anum_pg_index_indpred - 1] = predDatum;
633 if (predDatum == (Datum) 0)
634 nulls[Anum_pg_index_indpred - 1] = true;
635
636 tuple = heap_form_tuple(RelationGetDescr(pg_index), values, nulls);
637
638 /*
639 * insert the tuple into the pg_index catalog
640 */
641 CatalogTupleInsert(pg_index, tuple);
642
643 /*
644 * close the relation and free the tuple
645 */
646 table_close(pg_index, RowExclusiveLock);
647 heap_freetuple(tuple);
648 }
649
650
651 /*
652 * index_create
653 *
654 * heapRelation: table to build index on (suitably locked by caller)
655 * indexRelationName: what it say
656 * indexRelationId: normally, pass InvalidOid to let this routine
657 * generate an OID for the index. During bootstrap this may be
658 * nonzero to specify a preselected OID.
659 * parentIndexRelid: if creating an index partition, the OID of the
660 * parent index; otherwise InvalidOid.
661 * parentConstraintId: if creating a constraint on a partition, the OID
662 * of the constraint in the parent; otherwise InvalidOid.
663 * relFileNode: normally, pass InvalidOid to get new storage. May be
664 * nonzero to attach an existing valid build.
665 * indexInfo: same info executor uses to insert into the index
666 * indexColNames: column names to use for index (List of char *)
667 * accessMethodObjectId: OID of index AM to use
668 * tableSpaceId: OID of tablespace to use
669 * collationObjectId: array of collation OIDs, one per index column
670 * classObjectId: array of index opclass OIDs, one per index column
671 * coloptions: array of per-index-column indoption settings
672 * reloptions: AM-specific options
673 * flags: bitmask that can include any combination of these bits:
674 * INDEX_CREATE_IS_PRIMARY
675 * the index is a primary key
676 * INDEX_CREATE_ADD_CONSTRAINT:
677 * invoke index_constraint_create also
678 * INDEX_CREATE_SKIP_BUILD:
679 * skip the index_build() step for the moment; caller must do it
680 * later (typically via reindex_index())
681 * INDEX_CREATE_CONCURRENT:
682 * do not lock the table against writers. The index will be
683 * marked "invalid" and the caller must take additional steps
684 * to fix it up.
685 * INDEX_CREATE_IF_NOT_EXISTS:
686 * do not throw an error if a relation with the same name
687 * already exists.
688 * INDEX_CREATE_PARTITIONED:
689 * create a partitioned index (table must be partitioned)
690 * constr_flags: flags passed to index_constraint_create
691 * (only if INDEX_CREATE_ADD_CONSTRAINT is set)
692 * allow_system_table_mods: allow table to be a system catalog
693 * is_internal: if true, post creation hook for new index
694 * constraintId: if not NULL, receives OID of created constraint
695 *
696 * Returns the OID of the created index.
697 */
698 Oid
index_create(Relation heapRelation,const char * indexRelationName,Oid indexRelationId,Oid parentIndexRelid,Oid parentConstraintId,Oid relFileNode,IndexInfo * indexInfo,List * indexColNames,Oid accessMethodObjectId,Oid tableSpaceId,Oid * collationObjectId,Oid * classObjectId,int16 * coloptions,Datum reloptions,bits16 flags,bits16 constr_flags,bool allow_system_table_mods,bool is_internal,Oid * constraintId)699 index_create(Relation heapRelation,
700 const char *indexRelationName,
701 Oid indexRelationId,
702 Oid parentIndexRelid,
703 Oid parentConstraintId,
704 Oid relFileNode,
705 IndexInfo *indexInfo,
706 List *indexColNames,
707 Oid accessMethodObjectId,
708 Oid tableSpaceId,
709 Oid *collationObjectId,
710 Oid *classObjectId,
711 int16 *coloptions,
712 Datum reloptions,
713 bits16 flags,
714 bits16 constr_flags,
715 bool allow_system_table_mods,
716 bool is_internal,
717 Oid *constraintId)
718 {
719 Oid heapRelationId = RelationGetRelid(heapRelation);
720 Relation pg_class;
721 Relation indexRelation;
722 TupleDesc indexTupDesc;
723 bool shared_relation;
724 bool mapped_relation;
725 bool is_exclusion;
726 Oid namespaceId;
727 int i;
728 char relpersistence;
729 bool isprimary = (flags & INDEX_CREATE_IS_PRIMARY) != 0;
730 bool invalid = (flags & INDEX_CREATE_INVALID) != 0;
731 bool concurrent = (flags & INDEX_CREATE_CONCURRENT) != 0;
732 bool partitioned = (flags & INDEX_CREATE_PARTITIONED) != 0;
733 char relkind;
734 TransactionId relfrozenxid;
735 MultiXactId relminmxid;
736
737 /* constraint flags can only be set when a constraint is requested */
738 Assert((constr_flags == 0) ||
739 ((flags & INDEX_CREATE_ADD_CONSTRAINT) != 0));
740 /* partitioned indexes must never be "built" by themselves */
741 Assert(!partitioned || (flags & INDEX_CREATE_SKIP_BUILD));
742
743 relkind = partitioned ? RELKIND_PARTITIONED_INDEX : RELKIND_INDEX;
744 is_exclusion = (indexInfo->ii_ExclusionOps != NULL);
745
746 pg_class = table_open(RelationRelationId, RowExclusiveLock);
747
748 /*
749 * The index will be in the same namespace as its parent table, and is
750 * shared across databases if and only if the parent is. Likewise, it
751 * will use the relfilenode map if and only if the parent does; and it
752 * inherits the parent's relpersistence.
753 */
754 namespaceId = RelationGetNamespace(heapRelation);
755 shared_relation = heapRelation->rd_rel->relisshared;
756 mapped_relation = RelationIsMapped(heapRelation);
757 relpersistence = heapRelation->rd_rel->relpersistence;
758
759 /*
760 * check parameters
761 */
762 if (indexInfo->ii_NumIndexAttrs < 1)
763 elog(ERROR, "must index at least one column");
764
765 if (!allow_system_table_mods &&
766 IsSystemRelation(heapRelation) &&
767 IsNormalProcessingMode())
768 ereport(ERROR,
769 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
770 errmsg("user-defined indexes on system catalog tables are not supported")));
771
772 /*
773 * Btree text_pattern_ops uses text_eq as the equality operator, which is
774 * fine as long as the collation is deterministic; text_eq then reduces to
775 * bitwise equality and so it is semantically compatible with the other
776 * operators and functions in that opclass. But with a nondeterministic
777 * collation, text_eq could yield results that are incompatible with the
778 * actual behavior of the index (which is determined by the opclass's
779 * comparison function). We prevent such problems by refusing creation of
780 * an index with that opclass and a nondeterministic collation.
781 *
782 * The same applies to varchar_pattern_ops and bpchar_pattern_ops. If we
783 * find more cases, we might decide to create a real mechanism for marking
784 * opclasses as incompatible with nondeterminism; but for now, this small
785 * hack suffices.
786 *
787 * Another solution is to use a special operator, not text_eq, as the
788 * equality opclass member; but that is undesirable because it would
789 * prevent index usage in many queries that work fine today.
790 */
791 for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
792 {
793 Oid collation = collationObjectId[i];
794 Oid opclass = classObjectId[i];
795
796 if (collation)
797 {
798 if ((opclass == TEXT_BTREE_PATTERN_OPS_OID ||
799 opclass == VARCHAR_BTREE_PATTERN_OPS_OID ||
800 opclass == BPCHAR_BTREE_PATTERN_OPS_OID) &&
801 !get_collation_isdeterministic(collation))
802 {
803 HeapTuple classtup;
804
805 classtup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclass));
806 if (!HeapTupleIsValid(classtup))
807 elog(ERROR, "cache lookup failed for operator class %u", opclass);
808 ereport(ERROR,
809 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
810 errmsg("nondeterministic collations are not supported for operator class \"%s\"",
811 NameStr(((Form_pg_opclass) GETSTRUCT(classtup))->opcname))));
812 ReleaseSysCache(classtup);
813 }
814 }
815 }
816
817 /*
818 * Concurrent index build on a system catalog is unsafe because we tend to
819 * release locks before committing in catalogs.
820 */
821 if (concurrent &&
822 IsCatalogRelation(heapRelation))
823 ereport(ERROR,
824 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
825 errmsg("concurrent index creation on system catalog tables is not supported")));
826
827 /*
828 * This case is currently not supported. There's no way to ask for it in
829 * the grammar with CREATE INDEX, but it can happen with REINDEX.
830 */
831 if (concurrent && is_exclusion)
832 ereport(ERROR,
833 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
834 errmsg("concurrent index creation for exclusion constraints is not supported")));
835
836 /*
837 * We cannot allow indexing a shared relation after initdb (because
838 * there's no way to make the entry in other databases' pg_class).
839 */
840 if (shared_relation && !IsBootstrapProcessingMode())
841 ereport(ERROR,
842 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
843 errmsg("shared indexes cannot be created after initdb")));
844
845 /*
846 * Shared relations must be in pg_global, too (last-ditch check)
847 */
848 if (shared_relation && tableSpaceId != GLOBALTABLESPACE_OID)
849 elog(ERROR, "shared relations must be placed in pg_global tablespace");
850
851 /*
852 * Check for duplicate name (both as to the index, and as to the
853 * associated constraint if any). Such cases would fail on the relevant
854 * catalogs' unique indexes anyway, but we prefer to give a friendlier
855 * error message.
856 */
857 if (get_relname_relid(indexRelationName, namespaceId))
858 {
859 if ((flags & INDEX_CREATE_IF_NOT_EXISTS) != 0)
860 {
861 ereport(NOTICE,
862 (errcode(ERRCODE_DUPLICATE_TABLE),
863 errmsg("relation \"%s\" already exists, skipping",
864 indexRelationName)));
865 table_close(pg_class, RowExclusiveLock);
866 return InvalidOid;
867 }
868
869 ereport(ERROR,
870 (errcode(ERRCODE_DUPLICATE_TABLE),
871 errmsg("relation \"%s\" already exists",
872 indexRelationName)));
873 }
874
875 if ((flags & INDEX_CREATE_ADD_CONSTRAINT) != 0 &&
876 ConstraintNameIsUsed(CONSTRAINT_RELATION, heapRelationId,
877 indexRelationName))
878 {
879 /*
880 * INDEX_CREATE_IF_NOT_EXISTS does not apply here, since the
881 * conflicting constraint is not an index.
882 */
883 ereport(ERROR,
884 (errcode(ERRCODE_DUPLICATE_OBJECT),
885 errmsg("constraint \"%s\" for relation \"%s\" already exists",
886 indexRelationName, RelationGetRelationName(heapRelation))));
887 }
888
889 /*
890 * construct tuple descriptor for index tuples
891 */
892 indexTupDesc = ConstructTupleDescriptor(heapRelation,
893 indexInfo,
894 indexColNames,
895 accessMethodObjectId,
896 collationObjectId,
897 classObjectId);
898
899 /*
900 * Allocate an OID for the index, unless we were told what to use.
901 *
902 * The OID will be the relfilenode as well, so make sure it doesn't
903 * collide with either pg_class OIDs or existing physical files.
904 */
905 if (!OidIsValid(indexRelationId))
906 {
907 /* Use binary-upgrade override for pg_class.oid/relfilenode? */
908 if (IsBinaryUpgrade)
909 {
910 if (!OidIsValid(binary_upgrade_next_index_pg_class_oid))
911 ereport(ERROR,
912 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
913 errmsg("pg_class index OID value not set when in binary upgrade mode")));
914
915 indexRelationId = binary_upgrade_next_index_pg_class_oid;
916 binary_upgrade_next_index_pg_class_oid = InvalidOid;
917 }
918 else
919 {
920 indexRelationId =
921 GetNewRelFileNode(tableSpaceId, pg_class, relpersistence);
922 }
923 }
924
925 /*
926 * create the index relation's relcache entry and, if necessary, the
927 * physical disk file. (If we fail further down, it's the smgr's
928 * responsibility to remove the disk file again, if any.)
929 */
930 indexRelation = heap_create(indexRelationName,
931 namespaceId,
932 tableSpaceId,
933 indexRelationId,
934 relFileNode,
935 accessMethodObjectId,
936 indexTupDesc,
937 relkind,
938 relpersistence,
939 shared_relation,
940 mapped_relation,
941 allow_system_table_mods,
942 &relfrozenxid,
943 &relminmxid);
944
945 Assert(relfrozenxid == InvalidTransactionId);
946 Assert(relminmxid == InvalidMultiXactId);
947 Assert(indexRelationId == RelationGetRelid(indexRelation));
948
949 /*
950 * Obtain exclusive lock on it. Although no other transactions can see it
951 * until we commit, this prevents deadlock-risk complaints from lock
952 * manager in cases such as CLUSTER.
953 */
954 LockRelation(indexRelation, AccessExclusiveLock);
955
956 /*
957 * Fill in fields of the index's pg_class entry that are not set correctly
958 * by heap_create.
959 *
960 * XXX should have a cleaner way to create cataloged indexes
961 */
962 indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner;
963 indexRelation->rd_rel->relam = accessMethodObjectId;
964 indexRelation->rd_rel->relispartition = OidIsValid(parentIndexRelid);
965
966 /*
967 * store index's pg_class entry
968 */
969 InsertPgClassTuple(pg_class, indexRelation,
970 RelationGetRelid(indexRelation),
971 (Datum) 0,
972 reloptions);
973
974 /* done with pg_class */
975 table_close(pg_class, RowExclusiveLock);
976
977 /*
978 * now update the object id's of all the attribute tuple forms in the
979 * index relation's tuple descriptor
980 */
981 InitializeAttributeOids(indexRelation,
982 indexInfo->ii_NumIndexAttrs,
983 indexRelationId);
984
985 /*
986 * append ATTRIBUTE tuples for the index
987 */
988 AppendAttributeTuples(indexRelation, indexInfo->ii_OpclassOptions);
989
990 /* ----------------
991 * update pg_index
992 * (append INDEX tuple)
993 *
994 * Note that this stows away a representation of "predicate".
995 * (Or, could define a rule to maintain the predicate) --Nels, Feb '92
996 * ----------------
997 */
998 UpdateIndexRelation(indexRelationId, heapRelationId, parentIndexRelid,
999 indexInfo,
1000 collationObjectId, classObjectId, coloptions,
1001 isprimary, is_exclusion,
1002 (constr_flags & INDEX_CONSTR_CREATE_DEFERRABLE) == 0,
1003 !concurrent && !invalid,
1004 !concurrent);
1005
1006 /*
1007 * Register relcache invalidation on the indexes' heap relation, to
1008 * maintain consistency of its index list
1009 */
1010 CacheInvalidateRelcache(heapRelation);
1011
1012 /* update pg_inherits and the parent's relhassubclass, if needed */
1013 if (OidIsValid(parentIndexRelid))
1014 {
1015 StoreSingleInheritance(indexRelationId, parentIndexRelid, 1);
1016 SetRelationHasSubclass(parentIndexRelid, true);
1017 }
1018
1019 /*
1020 * Register constraint and dependencies for the index.
1021 *
1022 * If the index is from a CONSTRAINT clause, construct a pg_constraint
1023 * entry. The index will be linked to the constraint, which in turn is
1024 * linked to the table. If it's not a CONSTRAINT, we need to make a
1025 * dependency directly on the table.
1026 *
1027 * We don't need a dependency on the namespace, because there'll be an
1028 * indirect dependency via our parent table.
1029 *
1030 * During bootstrap we can't register any dependencies, and we don't try
1031 * to make a constraint either.
1032 */
1033 if (!IsBootstrapProcessingMode())
1034 {
1035 ObjectAddress myself,
1036 referenced;
1037 ObjectAddresses *addrs;
1038
1039 ObjectAddressSet(myself, RelationRelationId, indexRelationId);
1040
1041 if ((flags & INDEX_CREATE_ADD_CONSTRAINT) != 0)
1042 {
1043 char constraintType;
1044 ObjectAddress localaddr;
1045
1046 if (isprimary)
1047 constraintType = CONSTRAINT_PRIMARY;
1048 else if (indexInfo->ii_Unique)
1049 constraintType = CONSTRAINT_UNIQUE;
1050 else if (is_exclusion)
1051 constraintType = CONSTRAINT_EXCLUSION;
1052 else
1053 {
1054 elog(ERROR, "constraint must be PRIMARY, UNIQUE or EXCLUDE");
1055 constraintType = 0; /* keep compiler quiet */
1056 }
1057
1058 localaddr = index_constraint_create(heapRelation,
1059 indexRelationId,
1060 parentConstraintId,
1061 indexInfo,
1062 indexRelationName,
1063 constraintType,
1064 constr_flags,
1065 allow_system_table_mods,
1066 is_internal);
1067 if (constraintId)
1068 *constraintId = localaddr.objectId;
1069 }
1070 else
1071 {
1072 bool have_simple_col = false;
1073
1074 addrs = new_object_addresses();
1075
1076 /* Create auto dependencies on simply-referenced columns */
1077 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1078 {
1079 if (indexInfo->ii_IndexAttrNumbers[i] != 0)
1080 {
1081 ObjectAddressSubSet(referenced, RelationRelationId,
1082 heapRelationId,
1083 indexInfo->ii_IndexAttrNumbers[i]);
1084 add_exact_object_address(&referenced, addrs);
1085 have_simple_col = true;
1086 }
1087 }
1088
1089 /*
1090 * If there are no simply-referenced columns, give the index an
1091 * auto dependency on the whole table. In most cases, this will
1092 * be redundant, but it might not be if the index expressions and
1093 * predicate contain no Vars or only whole-row Vars.
1094 */
1095 if (!have_simple_col)
1096 {
1097 ObjectAddressSet(referenced, RelationRelationId,
1098 heapRelationId);
1099 add_exact_object_address(&referenced, addrs);
1100 }
1101
1102 record_object_address_dependencies(&myself, addrs, DEPENDENCY_AUTO);
1103 free_object_addresses(addrs);
1104 }
1105
1106 /*
1107 * If this is an index partition, create partition dependencies on
1108 * both the parent index and the table. (Note: these must be *in
1109 * addition to*, not instead of, all other dependencies. Otherwise
1110 * we'll be short some dependencies after DETACH PARTITION.)
1111 */
1112 if (OidIsValid(parentIndexRelid))
1113 {
1114 ObjectAddressSet(referenced, RelationRelationId, parentIndexRelid);
1115 recordDependencyOn(&myself, &referenced, DEPENDENCY_PARTITION_PRI);
1116
1117 ObjectAddressSet(referenced, RelationRelationId, heapRelationId);
1118 recordDependencyOn(&myself, &referenced, DEPENDENCY_PARTITION_SEC);
1119 }
1120
1121 /* placeholder for normal dependencies */
1122 addrs = new_object_addresses();
1123
1124 /* Store dependency on collations */
1125
1126 /* The default collation is pinned, so don't bother recording it */
1127 for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
1128 {
1129 if (OidIsValid(collationObjectId[i]) &&
1130 collationObjectId[i] != DEFAULT_COLLATION_OID)
1131 {
1132 ObjectAddressSet(referenced, CollationRelationId,
1133 collationObjectId[i]);
1134 add_exact_object_address(&referenced, addrs);
1135 }
1136 }
1137
1138 /* Store dependency on operator classes */
1139 for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
1140 {
1141 ObjectAddressSet(referenced, OperatorClassRelationId, classObjectId[i]);
1142 add_exact_object_address(&referenced, addrs);
1143 }
1144
1145 record_object_address_dependencies(&myself, addrs, DEPENDENCY_NORMAL);
1146 free_object_addresses(addrs);
1147
1148 /* Store dependencies on anything mentioned in index expressions */
1149 if (indexInfo->ii_Expressions)
1150 {
1151 recordDependencyOnSingleRelExpr(&myself,
1152 (Node *) indexInfo->ii_Expressions,
1153 heapRelationId,
1154 DEPENDENCY_NORMAL,
1155 DEPENDENCY_AUTO, false);
1156 }
1157
1158 /* Store dependencies on anything mentioned in predicate */
1159 if (indexInfo->ii_Predicate)
1160 {
1161 recordDependencyOnSingleRelExpr(&myself,
1162 (Node *) indexInfo->ii_Predicate,
1163 heapRelationId,
1164 DEPENDENCY_NORMAL,
1165 DEPENDENCY_AUTO, false);
1166 }
1167 }
1168 else
1169 {
1170 /* Bootstrap mode - assert we weren't asked for constraint support */
1171 Assert((flags & INDEX_CREATE_ADD_CONSTRAINT) == 0);
1172 }
1173
1174 /* Post creation hook for new index */
1175 InvokeObjectPostCreateHookArg(RelationRelationId,
1176 indexRelationId, 0, is_internal);
1177
1178 /*
1179 * Advance the command counter so that we can see the newly-entered
1180 * catalog tuples for the index.
1181 */
1182 CommandCounterIncrement();
1183
1184 /*
1185 * In bootstrap mode, we have to fill in the index strategy structure with
1186 * information from the catalogs. If we aren't bootstrapping, then the
1187 * relcache entry has already been rebuilt thanks to sinval update during
1188 * CommandCounterIncrement.
1189 */
1190 if (IsBootstrapProcessingMode())
1191 RelationInitIndexAccessInfo(indexRelation);
1192 else
1193 Assert(indexRelation->rd_indexcxt != NULL);
1194
1195 indexRelation->rd_index->indnkeyatts = indexInfo->ii_NumIndexKeyAttrs;
1196
1197 /* Validate opclass-specific options */
1198 if (indexInfo->ii_OpclassOptions)
1199 for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
1200 (void) index_opclass_options(indexRelation, i + 1,
1201 indexInfo->ii_OpclassOptions[i],
1202 true);
1203
1204 /*
1205 * If this is bootstrap (initdb) time, then we don't actually fill in the
1206 * index yet. We'll be creating more indexes and classes later, so we
1207 * delay filling them in until just before we're done with bootstrapping.
1208 * Similarly, if the caller specified to skip the build then filling the
1209 * index is delayed till later (ALTER TABLE can save work in some cases
1210 * with this). Otherwise, we call the AM routine that constructs the
1211 * index.
1212 */
1213 if (IsBootstrapProcessingMode())
1214 {
1215 index_register(heapRelationId, indexRelationId, indexInfo);
1216 }
1217 else if ((flags & INDEX_CREATE_SKIP_BUILD) != 0)
1218 {
1219 /*
1220 * Caller is responsible for filling the index later on. However,
1221 * we'd better make sure that the heap relation is correctly marked as
1222 * having an index.
1223 */
1224 index_update_stats(heapRelation,
1225 true,
1226 -1.0);
1227 /* Make the above update visible */
1228 CommandCounterIncrement();
1229 }
1230 else
1231 {
1232 index_build(heapRelation, indexRelation, indexInfo, false, true);
1233 }
1234
1235 /*
1236 * Close the index; but we keep the lock that we acquired above until end
1237 * of transaction. Closing the heap is caller's responsibility.
1238 */
1239 index_close(indexRelation, NoLock);
1240
1241 return indexRelationId;
1242 }
1243
1244 /*
1245 * index_concurrently_create_copy
1246 *
1247 * Create concurrently an index based on the definition of the one provided by
1248 * caller. The index is inserted into catalogs and needs to be built later
1249 * on. This is called during concurrent reindex processing.
1250 *
1251 * "tablespaceOid" is the tablespace to use for this index.
1252 */
1253 Oid
index_concurrently_create_copy(Relation heapRelation,Oid oldIndexId,Oid tablespaceOid,const char * newName)1254 index_concurrently_create_copy(Relation heapRelation, Oid oldIndexId,
1255 Oid tablespaceOid, const char *newName)
1256 {
1257 Relation indexRelation;
1258 IndexInfo *oldInfo,
1259 *newInfo;
1260 Oid newIndexId = InvalidOid;
1261 HeapTuple indexTuple,
1262 classTuple;
1263 Datum indclassDatum,
1264 colOptionDatum,
1265 optionDatum;
1266 oidvector *indclass;
1267 int2vector *indcoloptions;
1268 bool isnull;
1269 List *indexColNames = NIL;
1270 List *indexExprs = NIL;
1271 List *indexPreds = NIL;
1272
1273 indexRelation = index_open(oldIndexId, RowExclusiveLock);
1274
1275 /* The new index needs some information from the old index */
1276 oldInfo = BuildIndexInfo(indexRelation);
1277
1278 /*
1279 * Concurrent build of an index with exclusion constraints is not
1280 * supported.
1281 */
1282 if (oldInfo->ii_ExclusionOps != NULL)
1283 ereport(ERROR,
1284 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1285 errmsg("concurrent index creation for exclusion constraints is not supported")));
1286
1287 /* Get the array of class and column options IDs from index info */
1288 indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(oldIndexId));
1289 if (!HeapTupleIsValid(indexTuple))
1290 elog(ERROR, "cache lookup failed for index %u", oldIndexId);
1291 indclassDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
1292 Anum_pg_index_indclass, &isnull);
1293 Assert(!isnull);
1294 indclass = (oidvector *) DatumGetPointer(indclassDatum);
1295
1296 colOptionDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
1297 Anum_pg_index_indoption, &isnull);
1298 Assert(!isnull);
1299 indcoloptions = (int2vector *) DatumGetPointer(colOptionDatum);
1300
1301 /* Fetch options of index if any */
1302 classTuple = SearchSysCache1(RELOID, oldIndexId);
1303 if (!HeapTupleIsValid(classTuple))
1304 elog(ERROR, "cache lookup failed for relation %u", oldIndexId);
1305 optionDatum = SysCacheGetAttr(RELOID, classTuple,
1306 Anum_pg_class_reloptions, &isnull);
1307
1308 /*
1309 * Fetch the list of expressions and predicates directly from the
1310 * catalogs. This cannot rely on the information from IndexInfo of the
1311 * old index as these have been flattened for the planner.
1312 */
1313 if (oldInfo->ii_Expressions != NIL)
1314 {
1315 Datum exprDatum;
1316 char *exprString;
1317
1318 exprDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
1319 Anum_pg_index_indexprs, &isnull);
1320 Assert(!isnull);
1321 exprString = TextDatumGetCString(exprDatum);
1322 indexExprs = (List *) stringToNode(exprString);
1323 pfree(exprString);
1324 }
1325 if (oldInfo->ii_Predicate != NIL)
1326 {
1327 Datum predDatum;
1328 char *predString;
1329
1330 predDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
1331 Anum_pg_index_indpred, &isnull);
1332 Assert(!isnull);
1333 predString = TextDatumGetCString(predDatum);
1334 indexPreds = (List *) stringToNode(predString);
1335
1336 /* Also convert to implicit-AND format */
1337 indexPreds = make_ands_implicit((Expr *) indexPreds);
1338 pfree(predString);
1339 }
1340
1341 /*
1342 * Build the index information for the new index. Note that rebuild of
1343 * indexes with exclusion constraints is not supported, hence there is no
1344 * need to fill all the ii_Exclusion* fields.
1345 */
1346 newInfo = makeIndexInfo(oldInfo->ii_NumIndexAttrs,
1347 oldInfo->ii_NumIndexKeyAttrs,
1348 oldInfo->ii_Am,
1349 indexExprs,
1350 indexPreds,
1351 oldInfo->ii_Unique,
1352 false, /* not ready for inserts */
1353 true);
1354
1355 /*
1356 * Extract the list of column names and the column numbers for the new
1357 * index information. All this information will be used for the index
1358 * creation.
1359 */
1360 for (int i = 0; i < oldInfo->ii_NumIndexAttrs; i++)
1361 {
1362 TupleDesc indexTupDesc = RelationGetDescr(indexRelation);
1363 Form_pg_attribute att = TupleDescAttr(indexTupDesc, i);
1364
1365 indexColNames = lappend(indexColNames, NameStr(att->attname));
1366 newInfo->ii_IndexAttrNumbers[i] = oldInfo->ii_IndexAttrNumbers[i];
1367 }
1368
1369 /* Extract opclass parameters for each attribute, if any */
1370 if (oldInfo->ii_OpclassOptions != NULL)
1371 {
1372 newInfo->ii_OpclassOptions = palloc0(sizeof(Datum) *
1373 newInfo->ii_NumIndexAttrs);
1374 for (int i = 0; i < newInfo->ii_NumIndexAttrs; i++)
1375 newInfo->ii_OpclassOptions[i] = get_attoptions(oldIndexId, i + 1);
1376 }
1377
1378 /*
1379 * Now create the new index.
1380 *
1381 * For a partition index, we adjust the partition dependency later, to
1382 * ensure a consistent state at all times. That is why parentIndexRelid
1383 * is not set here.
1384 */
1385 newIndexId = index_create(heapRelation,
1386 newName,
1387 InvalidOid, /* indexRelationId */
1388 InvalidOid, /* parentIndexRelid */
1389 InvalidOid, /* parentConstraintId */
1390 InvalidOid, /* relFileNode */
1391 newInfo,
1392 indexColNames,
1393 indexRelation->rd_rel->relam,
1394 tablespaceOid,
1395 indexRelation->rd_indcollation,
1396 indclass->values,
1397 indcoloptions->values,
1398 optionDatum,
1399 INDEX_CREATE_SKIP_BUILD | INDEX_CREATE_CONCURRENT,
1400 0,
1401 true, /* allow table to be a system catalog? */
1402 false, /* is_internal? */
1403 NULL);
1404
1405 /* Close the relations used and clean up */
1406 index_close(indexRelation, NoLock);
1407 ReleaseSysCache(indexTuple);
1408 ReleaseSysCache(classTuple);
1409
1410 return newIndexId;
1411 }
1412
1413 /*
1414 * index_concurrently_build
1415 *
1416 * Build index for a concurrent operation. Low-level locks are taken when
1417 * this operation is performed to prevent only schema changes, but they need
1418 * to be kept until the end of the transaction performing this operation.
1419 * 'indexOid' refers to an index relation OID already created as part of
1420 * previous processing, and 'heapOid' refers to its parent heap relation.
1421 */
1422 void
index_concurrently_build(Oid heapRelationId,Oid indexRelationId)1423 index_concurrently_build(Oid heapRelationId,
1424 Oid indexRelationId)
1425 {
1426 Relation heapRel;
1427 Relation indexRelation;
1428 IndexInfo *indexInfo;
1429
1430 /* This had better make sure that a snapshot is active */
1431 Assert(ActiveSnapshotSet());
1432
1433 /* Open and lock the parent heap relation */
1434 heapRel = table_open(heapRelationId, ShareUpdateExclusiveLock);
1435
1436 /* And the target index relation */
1437 indexRelation = index_open(indexRelationId, RowExclusiveLock);
1438
1439 /*
1440 * We have to re-build the IndexInfo struct, since it was lost in the
1441 * commit of the transaction where this concurrent index was created at
1442 * the catalog level.
1443 */
1444 indexInfo = BuildIndexInfo(indexRelation);
1445 Assert(!indexInfo->ii_ReadyForInserts);
1446 indexInfo->ii_Concurrent = true;
1447 indexInfo->ii_BrokenHotChain = false;
1448
1449 /* Now build the index */
1450 index_build(heapRel, indexRelation, indexInfo, false, true);
1451
1452 /* Close both the relations, but keep the locks */
1453 table_close(heapRel, NoLock);
1454 index_close(indexRelation, NoLock);
1455
1456 /*
1457 * Update the pg_index row to mark the index as ready for inserts. Once we
1458 * commit this transaction, any new transactions that open the table must
1459 * insert new entries into the index for insertions and non-HOT updates.
1460 */
1461 index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
1462 }
1463
1464 /*
1465 * index_concurrently_swap
1466 *
1467 * Swap name, dependencies, and constraints of the old index over to the new
1468 * index, while marking the old index as invalid and the new as valid.
1469 */
1470 void
index_concurrently_swap(Oid newIndexId,Oid oldIndexId,const char * oldName)1471 index_concurrently_swap(Oid newIndexId, Oid oldIndexId, const char *oldName)
1472 {
1473 Relation pg_class,
1474 pg_index,
1475 pg_constraint,
1476 pg_trigger;
1477 Relation oldClassRel,
1478 newClassRel;
1479 HeapTuple oldClassTuple,
1480 newClassTuple;
1481 Form_pg_class oldClassForm,
1482 newClassForm;
1483 HeapTuple oldIndexTuple,
1484 newIndexTuple;
1485 Form_pg_index oldIndexForm,
1486 newIndexForm;
1487 bool isPartition;
1488 Oid indexConstraintOid;
1489 List *constraintOids = NIL;
1490 ListCell *lc;
1491
1492 /*
1493 * Take a necessary lock on the old and new index before swapping them.
1494 */
1495 oldClassRel = relation_open(oldIndexId, ShareUpdateExclusiveLock);
1496 newClassRel = relation_open(newIndexId, ShareUpdateExclusiveLock);
1497
1498 /* Now swap names and dependencies of those indexes */
1499 pg_class = table_open(RelationRelationId, RowExclusiveLock);
1500
1501 oldClassTuple = SearchSysCacheCopy1(RELOID,
1502 ObjectIdGetDatum(oldIndexId));
1503 if (!HeapTupleIsValid(oldClassTuple))
1504 elog(ERROR, "could not find tuple for relation %u", oldIndexId);
1505 newClassTuple = SearchSysCacheCopy1(RELOID,
1506 ObjectIdGetDatum(newIndexId));
1507 if (!HeapTupleIsValid(newClassTuple))
1508 elog(ERROR, "could not find tuple for relation %u", newIndexId);
1509
1510 oldClassForm = (Form_pg_class) GETSTRUCT(oldClassTuple);
1511 newClassForm = (Form_pg_class) GETSTRUCT(newClassTuple);
1512
1513 /* Swap the names */
1514 namestrcpy(&newClassForm->relname, NameStr(oldClassForm->relname));
1515 namestrcpy(&oldClassForm->relname, oldName);
1516
1517 /* Swap the partition flags to track inheritance properly */
1518 isPartition = newClassForm->relispartition;
1519 newClassForm->relispartition = oldClassForm->relispartition;
1520 oldClassForm->relispartition = isPartition;
1521
1522 CatalogTupleUpdate(pg_class, &oldClassTuple->t_self, oldClassTuple);
1523 CatalogTupleUpdate(pg_class, &newClassTuple->t_self, newClassTuple);
1524
1525 heap_freetuple(oldClassTuple);
1526 heap_freetuple(newClassTuple);
1527
1528 /* Now swap index info */
1529 pg_index = table_open(IndexRelationId, RowExclusiveLock);
1530
1531 oldIndexTuple = SearchSysCacheCopy1(INDEXRELID,
1532 ObjectIdGetDatum(oldIndexId));
1533 if (!HeapTupleIsValid(oldIndexTuple))
1534 elog(ERROR, "could not find tuple for relation %u", oldIndexId);
1535 newIndexTuple = SearchSysCacheCopy1(INDEXRELID,
1536 ObjectIdGetDatum(newIndexId));
1537 if (!HeapTupleIsValid(newIndexTuple))
1538 elog(ERROR, "could not find tuple for relation %u", newIndexId);
1539
1540 oldIndexForm = (Form_pg_index) GETSTRUCT(oldIndexTuple);
1541 newIndexForm = (Form_pg_index) GETSTRUCT(newIndexTuple);
1542
1543 /*
1544 * Copy constraint flags from the old index. This is safe because the old
1545 * index guaranteed uniqueness.
1546 */
1547 newIndexForm->indisprimary = oldIndexForm->indisprimary;
1548 oldIndexForm->indisprimary = false;
1549 newIndexForm->indisexclusion = oldIndexForm->indisexclusion;
1550 oldIndexForm->indisexclusion = false;
1551 newIndexForm->indimmediate = oldIndexForm->indimmediate;
1552 oldIndexForm->indimmediate = true;
1553
1554 /* Preserve indisreplident in the new index */
1555 newIndexForm->indisreplident = oldIndexForm->indisreplident;
1556
1557 /* Preserve indisclustered in the new index */
1558 newIndexForm->indisclustered = oldIndexForm->indisclustered;
1559
1560 /*
1561 * Mark the new index as valid, and the old index as invalid similarly to
1562 * what index_set_state_flags() does.
1563 */
1564 newIndexForm->indisvalid = true;
1565 oldIndexForm->indisvalid = false;
1566 oldIndexForm->indisclustered = false;
1567 oldIndexForm->indisreplident = false;
1568
1569 CatalogTupleUpdate(pg_index, &oldIndexTuple->t_self, oldIndexTuple);
1570 CatalogTupleUpdate(pg_index, &newIndexTuple->t_self, newIndexTuple);
1571
1572 heap_freetuple(oldIndexTuple);
1573 heap_freetuple(newIndexTuple);
1574
1575 /*
1576 * Move constraints and triggers over to the new index
1577 */
1578
1579 constraintOids = get_index_ref_constraints(oldIndexId);
1580
1581 indexConstraintOid = get_index_constraint(oldIndexId);
1582
1583 if (OidIsValid(indexConstraintOid))
1584 constraintOids = lappend_oid(constraintOids, indexConstraintOid);
1585
1586 pg_constraint = table_open(ConstraintRelationId, RowExclusiveLock);
1587 pg_trigger = table_open(TriggerRelationId, RowExclusiveLock);
1588
1589 foreach(lc, constraintOids)
1590 {
1591 HeapTuple constraintTuple,
1592 triggerTuple;
1593 Form_pg_constraint conForm;
1594 ScanKeyData key[1];
1595 SysScanDesc scan;
1596 Oid constraintOid = lfirst_oid(lc);
1597
1598 /* Move the constraint from the old to the new index */
1599 constraintTuple = SearchSysCacheCopy1(CONSTROID,
1600 ObjectIdGetDatum(constraintOid));
1601 if (!HeapTupleIsValid(constraintTuple))
1602 elog(ERROR, "could not find tuple for constraint %u", constraintOid);
1603
1604 conForm = ((Form_pg_constraint) GETSTRUCT(constraintTuple));
1605
1606 if (conForm->conindid == oldIndexId)
1607 {
1608 conForm->conindid = newIndexId;
1609
1610 CatalogTupleUpdate(pg_constraint, &constraintTuple->t_self, constraintTuple);
1611 }
1612
1613 heap_freetuple(constraintTuple);
1614
1615 /* Search for trigger records */
1616 ScanKeyInit(&key[0],
1617 Anum_pg_trigger_tgconstraint,
1618 BTEqualStrategyNumber, F_OIDEQ,
1619 ObjectIdGetDatum(constraintOid));
1620
1621 scan = systable_beginscan(pg_trigger, TriggerConstraintIndexId, true,
1622 NULL, 1, key);
1623
1624 while (HeapTupleIsValid((triggerTuple = systable_getnext(scan))))
1625 {
1626 Form_pg_trigger tgForm = (Form_pg_trigger) GETSTRUCT(triggerTuple);
1627
1628 if (tgForm->tgconstrindid != oldIndexId)
1629 continue;
1630
1631 /* Make a modifiable copy */
1632 triggerTuple = heap_copytuple(triggerTuple);
1633 tgForm = (Form_pg_trigger) GETSTRUCT(triggerTuple);
1634
1635 tgForm->tgconstrindid = newIndexId;
1636
1637 CatalogTupleUpdate(pg_trigger, &triggerTuple->t_self, triggerTuple);
1638
1639 heap_freetuple(triggerTuple);
1640 }
1641
1642 systable_endscan(scan);
1643 }
1644
1645 /*
1646 * Move comment if any
1647 */
1648 {
1649 Relation description;
1650 ScanKeyData skey[3];
1651 SysScanDesc sd;
1652 HeapTuple tuple;
1653 Datum values[Natts_pg_description] = {0};
1654 bool nulls[Natts_pg_description] = {0};
1655 bool replaces[Natts_pg_description] = {0};
1656
1657 values[Anum_pg_description_objoid - 1] = ObjectIdGetDatum(newIndexId);
1658 replaces[Anum_pg_description_objoid - 1] = true;
1659
1660 ScanKeyInit(&skey[0],
1661 Anum_pg_description_objoid,
1662 BTEqualStrategyNumber, F_OIDEQ,
1663 ObjectIdGetDatum(oldIndexId));
1664 ScanKeyInit(&skey[1],
1665 Anum_pg_description_classoid,
1666 BTEqualStrategyNumber, F_OIDEQ,
1667 ObjectIdGetDatum(RelationRelationId));
1668 ScanKeyInit(&skey[2],
1669 Anum_pg_description_objsubid,
1670 BTEqualStrategyNumber, F_INT4EQ,
1671 Int32GetDatum(0));
1672
1673 description = table_open(DescriptionRelationId, RowExclusiveLock);
1674
1675 sd = systable_beginscan(description, DescriptionObjIndexId, true,
1676 NULL, 3, skey);
1677
1678 while ((tuple = systable_getnext(sd)) != NULL)
1679 {
1680 tuple = heap_modify_tuple(tuple, RelationGetDescr(description),
1681 values, nulls, replaces);
1682 CatalogTupleUpdate(description, &tuple->t_self, tuple);
1683
1684 break; /* Assume there can be only one match */
1685 }
1686
1687 systable_endscan(sd);
1688 table_close(description, NoLock);
1689 }
1690
1691 /*
1692 * Swap inheritance relationship with parent index
1693 */
1694 if (get_rel_relispartition(oldIndexId))
1695 {
1696 List *ancestors = get_partition_ancestors(oldIndexId);
1697 Oid parentIndexRelid = linitial_oid(ancestors);
1698
1699 DeleteInheritsTuple(oldIndexId, parentIndexRelid, false, NULL);
1700 StoreSingleInheritance(newIndexId, parentIndexRelid, 1);
1701
1702 list_free(ancestors);
1703 }
1704
1705 /*
1706 * Swap all dependencies of and on the old index to the new one, and
1707 * vice-versa. Note that a call to CommandCounterIncrement() would cause
1708 * duplicate entries in pg_depend, so this should not be done.
1709 */
1710 changeDependenciesOf(RelationRelationId, newIndexId, oldIndexId);
1711 changeDependenciesOn(RelationRelationId, newIndexId, oldIndexId);
1712
1713 changeDependenciesOf(RelationRelationId, oldIndexId, newIndexId);
1714 changeDependenciesOn(RelationRelationId, oldIndexId, newIndexId);
1715
1716 /*
1717 * Copy over statistics from old to new index
1718 */
1719 {
1720 PgStat_StatTabEntry *tabentry;
1721
1722 tabentry = pgstat_fetch_stat_tabentry(oldIndexId);
1723 if (tabentry)
1724 {
1725 if (newClassRel->pgstat_info)
1726 {
1727 newClassRel->pgstat_info->t_counts.t_numscans = tabentry->numscans;
1728 newClassRel->pgstat_info->t_counts.t_tuples_returned = tabentry->tuples_returned;
1729 newClassRel->pgstat_info->t_counts.t_tuples_fetched = tabentry->tuples_fetched;
1730 newClassRel->pgstat_info->t_counts.t_blocks_fetched = tabentry->blocks_fetched;
1731 newClassRel->pgstat_info->t_counts.t_blocks_hit = tabentry->blocks_hit;
1732
1733 /*
1734 * The data will be sent by the next pgstat_report_stat()
1735 * call.
1736 */
1737 }
1738 }
1739 }
1740
1741 /* Copy data of pg_statistic from the old index to the new one */
1742 CopyStatistics(oldIndexId, newIndexId);
1743
1744 /* Copy pg_attribute.attstattarget for each index attribute */
1745 {
1746 HeapTuple attrTuple;
1747 Relation pg_attribute;
1748 SysScanDesc scan;
1749 ScanKeyData key[1];
1750
1751 pg_attribute = table_open(AttributeRelationId, RowExclusiveLock);
1752 ScanKeyInit(&key[0],
1753 Anum_pg_attribute_attrelid,
1754 BTEqualStrategyNumber, F_OIDEQ,
1755 ObjectIdGetDatum(newIndexId));
1756 scan = systable_beginscan(pg_attribute, AttributeRelidNumIndexId,
1757 true, NULL, 1, key);
1758
1759 while (HeapTupleIsValid((attrTuple = systable_getnext(scan))))
1760 {
1761 Form_pg_attribute att = (Form_pg_attribute) GETSTRUCT(attrTuple);
1762 Datum repl_val[Natts_pg_attribute];
1763 bool repl_null[Natts_pg_attribute];
1764 bool repl_repl[Natts_pg_attribute];
1765 int attstattarget;
1766 HeapTuple newTuple;
1767
1768 /* Ignore dropped columns */
1769 if (att->attisdropped)
1770 continue;
1771
1772 /*
1773 * Get attstattarget from the old index and refresh the new value.
1774 */
1775 attstattarget = get_attstattarget(oldIndexId, att->attnum);
1776
1777 /* no need for a refresh if both match */
1778 if (attstattarget == att->attstattarget)
1779 continue;
1780
1781 memset(repl_val, 0, sizeof(repl_val));
1782 memset(repl_null, false, sizeof(repl_null));
1783 memset(repl_repl, false, sizeof(repl_repl));
1784
1785 repl_repl[Anum_pg_attribute_attstattarget - 1] = true;
1786 repl_val[Anum_pg_attribute_attstattarget - 1] = Int32GetDatum(attstattarget);
1787
1788 newTuple = heap_modify_tuple(attrTuple,
1789 RelationGetDescr(pg_attribute),
1790 repl_val, repl_null, repl_repl);
1791 CatalogTupleUpdate(pg_attribute, &newTuple->t_self, newTuple);
1792
1793 heap_freetuple(newTuple);
1794 }
1795
1796 systable_endscan(scan);
1797 table_close(pg_attribute, RowExclusiveLock);
1798 }
1799
1800 /* Close relations */
1801 table_close(pg_class, RowExclusiveLock);
1802 table_close(pg_index, RowExclusiveLock);
1803 table_close(pg_constraint, RowExclusiveLock);
1804 table_close(pg_trigger, RowExclusiveLock);
1805
1806 /* The lock taken previously is not released until the end of transaction */
1807 relation_close(oldClassRel, NoLock);
1808 relation_close(newClassRel, NoLock);
1809 }
1810
1811 /*
1812 * index_concurrently_set_dead
1813 *
1814 * Perform the last invalidation stage of DROP INDEX CONCURRENTLY or REINDEX
1815 * CONCURRENTLY before actually dropping the index. After calling this
1816 * function, the index is seen by all the backends as dead. Low-level locks
1817 * taken here are kept until the end of the transaction calling this function.
1818 */
1819 void
index_concurrently_set_dead(Oid heapId,Oid indexId)1820 index_concurrently_set_dead(Oid heapId, Oid indexId)
1821 {
1822 Relation userHeapRelation;
1823 Relation userIndexRelation;
1824
1825 /*
1826 * No more predicate locks will be acquired on this index, and we're about
1827 * to stop doing inserts into the index which could show conflicts with
1828 * existing predicate locks, so now is the time to move them to the heap
1829 * relation.
1830 */
1831 userHeapRelation = table_open(heapId, ShareUpdateExclusiveLock);
1832 userIndexRelation = index_open(indexId, ShareUpdateExclusiveLock);
1833 TransferPredicateLocksToHeapRelation(userIndexRelation);
1834
1835 /*
1836 * Now we are sure that nobody uses the index for queries; they just might
1837 * have it open for updating it. So now we can unset indisready and
1838 * indislive, then wait till nobody could be using it at all anymore.
1839 */
1840 index_set_state_flags(indexId, INDEX_DROP_SET_DEAD);
1841
1842 /*
1843 * Invalidate the relcache for the table, so that after this commit all
1844 * sessions will refresh the table's index list. Forgetting just the
1845 * index's relcache entry is not enough.
1846 */
1847 CacheInvalidateRelcache(userHeapRelation);
1848
1849 /*
1850 * Close the relations again, though still holding session lock.
1851 */
1852 table_close(userHeapRelation, NoLock);
1853 index_close(userIndexRelation, NoLock);
1854 }
1855
1856 /*
1857 * index_constraint_create
1858 *
1859 * Set up a constraint associated with an index. Return the new constraint's
1860 * address.
1861 *
1862 * heapRelation: table owning the index (must be suitably locked by caller)
1863 * indexRelationId: OID of the index
1864 * parentConstraintId: if constraint is on a partition, the OID of the
1865 * constraint in the parent.
1866 * indexInfo: same info executor uses to insert into the index
1867 * constraintName: what it say (generally, should match name of index)
1868 * constraintType: one of CONSTRAINT_PRIMARY, CONSTRAINT_UNIQUE, or
1869 * CONSTRAINT_EXCLUSION
1870 * flags: bitmask that can include any combination of these bits:
1871 * INDEX_CONSTR_CREATE_MARK_AS_PRIMARY: index is a PRIMARY KEY
1872 * INDEX_CONSTR_CREATE_DEFERRABLE: constraint is DEFERRABLE
1873 * INDEX_CONSTR_CREATE_INIT_DEFERRED: constraint is INITIALLY DEFERRED
1874 * INDEX_CONSTR_CREATE_UPDATE_INDEX: update the pg_index row
1875 * INDEX_CONSTR_CREATE_REMOVE_OLD_DEPS: remove existing dependencies
1876 * of index on table's columns
1877 * allow_system_table_mods: allow table to be a system catalog
1878 * is_internal: index is constructed due to internal process
1879 */
1880 ObjectAddress
index_constraint_create(Relation heapRelation,Oid indexRelationId,Oid parentConstraintId,IndexInfo * indexInfo,const char * constraintName,char constraintType,bits16 constr_flags,bool allow_system_table_mods,bool is_internal)1881 index_constraint_create(Relation heapRelation,
1882 Oid indexRelationId,
1883 Oid parentConstraintId,
1884 IndexInfo *indexInfo,
1885 const char *constraintName,
1886 char constraintType,
1887 bits16 constr_flags,
1888 bool allow_system_table_mods,
1889 bool is_internal)
1890 {
1891 Oid namespaceId = RelationGetNamespace(heapRelation);
1892 ObjectAddress myself,
1893 idxaddr;
1894 Oid conOid;
1895 bool deferrable;
1896 bool initdeferred;
1897 bool mark_as_primary;
1898 bool islocal;
1899 bool noinherit;
1900 int inhcount;
1901
1902 deferrable = (constr_flags & INDEX_CONSTR_CREATE_DEFERRABLE) != 0;
1903 initdeferred = (constr_flags & INDEX_CONSTR_CREATE_INIT_DEFERRED) != 0;
1904 mark_as_primary = (constr_flags & INDEX_CONSTR_CREATE_MARK_AS_PRIMARY) != 0;
1905
1906 /* constraint creation support doesn't work while bootstrapping */
1907 Assert(!IsBootstrapProcessingMode());
1908
1909 /* enforce system-table restriction */
1910 if (!allow_system_table_mods &&
1911 IsSystemRelation(heapRelation) &&
1912 IsNormalProcessingMode())
1913 ereport(ERROR,
1914 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1915 errmsg("user-defined indexes on system catalog tables are not supported")));
1916
1917 /* primary/unique constraints shouldn't have any expressions */
1918 if (indexInfo->ii_Expressions &&
1919 constraintType != CONSTRAINT_EXCLUSION)
1920 elog(ERROR, "constraints cannot have index expressions");
1921
1922 /*
1923 * If we're manufacturing a constraint for a pre-existing index, we need
1924 * to get rid of the existing auto dependencies for the index (the ones
1925 * that index_create() would have made instead of calling this function).
1926 *
1927 * Note: this code would not necessarily do the right thing if the index
1928 * has any expressions or predicate, but we'd never be turning such an
1929 * index into a UNIQUE or PRIMARY KEY constraint.
1930 */
1931 if (constr_flags & INDEX_CONSTR_CREATE_REMOVE_OLD_DEPS)
1932 deleteDependencyRecordsForClass(RelationRelationId, indexRelationId,
1933 RelationRelationId, DEPENDENCY_AUTO);
1934
1935 if (OidIsValid(parentConstraintId))
1936 {
1937 islocal = false;
1938 inhcount = 1;
1939 noinherit = false;
1940 }
1941 else
1942 {
1943 islocal = true;
1944 inhcount = 0;
1945 noinherit = true;
1946 }
1947
1948 /*
1949 * Construct a pg_constraint entry.
1950 */
1951 conOid = CreateConstraintEntry(constraintName,
1952 namespaceId,
1953 constraintType,
1954 deferrable,
1955 initdeferred,
1956 true,
1957 parentConstraintId,
1958 RelationGetRelid(heapRelation),
1959 indexInfo->ii_IndexAttrNumbers,
1960 indexInfo->ii_NumIndexKeyAttrs,
1961 indexInfo->ii_NumIndexAttrs,
1962 InvalidOid, /* no domain */
1963 indexRelationId, /* index OID */
1964 InvalidOid, /* no foreign key */
1965 NULL,
1966 NULL,
1967 NULL,
1968 NULL,
1969 0,
1970 ' ',
1971 ' ',
1972 ' ',
1973 indexInfo->ii_ExclusionOps,
1974 NULL, /* no check constraint */
1975 NULL,
1976 islocal,
1977 inhcount,
1978 noinherit,
1979 is_internal);
1980
1981 /*
1982 * Register the index as internally dependent on the constraint.
1983 *
1984 * Note that the constraint has a dependency on the table, so we don't
1985 * need (or want) any direct dependency from the index to the table.
1986 */
1987 ObjectAddressSet(myself, ConstraintRelationId, conOid);
1988 ObjectAddressSet(idxaddr, RelationRelationId, indexRelationId);
1989 recordDependencyOn(&idxaddr, &myself, DEPENDENCY_INTERNAL);
1990
1991 /*
1992 * Also, if this is a constraint on a partition, give it partition-type
1993 * dependencies on the parent constraint as well as the table.
1994 */
1995 if (OidIsValid(parentConstraintId))
1996 {
1997 ObjectAddress referenced;
1998
1999 ObjectAddressSet(referenced, ConstraintRelationId, parentConstraintId);
2000 recordDependencyOn(&myself, &referenced, DEPENDENCY_PARTITION_PRI);
2001 ObjectAddressSet(referenced, RelationRelationId,
2002 RelationGetRelid(heapRelation));
2003 recordDependencyOn(&myself, &referenced, DEPENDENCY_PARTITION_SEC);
2004 }
2005
2006 /*
2007 * If the constraint is deferrable, create the deferred uniqueness
2008 * checking trigger. (The trigger will be given an internal dependency on
2009 * the constraint by CreateTrigger.)
2010 */
2011 if (deferrable)
2012 {
2013 CreateTrigStmt *trigger = makeNode(CreateTrigStmt);
2014
2015 trigger->replace = false;
2016 trigger->isconstraint = true;
2017 trigger->trigname = (constraintType == CONSTRAINT_PRIMARY) ?
2018 "PK_ConstraintTrigger" :
2019 "Unique_ConstraintTrigger";
2020 trigger->relation = NULL;
2021 trigger->funcname = SystemFuncName("unique_key_recheck");
2022 trigger->args = NIL;
2023 trigger->row = true;
2024 trigger->timing = TRIGGER_TYPE_AFTER;
2025 trigger->events = TRIGGER_TYPE_INSERT | TRIGGER_TYPE_UPDATE;
2026 trigger->columns = NIL;
2027 trigger->whenClause = NULL;
2028 trigger->transitionRels = NIL;
2029 trigger->deferrable = true;
2030 trigger->initdeferred = initdeferred;
2031 trigger->constrrel = NULL;
2032
2033 (void) CreateTrigger(trigger, NULL, RelationGetRelid(heapRelation),
2034 InvalidOid, conOid, indexRelationId, InvalidOid,
2035 InvalidOid, NULL, true, false);
2036 }
2037
2038 /*
2039 * If needed, mark the index as primary and/or deferred in pg_index.
2040 *
2041 * Note: When making an existing index into a constraint, caller must have
2042 * a table lock that prevents concurrent table updates; otherwise, there
2043 * is a risk that concurrent readers of the table will miss seeing this
2044 * index at all.
2045 */
2046 if ((constr_flags & INDEX_CONSTR_CREATE_UPDATE_INDEX) &&
2047 (mark_as_primary || deferrable))
2048 {
2049 Relation pg_index;
2050 HeapTuple indexTuple;
2051 Form_pg_index indexForm;
2052 bool dirty = false;
2053
2054 pg_index = table_open(IndexRelationId, RowExclusiveLock);
2055
2056 indexTuple = SearchSysCacheCopy1(INDEXRELID,
2057 ObjectIdGetDatum(indexRelationId));
2058 if (!HeapTupleIsValid(indexTuple))
2059 elog(ERROR, "cache lookup failed for index %u", indexRelationId);
2060 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
2061
2062 if (mark_as_primary && !indexForm->indisprimary)
2063 {
2064 indexForm->indisprimary = true;
2065 dirty = true;
2066 }
2067
2068 if (deferrable && indexForm->indimmediate)
2069 {
2070 indexForm->indimmediate = false;
2071 dirty = true;
2072 }
2073
2074 if (dirty)
2075 {
2076 CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
2077
2078 InvokeObjectPostAlterHookArg(IndexRelationId, indexRelationId, 0,
2079 InvalidOid, is_internal);
2080 }
2081
2082 heap_freetuple(indexTuple);
2083 table_close(pg_index, RowExclusiveLock);
2084 }
2085
2086 return myself;
2087 }
2088
2089 /*
2090 * index_drop
2091 *
2092 * NOTE: this routine should now only be called through performDeletion(),
2093 * else associated dependencies won't be cleaned up.
2094 *
2095 * If concurrent is true, do a DROP INDEX CONCURRENTLY. If concurrent is
2096 * false but concurrent_lock_mode is true, then do a normal DROP INDEX but
2097 * take a lock for CONCURRENTLY processing. That is used as part of REINDEX
2098 * CONCURRENTLY.
2099 */
2100 void
index_drop(Oid indexId,bool concurrent,bool concurrent_lock_mode)2101 index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode)
2102 {
2103 Oid heapId;
2104 Relation userHeapRelation;
2105 Relation userIndexRelation;
2106 Relation indexRelation;
2107 HeapTuple tuple;
2108 bool hasexprs;
2109 LockRelId heaprelid,
2110 indexrelid;
2111 LOCKTAG heaplocktag;
2112 LOCKMODE lockmode;
2113
2114 /*
2115 * A temporary relation uses a non-concurrent DROP. Other backends can't
2116 * access a temporary relation, so there's no harm in grabbing a stronger
2117 * lock (see comments in RemoveRelations), and a non-concurrent DROP is
2118 * more efficient.
2119 */
2120 Assert(get_rel_persistence(indexId) != RELPERSISTENCE_TEMP ||
2121 (!concurrent && !concurrent_lock_mode));
2122
2123 /*
2124 * To drop an index safely, we must grab exclusive lock on its parent
2125 * table. Exclusive lock on the index alone is insufficient because
2126 * another backend might be about to execute a query on the parent table.
2127 * If it relies on a previously cached list of index OIDs, then it could
2128 * attempt to access the just-dropped index. We must therefore take a
2129 * table lock strong enough to prevent all queries on the table from
2130 * proceeding until we commit and send out a shared-cache-inval notice
2131 * that will make them update their index lists.
2132 *
2133 * In the concurrent case we avoid this requirement by disabling index use
2134 * in multiple steps and waiting out any transactions that might be using
2135 * the index, so we don't need exclusive lock on the parent table. Instead
2136 * we take ShareUpdateExclusiveLock, to ensure that two sessions aren't
2137 * doing CREATE/DROP INDEX CONCURRENTLY on the same index. (We will get
2138 * AccessExclusiveLock on the index below, once we're sure nobody else is
2139 * using it.)
2140 */
2141 heapId = IndexGetRelation(indexId, false);
2142 lockmode = (concurrent || concurrent_lock_mode) ? ShareUpdateExclusiveLock : AccessExclusiveLock;
2143 userHeapRelation = table_open(heapId, lockmode);
2144 userIndexRelation = index_open(indexId, lockmode);
2145
2146 /*
2147 * We might still have open queries using it in our own session, which the
2148 * above locking won't prevent, so test explicitly.
2149 */
2150 CheckTableNotInUse(userIndexRelation, "DROP INDEX");
2151
2152 /*
2153 * Drop Index Concurrently is more or less the reverse process of Create
2154 * Index Concurrently.
2155 *
2156 * First we unset indisvalid so queries starting afterwards don't use the
2157 * index to answer queries anymore. We have to keep indisready = true so
2158 * transactions that are still scanning the index can continue to see
2159 * valid index contents. For instance, if they are using READ COMMITTED
2160 * mode, and another transaction makes changes and commits, they need to
2161 * see those new tuples in the index.
2162 *
2163 * After all transactions that could possibly have used the index for
2164 * queries end, we can unset indisready and indislive, then wait till
2165 * nobody could be touching it anymore. (Note: we need indislive because
2166 * this state must be distinct from the initial state during CREATE INDEX
2167 * CONCURRENTLY, which has indislive true while indisready and indisvalid
2168 * are false. That's because in that state, transactions must examine the
2169 * index for HOT-safety decisions, while in this state we don't want them
2170 * to open it at all.)
2171 *
2172 * Since all predicate locks on the index are about to be made invalid, we
2173 * must promote them to predicate locks on the heap. In the
2174 * non-concurrent case we can just do that now. In the concurrent case
2175 * it's a bit trickier. The predicate locks must be moved when there are
2176 * no index scans in progress on the index and no more can subsequently
2177 * start, so that no new predicate locks can be made on the index. Also,
2178 * they must be moved before heap inserts stop maintaining the index, else
2179 * the conflict with the predicate lock on the index gap could be missed
2180 * before the lock on the heap relation is in place to detect a conflict
2181 * based on the heap tuple insert.
2182 */
2183 if (concurrent)
2184 {
2185 /*
2186 * We must commit our transaction in order to make the first pg_index
2187 * state update visible to other sessions. If the DROP machinery has
2188 * already performed any other actions (removal of other objects,
2189 * pg_depend entries, etc), the commit would make those actions
2190 * permanent, which would leave us with inconsistent catalog state if
2191 * we fail partway through the following sequence. Since DROP INDEX
2192 * CONCURRENTLY is restricted to dropping just one index that has no
2193 * dependencies, we should get here before anything's been done ---
2194 * but let's check that to be sure. We can verify that the current
2195 * transaction has not executed any transactional updates by checking
2196 * that no XID has been assigned.
2197 */
2198 if (GetTopTransactionIdIfAny() != InvalidTransactionId)
2199 ereport(ERROR,
2200 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2201 errmsg("DROP INDEX CONCURRENTLY must be first action in transaction")));
2202
2203 /*
2204 * Mark index invalid by updating its pg_index entry
2205 */
2206 index_set_state_flags(indexId, INDEX_DROP_CLEAR_VALID);
2207
2208 /*
2209 * Invalidate the relcache for the table, so that after this commit
2210 * all sessions will refresh any cached plans that might reference the
2211 * index.
2212 */
2213 CacheInvalidateRelcache(userHeapRelation);
2214
2215 /* save lockrelid and locktag for below, then close but keep locks */
2216 heaprelid = userHeapRelation->rd_lockInfo.lockRelId;
2217 SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
2218 indexrelid = userIndexRelation->rd_lockInfo.lockRelId;
2219
2220 table_close(userHeapRelation, NoLock);
2221 index_close(userIndexRelation, NoLock);
2222
2223 /*
2224 * We must commit our current transaction so that the indisvalid
2225 * update becomes visible to other transactions; then start another.
2226 * Note that any previously-built data structures are lost in the
2227 * commit. The only data we keep past here are the relation IDs.
2228 *
2229 * Before committing, get a session-level lock on the table, to ensure
2230 * that neither it nor the index can be dropped before we finish. This
2231 * cannot block, even if someone else is waiting for access, because
2232 * we already have the same lock within our transaction.
2233 */
2234 LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
2235 LockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
2236
2237 PopActiveSnapshot();
2238 CommitTransactionCommand();
2239 StartTransactionCommand();
2240
2241 /*
2242 * Now we must wait until no running transaction could be using the
2243 * index for a query. Use AccessExclusiveLock here to check for
2244 * running transactions that hold locks of any kind on the table. Note
2245 * we do not need to worry about xacts that open the table for reading
2246 * after this point; they will see the index as invalid when they open
2247 * the relation.
2248 *
2249 * Note: the reason we use actual lock acquisition here, rather than
2250 * just checking the ProcArray and sleeping, is that deadlock is
2251 * possible if one of the transactions in question is blocked trying
2252 * to acquire an exclusive lock on our table. The lock code will
2253 * detect deadlock and error out properly.
2254 *
2255 * Note: we report progress through WaitForLockers() unconditionally
2256 * here, even though it will only be used when we're called by REINDEX
2257 * CONCURRENTLY and not when called by DROP INDEX CONCURRENTLY.
2258 */
2259 WaitForLockers(heaplocktag, AccessExclusiveLock, true);
2260
2261 /* Finish invalidation of index and mark it as dead */
2262 index_concurrently_set_dead(heapId, indexId);
2263
2264 /*
2265 * Again, commit the transaction to make the pg_index update visible
2266 * to other sessions.
2267 */
2268 CommitTransactionCommand();
2269 StartTransactionCommand();
2270
2271 /*
2272 * Wait till every transaction that saw the old index state has
2273 * finished. See above about progress reporting.
2274 */
2275 WaitForLockers(heaplocktag, AccessExclusiveLock, true);
2276
2277 /*
2278 * Re-open relations to allow us to complete our actions.
2279 *
2280 * At this point, nothing should be accessing the index, but lets
2281 * leave nothing to chance and grab AccessExclusiveLock on the index
2282 * before the physical deletion.
2283 */
2284 userHeapRelation = table_open(heapId, ShareUpdateExclusiveLock);
2285 userIndexRelation = index_open(indexId, AccessExclusiveLock);
2286 }
2287 else
2288 {
2289 /* Not concurrent, so just transfer predicate locks and we're good */
2290 TransferPredicateLocksToHeapRelation(userIndexRelation);
2291 }
2292
2293 /*
2294 * Schedule physical removal of the files (if any)
2295 */
2296 if (userIndexRelation->rd_rel->relkind != RELKIND_PARTITIONED_INDEX)
2297 RelationDropStorage(userIndexRelation);
2298
2299 /*
2300 * Close and flush the index's relcache entry, to ensure relcache doesn't
2301 * try to rebuild it while we're deleting catalog entries. We keep the
2302 * lock though.
2303 */
2304 index_close(userIndexRelation, NoLock);
2305
2306 RelationForgetRelation(indexId);
2307
2308 /*
2309 * fix INDEX relation, and check for expressional index
2310 */
2311 indexRelation = table_open(IndexRelationId, RowExclusiveLock);
2312
2313 tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
2314 if (!HeapTupleIsValid(tuple))
2315 elog(ERROR, "cache lookup failed for index %u", indexId);
2316
2317 hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs,
2318 RelationGetDescr(indexRelation));
2319
2320 CatalogTupleDelete(indexRelation, &tuple->t_self);
2321
2322 ReleaseSysCache(tuple);
2323 table_close(indexRelation, RowExclusiveLock);
2324
2325 /*
2326 * if it has any expression columns, we might have stored statistics about
2327 * them.
2328 */
2329 if (hasexprs)
2330 RemoveStatistics(indexId, 0);
2331
2332 /*
2333 * fix ATTRIBUTE relation
2334 */
2335 DeleteAttributeTuples(indexId);
2336
2337 /*
2338 * fix RELATION relation
2339 */
2340 DeleteRelationTuple(indexId);
2341
2342 /*
2343 * fix INHERITS relation
2344 */
2345 DeleteInheritsTuple(indexId, InvalidOid, false, NULL);
2346
2347 /*
2348 * We are presently too lazy to attempt to compute the new correct value
2349 * of relhasindex (the next VACUUM will fix it if necessary). So there is
2350 * no need to update the pg_class tuple for the owning relation. But we
2351 * must send out a shared-cache-inval notice on the owning relation to
2352 * ensure other backends update their relcache lists of indexes. (In the
2353 * concurrent case, this is redundant but harmless.)
2354 */
2355 CacheInvalidateRelcache(userHeapRelation);
2356
2357 /*
2358 * Close owning rel, but keep lock
2359 */
2360 table_close(userHeapRelation, NoLock);
2361
2362 /*
2363 * Release the session locks before we go.
2364 */
2365 if (concurrent)
2366 {
2367 UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
2368 UnlockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
2369 }
2370 }
2371
2372 /* ----------------------------------------------------------------
2373 * index_build support
2374 * ----------------------------------------------------------------
2375 */
2376
2377 /* ----------------
2378 * BuildIndexInfo
2379 * Construct an IndexInfo record for an open index
2380 *
2381 * IndexInfo stores the information about the index that's needed by
2382 * FormIndexDatum, which is used for both index_build() and later insertion
2383 * of individual index tuples. Normally we build an IndexInfo for an index
2384 * just once per command, and then use it for (potentially) many tuples.
2385 * ----------------
2386 */
2387 IndexInfo *
BuildIndexInfo(Relation index)2388 BuildIndexInfo(Relation index)
2389 {
2390 IndexInfo *ii;
2391 Form_pg_index indexStruct = index->rd_index;
2392 int i;
2393 int numAtts;
2394
2395 /* check the number of keys, and copy attr numbers into the IndexInfo */
2396 numAtts = indexStruct->indnatts;
2397 if (numAtts < 1 || numAtts > INDEX_MAX_KEYS)
2398 elog(ERROR, "invalid indnatts %d for index %u",
2399 numAtts, RelationGetRelid(index));
2400
2401 /*
2402 * Create the node, fetching any expressions needed for expressional
2403 * indexes and index predicate if any.
2404 */
2405 ii = makeIndexInfo(indexStruct->indnatts,
2406 indexStruct->indnkeyatts,
2407 index->rd_rel->relam,
2408 RelationGetIndexExpressions(index),
2409 RelationGetIndexPredicate(index),
2410 indexStruct->indisunique,
2411 indexStruct->indisready,
2412 false);
2413
2414 /* fill in attribute numbers */
2415 for (i = 0; i < numAtts; i++)
2416 ii->ii_IndexAttrNumbers[i] = indexStruct->indkey.values[i];
2417
2418 /* fetch exclusion constraint info if any */
2419 if (indexStruct->indisexclusion)
2420 {
2421 RelationGetExclusionInfo(index,
2422 &ii->ii_ExclusionOps,
2423 &ii->ii_ExclusionProcs,
2424 &ii->ii_ExclusionStrats);
2425 }
2426
2427 ii->ii_OpclassOptions = RelationGetIndexRawAttOptions(index);
2428
2429 return ii;
2430 }
2431
2432 /* ----------------
2433 * BuildDummyIndexInfo
2434 * Construct a dummy IndexInfo record for an open index
2435 *
2436 * This differs from the real BuildIndexInfo in that it will never run any
2437 * user-defined code that might exist in index expressions or predicates.
2438 * Instead of the real index expressions, we return null constants that have
2439 * the right types/typmods/collations. Predicates and exclusion clauses are
2440 * just ignored. This is sufficient for the purpose of truncating an index,
2441 * since we will not need to actually evaluate the expressions or predicates;
2442 * the only thing that's likely to be done with the data is construction of
2443 * a tupdesc describing the index's rowtype.
2444 * ----------------
2445 */
2446 IndexInfo *
BuildDummyIndexInfo(Relation index)2447 BuildDummyIndexInfo(Relation index)
2448 {
2449 IndexInfo *ii;
2450 Form_pg_index indexStruct = index->rd_index;
2451 int i;
2452 int numAtts;
2453
2454 /* check the number of keys, and copy attr numbers into the IndexInfo */
2455 numAtts = indexStruct->indnatts;
2456 if (numAtts < 1 || numAtts > INDEX_MAX_KEYS)
2457 elog(ERROR, "invalid indnatts %d for index %u",
2458 numAtts, RelationGetRelid(index));
2459
2460 /*
2461 * Create the node, using dummy index expressions, and pretending there is
2462 * no predicate.
2463 */
2464 ii = makeIndexInfo(indexStruct->indnatts,
2465 indexStruct->indnkeyatts,
2466 index->rd_rel->relam,
2467 RelationGetDummyIndexExpressions(index),
2468 NIL,
2469 indexStruct->indisunique,
2470 indexStruct->indisready,
2471 false);
2472
2473 /* fill in attribute numbers */
2474 for (i = 0; i < numAtts; i++)
2475 ii->ii_IndexAttrNumbers[i] = indexStruct->indkey.values[i];
2476
2477 /* We ignore the exclusion constraint if any */
2478
2479 return ii;
2480 }
2481
2482 /*
2483 * CompareIndexInfo
2484 * Return whether the properties of two indexes (in different tables)
2485 * indicate that they have the "same" definitions.
2486 *
2487 * Note: passing collations and opfamilies separately is a kludge. Adding
2488 * them to IndexInfo may result in better coding here and elsewhere.
2489 *
2490 * Use build_attrmap_by_name(index2, index1) to build the attmap.
2491 */
2492 bool
CompareIndexInfo(IndexInfo * info1,IndexInfo * info2,Oid * collations1,Oid * collations2,Oid * opfamilies1,Oid * opfamilies2,AttrMap * attmap)2493 CompareIndexInfo(IndexInfo *info1, IndexInfo *info2,
2494 Oid *collations1, Oid *collations2,
2495 Oid *opfamilies1, Oid *opfamilies2,
2496 AttrMap *attmap)
2497 {
2498 int i;
2499
2500 if (info1->ii_Unique != info2->ii_Unique)
2501 return false;
2502
2503 /* indexes are only equivalent if they have the same access method */
2504 if (info1->ii_Am != info2->ii_Am)
2505 return false;
2506
2507 /* and same number of attributes */
2508 if (info1->ii_NumIndexAttrs != info2->ii_NumIndexAttrs)
2509 return false;
2510
2511 /* and same number of key attributes */
2512 if (info1->ii_NumIndexKeyAttrs != info2->ii_NumIndexKeyAttrs)
2513 return false;
2514
2515 /*
2516 * and columns match through the attribute map (actual attribute numbers
2517 * might differ!) Note that this implies that index columns that are
2518 * expressions appear in the same positions. We will next compare the
2519 * expressions themselves.
2520 */
2521 for (i = 0; i < info1->ii_NumIndexAttrs; i++)
2522 {
2523 if (attmap->maplen < info2->ii_IndexAttrNumbers[i])
2524 elog(ERROR, "incorrect attribute map");
2525
2526 /* ignore expressions at this stage */
2527 if ((info1->ii_IndexAttrNumbers[i] != InvalidAttrNumber) &&
2528 (attmap->attnums[info2->ii_IndexAttrNumbers[i] - 1] !=
2529 info1->ii_IndexAttrNumbers[i]))
2530 return false;
2531
2532 /* collation and opfamily is not valid for including columns */
2533 if (i >= info1->ii_NumIndexKeyAttrs)
2534 continue;
2535
2536 if (collations1[i] != collations2[i])
2537 return false;
2538 if (opfamilies1[i] != opfamilies2[i])
2539 return false;
2540 }
2541
2542 /*
2543 * For expression indexes: either both are expression indexes, or neither
2544 * is; if they are, make sure the expressions match.
2545 */
2546 if ((info1->ii_Expressions != NIL) != (info2->ii_Expressions != NIL))
2547 return false;
2548 if (info1->ii_Expressions != NIL)
2549 {
2550 bool found_whole_row;
2551 Node *mapped;
2552
2553 mapped = map_variable_attnos((Node *) info2->ii_Expressions,
2554 1, 0, attmap,
2555 InvalidOid, &found_whole_row);
2556 if (found_whole_row)
2557 {
2558 /*
2559 * we could throw an error here, but seems out of scope for this
2560 * routine.
2561 */
2562 return false;
2563 }
2564
2565 if (!equal(info1->ii_Expressions, mapped))
2566 return false;
2567 }
2568
2569 /* Partial index predicates must be identical, if they exist */
2570 if ((info1->ii_Predicate == NULL) != (info2->ii_Predicate == NULL))
2571 return false;
2572 if (info1->ii_Predicate != NULL)
2573 {
2574 bool found_whole_row;
2575 Node *mapped;
2576
2577 mapped = map_variable_attnos((Node *) info2->ii_Predicate,
2578 1, 0, attmap,
2579 InvalidOid, &found_whole_row);
2580 if (found_whole_row)
2581 {
2582 /*
2583 * we could throw an error here, but seems out of scope for this
2584 * routine.
2585 */
2586 return false;
2587 }
2588 if (!equal(info1->ii_Predicate, mapped))
2589 return false;
2590 }
2591
2592 /* No support currently for comparing exclusion indexes. */
2593 if (info1->ii_ExclusionOps != NULL || info2->ii_ExclusionOps != NULL)
2594 return false;
2595
2596 return true;
2597 }
2598
2599 /* ----------------
2600 * BuildSpeculativeIndexInfo
2601 * Add extra state to IndexInfo record
2602 *
2603 * For unique indexes, we usually don't want to add info to the IndexInfo for
2604 * checking uniqueness, since the B-Tree AM handles that directly. However,
2605 * in the case of speculative insertion, additional support is required.
2606 *
2607 * Do this processing here rather than in BuildIndexInfo() to not incur the
2608 * overhead in the common non-speculative cases.
2609 * ----------------
2610 */
2611 void
BuildSpeculativeIndexInfo(Relation index,IndexInfo * ii)2612 BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii)
2613 {
2614 int indnkeyatts;
2615 int i;
2616
2617 indnkeyatts = IndexRelationGetNumberOfKeyAttributes(index);
2618
2619 /*
2620 * fetch info for checking unique indexes
2621 */
2622 Assert(ii->ii_Unique);
2623
2624 if (index->rd_rel->relam != BTREE_AM_OID)
2625 elog(ERROR, "unexpected non-btree speculative unique index");
2626
2627 ii->ii_UniqueOps = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
2628 ii->ii_UniqueProcs = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
2629 ii->ii_UniqueStrats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts);
2630
2631 /*
2632 * We have to look up the operator's strategy number. This provides a
2633 * cross-check that the operator does match the index.
2634 */
2635 /* We need the func OIDs and strategy numbers too */
2636 for (i = 0; i < indnkeyatts; i++)
2637 {
2638 ii->ii_UniqueStrats[i] = BTEqualStrategyNumber;
2639 ii->ii_UniqueOps[i] =
2640 get_opfamily_member(index->rd_opfamily[i],
2641 index->rd_opcintype[i],
2642 index->rd_opcintype[i],
2643 ii->ii_UniqueStrats[i]);
2644 if (!OidIsValid(ii->ii_UniqueOps[i]))
2645 elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
2646 ii->ii_UniqueStrats[i], index->rd_opcintype[i],
2647 index->rd_opcintype[i], index->rd_opfamily[i]);
2648 ii->ii_UniqueProcs[i] = get_opcode(ii->ii_UniqueOps[i]);
2649 }
2650 }
2651
2652 /* ----------------
2653 * FormIndexDatum
2654 * Construct values[] and isnull[] arrays for a new index tuple.
2655 *
2656 * indexInfo Info about the index
2657 * slot Heap tuple for which we must prepare an index entry
2658 * estate executor state for evaluating any index expressions
2659 * values Array of index Datums (output area)
2660 * isnull Array of is-null indicators (output area)
2661 *
2662 * When there are no index expressions, estate may be NULL. Otherwise it
2663 * must be supplied, *and* the ecxt_scantuple slot of its per-tuple expr
2664 * context must point to the heap tuple passed in.
2665 *
2666 * Notice we don't actually call index_form_tuple() here; we just prepare
2667 * its input arrays values[] and isnull[]. This is because the index AM
2668 * may wish to alter the data before storage.
2669 * ----------------
2670 */
2671 void
FormIndexDatum(IndexInfo * indexInfo,TupleTableSlot * slot,EState * estate,Datum * values,bool * isnull)2672 FormIndexDatum(IndexInfo *indexInfo,
2673 TupleTableSlot *slot,
2674 EState *estate,
2675 Datum *values,
2676 bool *isnull)
2677 {
2678 ListCell *indexpr_item;
2679 int i;
2680
2681 if (indexInfo->ii_Expressions != NIL &&
2682 indexInfo->ii_ExpressionsState == NIL)
2683 {
2684 /* First time through, set up expression evaluation state */
2685 indexInfo->ii_ExpressionsState =
2686 ExecPrepareExprList(indexInfo->ii_Expressions, estate);
2687 /* Check caller has set up context correctly */
2688 Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
2689 }
2690 indexpr_item = list_head(indexInfo->ii_ExpressionsState);
2691
2692 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
2693 {
2694 int keycol = indexInfo->ii_IndexAttrNumbers[i];
2695 Datum iDatum;
2696 bool isNull;
2697
2698 if (keycol < 0)
2699 iDatum = slot_getsysattr(slot, keycol, &isNull);
2700 else if (keycol != 0)
2701 {
2702 /*
2703 * Plain index column; get the value we need directly from the
2704 * heap tuple.
2705 */
2706 iDatum = slot_getattr(slot, keycol, &isNull);
2707 }
2708 else
2709 {
2710 /*
2711 * Index expression --- need to evaluate it.
2712 */
2713 if (indexpr_item == NULL)
2714 elog(ERROR, "wrong number of index expressions");
2715 iDatum = ExecEvalExprSwitchContext((ExprState *) lfirst(indexpr_item),
2716 GetPerTupleExprContext(estate),
2717 &isNull);
2718 indexpr_item = lnext(indexInfo->ii_ExpressionsState, indexpr_item);
2719 }
2720 values[i] = iDatum;
2721 isnull[i] = isNull;
2722 }
2723
2724 if (indexpr_item != NULL)
2725 elog(ERROR, "wrong number of index expressions");
2726 }
2727
2728
2729 /*
2730 * index_update_stats --- update pg_class entry after CREATE INDEX or REINDEX
2731 *
2732 * This routine updates the pg_class row of either an index or its parent
2733 * relation after CREATE INDEX or REINDEX. Its rather bizarre API is designed
2734 * to ensure we can do all the necessary work in just one update.
2735 *
2736 * hasindex: set relhasindex to this value
2737 * reltuples: if >= 0, set reltuples to this value; else no change
2738 *
2739 * If reltuples >= 0, relpages and relallvisible are also updated (using
2740 * RelationGetNumberOfBlocks() and visibilitymap_count()).
2741 *
2742 * NOTE: an important side-effect of this operation is that an SI invalidation
2743 * message is sent out to all backends --- including me --- causing relcache
2744 * entries to be flushed or updated with the new data. This must happen even
2745 * if we find that no change is needed in the pg_class row. When updating
2746 * a heap entry, this ensures that other backends find out about the new
2747 * index. When updating an index, it's important because some index AMs
2748 * expect a relcache flush to occur after REINDEX.
2749 */
2750 static void
index_update_stats(Relation rel,bool hasindex,double reltuples)2751 index_update_stats(Relation rel,
2752 bool hasindex,
2753 double reltuples)
2754 {
2755 Oid relid = RelationGetRelid(rel);
2756 Relation pg_class;
2757 HeapTuple tuple;
2758 Form_pg_class rd_rel;
2759 bool dirty;
2760
2761 /*
2762 * We always update the pg_class row using a non-transactional,
2763 * overwrite-in-place update. There are several reasons for this:
2764 *
2765 * 1. In bootstrap mode, we have no choice --- UPDATE wouldn't work.
2766 *
2767 * 2. We could be reindexing pg_class itself, in which case we can't move
2768 * its pg_class row because CatalogTupleInsert/CatalogTupleUpdate might
2769 * not know about all the indexes yet (see reindex_relation).
2770 *
2771 * 3. Because we execute CREATE INDEX with just share lock on the parent
2772 * rel (to allow concurrent index creations), an ordinary update could
2773 * suffer a tuple-concurrently-updated failure against another CREATE
2774 * INDEX committing at about the same time. We can avoid that by having
2775 * them both do nontransactional updates (we assume they will both be
2776 * trying to change the pg_class row to the same thing, so it doesn't
2777 * matter which goes first).
2778 *
2779 * It is safe to use a non-transactional update even though our
2780 * transaction could still fail before committing. Setting relhasindex
2781 * true is safe even if there are no indexes (VACUUM will eventually fix
2782 * it). And of course the new relpages and reltuples counts are correct
2783 * regardless. However, we don't want to change relpages (or
2784 * relallvisible) if the caller isn't providing an updated reltuples
2785 * count, because that would bollix the reltuples/relpages ratio which is
2786 * what's really important.
2787 */
2788
2789 pg_class = table_open(RelationRelationId, RowExclusiveLock);
2790
2791 /*
2792 * Make a copy of the tuple to update. Normally we use the syscache, but
2793 * we can't rely on that during bootstrap or while reindexing pg_class
2794 * itself.
2795 */
2796 if (IsBootstrapProcessingMode() ||
2797 ReindexIsProcessingHeap(RelationRelationId))
2798 {
2799 /* don't assume syscache will work */
2800 TableScanDesc pg_class_scan;
2801 ScanKeyData key[1];
2802
2803 ScanKeyInit(&key[0],
2804 Anum_pg_class_oid,
2805 BTEqualStrategyNumber, F_OIDEQ,
2806 ObjectIdGetDatum(relid));
2807
2808 pg_class_scan = table_beginscan_catalog(pg_class, 1, key);
2809 tuple = heap_getnext(pg_class_scan, ForwardScanDirection);
2810 tuple = heap_copytuple(tuple);
2811 table_endscan(pg_class_scan);
2812 }
2813 else
2814 {
2815 /* normal case, use syscache */
2816 tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
2817 }
2818
2819 if (!HeapTupleIsValid(tuple))
2820 elog(ERROR, "could not find tuple for relation %u", relid);
2821 rd_rel = (Form_pg_class) GETSTRUCT(tuple);
2822
2823 /* Should this be a more comprehensive test? */
2824 Assert(rd_rel->relkind != RELKIND_PARTITIONED_INDEX);
2825
2826 /*
2827 * As a special hack, if we are dealing with an empty table and the
2828 * existing reltuples is -1, we leave that alone. This ensures that
2829 * creating an index as part of CREATE TABLE doesn't cause the table to
2830 * prematurely look like it's been vacuumed.
2831 */
2832 if (reltuples == 0 && rd_rel->reltuples < 0)
2833 reltuples = -1;
2834
2835 /* Apply required updates, if any, to copied tuple */
2836
2837 dirty = false;
2838 if (rd_rel->relhasindex != hasindex)
2839 {
2840 rd_rel->relhasindex = hasindex;
2841 dirty = true;
2842 }
2843
2844 if (reltuples >= 0)
2845 {
2846 BlockNumber relpages = RelationGetNumberOfBlocks(rel);
2847 BlockNumber relallvisible;
2848
2849 if (rd_rel->relkind != RELKIND_INDEX)
2850 visibilitymap_count(rel, &relallvisible, NULL);
2851 else /* don't bother for indexes */
2852 relallvisible = 0;
2853
2854 if (rd_rel->relpages != (int32) relpages)
2855 {
2856 rd_rel->relpages = (int32) relpages;
2857 dirty = true;
2858 }
2859 if (rd_rel->reltuples != (float4) reltuples)
2860 {
2861 rd_rel->reltuples = (float4) reltuples;
2862 dirty = true;
2863 }
2864 if (rd_rel->relallvisible != (int32) relallvisible)
2865 {
2866 rd_rel->relallvisible = (int32) relallvisible;
2867 dirty = true;
2868 }
2869 }
2870
2871 /*
2872 * If anything changed, write out the tuple
2873 */
2874 if (dirty)
2875 {
2876 heap_inplace_update(pg_class, tuple);
2877 /* the above sends a cache inval message */
2878 }
2879 else
2880 {
2881 /* no need to change tuple, but force relcache inval anyway */
2882 CacheInvalidateRelcacheByTuple(tuple);
2883 }
2884
2885 heap_freetuple(tuple);
2886
2887 table_close(pg_class, RowExclusiveLock);
2888 }
2889
2890
2891 /*
2892 * index_build - invoke access-method-specific index build procedure
2893 *
2894 * On entry, the index's catalog entries are valid, and its physical disk
2895 * file has been created but is empty. We call the AM-specific build
2896 * procedure to fill in the index contents. We then update the pg_class
2897 * entries of the index and heap relation as needed, using statistics
2898 * returned by ambuild as well as data passed by the caller.
2899 *
2900 * isreindex indicates we are recreating a previously-existing index.
2901 * parallel indicates if parallelism may be useful.
2902 *
2903 * Note: before Postgres 8.2, the passed-in heap and index Relations
2904 * were automatically closed by this routine. This is no longer the case.
2905 * The caller opened 'em, and the caller should close 'em.
2906 */
2907 void
index_build(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,bool isreindex,bool parallel)2908 index_build(Relation heapRelation,
2909 Relation indexRelation,
2910 IndexInfo *indexInfo,
2911 bool isreindex,
2912 bool parallel)
2913 {
2914 IndexBuildResult *stats;
2915 Oid save_userid;
2916 int save_sec_context;
2917 int save_nestlevel;
2918
2919 /*
2920 * sanity checks
2921 */
2922 Assert(RelationIsValid(indexRelation));
2923 Assert(PointerIsValid(indexRelation->rd_indam));
2924 Assert(PointerIsValid(indexRelation->rd_indam->ambuild));
2925 Assert(PointerIsValid(indexRelation->rd_indam->ambuildempty));
2926
2927 /*
2928 * Determine worker process details for parallel CREATE INDEX. Currently,
2929 * only btree has support for parallel builds.
2930 *
2931 * Note that planner considers parallel safety for us.
2932 */
2933 if (parallel && IsNormalProcessingMode() &&
2934 indexRelation->rd_rel->relam == BTREE_AM_OID)
2935 indexInfo->ii_ParallelWorkers =
2936 plan_create_index_workers(RelationGetRelid(heapRelation),
2937 RelationGetRelid(indexRelation));
2938
2939 if (indexInfo->ii_ParallelWorkers == 0)
2940 ereport(DEBUG1,
2941 (errmsg_internal("building index \"%s\" on table \"%s\" serially",
2942 RelationGetRelationName(indexRelation),
2943 RelationGetRelationName(heapRelation))));
2944 else
2945 ereport(DEBUG1,
2946 (errmsg_internal("building index \"%s\" on table \"%s\" with request for %d parallel workers",
2947 RelationGetRelationName(indexRelation),
2948 RelationGetRelationName(heapRelation),
2949 indexInfo->ii_ParallelWorkers)));
2950
2951 /*
2952 * Switch to the table owner's userid, so that any index functions are run
2953 * as that user. Also lock down security-restricted operations and
2954 * arrange to make GUC variable changes local to this command.
2955 */
2956 GetUserIdAndSecContext(&save_userid, &save_sec_context);
2957 SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
2958 save_sec_context | SECURITY_RESTRICTED_OPERATION);
2959 save_nestlevel = NewGUCNestLevel();
2960
2961 /* Set up initial progress report status */
2962 {
2963 const int progress_index[] = {
2964 PROGRESS_CREATEIDX_PHASE,
2965 PROGRESS_CREATEIDX_SUBPHASE,
2966 PROGRESS_CREATEIDX_TUPLES_DONE,
2967 PROGRESS_CREATEIDX_TUPLES_TOTAL,
2968 PROGRESS_SCAN_BLOCKS_DONE,
2969 PROGRESS_SCAN_BLOCKS_TOTAL
2970 };
2971 const int64 progress_vals[] = {
2972 PROGRESS_CREATEIDX_PHASE_BUILD,
2973 PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE,
2974 0, 0, 0, 0
2975 };
2976
2977 pgstat_progress_update_multi_param(6, progress_index, progress_vals);
2978 }
2979
2980 /*
2981 * Call the access method's build procedure
2982 */
2983 stats = indexRelation->rd_indam->ambuild(heapRelation, indexRelation,
2984 indexInfo);
2985 Assert(PointerIsValid(stats));
2986
2987 /*
2988 * If this is an unlogged index, we may need to write out an init fork for
2989 * it -- but we must first check whether one already exists. If, for
2990 * example, an unlogged relation is truncated in the transaction that
2991 * created it, or truncated twice in a subsequent transaction, the
2992 * relfilenode won't change, and nothing needs to be done here.
2993 */
2994 if (indexRelation->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
2995 !smgrexists(indexRelation->rd_smgr, INIT_FORKNUM))
2996 {
2997 RelationOpenSmgr(indexRelation);
2998 smgrcreate(indexRelation->rd_smgr, INIT_FORKNUM, false);
2999 indexRelation->rd_indam->ambuildempty(indexRelation);
3000 }
3001
3002 /*
3003 * If we found any potentially broken HOT chains, mark the index as not
3004 * being usable until the current transaction is below the event horizon.
3005 * See src/backend/access/heap/README.HOT for discussion. Also set this
3006 * if early pruning/vacuuming is enabled for the heap relation. While it
3007 * might become safe to use the index earlier based on actual cleanup
3008 * activity and other active transactions, the test for that would be much
3009 * more complex and would require some form of blocking, so keep it simple
3010 * and fast by just using the current transaction.
3011 *
3012 * However, when reindexing an existing index, we should do nothing here.
3013 * Any HOT chains that are broken with respect to the index must predate
3014 * the index's original creation, so there is no need to change the
3015 * index's usability horizon. Moreover, we *must not* try to change the
3016 * index's pg_index entry while reindexing pg_index itself, and this
3017 * optimization nicely prevents that. The more complex rules needed for a
3018 * reindex are handled separately after this function returns.
3019 *
3020 * We also need not set indcheckxmin during a concurrent index build,
3021 * because we won't set indisvalid true until all transactions that care
3022 * about the broken HOT chains or early pruning/vacuuming are gone.
3023 *
3024 * Therefore, this code path can only be taken during non-concurrent
3025 * CREATE INDEX. Thus the fact that heap_update will set the pg_index
3026 * tuple's xmin doesn't matter, because that tuple was created in the
3027 * current transaction anyway. That also means we don't need to worry
3028 * about any concurrent readers of the tuple; no other transaction can see
3029 * it yet.
3030 */
3031 if ((indexInfo->ii_BrokenHotChain || EarlyPruningEnabled(heapRelation)) &&
3032 !isreindex &&
3033 !indexInfo->ii_Concurrent)
3034 {
3035 Oid indexId = RelationGetRelid(indexRelation);
3036 Relation pg_index;
3037 HeapTuple indexTuple;
3038 Form_pg_index indexForm;
3039
3040 pg_index = table_open(IndexRelationId, RowExclusiveLock);
3041
3042 indexTuple = SearchSysCacheCopy1(INDEXRELID,
3043 ObjectIdGetDatum(indexId));
3044 if (!HeapTupleIsValid(indexTuple))
3045 elog(ERROR, "cache lookup failed for index %u", indexId);
3046 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3047
3048 /* If it's a new index, indcheckxmin shouldn't be set ... */
3049 Assert(!indexForm->indcheckxmin);
3050
3051 indexForm->indcheckxmin = true;
3052 CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
3053
3054 heap_freetuple(indexTuple);
3055 table_close(pg_index, RowExclusiveLock);
3056 }
3057
3058 /*
3059 * Update heap and index pg_class rows
3060 */
3061 index_update_stats(heapRelation,
3062 true,
3063 stats->heap_tuples);
3064
3065 index_update_stats(indexRelation,
3066 false,
3067 stats->index_tuples);
3068
3069 /* Make the updated catalog row versions visible */
3070 CommandCounterIncrement();
3071
3072 /*
3073 * If it's for an exclusion constraint, make a second pass over the heap
3074 * to verify that the constraint is satisfied. We must not do this until
3075 * the index is fully valid. (Broken HOT chains shouldn't matter, though;
3076 * see comments for IndexCheckExclusion.)
3077 */
3078 if (indexInfo->ii_ExclusionOps != NULL)
3079 IndexCheckExclusion(heapRelation, indexRelation, indexInfo);
3080
3081 /* Roll back any GUC changes executed by index functions */
3082 AtEOXact_GUC(false, save_nestlevel);
3083
3084 /* Restore userid and security context */
3085 SetUserIdAndSecContext(save_userid, save_sec_context);
3086 }
3087
3088 /*
3089 * IndexCheckExclusion - verify that a new exclusion constraint is satisfied
3090 *
3091 * When creating an exclusion constraint, we first build the index normally
3092 * and then rescan the heap to check for conflicts. We assume that we only
3093 * need to validate tuples that are live according to an up-to-date snapshot,
3094 * and that these were correctly indexed even in the presence of broken HOT
3095 * chains. This should be OK since we are holding at least ShareLock on the
3096 * table, meaning there can be no uncommitted updates from other transactions.
3097 * (Note: that wouldn't necessarily work for system catalogs, since many
3098 * operations release write lock early on the system catalogs.)
3099 */
3100 static void
IndexCheckExclusion(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo)3101 IndexCheckExclusion(Relation heapRelation,
3102 Relation indexRelation,
3103 IndexInfo *indexInfo)
3104 {
3105 TableScanDesc scan;
3106 Datum values[INDEX_MAX_KEYS];
3107 bool isnull[INDEX_MAX_KEYS];
3108 ExprState *predicate;
3109 TupleTableSlot *slot;
3110 EState *estate;
3111 ExprContext *econtext;
3112 Snapshot snapshot;
3113
3114 /*
3115 * If we are reindexing the target index, mark it as no longer being
3116 * reindexed, to forestall an Assert in index_beginscan when we try to use
3117 * the index for probes. This is OK because the index is now fully valid.
3118 */
3119 if (ReindexIsCurrentlyProcessingIndex(RelationGetRelid(indexRelation)))
3120 ResetReindexProcessing();
3121
3122 /*
3123 * Need an EState for evaluation of index expressions and partial-index
3124 * predicates. Also a slot to hold the current tuple.
3125 */
3126 estate = CreateExecutorState();
3127 econtext = GetPerTupleExprContext(estate);
3128 slot = table_slot_create(heapRelation, NULL);
3129
3130 /* Arrange for econtext's scan tuple to be the tuple under test */
3131 econtext->ecxt_scantuple = slot;
3132
3133 /* Set up execution state for predicate, if any. */
3134 predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
3135
3136 /*
3137 * Scan all live tuples in the base relation.
3138 */
3139 snapshot = RegisterSnapshot(GetLatestSnapshot());
3140 scan = table_beginscan_strat(heapRelation, /* relation */
3141 snapshot, /* snapshot */
3142 0, /* number of keys */
3143 NULL, /* scan key */
3144 true, /* buffer access strategy OK */
3145 true); /* syncscan OK */
3146
3147 while (table_scan_getnextslot(scan, ForwardScanDirection, slot))
3148 {
3149 CHECK_FOR_INTERRUPTS();
3150
3151 /*
3152 * In a partial index, ignore tuples that don't satisfy the predicate.
3153 */
3154 if (predicate != NULL)
3155 {
3156 if (!ExecQual(predicate, econtext))
3157 continue;
3158 }
3159
3160 /*
3161 * Extract index column values, including computing expressions.
3162 */
3163 FormIndexDatum(indexInfo,
3164 slot,
3165 estate,
3166 values,
3167 isnull);
3168
3169 /*
3170 * Check that this tuple has no conflicts.
3171 */
3172 check_exclusion_constraint(heapRelation,
3173 indexRelation, indexInfo,
3174 &(slot->tts_tid), values, isnull,
3175 estate, true);
3176
3177 MemoryContextReset(econtext->ecxt_per_tuple_memory);
3178 }
3179
3180 table_endscan(scan);
3181 UnregisterSnapshot(snapshot);
3182
3183 ExecDropSingleTupleTableSlot(slot);
3184
3185 FreeExecutorState(estate);
3186
3187 /* These may have been pointing to the now-gone estate */
3188 indexInfo->ii_ExpressionsState = NIL;
3189 indexInfo->ii_PredicateState = NULL;
3190 }
3191
3192
3193 /*
3194 * validate_index - support code for concurrent index builds
3195 *
3196 * We do a concurrent index build by first inserting the catalog entry for the
3197 * index via index_create(), marking it not indisready and not indisvalid.
3198 * Then we commit our transaction and start a new one, then we wait for all
3199 * transactions that could have been modifying the table to terminate. Now
3200 * we know that any subsequently-started transactions will see the index and
3201 * honor its constraints on HOT updates; so while existing HOT-chains might
3202 * be broken with respect to the index, no currently live tuple will have an
3203 * incompatible HOT update done to it. We now build the index normally via
3204 * index_build(), while holding a weak lock that allows concurrent
3205 * insert/update/delete. Also, we index only tuples that are valid
3206 * as of the start of the scan (see table_index_build_scan), whereas a normal
3207 * build takes care to include recently-dead tuples. This is OK because
3208 * we won't mark the index valid until all transactions that might be able
3209 * to see those tuples are gone. The reason for doing that is to avoid
3210 * bogus unique-index failures due to concurrent UPDATEs (we might see
3211 * different versions of the same row as being valid when we pass over them,
3212 * if we used HeapTupleSatisfiesVacuum). This leaves us with an index that
3213 * does not contain any tuples added to the table while we built the index.
3214 *
3215 * Next, we mark the index "indisready" (but still not "indisvalid") and
3216 * commit the second transaction and start a third. Again we wait for all
3217 * transactions that could have been modifying the table to terminate. Now
3218 * we know that any subsequently-started transactions will see the index and
3219 * insert their new tuples into it. We then take a new reference snapshot
3220 * which is passed to validate_index(). Any tuples that are valid according
3221 * to this snap, but are not in the index, must be added to the index.
3222 * (Any tuples committed live after the snap will be inserted into the
3223 * index by their originating transaction. Any tuples committed dead before
3224 * the snap need not be indexed, because we will wait out all transactions
3225 * that might care about them before we mark the index valid.)
3226 *
3227 * validate_index() works by first gathering all the TIDs currently in the
3228 * index, using a bulkdelete callback that just stores the TIDs and doesn't
3229 * ever say "delete it". (This should be faster than a plain indexscan;
3230 * also, not all index AMs support full-index indexscan.) Then we sort the
3231 * TIDs, and finally scan the table doing a "merge join" against the TID list
3232 * to see which tuples are missing from the index. Thus we will ensure that
3233 * all tuples valid according to the reference snapshot are in the index.
3234 *
3235 * Building a unique index this way is tricky: we might try to insert a
3236 * tuple that is already dead or is in process of being deleted, and we
3237 * mustn't have a uniqueness failure against an updated version of the same
3238 * row. We could try to check the tuple to see if it's already dead and tell
3239 * index_insert() not to do the uniqueness check, but that still leaves us
3240 * with a race condition against an in-progress update. To handle that,
3241 * we expect the index AM to recheck liveness of the to-be-inserted tuple
3242 * before it declares a uniqueness error.
3243 *
3244 * After completing validate_index(), we wait until all transactions that
3245 * were alive at the time of the reference snapshot are gone; this is
3246 * necessary to be sure there are none left with a transaction snapshot
3247 * older than the reference (and hence possibly able to see tuples we did
3248 * not index). Then we mark the index "indisvalid" and commit. Subsequent
3249 * transactions will be able to use it for queries.
3250 *
3251 * Doing two full table scans is a brute-force strategy. We could try to be
3252 * cleverer, eg storing new tuples in a special area of the table (perhaps
3253 * making the table append-only by setting use_fsm). However that would
3254 * add yet more locking issues.
3255 */
3256 void
validate_index(Oid heapId,Oid indexId,Snapshot snapshot)3257 validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
3258 {
3259 Relation heapRelation,
3260 indexRelation;
3261 IndexInfo *indexInfo;
3262 IndexVacuumInfo ivinfo;
3263 ValidateIndexState state;
3264 Oid save_userid;
3265 int save_sec_context;
3266 int save_nestlevel;
3267
3268 {
3269 const int progress_index[] = {
3270 PROGRESS_CREATEIDX_PHASE,
3271 PROGRESS_CREATEIDX_TUPLES_DONE,
3272 PROGRESS_CREATEIDX_TUPLES_TOTAL,
3273 PROGRESS_SCAN_BLOCKS_DONE,
3274 PROGRESS_SCAN_BLOCKS_TOTAL
3275 };
3276 const int64 progress_vals[] = {
3277 PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN,
3278 0, 0, 0, 0
3279 };
3280
3281 pgstat_progress_update_multi_param(5, progress_index, progress_vals);
3282 }
3283
3284 /* Open and lock the parent heap relation */
3285 heapRelation = table_open(heapId, ShareUpdateExclusiveLock);
3286 /* And the target index relation */
3287 indexRelation = index_open(indexId, RowExclusiveLock);
3288
3289 /*
3290 * Fetch info needed for index_insert. (You might think this should be
3291 * passed in from DefineIndex, but its copy is long gone due to having
3292 * been built in a previous transaction.)
3293 */
3294 indexInfo = BuildIndexInfo(indexRelation);
3295
3296 /* mark build is concurrent just for consistency */
3297 indexInfo->ii_Concurrent = true;
3298
3299 /*
3300 * Switch to the table owner's userid, so that any index functions are run
3301 * as that user. Also lock down security-restricted operations and
3302 * arrange to make GUC variable changes local to this command.
3303 */
3304 GetUserIdAndSecContext(&save_userid, &save_sec_context);
3305 SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
3306 save_sec_context | SECURITY_RESTRICTED_OPERATION);
3307 save_nestlevel = NewGUCNestLevel();
3308
3309 /*
3310 * Scan the index and gather up all the TIDs into a tuplesort object.
3311 */
3312 ivinfo.index = indexRelation;
3313 ivinfo.analyze_only = false;
3314 ivinfo.report_progress = true;
3315 ivinfo.estimated_count = true;
3316 ivinfo.message_level = DEBUG2;
3317 ivinfo.num_heap_tuples = heapRelation->rd_rel->reltuples;
3318 ivinfo.strategy = NULL;
3319
3320 /*
3321 * Encode TIDs as int8 values for the sort, rather than directly sorting
3322 * item pointers. This can be significantly faster, primarily because TID
3323 * is a pass-by-reference type on all platforms, whereas int8 is
3324 * pass-by-value on most platforms.
3325 */
3326 state.tuplesort = tuplesort_begin_datum(INT8OID, Int8LessOperator,
3327 InvalidOid, false,
3328 maintenance_work_mem,
3329 NULL, false);
3330 state.htups = state.itups = state.tups_inserted = 0;
3331
3332 /* ambulkdelete updates progress metrics */
3333 (void) index_bulk_delete(&ivinfo, NULL,
3334 validate_index_callback, (void *) &state);
3335
3336 /* Execute the sort */
3337 {
3338 const int progress_index[] = {
3339 PROGRESS_CREATEIDX_PHASE,
3340 PROGRESS_SCAN_BLOCKS_DONE,
3341 PROGRESS_SCAN_BLOCKS_TOTAL
3342 };
3343 const int64 progress_vals[] = {
3344 PROGRESS_CREATEIDX_PHASE_VALIDATE_SORT,
3345 0, 0
3346 };
3347
3348 pgstat_progress_update_multi_param(3, progress_index, progress_vals);
3349 }
3350 tuplesort_performsort(state.tuplesort);
3351
3352 /*
3353 * Now scan the heap and "merge" it with the index
3354 */
3355 pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
3356 PROGRESS_CREATEIDX_PHASE_VALIDATE_TABLESCAN);
3357 table_index_validate_scan(heapRelation,
3358 indexRelation,
3359 indexInfo,
3360 snapshot,
3361 &state);
3362
3363 /* Done with tuplesort object */
3364 tuplesort_end(state.tuplesort);
3365
3366 elog(DEBUG2,
3367 "validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples",
3368 state.htups, state.itups, state.tups_inserted);
3369
3370 /* Roll back any GUC changes executed by index functions */
3371 AtEOXact_GUC(false, save_nestlevel);
3372
3373 /* Restore userid and security context */
3374 SetUserIdAndSecContext(save_userid, save_sec_context);
3375
3376 /* Close rels, but keep locks */
3377 index_close(indexRelation, NoLock);
3378 table_close(heapRelation, NoLock);
3379 }
3380
3381 /*
3382 * validate_index_callback - bulkdelete callback to collect the index TIDs
3383 */
3384 static bool
validate_index_callback(ItemPointer itemptr,void * opaque)3385 validate_index_callback(ItemPointer itemptr, void *opaque)
3386 {
3387 ValidateIndexState *state = (ValidateIndexState *) opaque;
3388 int64 encoded = itemptr_encode(itemptr);
3389
3390 tuplesort_putdatum(state->tuplesort, Int64GetDatum(encoded), false);
3391 state->itups += 1;
3392 return false; /* never actually delete anything */
3393 }
3394
3395 /*
3396 * index_set_state_flags - adjust pg_index state flags
3397 *
3398 * This is used during CREATE/DROP INDEX CONCURRENTLY to adjust the pg_index
3399 * flags that denote the index's state.
3400 *
3401 * Note that CatalogTupleUpdate() sends a cache invalidation message for the
3402 * tuple, so other sessions will hear about the update as soon as we commit.
3403 */
3404 void
index_set_state_flags(Oid indexId,IndexStateFlagsAction action)3405 index_set_state_flags(Oid indexId, IndexStateFlagsAction action)
3406 {
3407 Relation pg_index;
3408 HeapTuple indexTuple;
3409 Form_pg_index indexForm;
3410
3411 /* Open pg_index and fetch a writable copy of the index's tuple */
3412 pg_index = table_open(IndexRelationId, RowExclusiveLock);
3413
3414 indexTuple = SearchSysCacheCopy1(INDEXRELID,
3415 ObjectIdGetDatum(indexId));
3416 if (!HeapTupleIsValid(indexTuple))
3417 elog(ERROR, "cache lookup failed for index %u", indexId);
3418 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3419
3420 /* Perform the requested state change on the copy */
3421 switch (action)
3422 {
3423 case INDEX_CREATE_SET_READY:
3424 /* Set indisready during a CREATE INDEX CONCURRENTLY sequence */
3425 Assert(indexForm->indislive);
3426 Assert(!indexForm->indisready);
3427 Assert(!indexForm->indisvalid);
3428 indexForm->indisready = true;
3429 break;
3430 case INDEX_CREATE_SET_VALID:
3431 /* Set indisvalid during a CREATE INDEX CONCURRENTLY sequence */
3432 Assert(indexForm->indislive);
3433 Assert(indexForm->indisready);
3434 Assert(!indexForm->indisvalid);
3435 indexForm->indisvalid = true;
3436 break;
3437 case INDEX_DROP_CLEAR_VALID:
3438
3439 /*
3440 * Clear indisvalid during a DROP INDEX CONCURRENTLY sequence
3441 *
3442 * If indisready == true we leave it set so the index still gets
3443 * maintained by active transactions. We only need to ensure that
3444 * indisvalid is false. (We don't assert that either is initially
3445 * true, though, since we want to be able to retry a DROP INDEX
3446 * CONCURRENTLY that failed partway through.)
3447 *
3448 * Note: the CLUSTER logic assumes that indisclustered cannot be
3449 * set on any invalid index, so clear that flag too. Similarly,
3450 * ALTER TABLE assumes that indisreplident cannot be set for
3451 * invalid indexes.
3452 */
3453 indexForm->indisvalid = false;
3454 indexForm->indisclustered = false;
3455 indexForm->indisreplident = false;
3456 break;
3457 case INDEX_DROP_SET_DEAD:
3458
3459 /*
3460 * Clear indisready/indislive during DROP INDEX CONCURRENTLY
3461 *
3462 * We clear both indisready and indislive, because we not only
3463 * want to stop updates, we want to prevent sessions from touching
3464 * the index at all.
3465 */
3466 Assert(!indexForm->indisvalid);
3467 Assert(!indexForm->indisclustered);
3468 Assert(!indexForm->indisreplident);
3469 indexForm->indisready = false;
3470 indexForm->indislive = false;
3471 break;
3472 }
3473
3474 /* ... and update it */
3475 CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
3476
3477 table_close(pg_index, RowExclusiveLock);
3478 }
3479
3480
3481 /*
3482 * IndexGetRelation: given an index's relation OID, get the OID of the
3483 * relation it is an index on. Uses the system cache.
3484 */
3485 Oid
IndexGetRelation(Oid indexId,bool missing_ok)3486 IndexGetRelation(Oid indexId, bool missing_ok)
3487 {
3488 HeapTuple tuple;
3489 Form_pg_index index;
3490 Oid result;
3491
3492 tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
3493 if (!HeapTupleIsValid(tuple))
3494 {
3495 if (missing_ok)
3496 return InvalidOid;
3497 elog(ERROR, "cache lookup failed for index %u", indexId);
3498 }
3499 index = (Form_pg_index) GETSTRUCT(tuple);
3500 Assert(index->indexrelid == indexId);
3501
3502 result = index->indrelid;
3503 ReleaseSysCache(tuple);
3504 return result;
3505 }
3506
3507 /*
3508 * reindex_index - This routine is used to recreate a single index
3509 */
3510 void
reindex_index(Oid indexId,bool skip_constraint_checks,char persistence,ReindexParams * params)3511 reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
3512 ReindexParams *params)
3513 {
3514 Relation iRel,
3515 heapRelation;
3516 Oid heapId;
3517 IndexInfo *indexInfo;
3518 volatile bool skipped_constraint = false;
3519 PGRUsage ru0;
3520 bool progress = ((params->options & REINDEXOPT_REPORT_PROGRESS) != 0);
3521 bool set_tablespace = false;
3522
3523 pg_rusage_init(&ru0);
3524
3525 /*
3526 * Open and lock the parent heap relation. ShareLock is sufficient since
3527 * we only need to be sure no schema or data changes are going on.
3528 */
3529 heapId = IndexGetRelation(indexId,
3530 (params->options & REINDEXOPT_MISSING_OK) != 0);
3531 /* if relation is missing, leave */
3532 if (!OidIsValid(heapId))
3533 return;
3534
3535 if ((params->options & REINDEXOPT_MISSING_OK) != 0)
3536 heapRelation = try_table_open(heapId, ShareLock);
3537 else
3538 heapRelation = table_open(heapId, ShareLock);
3539
3540 /* if relation is gone, leave */
3541 if (!heapRelation)
3542 return;
3543
3544 if (progress)
3545 {
3546 const int progress_cols[] = {
3547 PROGRESS_CREATEIDX_COMMAND,
3548 PROGRESS_CREATEIDX_INDEX_OID
3549 };
3550 const int64 progress_vals[] = {
3551 PROGRESS_CREATEIDX_COMMAND_REINDEX,
3552 indexId
3553 };
3554
3555 pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
3556 heapId);
3557 pgstat_progress_update_multi_param(2, progress_cols, progress_vals);
3558 }
3559
3560 /*
3561 * Open the target index relation and get an exclusive lock on it, to
3562 * ensure that no one else is touching this particular index.
3563 */
3564 iRel = index_open(indexId, AccessExclusiveLock);
3565
3566 if (progress)
3567 pgstat_progress_update_param(PROGRESS_CREATEIDX_ACCESS_METHOD_OID,
3568 iRel->rd_rel->relam);
3569
3570 /*
3571 * Partitioned indexes should never get processed here, as they have no
3572 * physical storage.
3573 */
3574 if (iRel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
3575 elog(ERROR, "cannot reindex partitioned index \"%s.%s\"",
3576 get_namespace_name(RelationGetNamespace(iRel)),
3577 RelationGetRelationName(iRel));
3578
3579 /*
3580 * Don't allow reindex on temp tables of other backends ... their local
3581 * buffer manager is not going to cope.
3582 */
3583 if (RELATION_IS_OTHER_TEMP(iRel))
3584 ereport(ERROR,
3585 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3586 errmsg("cannot reindex temporary tables of other sessions")));
3587
3588 /*
3589 * Don't allow reindex of an invalid index on TOAST table. This is a
3590 * leftover from a failed REINDEX CONCURRENTLY, and if rebuilt it would
3591 * not be possible to drop it anymore.
3592 */
3593 if (IsToastNamespace(RelationGetNamespace(iRel)) &&
3594 !get_index_isvalid(indexId))
3595 ereport(ERROR,
3596 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3597 errmsg("cannot reindex invalid index on TOAST table")));
3598
3599 /*
3600 * System relations cannot be moved even if allow_system_table_mods is
3601 * enabled to keep things consistent with the concurrent case where all
3602 * the indexes of a relation are processed in series, including indexes of
3603 * toast relations.
3604 *
3605 * Note that this check is not part of CheckRelationTableSpaceMove() as it
3606 * gets used for ALTER TABLE SET TABLESPACE that could cascade across
3607 * toast relations.
3608 */
3609 if (OidIsValid(params->tablespaceOid) &&
3610 IsSystemRelation(iRel))
3611 ereport(ERROR,
3612 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3613 errmsg("cannot move system relation \"%s\"",
3614 RelationGetRelationName(iRel))));
3615
3616 /* Check if the tablespace of this index needs to be changed */
3617 if (OidIsValid(params->tablespaceOid) &&
3618 CheckRelationTableSpaceMove(iRel, params->tablespaceOid))
3619 set_tablespace = true;
3620
3621 /*
3622 * Also check for active uses of the index in the current transaction; we
3623 * don't want to reindex underneath an open indexscan.
3624 */
3625 CheckTableNotInUse(iRel, "REINDEX INDEX");
3626
3627 /* Set new tablespace, if requested */
3628 if (set_tablespace)
3629 {
3630 /* Update its pg_class row */
3631 SetRelationTableSpace(iRel, params->tablespaceOid, InvalidOid);
3632
3633 /*
3634 * Schedule unlinking of the old index storage at transaction commit.
3635 */
3636 RelationDropStorage(iRel);
3637 RelationAssumeNewRelfilenode(iRel);
3638
3639 /* Make sure the reltablespace change is visible */
3640 CommandCounterIncrement();
3641 }
3642
3643 /*
3644 * All predicate locks on the index are about to be made invalid. Promote
3645 * them to relation locks on the heap.
3646 */
3647 TransferPredicateLocksToHeapRelation(iRel);
3648
3649 /* Fetch info needed for index_build */
3650 indexInfo = BuildIndexInfo(iRel);
3651
3652 /* If requested, skip checking uniqueness/exclusion constraints */
3653 if (skip_constraint_checks)
3654 {
3655 if (indexInfo->ii_Unique || indexInfo->ii_ExclusionOps != NULL)
3656 skipped_constraint = true;
3657 indexInfo->ii_Unique = false;
3658 indexInfo->ii_ExclusionOps = NULL;
3659 indexInfo->ii_ExclusionProcs = NULL;
3660 indexInfo->ii_ExclusionStrats = NULL;
3661 }
3662
3663 /* Suppress use of the target index while rebuilding it */
3664 SetReindexProcessing(heapId, indexId);
3665
3666 /* Create a new physical relation for the index */
3667 RelationSetNewRelfilenode(iRel, persistence);
3668
3669 /* Initialize the index and rebuild */
3670 /* Note: we do not need to re-establish pkey setting */
3671 index_build(heapRelation, iRel, indexInfo, true, true);
3672
3673 /* Re-allow use of target index */
3674 ResetReindexProcessing();
3675
3676 /*
3677 * If the index is marked invalid/not-ready/dead (ie, it's from a failed
3678 * CREATE INDEX CONCURRENTLY, or a DROP INDEX CONCURRENTLY failed midway),
3679 * and we didn't skip a uniqueness check, we can now mark it valid. This
3680 * allows REINDEX to be used to clean up in such cases.
3681 *
3682 * We can also reset indcheckxmin, because we have now done a
3683 * non-concurrent index build, *except* in the case where index_build
3684 * found some still-broken HOT chains. If it did, and we don't have to
3685 * change any of the other flags, we just leave indcheckxmin alone (note
3686 * that index_build won't have changed it, because this is a reindex).
3687 * This is okay and desirable because not updating the tuple leaves the
3688 * index's usability horizon (recorded as the tuple's xmin value) the same
3689 * as it was.
3690 *
3691 * But, if the index was invalid/not-ready/dead and there were broken HOT
3692 * chains, we had better force indcheckxmin true, because the normal
3693 * argument that the HOT chains couldn't conflict with the index is
3694 * suspect for an invalid index. (A conflict is definitely possible if
3695 * the index was dead. It probably shouldn't happen otherwise, but let's
3696 * be conservative.) In this case advancing the usability horizon is
3697 * appropriate.
3698 *
3699 * Another reason for avoiding unnecessary updates here is that while
3700 * reindexing pg_index itself, we must not try to update tuples in it.
3701 * pg_index's indexes should always have these flags in their clean state,
3702 * so that won't happen.
3703 *
3704 * If early pruning/vacuuming is enabled for the heap relation, the
3705 * usability horizon must be advanced to the current transaction on every
3706 * build or rebuild. pg_index is OK in this regard because catalog tables
3707 * are not subject to early cleanup.
3708 */
3709 if (!skipped_constraint)
3710 {
3711 Relation pg_index;
3712 HeapTuple indexTuple;
3713 Form_pg_index indexForm;
3714 bool index_bad;
3715 bool early_pruning_enabled = EarlyPruningEnabled(heapRelation);
3716
3717 pg_index = table_open(IndexRelationId, RowExclusiveLock);
3718
3719 indexTuple = SearchSysCacheCopy1(INDEXRELID,
3720 ObjectIdGetDatum(indexId));
3721 if (!HeapTupleIsValid(indexTuple))
3722 elog(ERROR, "cache lookup failed for index %u", indexId);
3723 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3724
3725 index_bad = (!indexForm->indisvalid ||
3726 !indexForm->indisready ||
3727 !indexForm->indislive);
3728 if (index_bad ||
3729 (indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain) ||
3730 early_pruning_enabled)
3731 {
3732 if (!indexInfo->ii_BrokenHotChain && !early_pruning_enabled)
3733 indexForm->indcheckxmin = false;
3734 else if (index_bad || early_pruning_enabled)
3735 indexForm->indcheckxmin = true;
3736 indexForm->indisvalid = true;
3737 indexForm->indisready = true;
3738 indexForm->indislive = true;
3739 CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
3740
3741 /*
3742 * Invalidate the relcache for the table, so that after we commit
3743 * all sessions will refresh the table's index list. This ensures
3744 * that if anyone misses seeing the pg_index row during this
3745 * update, they'll refresh their list before attempting any update
3746 * on the table.
3747 */
3748 CacheInvalidateRelcache(heapRelation);
3749 }
3750
3751 table_close(pg_index, RowExclusiveLock);
3752 }
3753
3754 /* Log what we did */
3755 if ((params->options & REINDEXOPT_VERBOSE) != 0)
3756 ereport(INFO,
3757 (errmsg("index \"%s\" was reindexed",
3758 get_rel_name(indexId)),
3759 errdetail_internal("%s",
3760 pg_rusage_show(&ru0))));
3761
3762 if (progress)
3763 pgstat_progress_end_command();
3764
3765 /* Close rels, but keep locks */
3766 index_close(iRel, NoLock);
3767 table_close(heapRelation, NoLock);
3768 }
3769
3770 /*
3771 * reindex_relation - This routine is used to recreate all indexes
3772 * of a relation (and optionally its toast relation too, if any).
3773 *
3774 * "flags" is a bitmask that can include any combination of these bits:
3775 *
3776 * REINDEX_REL_PROCESS_TOAST: if true, process the toast table too (if any).
3777 *
3778 * REINDEX_REL_SUPPRESS_INDEX_USE: if true, the relation was just completely
3779 * rebuilt by an operation such as VACUUM FULL or CLUSTER, and therefore its
3780 * indexes are inconsistent with it. This makes things tricky if the relation
3781 * is a system catalog that we might consult during the reindexing. To deal
3782 * with that case, we mark all of the indexes as pending rebuild so that they
3783 * won't be trusted until rebuilt. The caller is required to call us *without*
3784 * having made the rebuilt table visible by doing CommandCounterIncrement;
3785 * we'll do CCI after having collected the index list. (This way we can still
3786 * use catalog indexes while collecting the list.)
3787 *
3788 * REINDEX_REL_CHECK_CONSTRAINTS: if true, recheck unique and exclusion
3789 * constraint conditions, else don't. To avoid deadlocks, VACUUM FULL or
3790 * CLUSTER on a system catalog must omit this flag. REINDEX should be used to
3791 * rebuild an index if constraint inconsistency is suspected. For optimal
3792 * performance, other callers should include the flag only after transforming
3793 * the data in a manner that risks a change in constraint validity.
3794 *
3795 * REINDEX_REL_FORCE_INDEXES_UNLOGGED: if true, set the persistence of the
3796 * rebuilt indexes to unlogged.
3797 *
3798 * REINDEX_REL_FORCE_INDEXES_PERMANENT: if true, set the persistence of the
3799 * rebuilt indexes to permanent.
3800 *
3801 * Returns true if any indexes were rebuilt (including toast table's index
3802 * when relevant). Note that a CommandCounterIncrement will occur after each
3803 * index rebuild.
3804 */
3805 bool
reindex_relation(Oid relid,int flags,ReindexParams * params)3806 reindex_relation(Oid relid, int flags, ReindexParams *params)
3807 {
3808 Relation rel;
3809 Oid toast_relid;
3810 List *indexIds;
3811 char persistence;
3812 bool result;
3813 ListCell *indexId;
3814 int i;
3815
3816 /*
3817 * Open and lock the relation. ShareLock is sufficient since we only need
3818 * to prevent schema and data changes in it. The lock level used here
3819 * should match ReindexTable().
3820 */
3821 if ((params->options & REINDEXOPT_MISSING_OK) != 0)
3822 rel = try_table_open(relid, ShareLock);
3823 else
3824 rel = table_open(relid, ShareLock);
3825
3826 /* if relation is gone, leave */
3827 if (!rel)
3828 return false;
3829
3830 /*
3831 * Partitioned tables should never get processed here, as they have no
3832 * physical storage.
3833 */
3834 if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
3835 elog(ERROR, "cannot reindex partitioned table \"%s.%s\"",
3836 get_namespace_name(RelationGetNamespace(rel)),
3837 RelationGetRelationName(rel));
3838
3839 toast_relid = rel->rd_rel->reltoastrelid;
3840
3841 /*
3842 * Get the list of index OIDs for this relation. (We trust to the
3843 * relcache to get this with a sequential scan if ignoring system
3844 * indexes.)
3845 */
3846 indexIds = RelationGetIndexList(rel);
3847
3848 if (flags & REINDEX_REL_SUPPRESS_INDEX_USE)
3849 {
3850 /* Suppress use of all the indexes until they are rebuilt */
3851 SetReindexPending(indexIds);
3852
3853 /*
3854 * Make the new heap contents visible --- now things might be
3855 * inconsistent!
3856 */
3857 CommandCounterIncrement();
3858 }
3859
3860 /*
3861 * Compute persistence of indexes: same as that of owning rel, unless
3862 * caller specified otherwise.
3863 */
3864 if (flags & REINDEX_REL_FORCE_INDEXES_UNLOGGED)
3865 persistence = RELPERSISTENCE_UNLOGGED;
3866 else if (flags & REINDEX_REL_FORCE_INDEXES_PERMANENT)
3867 persistence = RELPERSISTENCE_PERMANENT;
3868 else
3869 persistence = rel->rd_rel->relpersistence;
3870
3871 /* Reindex all the indexes. */
3872 i = 1;
3873 foreach(indexId, indexIds)
3874 {
3875 Oid indexOid = lfirst_oid(indexId);
3876 Oid indexNamespaceId = get_rel_namespace(indexOid);
3877
3878 /*
3879 * Skip any invalid indexes on a TOAST table. These can only be
3880 * duplicate leftovers from a failed REINDEX CONCURRENTLY, and if
3881 * rebuilt it would not be possible to drop them anymore.
3882 */
3883 if (IsToastNamespace(indexNamespaceId) &&
3884 !get_index_isvalid(indexOid))
3885 {
3886 ereport(WARNING,
3887 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3888 errmsg("cannot reindex invalid index \"%s.%s\" on TOAST table, skipping",
3889 get_namespace_name(indexNamespaceId),
3890 get_rel_name(indexOid))));
3891 continue;
3892 }
3893
3894 reindex_index(indexOid, !(flags & REINDEX_REL_CHECK_CONSTRAINTS),
3895 persistence, params);
3896
3897 CommandCounterIncrement();
3898
3899 /* Index should no longer be in the pending list */
3900 Assert(!ReindexIsProcessingIndex(indexOid));
3901
3902 /* Set index rebuild count */
3903 pgstat_progress_update_param(PROGRESS_CLUSTER_INDEX_REBUILD_COUNT,
3904 i);
3905 i++;
3906 }
3907
3908 /*
3909 * Close rel, but continue to hold the lock.
3910 */
3911 table_close(rel, NoLock);
3912
3913 result = (indexIds != NIL);
3914
3915 /*
3916 * If the relation has a secondary toast rel, reindex that too while we
3917 * still hold the lock on the main table.
3918 */
3919 if ((flags & REINDEX_REL_PROCESS_TOAST) && OidIsValid(toast_relid))
3920 {
3921 /*
3922 * Note that this should fail if the toast relation is missing, so
3923 * reset REINDEXOPT_MISSING_OK. Even if a new tablespace is set for
3924 * the parent relation, the indexes on its toast table are not moved.
3925 * This rule is enforced by setting tablespaceOid to InvalidOid.
3926 */
3927 ReindexParams newparams = *params;
3928
3929 newparams.options &= ~(REINDEXOPT_MISSING_OK);
3930 newparams.tablespaceOid = InvalidOid;
3931 result |= reindex_relation(toast_relid, flags, &newparams);
3932 }
3933
3934 return result;
3935 }
3936
3937
3938 /* ----------------------------------------------------------------
3939 * System index reindexing support
3940 *
3941 * When we are busy reindexing a system index, this code provides support
3942 * for preventing catalog lookups from using that index. We also make use
3943 * of this to catch attempted uses of user indexes during reindexing of
3944 * those indexes. This information is propagated to parallel workers;
3945 * attempting to change it during a parallel operation is not permitted.
3946 * ----------------------------------------------------------------
3947 */
3948
3949 static Oid currentlyReindexedHeap = InvalidOid;
3950 static Oid currentlyReindexedIndex = InvalidOid;
3951 static List *pendingReindexedIndexes = NIL;
3952 static int reindexingNestLevel = 0;
3953
3954 /*
3955 * ReindexIsProcessingHeap
3956 * True if heap specified by OID is currently being reindexed.
3957 */
3958 bool
ReindexIsProcessingHeap(Oid heapOid)3959 ReindexIsProcessingHeap(Oid heapOid)
3960 {
3961 return heapOid == currentlyReindexedHeap;
3962 }
3963
3964 /*
3965 * ReindexIsCurrentlyProcessingIndex
3966 * True if index specified by OID is currently being reindexed.
3967 */
3968 static bool
ReindexIsCurrentlyProcessingIndex(Oid indexOid)3969 ReindexIsCurrentlyProcessingIndex(Oid indexOid)
3970 {
3971 return indexOid == currentlyReindexedIndex;
3972 }
3973
3974 /*
3975 * ReindexIsProcessingIndex
3976 * True if index specified by OID is currently being reindexed,
3977 * or should be treated as invalid because it is awaiting reindex.
3978 */
3979 bool
ReindexIsProcessingIndex(Oid indexOid)3980 ReindexIsProcessingIndex(Oid indexOid)
3981 {
3982 return indexOid == currentlyReindexedIndex ||
3983 list_member_oid(pendingReindexedIndexes, indexOid);
3984 }
3985
3986 /*
3987 * SetReindexProcessing
3988 * Set flag that specified heap/index are being reindexed.
3989 */
3990 static void
SetReindexProcessing(Oid heapOid,Oid indexOid)3991 SetReindexProcessing(Oid heapOid, Oid indexOid)
3992 {
3993 Assert(OidIsValid(heapOid) && OidIsValid(indexOid));
3994 /* Reindexing is not re-entrant. */
3995 if (OidIsValid(currentlyReindexedHeap))
3996 elog(ERROR, "cannot reindex while reindexing");
3997 currentlyReindexedHeap = heapOid;
3998 currentlyReindexedIndex = indexOid;
3999 /* Index is no longer "pending" reindex. */
4000 RemoveReindexPending(indexOid);
4001 /* This may have been set already, but in case it isn't, do so now. */
4002 reindexingNestLevel = GetCurrentTransactionNestLevel();
4003 }
4004
4005 /*
4006 * ResetReindexProcessing
4007 * Unset reindexing status.
4008 */
4009 static void
ResetReindexProcessing(void)4010 ResetReindexProcessing(void)
4011 {
4012 currentlyReindexedHeap = InvalidOid;
4013 currentlyReindexedIndex = InvalidOid;
4014 /* reindexingNestLevel remains set till end of (sub)transaction */
4015 }
4016
4017 /*
4018 * SetReindexPending
4019 * Mark the given indexes as pending reindex.
4020 *
4021 * NB: we assume that the current memory context stays valid throughout.
4022 */
4023 static void
SetReindexPending(List * indexes)4024 SetReindexPending(List *indexes)
4025 {
4026 /* Reindexing is not re-entrant. */
4027 if (pendingReindexedIndexes)
4028 elog(ERROR, "cannot reindex while reindexing");
4029 if (IsInParallelMode())
4030 elog(ERROR, "cannot modify reindex state during a parallel operation");
4031 pendingReindexedIndexes = list_copy(indexes);
4032 reindexingNestLevel = GetCurrentTransactionNestLevel();
4033 }
4034
4035 /*
4036 * RemoveReindexPending
4037 * Remove the given index from the pending list.
4038 */
4039 static void
RemoveReindexPending(Oid indexOid)4040 RemoveReindexPending(Oid indexOid)
4041 {
4042 if (IsInParallelMode())
4043 elog(ERROR, "cannot modify reindex state during a parallel operation");
4044 pendingReindexedIndexes = list_delete_oid(pendingReindexedIndexes,
4045 indexOid);
4046 }
4047
4048 /*
4049 * ResetReindexState
4050 * Clear all reindexing state during (sub)transaction abort.
4051 */
4052 void
ResetReindexState(int nestLevel)4053 ResetReindexState(int nestLevel)
4054 {
4055 /*
4056 * Because reindexing is not re-entrant, we don't need to cope with nested
4057 * reindexing states. We just need to avoid messing up the outer-level
4058 * state in case a subtransaction fails within a REINDEX. So checking the
4059 * current nest level against that of the reindex operation is sufficient.
4060 */
4061 if (reindexingNestLevel >= nestLevel)
4062 {
4063 currentlyReindexedHeap = InvalidOid;
4064 currentlyReindexedIndex = InvalidOid;
4065
4066 /*
4067 * We needn't try to release the contents of pendingReindexedIndexes;
4068 * that list should be in a transaction-lifespan context, so it will
4069 * go away automatically.
4070 */
4071 pendingReindexedIndexes = NIL;
4072
4073 reindexingNestLevel = 0;
4074 }
4075 }
4076
4077 /*
4078 * EstimateReindexStateSpace
4079 * Estimate space needed to pass reindex state to parallel workers.
4080 */
4081 Size
EstimateReindexStateSpace(void)4082 EstimateReindexStateSpace(void)
4083 {
4084 return offsetof(SerializedReindexState, pendingReindexedIndexes)
4085 + mul_size(sizeof(Oid), list_length(pendingReindexedIndexes));
4086 }
4087
4088 /*
4089 * SerializeReindexState
4090 * Serialize reindex state for parallel workers.
4091 */
4092 void
SerializeReindexState(Size maxsize,char * start_address)4093 SerializeReindexState(Size maxsize, char *start_address)
4094 {
4095 SerializedReindexState *sistate = (SerializedReindexState *) start_address;
4096 int c = 0;
4097 ListCell *lc;
4098
4099 sistate->currentlyReindexedHeap = currentlyReindexedHeap;
4100 sistate->currentlyReindexedIndex = currentlyReindexedIndex;
4101 sistate->numPendingReindexedIndexes = list_length(pendingReindexedIndexes);
4102 foreach(lc, pendingReindexedIndexes)
4103 sistate->pendingReindexedIndexes[c++] = lfirst_oid(lc);
4104 }
4105
4106 /*
4107 * RestoreReindexState
4108 * Restore reindex state in a parallel worker.
4109 */
4110 void
RestoreReindexState(void * reindexstate)4111 RestoreReindexState(void *reindexstate)
4112 {
4113 SerializedReindexState *sistate = (SerializedReindexState *) reindexstate;
4114 int c = 0;
4115 MemoryContext oldcontext;
4116
4117 currentlyReindexedHeap = sistate->currentlyReindexedHeap;
4118 currentlyReindexedIndex = sistate->currentlyReindexedIndex;
4119
4120 Assert(pendingReindexedIndexes == NIL);
4121 oldcontext = MemoryContextSwitchTo(TopMemoryContext);
4122 for (c = 0; c < sistate->numPendingReindexedIndexes; ++c)
4123 pendingReindexedIndexes =
4124 lappend_oid(pendingReindexedIndexes,
4125 sistate->pendingReindexedIndexes[c]);
4126 MemoryContextSwitchTo(oldcontext);
4127
4128 /* Note the worker has its own transaction nesting level */
4129 reindexingNestLevel = GetCurrentTransactionNestLevel();
4130 }
4131