1 /*-------------------------------------------------------------------------
2 *
3 * index.c
4 * code to create and destroy POSTGRES index relations
5 *
6 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/catalog/index.c
12 *
13 *
14 * INTERFACE ROUTINES
15 * index_create() - Create a cataloged index relation
16 * index_drop() - Removes index relation from catalogs
17 * BuildIndexInfo() - Prepare to insert index tuples
18 * FormIndexDatum() - Construct datum vector for one index tuple
19 *
20 *-------------------------------------------------------------------------
21 */
22 #include "postgres.h"
23
24 #include <unistd.h>
25
26 #include "access/amapi.h"
27 #include "access/multixact.h"
28 #include "access/relscan.h"
29 #include "access/reloptions.h"
30 #include "access/sysattr.h"
31 #include "access/transam.h"
32 #include "access/visibilitymap.h"
33 #include "access/xact.h"
34 #include "bootstrap/bootstrap.h"
35 #include "catalog/binary_upgrade.h"
36 #include "catalog/catalog.h"
37 #include "catalog/dependency.h"
38 #include "catalog/heap.h"
39 #include "catalog/index.h"
40 #include "catalog/objectaccess.h"
41 #include "catalog/pg_am.h"
42 #include "catalog/pg_collation.h"
43 #include "catalog/pg_constraint.h"
44 #include "catalog/pg_depend.h"
45 #include "catalog/pg_inherits.h"
46 #include "catalog/pg_operator.h"
47 #include "catalog/pg_opclass.h"
48 #include "catalog/pg_tablespace.h"
49 #include "catalog/pg_trigger.h"
50 #include "catalog/pg_type.h"
51 #include "catalog/storage.h"
52 #include "commands/tablecmds.h"
53 #include "commands/event_trigger.h"
54 #include "commands/trigger.h"
55 #include "executor/executor.h"
56 #include "miscadmin.h"
57 #include "nodes/makefuncs.h"
58 #include "nodes/nodeFuncs.h"
59 #include "optimizer/clauses.h"
60 #include "optimizer/planner.h"
61 #include "parser/parser.h"
62 #include "rewrite/rewriteManip.h"
63 #include "storage/bufmgr.h"
64 #include "storage/lmgr.h"
65 #include "storage/predicate.h"
66 #include "storage/procarray.h"
67 #include "storage/smgr.h"
68 #include "utils/builtins.h"
69 #include "utils/fmgroids.h"
70 #include "utils/guc.h"
71 #include "utils/inval.h"
72 #include "utils/lsyscache.h"
73 #include "utils/memutils.h"
74 #include "utils/pg_rusage.h"
75 #include "utils/syscache.h"
76 #include "utils/tuplesort.h"
77 #include "utils/snapmgr.h"
78 #include "utils/tqual.h"
79
80
81 /* Potentially set by pg_upgrade_support functions */
82 Oid binary_upgrade_next_index_pg_class_oid = InvalidOid;
83
84 /* state info for validate_index bulkdelete callback */
85 typedef struct
86 {
87 Tuplesortstate *tuplesort; /* for sorting the index TIDs */
88 /* statistics (for debug purposes only): */
89 double htups,
90 itups,
91 tups_inserted;
92 } v_i_state;
93
94 /*
95 * Pointer-free representation of variables used when reindexing system
96 * catalogs; we use this to propagate those values to parallel workers.
97 */
98 typedef struct
99 {
100 Oid currentlyReindexedHeap;
101 Oid currentlyReindexedIndex;
102 int numPendingReindexedIndexes;
103 Oid pendingReindexedIndexes[FLEXIBLE_ARRAY_MEMBER];
104 } SerializedReindexState;
105
106 /* non-export function prototypes */
107 static bool relationHasPrimaryKey(Relation rel);
108 static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
109 IndexInfo *indexInfo,
110 List *indexColNames,
111 Oid accessMethodObjectId,
112 Oid *collationObjectId,
113 Oid *classObjectId);
114 static void InitializeAttributeOids(Relation indexRelation,
115 int numatts, Oid indexoid);
116 static void AppendAttributeTuples(Relation indexRelation, int numatts);
117 static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
118 Oid parentIndexId,
119 IndexInfo *indexInfo,
120 Oid *collationOids,
121 Oid *classOids,
122 int16 *coloptions,
123 bool primary,
124 bool isexclusion,
125 bool immediate,
126 bool isvalid,
127 bool isready);
128 static void index_update_stats(Relation rel,
129 bool hasindex,
130 double reltuples);
131 static void IndexCheckExclusion(Relation heapRelation,
132 Relation indexRelation,
133 IndexInfo *indexInfo);
134 static inline int64 itemptr_encode(ItemPointer itemptr);
135 static inline void itemptr_decode(ItemPointer itemptr, int64 encoded);
136 static bool validate_index_callback(ItemPointer itemptr, void *opaque);
137 static void validate_index_heapscan(Relation heapRelation,
138 Relation indexRelation,
139 IndexInfo *indexInfo,
140 Snapshot snapshot,
141 v_i_state *state);
142 static bool ReindexIsCurrentlyProcessingIndex(Oid indexOid);
143 static void SetReindexProcessing(Oid heapOid, Oid indexOid);
144 static void ResetReindexProcessing(void);
145 static void SetReindexPending(List *indexes);
146 static void RemoveReindexPending(Oid indexOid);
147
148
149 /*
150 * relationHasPrimaryKey
151 * See whether an existing relation has a primary key.
152 *
153 * Caller must have suitable lock on the relation.
154 *
155 * Note: we intentionally do not check IndexIsValid here; that's because this
156 * is used to enforce the rule that there can be only one indisprimary index,
157 * and we want that to be true even if said index is invalid.
158 */
159 static bool
relationHasPrimaryKey(Relation rel)160 relationHasPrimaryKey(Relation rel)
161 {
162 bool result = false;
163 List *indexoidlist;
164 ListCell *indexoidscan;
165
166 /*
167 * Get the list of index OIDs for the table from the relcache, and look up
168 * each one in the pg_index syscache until we find one marked primary key
169 * (hopefully there isn't more than one such).
170 */
171 indexoidlist = RelationGetIndexList(rel);
172
173 foreach(indexoidscan, indexoidlist)
174 {
175 Oid indexoid = lfirst_oid(indexoidscan);
176 HeapTuple indexTuple;
177
178 indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid));
179 if (!HeapTupleIsValid(indexTuple)) /* should not happen */
180 elog(ERROR, "cache lookup failed for index %u", indexoid);
181 result = ((Form_pg_index) GETSTRUCT(indexTuple))->indisprimary;
182 ReleaseSysCache(indexTuple);
183 if (result)
184 break;
185 }
186
187 list_free(indexoidlist);
188
189 return result;
190 }
191
192 /*
193 * index_check_primary_key
194 * Apply special checks needed before creating a PRIMARY KEY index
195 *
196 * This processing used to be in DefineIndex(), but has been split out
197 * so that it can be applied during ALTER TABLE ADD PRIMARY KEY USING INDEX.
198 *
199 * We check for a pre-existing primary key, and that all columns of the index
200 * are simple column references (not expressions), and that all those
201 * columns are marked NOT NULL. If they aren't (which can only happen during
202 * ALTER TABLE ADD CONSTRAINT, since the parser forces such columns to be
203 * created NOT NULL during CREATE TABLE), do an ALTER SET NOT NULL to mark
204 * them so --- or fail if they are not in fact nonnull.
205 *
206 * As of PG v10, the SET NOT NULL is applied to child tables as well, so
207 * that the behavior is like a manual SET NOT NULL.
208 *
209 * Caller had better have at least ShareLock on the table, else the not-null
210 * checking isn't trustworthy.
211 */
212 void
index_check_primary_key(Relation heapRel,IndexInfo * indexInfo,bool is_alter_table,IndexStmt * stmt)213 index_check_primary_key(Relation heapRel,
214 IndexInfo *indexInfo,
215 bool is_alter_table,
216 IndexStmt *stmt)
217 {
218 List *cmds;
219 int i;
220
221 /*
222 * If ALTER TABLE and CREATE TABLE .. PARTITION OF, check that there isn't
223 * already a PRIMARY KEY. In CREATE TABLE for an ordinary relations, we
224 * have faith that the parser rejected multiple pkey clauses; and CREATE
225 * INDEX doesn't have a way to say PRIMARY KEY, so it's no problem either.
226 */
227 if ((is_alter_table || heapRel->rd_rel->relispartition) &&
228 relationHasPrimaryKey(heapRel))
229 {
230 ereport(ERROR,
231 (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
232 errmsg("multiple primary keys for table \"%s\" are not allowed",
233 RelationGetRelationName(heapRel))));
234 }
235
236 /*
237 * Check that all of the attributes in a primary key are marked as not
238 * null, otherwise attempt to ALTER TABLE .. SET NOT NULL
239 */
240 cmds = NIL;
241 for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
242 {
243 AttrNumber attnum = indexInfo->ii_IndexAttrNumbers[i];
244 HeapTuple atttuple;
245 Form_pg_attribute attform;
246
247 if (attnum == 0)
248 ereport(ERROR,
249 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
250 errmsg("primary keys cannot be expressions")));
251
252 /* System attributes are never null, so no need to check */
253 if (attnum < 0)
254 continue;
255
256 atttuple = SearchSysCache2(ATTNUM,
257 ObjectIdGetDatum(RelationGetRelid(heapRel)),
258 Int16GetDatum(attnum));
259 if (!HeapTupleIsValid(atttuple))
260 elog(ERROR, "cache lookup failed for attribute %d of relation %u",
261 attnum, RelationGetRelid(heapRel));
262 attform = (Form_pg_attribute) GETSTRUCT(atttuple);
263
264 if (!attform->attnotnull)
265 {
266 /* Add a subcommand to make this one NOT NULL */
267 AlterTableCmd *cmd = makeNode(AlterTableCmd);
268
269 cmd->subtype = AT_SetNotNull;
270 cmd->name = pstrdup(NameStr(attform->attname));
271 cmds = lappend(cmds, cmd);
272 }
273
274 ReleaseSysCache(atttuple);
275 }
276
277 /*
278 * XXX: possible future improvement: when being called from ALTER TABLE,
279 * it would be more efficient to merge this with the outer ALTER TABLE, so
280 * as to avoid two scans. But that seems to complicate DefineIndex's API
281 * unduly.
282 */
283 if (cmds)
284 {
285 EventTriggerAlterTableStart((Node *) stmt);
286 AlterTableInternal(RelationGetRelid(heapRel), cmds, true);
287 EventTriggerAlterTableEnd();
288 }
289 }
290
291 /*
292 * ConstructTupleDescriptor
293 *
294 * Build an index tuple descriptor for a new index
295 */
296 static TupleDesc
ConstructTupleDescriptor(Relation heapRelation,IndexInfo * indexInfo,List * indexColNames,Oid accessMethodObjectId,Oid * collationObjectId,Oid * classObjectId)297 ConstructTupleDescriptor(Relation heapRelation,
298 IndexInfo *indexInfo,
299 List *indexColNames,
300 Oid accessMethodObjectId,
301 Oid *collationObjectId,
302 Oid *classObjectId)
303 {
304 int numatts = indexInfo->ii_NumIndexAttrs;
305 int numkeyatts = indexInfo->ii_NumIndexKeyAttrs;
306 ListCell *colnames_item = list_head(indexColNames);
307 ListCell *indexpr_item = list_head(indexInfo->ii_Expressions);
308 IndexAmRoutine *amroutine;
309 TupleDesc heapTupDesc;
310 TupleDesc indexTupDesc;
311 int natts; /* #atts in heap rel --- for error checks */
312 int i;
313
314 /* We need access to the index AM's API struct */
315 amroutine = GetIndexAmRoutineByAmId(accessMethodObjectId, false);
316
317 /* ... and to the table's tuple descriptor */
318 heapTupDesc = RelationGetDescr(heapRelation);
319 natts = RelationGetForm(heapRelation)->relnatts;
320
321 /*
322 * allocate the new tuple descriptor
323 */
324 indexTupDesc = CreateTemplateTupleDesc(numatts, false);
325
326 /*
327 * For simple index columns, we copy the pg_attribute row from the parent
328 * relation and modify it as necessary. For expressions we have to cons
329 * up a pg_attribute row the hard way.
330 */
331 for (i = 0; i < numatts; i++)
332 {
333 AttrNumber atnum = indexInfo->ii_IndexAttrNumbers[i];
334 Form_pg_attribute to = TupleDescAttr(indexTupDesc, i);
335 HeapTuple tuple;
336 Form_pg_type typeTup;
337 Form_pg_opclass opclassTup;
338 Oid keyType;
339
340 if (atnum != 0)
341 {
342 /* Simple index column */
343 Form_pg_attribute from;
344
345 if (atnum < 0)
346 {
347 /*
348 * here we are indexing on a system attribute (-1...-n)
349 */
350 from = SystemAttributeDefinition(atnum,
351 heapRelation->rd_rel->relhasoids);
352 }
353 else
354 {
355 /*
356 * here we are indexing on a normal attribute (1...n)
357 */
358 if (atnum > natts) /* safety check */
359 elog(ERROR, "invalid column number %d", atnum);
360 from = TupleDescAttr(heapTupDesc,
361 AttrNumberGetAttrOffset(atnum));
362 }
363
364 /*
365 * now that we've determined the "from", let's copy the tuple desc
366 * data...
367 */
368 memcpy(to, from, ATTRIBUTE_FIXED_PART_SIZE);
369
370 /*
371 * Set the attribute name as specified by caller.
372 */
373 if (colnames_item == NULL) /* shouldn't happen */
374 elog(ERROR, "too few entries in colnames list");
375 namestrcpy(&to->attname, (const char *) lfirst(colnames_item));
376 colnames_item = lnext(colnames_item);
377
378 /*
379 * Fix the stuff that should not be the same as the underlying
380 * attr
381 */
382 to->attnum = i + 1;
383
384 to->attstattarget = -1;
385 to->attcacheoff = -1;
386 to->attnotnull = false;
387 to->atthasdef = false;
388 to->atthasmissing = false;
389 to->attidentity = '\0';
390 to->attislocal = true;
391 to->attinhcount = 0;
392 to->attcollation = (i < numkeyatts) ?
393 collationObjectId[i] : InvalidOid;
394 }
395 else
396 {
397 /* Expressional index */
398 Node *indexkey;
399
400 MemSet(to, 0, ATTRIBUTE_FIXED_PART_SIZE);
401
402 /*
403 * Set the attribute name as specified by caller.
404 */
405 if (colnames_item == NULL) /* shouldn't happen */
406 elog(ERROR, "too few entries in colnames list");
407 namestrcpy(&to->attname, (const char *) lfirst(colnames_item));
408 colnames_item = lnext(colnames_item);
409
410 if (indexpr_item == NULL) /* shouldn't happen */
411 elog(ERROR, "too few entries in indexprs list");
412 indexkey = (Node *) lfirst(indexpr_item);
413 indexpr_item = lnext(indexpr_item);
414
415 /*
416 * Lookup the expression type in pg_type for the type length etc.
417 */
418 keyType = exprType(indexkey);
419 tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
420 if (!HeapTupleIsValid(tuple))
421 elog(ERROR, "cache lookup failed for type %u", keyType);
422 typeTup = (Form_pg_type) GETSTRUCT(tuple);
423
424 /*
425 * Assign some of the attributes values. Leave the rest as 0.
426 */
427 to->attnum = i + 1;
428 to->atttypid = keyType;
429 to->attlen = typeTup->typlen;
430 to->attbyval = typeTup->typbyval;
431 to->attstorage = typeTup->typstorage;
432 to->attalign = typeTup->typalign;
433 to->attstattarget = -1;
434 to->attcacheoff = -1;
435 to->atttypmod = exprTypmod(indexkey);
436 to->attislocal = true;
437 to->attcollation = (i < numkeyatts) ?
438 collationObjectId[i] : InvalidOid;
439
440 ReleaseSysCache(tuple);
441
442 /*
443 * Make sure the expression yields a type that's safe to store in
444 * an index. We need this defense because we have index opclasses
445 * for pseudo-types such as "record", and the actually stored type
446 * had better be safe; eg, a named composite type is okay, an
447 * anonymous record type is not. The test is the same as for
448 * whether a table column is of a safe type (which is why we
449 * needn't check for the non-expression case).
450 */
451 CheckAttributeType(NameStr(to->attname),
452 to->atttypid, to->attcollation,
453 NIL, false);
454 }
455
456 /*
457 * We do not yet have the correct relation OID for the index, so just
458 * set it invalid for now. InitializeAttributeOids() will fix it
459 * later.
460 */
461 to->attrelid = InvalidOid;
462
463 /*
464 * Check the opclass and index AM to see if either provides a keytype
465 * (overriding the attribute type). Opclass (if exists) takes
466 * precedence.
467 */
468 keyType = amroutine->amkeytype;
469
470 /*
471 * Code below is concerned to the opclasses which are not used with
472 * the included columns.
473 */
474 if (i < indexInfo->ii_NumIndexKeyAttrs)
475 {
476 tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(classObjectId[i]));
477 if (!HeapTupleIsValid(tuple))
478 elog(ERROR, "cache lookup failed for opclass %u",
479 classObjectId[i]);
480 opclassTup = (Form_pg_opclass) GETSTRUCT(tuple);
481 if (OidIsValid(opclassTup->opckeytype))
482 keyType = opclassTup->opckeytype;
483
484 /*
485 * If keytype is specified as ANYELEMENT, and opcintype is
486 * ANYARRAY, then the attribute type must be an array (else it'd
487 * not have matched this opclass); use its element type.
488 */
489 if (keyType == ANYELEMENTOID && opclassTup->opcintype == ANYARRAYOID)
490 {
491 keyType = get_base_element_type(to->atttypid);
492 if (!OidIsValid(keyType))
493 elog(ERROR, "could not get element type of array type %u",
494 to->atttypid);
495 }
496
497 ReleaseSysCache(tuple);
498 }
499
500 /*
501 * If a key type different from the heap value is specified, update
502 * the type-related fields in the index tupdesc.
503 */
504 if (OidIsValid(keyType) && keyType != to->atttypid)
505 {
506 tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
507 if (!HeapTupleIsValid(tuple))
508 elog(ERROR, "cache lookup failed for type %u", keyType);
509 typeTup = (Form_pg_type) GETSTRUCT(tuple);
510
511 to->atttypid = keyType;
512 to->atttypmod = -1;
513 to->attlen = typeTup->typlen;
514 to->attbyval = typeTup->typbyval;
515 to->attalign = typeTup->typalign;
516 to->attstorage = typeTup->typstorage;
517
518 ReleaseSysCache(tuple);
519 }
520 }
521
522 pfree(amroutine);
523
524 return indexTupDesc;
525 }
526
527 /* ----------------------------------------------------------------
528 * InitializeAttributeOids
529 * ----------------------------------------------------------------
530 */
531 static void
InitializeAttributeOids(Relation indexRelation,int numatts,Oid indexoid)532 InitializeAttributeOids(Relation indexRelation,
533 int numatts,
534 Oid indexoid)
535 {
536 TupleDesc tupleDescriptor;
537 int i;
538
539 tupleDescriptor = RelationGetDescr(indexRelation);
540
541 for (i = 0; i < numatts; i += 1)
542 TupleDescAttr(tupleDescriptor, i)->attrelid = indexoid;
543 }
544
545 /* ----------------------------------------------------------------
546 * AppendAttributeTuples
547 * ----------------------------------------------------------------
548 */
549 static void
AppendAttributeTuples(Relation indexRelation,int numatts)550 AppendAttributeTuples(Relation indexRelation, int numatts)
551 {
552 Relation pg_attribute;
553 CatalogIndexState indstate;
554 TupleDesc indexTupDesc;
555 int i;
556
557 /*
558 * open the attribute relation and its indexes
559 */
560 pg_attribute = heap_open(AttributeRelationId, RowExclusiveLock);
561
562 indstate = CatalogOpenIndexes(pg_attribute);
563
564 /*
565 * insert data from new index's tupdesc into pg_attribute
566 */
567 indexTupDesc = RelationGetDescr(indexRelation);
568
569 for (i = 0; i < numatts; i++)
570 {
571 Form_pg_attribute attr = TupleDescAttr(indexTupDesc, i);
572
573 /*
574 * There used to be very grotty code here to set these fields, but I
575 * think it's unnecessary. They should be set already.
576 */
577 Assert(attr->attnum == i + 1);
578 Assert(attr->attcacheoff == -1);
579
580 InsertPgAttributeTuple(pg_attribute, attr, indstate);
581 }
582
583 CatalogCloseIndexes(indstate);
584
585 heap_close(pg_attribute, RowExclusiveLock);
586 }
587
588 /* ----------------------------------------------------------------
589 * UpdateIndexRelation
590 *
591 * Construct and insert a new entry in the pg_index catalog
592 * ----------------------------------------------------------------
593 */
594 static void
UpdateIndexRelation(Oid indexoid,Oid heapoid,Oid parentIndexOid,IndexInfo * indexInfo,Oid * collationOids,Oid * classOids,int16 * coloptions,bool primary,bool isexclusion,bool immediate,bool isvalid,bool isready)595 UpdateIndexRelation(Oid indexoid,
596 Oid heapoid,
597 Oid parentIndexOid,
598 IndexInfo *indexInfo,
599 Oid *collationOids,
600 Oid *classOids,
601 int16 *coloptions,
602 bool primary,
603 bool isexclusion,
604 bool immediate,
605 bool isvalid,
606 bool isready)
607 {
608 int2vector *indkey;
609 oidvector *indcollation;
610 oidvector *indclass;
611 int2vector *indoption;
612 Datum exprsDatum;
613 Datum predDatum;
614 Datum values[Natts_pg_index];
615 bool nulls[Natts_pg_index];
616 Relation pg_index;
617 HeapTuple tuple;
618 int i;
619
620 /*
621 * Copy the index key, opclass, and indoption info into arrays (should we
622 * make the caller pass them like this to start with?)
623 */
624 indkey = buildint2vector(NULL, indexInfo->ii_NumIndexAttrs);
625 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
626 indkey->values[i] = indexInfo->ii_IndexAttrNumbers[i];
627 indcollation = buildoidvector(collationOids, indexInfo->ii_NumIndexKeyAttrs);
628 indclass = buildoidvector(classOids, indexInfo->ii_NumIndexKeyAttrs);
629 indoption = buildint2vector(coloptions, indexInfo->ii_NumIndexKeyAttrs);
630
631 /*
632 * Convert the index expressions (if any) to a text datum
633 */
634 if (indexInfo->ii_Expressions != NIL)
635 {
636 char *exprsString;
637
638 exprsString = nodeToString(indexInfo->ii_Expressions);
639 exprsDatum = CStringGetTextDatum(exprsString);
640 pfree(exprsString);
641 }
642 else
643 exprsDatum = (Datum) 0;
644
645 /*
646 * Convert the index predicate (if any) to a text datum. Note we convert
647 * implicit-AND format to normal explicit-AND for storage.
648 */
649 if (indexInfo->ii_Predicate != NIL)
650 {
651 char *predString;
652
653 predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate));
654 predDatum = CStringGetTextDatum(predString);
655 pfree(predString);
656 }
657 else
658 predDatum = (Datum) 0;
659
660 /*
661 * open the system catalog index relation
662 */
663 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
664
665 /*
666 * Build a pg_index tuple
667 */
668 MemSet(nulls, false, sizeof(nulls));
669
670 values[Anum_pg_index_indexrelid - 1] = ObjectIdGetDatum(indexoid);
671 values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid);
672 values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs);
673 values[Anum_pg_index_indnkeyatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexKeyAttrs);
674 values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique);
675 values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
676 values[Anum_pg_index_indisexclusion - 1] = BoolGetDatum(isexclusion);
677 values[Anum_pg_index_indimmediate - 1] = BoolGetDatum(immediate);
678 values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
679 values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
680 values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false);
681 values[Anum_pg_index_indisready - 1] = BoolGetDatum(isready);
682 values[Anum_pg_index_indislive - 1] = BoolGetDatum(true);
683 values[Anum_pg_index_indisreplident - 1] = BoolGetDatum(false);
684 values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey);
685 values[Anum_pg_index_indcollation - 1] = PointerGetDatum(indcollation);
686 values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass);
687 values[Anum_pg_index_indoption - 1] = PointerGetDatum(indoption);
688 values[Anum_pg_index_indexprs - 1] = exprsDatum;
689 if (exprsDatum == (Datum) 0)
690 nulls[Anum_pg_index_indexprs - 1] = true;
691 values[Anum_pg_index_indpred - 1] = predDatum;
692 if (predDatum == (Datum) 0)
693 nulls[Anum_pg_index_indpred - 1] = true;
694
695 tuple = heap_form_tuple(RelationGetDescr(pg_index), values, nulls);
696
697 /*
698 * insert the tuple into the pg_index catalog
699 */
700 CatalogTupleInsert(pg_index, tuple);
701
702 /*
703 * close the relation and free the tuple
704 */
705 heap_close(pg_index, RowExclusiveLock);
706 heap_freetuple(tuple);
707 }
708
709
710 /*
711 * index_create
712 *
713 * heapRelation: table to build index on (suitably locked by caller)
714 * indexRelationName: what it say
715 * indexRelationId: normally, pass InvalidOid to let this routine
716 * generate an OID for the index. During bootstrap this may be
717 * nonzero to specify a preselected OID.
718 * parentIndexRelid: if creating an index partition, the OID of the
719 * parent index; otherwise InvalidOid.
720 * parentConstraintId: if creating a constraint on a partition, the OID
721 * of the constraint in the parent; otherwise InvalidOid.
722 * relFileNode: normally, pass InvalidOid to get new storage. May be
723 * nonzero to attach an existing valid build.
724 * indexInfo: same info executor uses to insert into the index
725 * indexColNames: column names to use for index (List of char *)
726 * accessMethodObjectId: OID of index AM to use
727 * tableSpaceId: OID of tablespace to use
728 * collationObjectId: array of collation OIDs, one per index column
729 * classObjectId: array of index opclass OIDs, one per index column
730 * coloptions: array of per-index-column indoption settings
731 * reloptions: AM-specific options
732 * flags: bitmask that can include any combination of these bits:
733 * INDEX_CREATE_IS_PRIMARY
734 * the index is a primary key
735 * INDEX_CREATE_ADD_CONSTRAINT:
736 * invoke index_constraint_create also
737 * INDEX_CREATE_SKIP_BUILD:
738 * skip the index_build() step for the moment; caller must do it
739 * later (typically via reindex_index())
740 * INDEX_CREATE_CONCURRENT:
741 * do not lock the table against writers. The index will be
742 * marked "invalid" and the caller must take additional steps
743 * to fix it up.
744 * INDEX_CREATE_IF_NOT_EXISTS:
745 * do not throw an error if a relation with the same name
746 * already exists.
747 * INDEX_CREATE_PARTITIONED:
748 * create a partitioned index (table must be partitioned)
749 * constr_flags: flags passed to index_constraint_create
750 * (only if INDEX_CREATE_ADD_CONSTRAINT is set)
751 * allow_system_table_mods: allow table to be a system catalog
752 * is_internal: if true, post creation hook for new index
753 * constraintId: if not NULL, receives OID of created constraint
754 *
755 * Returns the OID of the created index.
756 */
757 Oid
index_create(Relation heapRelation,const char * indexRelationName,Oid indexRelationId,Oid parentIndexRelid,Oid parentConstraintId,Oid relFileNode,IndexInfo * indexInfo,List * indexColNames,Oid accessMethodObjectId,Oid tableSpaceId,Oid * collationObjectId,Oid * classObjectId,int16 * coloptions,Datum reloptions,bits16 flags,bits16 constr_flags,bool allow_system_table_mods,bool is_internal,Oid * constraintId)758 index_create(Relation heapRelation,
759 const char *indexRelationName,
760 Oid indexRelationId,
761 Oid parentIndexRelid,
762 Oid parentConstraintId,
763 Oid relFileNode,
764 IndexInfo *indexInfo,
765 List *indexColNames,
766 Oid accessMethodObjectId,
767 Oid tableSpaceId,
768 Oid *collationObjectId,
769 Oid *classObjectId,
770 int16 *coloptions,
771 Datum reloptions,
772 bits16 flags,
773 bits16 constr_flags,
774 bool allow_system_table_mods,
775 bool is_internal,
776 Oid *constraintId)
777 {
778 Oid heapRelationId = RelationGetRelid(heapRelation);
779 Relation pg_class;
780 Relation indexRelation;
781 TupleDesc indexTupDesc;
782 bool shared_relation;
783 bool mapped_relation;
784 bool is_exclusion;
785 Oid namespaceId;
786 int i;
787 char relpersistence;
788 bool isprimary = (flags & INDEX_CREATE_IS_PRIMARY) != 0;
789 bool invalid = (flags & INDEX_CREATE_INVALID) != 0;
790 bool concurrent = (flags & INDEX_CREATE_CONCURRENT) != 0;
791 bool partitioned = (flags & INDEX_CREATE_PARTITIONED) != 0;
792 char relkind;
793
794 /* constraint flags can only be set when a constraint is requested */
795 Assert((constr_flags == 0) ||
796 ((flags & INDEX_CREATE_ADD_CONSTRAINT) != 0));
797 /* partitioned indexes must never be "built" by themselves */
798 Assert(!partitioned || (flags & INDEX_CREATE_SKIP_BUILD));
799
800 relkind = partitioned ? RELKIND_PARTITIONED_INDEX : RELKIND_INDEX;
801 is_exclusion = (indexInfo->ii_ExclusionOps != NULL);
802
803 pg_class = heap_open(RelationRelationId, RowExclusiveLock);
804
805 /*
806 * The index will be in the same namespace as its parent table, and is
807 * shared across databases if and only if the parent is. Likewise, it
808 * will use the relfilenode map if and only if the parent does; and it
809 * inherits the parent's relpersistence.
810 */
811 namespaceId = RelationGetNamespace(heapRelation);
812 shared_relation = heapRelation->rd_rel->relisshared;
813 mapped_relation = RelationIsMapped(heapRelation);
814 relpersistence = heapRelation->rd_rel->relpersistence;
815
816 /*
817 * check parameters
818 */
819 if (indexInfo->ii_NumIndexAttrs < 1)
820 elog(ERROR, "must index at least one column");
821
822 if (!allow_system_table_mods &&
823 IsSystemRelation(heapRelation) &&
824 IsNormalProcessingMode())
825 ereport(ERROR,
826 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
827 errmsg("user-defined indexes on system catalog tables are not supported")));
828
829 /*
830 * concurrent index build on a system catalog is unsafe because we tend to
831 * release locks before committing in catalogs
832 */
833 if (concurrent &&
834 IsSystemRelation(heapRelation))
835 ereport(ERROR,
836 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
837 errmsg("concurrent index creation on system catalog tables is not supported")));
838
839 /*
840 * This case is currently not supported, but there's no way to ask for it
841 * in the grammar anyway, so it can't happen.
842 */
843 if (concurrent && is_exclusion)
844 ereport(ERROR,
845 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
846 errmsg_internal("concurrent index creation for exclusion constraints is not supported")));
847
848 /*
849 * We cannot allow indexing a shared relation after initdb (because
850 * there's no way to make the entry in other databases' pg_class).
851 */
852 if (shared_relation && !IsBootstrapProcessingMode())
853 ereport(ERROR,
854 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
855 errmsg("shared indexes cannot be created after initdb")));
856
857 /*
858 * Shared relations must be in pg_global, too (last-ditch check)
859 */
860 if (shared_relation && tableSpaceId != GLOBALTABLESPACE_OID)
861 elog(ERROR, "shared relations must be placed in pg_global tablespace");
862
863 /*
864 * Check for duplicate name (both as to the index, and as to the
865 * associated constraint if any). Such cases would fail on the relevant
866 * catalogs' unique indexes anyway, but we prefer to give a friendlier
867 * error message.
868 */
869 if (get_relname_relid(indexRelationName, namespaceId))
870 {
871 if ((flags & INDEX_CREATE_IF_NOT_EXISTS) != 0)
872 {
873 ereport(NOTICE,
874 (errcode(ERRCODE_DUPLICATE_TABLE),
875 errmsg("relation \"%s\" already exists, skipping",
876 indexRelationName)));
877 heap_close(pg_class, RowExclusiveLock);
878 return InvalidOid;
879 }
880
881 ereport(ERROR,
882 (errcode(ERRCODE_DUPLICATE_TABLE),
883 errmsg("relation \"%s\" already exists",
884 indexRelationName)));
885 }
886
887 if ((flags & INDEX_CREATE_ADD_CONSTRAINT) != 0 &&
888 ConstraintNameIsUsed(CONSTRAINT_RELATION, heapRelationId,
889 indexRelationName))
890 {
891 /*
892 * INDEX_CREATE_IF_NOT_EXISTS does not apply here, since the
893 * conflicting constraint is not an index.
894 */
895 ereport(ERROR,
896 (errcode(ERRCODE_DUPLICATE_OBJECT),
897 errmsg("constraint \"%s\" for relation \"%s\" already exists",
898 indexRelationName, RelationGetRelationName(heapRelation))));
899 }
900
901 /*
902 * construct tuple descriptor for index tuples
903 */
904 indexTupDesc = ConstructTupleDescriptor(heapRelation,
905 indexInfo,
906 indexColNames,
907 accessMethodObjectId,
908 collationObjectId,
909 classObjectId);
910
911 /*
912 * Allocate an OID for the index, unless we were told what to use.
913 *
914 * The OID will be the relfilenode as well, so make sure it doesn't
915 * collide with either pg_class OIDs or existing physical files.
916 */
917 if (!OidIsValid(indexRelationId))
918 {
919 /* Use binary-upgrade override for pg_class.oid/relfilenode? */
920 if (IsBinaryUpgrade)
921 {
922 if (!OidIsValid(binary_upgrade_next_index_pg_class_oid))
923 ereport(ERROR,
924 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
925 errmsg("pg_class index OID value not set when in binary upgrade mode")));
926
927 indexRelationId = binary_upgrade_next_index_pg_class_oid;
928 binary_upgrade_next_index_pg_class_oid = InvalidOid;
929 }
930 else
931 {
932 indexRelationId =
933 GetNewRelFileNode(tableSpaceId, pg_class, relpersistence);
934 }
935 }
936
937 /*
938 * create the index relation's relcache entry and, if necessary, the
939 * physical disk file. (If we fail further down, it's the smgr's
940 * responsibility to remove the disk file again, if any.)
941 */
942 indexRelation = heap_create(indexRelationName,
943 namespaceId,
944 tableSpaceId,
945 indexRelationId,
946 relFileNode,
947 indexTupDesc,
948 relkind,
949 relpersistence,
950 shared_relation,
951 mapped_relation,
952 allow_system_table_mods);
953
954 Assert(indexRelationId == RelationGetRelid(indexRelation));
955
956 /*
957 * Obtain exclusive lock on it. Although no other transactions can see it
958 * until we commit, this prevents deadlock-risk complaints from lock
959 * manager in cases such as CLUSTER.
960 */
961 LockRelation(indexRelation, AccessExclusiveLock);
962
963 /*
964 * Fill in fields of the index's pg_class entry that are not set correctly
965 * by heap_create.
966 *
967 * XXX should have a cleaner way to create cataloged indexes
968 */
969 indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner;
970 indexRelation->rd_rel->relam = accessMethodObjectId;
971 indexRelation->rd_rel->relhasoids = false;
972 indexRelation->rd_rel->relispartition = OidIsValid(parentIndexRelid);
973
974 /*
975 * store index's pg_class entry
976 */
977 InsertPgClassTuple(pg_class, indexRelation,
978 RelationGetRelid(indexRelation),
979 (Datum) 0,
980 reloptions);
981
982 /* done with pg_class */
983 heap_close(pg_class, RowExclusiveLock);
984
985 /*
986 * now update the object id's of all the attribute tuple forms in the
987 * index relation's tuple descriptor
988 */
989 InitializeAttributeOids(indexRelation,
990 indexInfo->ii_NumIndexAttrs,
991 indexRelationId);
992
993 /*
994 * append ATTRIBUTE tuples for the index
995 */
996 AppendAttributeTuples(indexRelation, indexInfo->ii_NumIndexAttrs);
997
998 /* ----------------
999 * update pg_index
1000 * (append INDEX tuple)
1001 *
1002 * Note that this stows away a representation of "predicate".
1003 * (Or, could define a rule to maintain the predicate) --Nels, Feb '92
1004 * ----------------
1005 */
1006 UpdateIndexRelation(indexRelationId, heapRelationId, parentIndexRelid,
1007 indexInfo,
1008 collationObjectId, classObjectId, coloptions,
1009 isprimary, is_exclusion,
1010 (constr_flags & INDEX_CONSTR_CREATE_DEFERRABLE) == 0,
1011 !concurrent && !invalid,
1012 !concurrent);
1013
1014 /*
1015 * Register relcache invalidation on the indexes' heap relation, to
1016 * maintain consistency of its index list
1017 */
1018 CacheInvalidateRelcache(heapRelation);
1019
1020 /* update pg_inherits, if needed */
1021 if (OidIsValid(parentIndexRelid))
1022 StoreSingleInheritance(indexRelationId, parentIndexRelid, 1);
1023
1024 /*
1025 * Register constraint and dependencies for the index.
1026 *
1027 * If the index is from a CONSTRAINT clause, construct a pg_constraint
1028 * entry. The index will be linked to the constraint, which in turn is
1029 * linked to the table. If it's not a CONSTRAINT, we need to make a
1030 * dependency directly on the table.
1031 *
1032 * We don't need a dependency on the namespace, because there'll be an
1033 * indirect dependency via our parent table.
1034 *
1035 * During bootstrap we can't register any dependencies, and we don't try
1036 * to make a constraint either.
1037 */
1038 if (!IsBootstrapProcessingMode())
1039 {
1040 ObjectAddress myself,
1041 referenced;
1042
1043 myself.classId = RelationRelationId;
1044 myself.objectId = indexRelationId;
1045 myself.objectSubId = 0;
1046
1047 if ((flags & INDEX_CREATE_ADD_CONSTRAINT) != 0)
1048 {
1049 char constraintType;
1050 ObjectAddress localaddr;
1051
1052 if (isprimary)
1053 constraintType = CONSTRAINT_PRIMARY;
1054 else if (indexInfo->ii_Unique)
1055 constraintType = CONSTRAINT_UNIQUE;
1056 else if (is_exclusion)
1057 constraintType = CONSTRAINT_EXCLUSION;
1058 else
1059 {
1060 elog(ERROR, "constraint must be PRIMARY, UNIQUE or EXCLUDE");
1061 constraintType = 0; /* keep compiler quiet */
1062 }
1063
1064 localaddr = index_constraint_create(heapRelation,
1065 indexRelationId,
1066 parentConstraintId,
1067 indexInfo,
1068 indexRelationName,
1069 constraintType,
1070 constr_flags,
1071 allow_system_table_mods,
1072 is_internal);
1073 if (constraintId)
1074 *constraintId = localaddr.objectId;
1075 }
1076 else
1077 {
1078 bool have_simple_col = false;
1079 DependencyType deptype;
1080
1081 deptype = OidIsValid(parentIndexRelid) ? DEPENDENCY_INTERNAL_AUTO : DEPENDENCY_AUTO;
1082
1083 /* Create auto dependencies on simply-referenced columns */
1084 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1085 {
1086 if (indexInfo->ii_IndexAttrNumbers[i] != 0)
1087 {
1088 referenced.classId = RelationRelationId;
1089 referenced.objectId = heapRelationId;
1090 referenced.objectSubId = indexInfo->ii_IndexAttrNumbers[i];
1091
1092 recordDependencyOn(&myself, &referenced, deptype);
1093
1094 have_simple_col = true;
1095 }
1096 }
1097
1098 /*
1099 * If there are no simply-referenced columns, give the index an
1100 * auto dependency on the whole table. In most cases, this will
1101 * be redundant, but it might not be if the index expressions and
1102 * predicate contain no Vars or only whole-row Vars.
1103 */
1104 if (!have_simple_col)
1105 {
1106 referenced.classId = RelationRelationId;
1107 referenced.objectId = heapRelationId;
1108 referenced.objectSubId = 0;
1109
1110 recordDependencyOn(&myself, &referenced, deptype);
1111 }
1112 }
1113
1114 /* Store dependency on parent index, if any */
1115 if (OidIsValid(parentIndexRelid))
1116 {
1117 referenced.classId = RelationRelationId;
1118 referenced.objectId = parentIndexRelid;
1119 referenced.objectSubId = 0;
1120
1121 recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL_AUTO);
1122 }
1123
1124 /* Store dependency on collations */
1125 /* The default collation is pinned, so don't bother recording it */
1126 for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
1127 {
1128 if (OidIsValid(collationObjectId[i]) &&
1129 collationObjectId[i] != DEFAULT_COLLATION_OID)
1130 {
1131 referenced.classId = CollationRelationId;
1132 referenced.objectId = collationObjectId[i];
1133 referenced.objectSubId = 0;
1134
1135 recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
1136 }
1137 }
1138
1139 /* Store dependency on operator classes */
1140 for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
1141 {
1142 referenced.classId = OperatorClassRelationId;
1143 referenced.objectId = classObjectId[i];
1144 referenced.objectSubId = 0;
1145
1146 recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
1147 }
1148
1149 /* Store dependencies on anything mentioned in index expressions */
1150 if (indexInfo->ii_Expressions)
1151 {
1152 recordDependencyOnSingleRelExpr(&myself,
1153 (Node *) indexInfo->ii_Expressions,
1154 heapRelationId,
1155 DEPENDENCY_NORMAL,
1156 DEPENDENCY_AUTO, false);
1157 }
1158
1159 /* Store dependencies on anything mentioned in predicate */
1160 if (indexInfo->ii_Predicate)
1161 {
1162 recordDependencyOnSingleRelExpr(&myself,
1163 (Node *) indexInfo->ii_Predicate,
1164 heapRelationId,
1165 DEPENDENCY_NORMAL,
1166 DEPENDENCY_AUTO, false);
1167 }
1168 }
1169 else
1170 {
1171 /* Bootstrap mode - assert we weren't asked for constraint support */
1172 Assert((flags & INDEX_CREATE_ADD_CONSTRAINT) == 0);
1173 }
1174
1175 /* Post creation hook for new index */
1176 InvokeObjectPostCreateHookArg(RelationRelationId,
1177 indexRelationId, 0, is_internal);
1178
1179 /*
1180 * Advance the command counter so that we can see the newly-entered
1181 * catalog tuples for the index.
1182 */
1183 CommandCounterIncrement();
1184
1185 /*
1186 * In bootstrap mode, we have to fill in the index strategy structure with
1187 * information from the catalogs. If we aren't bootstrapping, then the
1188 * relcache entry has already been rebuilt thanks to sinval update during
1189 * CommandCounterIncrement.
1190 */
1191 if (IsBootstrapProcessingMode())
1192 RelationInitIndexAccessInfo(indexRelation);
1193 else
1194 Assert(indexRelation->rd_indexcxt != NULL);
1195
1196 indexRelation->rd_index->indnkeyatts = indexInfo->ii_NumIndexKeyAttrs;
1197
1198 /*
1199 * If this is bootstrap (initdb) time, then we don't actually fill in the
1200 * index yet. We'll be creating more indexes and classes later, so we
1201 * delay filling them in until just before we're done with bootstrapping.
1202 * Similarly, if the caller specified to skip the build then filling the
1203 * index is delayed till later (ALTER TABLE can save work in some cases
1204 * with this). Otherwise, we call the AM routine that constructs the
1205 * index.
1206 */
1207 if (IsBootstrapProcessingMode())
1208 {
1209 index_register(heapRelationId, indexRelationId, indexInfo);
1210 }
1211 else if ((flags & INDEX_CREATE_SKIP_BUILD) != 0)
1212 {
1213 /*
1214 * Caller is responsible for filling the index later on. However,
1215 * we'd better make sure that the heap relation is correctly marked as
1216 * having an index.
1217 */
1218 index_update_stats(heapRelation,
1219 true,
1220 -1.0);
1221 /* Make the above update visible */
1222 CommandCounterIncrement();
1223 }
1224 else
1225 {
1226 index_build(heapRelation, indexRelation, indexInfo, isprimary, false,
1227 true);
1228 }
1229
1230 /*
1231 * Close the index; but we keep the lock that we acquired above until end
1232 * of transaction. Closing the heap is caller's responsibility.
1233 */
1234 index_close(indexRelation, NoLock);
1235
1236 return indexRelationId;
1237 }
1238
1239 /*
1240 * index_constraint_create
1241 *
1242 * Set up a constraint associated with an index. Return the new constraint's
1243 * address.
1244 *
1245 * heapRelation: table owning the index (must be suitably locked by caller)
1246 * indexRelationId: OID of the index
1247 * parentConstraintId: if constraint is on a partition, the OID of the
1248 * constraint in the parent.
1249 * indexInfo: same info executor uses to insert into the index
1250 * constraintName: what it say (generally, should match name of index)
1251 * constraintType: one of CONSTRAINT_PRIMARY, CONSTRAINT_UNIQUE, or
1252 * CONSTRAINT_EXCLUSION
1253 * flags: bitmask that can include any combination of these bits:
1254 * INDEX_CONSTR_CREATE_MARK_AS_PRIMARY: index is a PRIMARY KEY
1255 * INDEX_CONSTR_CREATE_DEFERRABLE: constraint is DEFERRABLE
1256 * INDEX_CONSTR_CREATE_INIT_DEFERRED: constraint is INITIALLY DEFERRED
1257 * INDEX_CONSTR_CREATE_UPDATE_INDEX: update the pg_index row
1258 * INDEX_CONSTR_CREATE_REMOVE_OLD_DEPS: remove existing dependencies
1259 * of index on table's columns
1260 * allow_system_table_mods: allow table to be a system catalog
1261 * is_internal: index is constructed due to internal process
1262 */
1263 ObjectAddress
index_constraint_create(Relation heapRelation,Oid indexRelationId,Oid parentConstraintId,IndexInfo * indexInfo,const char * constraintName,char constraintType,bits16 constr_flags,bool allow_system_table_mods,bool is_internal)1264 index_constraint_create(Relation heapRelation,
1265 Oid indexRelationId,
1266 Oid parentConstraintId,
1267 IndexInfo *indexInfo,
1268 const char *constraintName,
1269 char constraintType,
1270 bits16 constr_flags,
1271 bool allow_system_table_mods,
1272 bool is_internal)
1273 {
1274 Oid namespaceId = RelationGetNamespace(heapRelation);
1275 ObjectAddress myself,
1276 referenced;
1277 Oid conOid;
1278 bool deferrable;
1279 bool initdeferred;
1280 bool mark_as_primary;
1281 bool islocal;
1282 bool noinherit;
1283 int inhcount;
1284
1285 deferrable = (constr_flags & INDEX_CONSTR_CREATE_DEFERRABLE) != 0;
1286 initdeferred = (constr_flags & INDEX_CONSTR_CREATE_INIT_DEFERRED) != 0;
1287 mark_as_primary = (constr_flags & INDEX_CONSTR_CREATE_MARK_AS_PRIMARY) != 0;
1288
1289 /* constraint creation support doesn't work while bootstrapping */
1290 Assert(!IsBootstrapProcessingMode());
1291
1292 /* enforce system-table restriction */
1293 if (!allow_system_table_mods &&
1294 IsSystemRelation(heapRelation) &&
1295 IsNormalProcessingMode())
1296 ereport(ERROR,
1297 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1298 errmsg("user-defined indexes on system catalog tables are not supported")));
1299
1300 /* primary/unique constraints shouldn't have any expressions */
1301 if (indexInfo->ii_Expressions &&
1302 constraintType != CONSTRAINT_EXCLUSION)
1303 elog(ERROR, "constraints cannot have index expressions");
1304
1305 /*
1306 * If we're manufacturing a constraint for a pre-existing index, we need
1307 * to get rid of the existing auto dependencies for the index (the ones
1308 * that index_create() would have made instead of calling this function).
1309 *
1310 * Note: this code would not necessarily do the right thing if the index
1311 * has any expressions or predicate, but we'd never be turning such an
1312 * index into a UNIQUE or PRIMARY KEY constraint.
1313 */
1314 if (constr_flags & INDEX_CONSTR_CREATE_REMOVE_OLD_DEPS)
1315 deleteDependencyRecordsForClass(RelationRelationId, indexRelationId,
1316 RelationRelationId, DEPENDENCY_AUTO);
1317
1318 if (OidIsValid(parentConstraintId))
1319 {
1320 islocal = false;
1321 inhcount = 1;
1322 noinherit = false;
1323 }
1324 else
1325 {
1326 islocal = true;
1327 inhcount = 0;
1328 noinherit = true;
1329 }
1330
1331 /*
1332 * Construct a pg_constraint entry.
1333 */
1334 conOid = CreateConstraintEntry(constraintName,
1335 namespaceId,
1336 constraintType,
1337 deferrable,
1338 initdeferred,
1339 true,
1340 parentConstraintId,
1341 RelationGetRelid(heapRelation),
1342 indexInfo->ii_IndexAttrNumbers,
1343 indexInfo->ii_NumIndexKeyAttrs,
1344 indexInfo->ii_NumIndexAttrs,
1345 InvalidOid, /* no domain */
1346 indexRelationId, /* index OID */
1347 InvalidOid, /* no foreign key */
1348 NULL,
1349 NULL,
1350 NULL,
1351 NULL,
1352 0,
1353 ' ',
1354 ' ',
1355 ' ',
1356 indexInfo->ii_ExclusionOps,
1357 NULL, /* no check constraint */
1358 NULL,
1359 NULL,
1360 islocal,
1361 inhcount,
1362 noinherit,
1363 is_internal);
1364
1365 /*
1366 * Register the index as internally dependent on the constraint.
1367 *
1368 * Note that the constraint has a dependency on the table, so we don't
1369 * need (or want) any direct dependency from the index to the table.
1370 */
1371 myself.classId = RelationRelationId;
1372 myself.objectId = indexRelationId;
1373 myself.objectSubId = 0;
1374
1375 referenced.classId = ConstraintRelationId;
1376 referenced.objectId = conOid;
1377 referenced.objectSubId = 0;
1378
1379 recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
1380
1381 /*
1382 * Also, if this is a constraint on a partition, mark it as depending on
1383 * the constraint in the parent.
1384 */
1385 if (OidIsValid(parentConstraintId))
1386 {
1387 ObjectAddress parentConstr;
1388
1389 ObjectAddressSet(parentConstr, ConstraintRelationId, parentConstraintId);
1390 recordDependencyOn(&referenced, &parentConstr, DEPENDENCY_INTERNAL_AUTO);
1391 }
1392
1393 /*
1394 * If the constraint is deferrable, create the deferred uniqueness
1395 * checking trigger. (The trigger will be given an internal dependency on
1396 * the constraint by CreateTrigger.)
1397 */
1398 if (deferrable)
1399 {
1400 CreateTrigStmt *trigger;
1401
1402 trigger = makeNode(CreateTrigStmt);
1403 trigger->trigname = (constraintType == CONSTRAINT_PRIMARY) ?
1404 "PK_ConstraintTrigger" :
1405 "Unique_ConstraintTrigger";
1406 trigger->relation = NULL;
1407 trigger->funcname = SystemFuncName("unique_key_recheck");
1408 trigger->args = NIL;
1409 trigger->row = true;
1410 trigger->timing = TRIGGER_TYPE_AFTER;
1411 trigger->events = TRIGGER_TYPE_INSERT | TRIGGER_TYPE_UPDATE;
1412 trigger->columns = NIL;
1413 trigger->whenClause = NULL;
1414 trigger->isconstraint = true;
1415 trigger->deferrable = true;
1416 trigger->initdeferred = initdeferred;
1417 trigger->constrrel = NULL;
1418
1419 (void) CreateTrigger(trigger, NULL, RelationGetRelid(heapRelation),
1420 InvalidOid, conOid, indexRelationId, InvalidOid,
1421 InvalidOid, NULL, true, false);
1422 }
1423
1424 /*
1425 * If needed, mark the index as primary and/or deferred in pg_index.
1426 *
1427 * Note: When making an existing index into a constraint, caller must have
1428 * a table lock that prevents concurrent table updates; otherwise, there
1429 * is a risk that concurrent readers of the table will miss seeing this
1430 * index at all.
1431 */
1432 if ((constr_flags & INDEX_CONSTR_CREATE_UPDATE_INDEX) &&
1433 (mark_as_primary || deferrable))
1434 {
1435 Relation pg_index;
1436 HeapTuple indexTuple;
1437 Form_pg_index indexForm;
1438 bool dirty = false;
1439
1440 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
1441
1442 indexTuple = SearchSysCacheCopy1(INDEXRELID,
1443 ObjectIdGetDatum(indexRelationId));
1444 if (!HeapTupleIsValid(indexTuple))
1445 elog(ERROR, "cache lookup failed for index %u", indexRelationId);
1446 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
1447
1448 if (mark_as_primary && !indexForm->indisprimary)
1449 {
1450 indexForm->indisprimary = true;
1451 dirty = true;
1452 }
1453
1454 if (deferrable && indexForm->indimmediate)
1455 {
1456 indexForm->indimmediate = false;
1457 dirty = true;
1458 }
1459
1460 if (dirty)
1461 {
1462 CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
1463
1464 InvokeObjectPostAlterHookArg(IndexRelationId, indexRelationId, 0,
1465 InvalidOid, is_internal);
1466 }
1467
1468 heap_freetuple(indexTuple);
1469 heap_close(pg_index, RowExclusiveLock);
1470 }
1471
1472 return referenced;
1473 }
1474
1475 /*
1476 * index_drop
1477 *
1478 * NOTE: this routine should now only be called through performDeletion(),
1479 * else associated dependencies won't be cleaned up.
1480 */
1481 void
index_drop(Oid indexId,bool concurrent)1482 index_drop(Oid indexId, bool concurrent)
1483 {
1484 Oid heapId;
1485 Relation userHeapRelation;
1486 Relation userIndexRelation;
1487 Relation indexRelation;
1488 HeapTuple tuple;
1489 bool hasexprs;
1490 LockRelId heaprelid,
1491 indexrelid;
1492 LOCKTAG heaplocktag;
1493 LOCKMODE lockmode;
1494
1495 /*
1496 * A temporary relation uses a non-concurrent DROP. Other backends can't
1497 * access a temporary relation, so there's no harm in grabbing a stronger
1498 * lock (see comments in RemoveRelations), and a non-concurrent DROP is
1499 * more efficient.
1500 */
1501 Assert(get_rel_persistence(indexId) != RELPERSISTENCE_TEMP ||
1502 !concurrent);
1503
1504 /*
1505 * To drop an index safely, we must grab exclusive lock on its parent
1506 * table. Exclusive lock on the index alone is insufficient because
1507 * another backend might be about to execute a query on the parent table.
1508 * If it relies on a previously cached list of index OIDs, then it could
1509 * attempt to access the just-dropped index. We must therefore take a
1510 * table lock strong enough to prevent all queries on the table from
1511 * proceeding until we commit and send out a shared-cache-inval notice
1512 * that will make them update their index lists.
1513 *
1514 * In the concurrent case we avoid this requirement by disabling index use
1515 * in multiple steps and waiting out any transactions that might be using
1516 * the index, so we don't need exclusive lock on the parent table. Instead
1517 * we take ShareUpdateExclusiveLock, to ensure that two sessions aren't
1518 * doing CREATE/DROP INDEX CONCURRENTLY on the same index. (We will get
1519 * AccessExclusiveLock on the index below, once we're sure nobody else is
1520 * using it.)
1521 */
1522 heapId = IndexGetRelation(indexId, false);
1523 lockmode = concurrent ? ShareUpdateExclusiveLock : AccessExclusiveLock;
1524 userHeapRelation = heap_open(heapId, lockmode);
1525 userIndexRelation = index_open(indexId, lockmode);
1526
1527 /*
1528 * We might still have open queries using it in our own session, which the
1529 * above locking won't prevent, so test explicitly.
1530 */
1531 CheckTableNotInUse(userIndexRelation, "DROP INDEX");
1532
1533 /*
1534 * Drop Index Concurrently is more or less the reverse process of Create
1535 * Index Concurrently.
1536 *
1537 * First we unset indisvalid so queries starting afterwards don't use the
1538 * index to answer queries anymore. We have to keep indisready = true so
1539 * transactions that are still scanning the index can continue to see
1540 * valid index contents. For instance, if they are using READ COMMITTED
1541 * mode, and another transaction makes changes and commits, they need to
1542 * see those new tuples in the index.
1543 *
1544 * After all transactions that could possibly have used the index for
1545 * queries end, we can unset indisready and indislive, then wait till
1546 * nobody could be touching it anymore. (Note: we need indislive because
1547 * this state must be distinct from the initial state during CREATE INDEX
1548 * CONCURRENTLY, which has indislive true while indisready and indisvalid
1549 * are false. That's because in that state, transactions must examine the
1550 * index for HOT-safety decisions, while in this state we don't want them
1551 * to open it at all.)
1552 *
1553 * Since all predicate locks on the index are about to be made invalid, we
1554 * must promote them to predicate locks on the heap. In the
1555 * non-concurrent case we can just do that now. In the concurrent case
1556 * it's a bit trickier. The predicate locks must be moved when there are
1557 * no index scans in progress on the index and no more can subsequently
1558 * start, so that no new predicate locks can be made on the index. Also,
1559 * they must be moved before heap inserts stop maintaining the index, else
1560 * the conflict with the predicate lock on the index gap could be missed
1561 * before the lock on the heap relation is in place to detect a conflict
1562 * based on the heap tuple insert.
1563 */
1564 if (concurrent)
1565 {
1566 /*
1567 * We must commit our transaction in order to make the first pg_index
1568 * state update visible to other sessions. If the DROP machinery has
1569 * already performed any other actions (removal of other objects,
1570 * pg_depend entries, etc), the commit would make those actions
1571 * permanent, which would leave us with inconsistent catalog state if
1572 * we fail partway through the following sequence. Since DROP INDEX
1573 * CONCURRENTLY is restricted to dropping just one index that has no
1574 * dependencies, we should get here before anything's been done ---
1575 * but let's check that to be sure. We can verify that the current
1576 * transaction has not executed any transactional updates by checking
1577 * that no XID has been assigned.
1578 */
1579 if (GetTopTransactionIdIfAny() != InvalidTransactionId)
1580 ereport(ERROR,
1581 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1582 errmsg("DROP INDEX CONCURRENTLY must be first action in transaction")));
1583
1584 /*
1585 * Mark index invalid by updating its pg_index entry
1586 */
1587 index_set_state_flags(indexId, INDEX_DROP_CLEAR_VALID);
1588
1589 /*
1590 * Invalidate the relcache for the table, so that after this commit
1591 * all sessions will refresh any cached plans that might reference the
1592 * index.
1593 */
1594 CacheInvalidateRelcache(userHeapRelation);
1595
1596 /* save lockrelid and locktag for below, then close but keep locks */
1597 heaprelid = userHeapRelation->rd_lockInfo.lockRelId;
1598 SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
1599 indexrelid = userIndexRelation->rd_lockInfo.lockRelId;
1600
1601 heap_close(userHeapRelation, NoLock);
1602 index_close(userIndexRelation, NoLock);
1603
1604 /*
1605 * We must commit our current transaction so that the indisvalid
1606 * update becomes visible to other transactions; then start another.
1607 * Note that any previously-built data structures are lost in the
1608 * commit. The only data we keep past here are the relation IDs.
1609 *
1610 * Before committing, get a session-level lock on the table, to ensure
1611 * that neither it nor the index can be dropped before we finish. This
1612 * cannot block, even if someone else is waiting for access, because
1613 * we already have the same lock within our transaction.
1614 */
1615 LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
1616 LockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
1617
1618 PopActiveSnapshot();
1619 CommitTransactionCommand();
1620 StartTransactionCommand();
1621
1622 /*
1623 * Now we must wait until no running transaction could be using the
1624 * index for a query. Use AccessExclusiveLock here to check for
1625 * running transactions that hold locks of any kind on the table. Note
1626 * we do not need to worry about xacts that open the table for reading
1627 * after this point; they will see the index as invalid when they open
1628 * the relation.
1629 *
1630 * Note: the reason we use actual lock acquisition here, rather than
1631 * just checking the ProcArray and sleeping, is that deadlock is
1632 * possible if one of the transactions in question is blocked trying
1633 * to acquire an exclusive lock on our table. The lock code will
1634 * detect deadlock and error out properly.
1635 */
1636 WaitForLockers(heaplocktag, AccessExclusiveLock);
1637
1638 /*
1639 * No more predicate locks will be acquired on this index, and we're
1640 * about to stop doing inserts into the index which could show
1641 * conflicts with existing predicate locks, so now is the time to move
1642 * them to the heap relation.
1643 */
1644 userHeapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
1645 userIndexRelation = index_open(indexId, ShareUpdateExclusiveLock);
1646 TransferPredicateLocksToHeapRelation(userIndexRelation);
1647
1648 /*
1649 * Now we are sure that nobody uses the index for queries; they just
1650 * might have it open for updating it. So now we can unset indisready
1651 * and indislive, then wait till nobody could be using it at all
1652 * anymore.
1653 */
1654 index_set_state_flags(indexId, INDEX_DROP_SET_DEAD);
1655
1656 /*
1657 * Invalidate the relcache for the table, so that after this commit
1658 * all sessions will refresh the table's index list. Forgetting just
1659 * the index's relcache entry is not enough.
1660 */
1661 CacheInvalidateRelcache(userHeapRelation);
1662
1663 /*
1664 * Close the relations again, though still holding session lock.
1665 */
1666 heap_close(userHeapRelation, NoLock);
1667 index_close(userIndexRelation, NoLock);
1668
1669 /*
1670 * Again, commit the transaction to make the pg_index update visible
1671 * to other sessions.
1672 */
1673 CommitTransactionCommand();
1674 StartTransactionCommand();
1675
1676 /*
1677 * Wait till every transaction that saw the old index state has
1678 * finished.
1679 */
1680 WaitForLockers(heaplocktag, AccessExclusiveLock);
1681
1682 /*
1683 * Re-open relations to allow us to complete our actions.
1684 *
1685 * At this point, nothing should be accessing the index, but lets
1686 * leave nothing to chance and grab AccessExclusiveLock on the index
1687 * before the physical deletion.
1688 */
1689 userHeapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
1690 userIndexRelation = index_open(indexId, AccessExclusiveLock);
1691 }
1692 else
1693 {
1694 /* Not concurrent, so just transfer predicate locks and we're good */
1695 TransferPredicateLocksToHeapRelation(userIndexRelation);
1696 }
1697
1698 /*
1699 * Schedule physical removal of the files (if any)
1700 */
1701 if (userIndexRelation->rd_rel->relkind != RELKIND_PARTITIONED_INDEX)
1702 RelationDropStorage(userIndexRelation);
1703
1704 /*
1705 * Close and flush the index's relcache entry, to ensure relcache doesn't
1706 * try to rebuild it while we're deleting catalog entries. We keep the
1707 * lock though.
1708 */
1709 index_close(userIndexRelation, NoLock);
1710
1711 RelationForgetRelation(indexId);
1712
1713 /*
1714 * fix INDEX relation, and check for expressional index
1715 */
1716 indexRelation = heap_open(IndexRelationId, RowExclusiveLock);
1717
1718 tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
1719 if (!HeapTupleIsValid(tuple))
1720 elog(ERROR, "cache lookup failed for index %u", indexId);
1721
1722 hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs,
1723 RelationGetDescr(indexRelation));
1724
1725 CatalogTupleDelete(indexRelation, &tuple->t_self);
1726
1727 ReleaseSysCache(tuple);
1728 heap_close(indexRelation, RowExclusiveLock);
1729
1730 /*
1731 * if it has any expression columns, we might have stored statistics about
1732 * them.
1733 */
1734 if (hasexprs)
1735 RemoveStatistics(indexId, 0);
1736
1737 /*
1738 * fix ATTRIBUTE relation
1739 */
1740 DeleteAttributeTuples(indexId);
1741
1742 /*
1743 * fix RELATION relation
1744 */
1745 DeleteRelationTuple(indexId);
1746
1747 /*
1748 * fix INHERITS relation
1749 */
1750 DeleteInheritsTuple(indexId, InvalidOid);
1751
1752 /*
1753 * We are presently too lazy to attempt to compute the new correct value
1754 * of relhasindex (the next VACUUM will fix it if necessary). So there is
1755 * no need to update the pg_class tuple for the owning relation. But we
1756 * must send out a shared-cache-inval notice on the owning relation to
1757 * ensure other backends update their relcache lists of indexes. (In the
1758 * concurrent case, this is redundant but harmless.)
1759 */
1760 CacheInvalidateRelcache(userHeapRelation);
1761
1762 /*
1763 * Close owning rel, but keep lock
1764 */
1765 heap_close(userHeapRelation, NoLock);
1766
1767 /*
1768 * Release the session locks before we go.
1769 */
1770 if (concurrent)
1771 {
1772 UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
1773 UnlockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
1774 }
1775 }
1776
1777 /* ----------------------------------------------------------------
1778 * index_build support
1779 * ----------------------------------------------------------------
1780 */
1781
1782 /* ----------------
1783 * BuildIndexInfo
1784 * Construct an IndexInfo record for an open index
1785 *
1786 * IndexInfo stores the information about the index that's needed by
1787 * FormIndexDatum, which is used for both index_build() and later insertion
1788 * of individual index tuples. Normally we build an IndexInfo for an index
1789 * just once per command, and then use it for (potentially) many tuples.
1790 * ----------------
1791 */
1792 IndexInfo *
BuildIndexInfo(Relation index)1793 BuildIndexInfo(Relation index)
1794 {
1795 IndexInfo *ii = makeNode(IndexInfo);
1796 Form_pg_index indexStruct = index->rd_index;
1797 int i;
1798 int numAtts;
1799
1800 /* check the number of keys, and copy attr numbers into the IndexInfo */
1801 numAtts = indexStruct->indnatts;
1802 if (numAtts < 1 || numAtts > INDEX_MAX_KEYS)
1803 elog(ERROR, "invalid indnatts %d for index %u",
1804 numAtts, RelationGetRelid(index));
1805 ii->ii_NumIndexAttrs = numAtts;
1806 ii->ii_NumIndexKeyAttrs = indexStruct->indnkeyatts;
1807 Assert(ii->ii_NumIndexKeyAttrs != 0);
1808 Assert(ii->ii_NumIndexKeyAttrs <= ii->ii_NumIndexAttrs);
1809
1810 for (i = 0; i < numAtts; i++)
1811 ii->ii_IndexAttrNumbers[i] = indexStruct->indkey.values[i];
1812
1813 /* fetch any expressions needed for expressional indexes */
1814 ii->ii_Expressions = RelationGetIndexExpressions(index);
1815 ii->ii_ExpressionsState = NIL;
1816
1817 /* fetch index predicate if any */
1818 ii->ii_Predicate = RelationGetIndexPredicate(index);
1819 ii->ii_PredicateState = NULL;
1820
1821 /* fetch exclusion constraint info if any */
1822 if (indexStruct->indisexclusion)
1823 {
1824 RelationGetExclusionInfo(index,
1825 &ii->ii_ExclusionOps,
1826 &ii->ii_ExclusionProcs,
1827 &ii->ii_ExclusionStrats);
1828 }
1829 else
1830 {
1831 ii->ii_ExclusionOps = NULL;
1832 ii->ii_ExclusionProcs = NULL;
1833 ii->ii_ExclusionStrats = NULL;
1834 }
1835
1836 /* other info */
1837 ii->ii_Unique = indexStruct->indisunique;
1838 ii->ii_ReadyForInserts = IndexIsReady(indexStruct);
1839 /* assume not doing speculative insertion for now */
1840 ii->ii_UniqueOps = NULL;
1841 ii->ii_UniqueProcs = NULL;
1842 ii->ii_UniqueStrats = NULL;
1843
1844 /* initialize index-build state to default */
1845 ii->ii_Concurrent = false;
1846 ii->ii_BrokenHotChain = false;
1847 ii->ii_ParallelWorkers = 0;
1848
1849 /* set up for possible use by index AM */
1850 ii->ii_Am = index->rd_rel->relam;
1851 ii->ii_AmCache = NULL;
1852 ii->ii_Context = CurrentMemoryContext;
1853
1854 return ii;
1855 }
1856
1857 /* ----------------
1858 * BuildDummyIndexInfo
1859 * Construct a dummy IndexInfo record for an open index
1860 *
1861 * This differs from the real BuildIndexInfo in that it will never run any
1862 * user-defined code that might exist in index expressions or predicates.
1863 * Instead of the real index expressions, we return null constants that have
1864 * the right types/typmods/collations. Predicates and exclusion clauses are
1865 * just ignored. This is sufficient for the purpose of truncating an index,
1866 * since we will not need to actually evaluate the expressions or predicates;
1867 * the only thing that's likely to be done with the data is construction of
1868 * a tupdesc describing the index's rowtype.
1869 * ----------------
1870 */
1871 IndexInfo *
BuildDummyIndexInfo(Relation index)1872 BuildDummyIndexInfo(Relation index)
1873 {
1874 IndexInfo *ii = makeNode(IndexInfo);
1875 Form_pg_index indexStruct = index->rd_index;
1876 int i;
1877 int numAtts;
1878
1879 /* check the number of keys, and copy attr numbers into the IndexInfo */
1880 numAtts = indexStruct->indnatts;
1881 if (numAtts < 1 || numAtts > INDEX_MAX_KEYS)
1882 elog(ERROR, "invalid indnatts %d for index %u",
1883 numAtts, RelationGetRelid(index));
1884 ii->ii_NumIndexAttrs = numAtts;
1885 ii->ii_NumIndexKeyAttrs = indexStruct->indnkeyatts;
1886 Assert(ii->ii_NumIndexKeyAttrs != 0);
1887 Assert(ii->ii_NumIndexKeyAttrs <= ii->ii_NumIndexAttrs);
1888
1889 for (i = 0; i < numAtts; i++)
1890 ii->ii_IndexAttrNumbers[i] = indexStruct->indkey.values[i];
1891
1892 /* fetch dummy expressions for expressional indexes */
1893 ii->ii_Expressions = RelationGetDummyIndexExpressions(index);
1894 ii->ii_ExpressionsState = NIL;
1895
1896 /* pretend there is no predicate */
1897 ii->ii_Predicate = NIL;
1898 ii->ii_PredicateState = NULL;
1899
1900 /* We ignore the exclusion constraint if any */
1901 ii->ii_ExclusionOps = NULL;
1902 ii->ii_ExclusionProcs = NULL;
1903 ii->ii_ExclusionStrats = NULL;
1904
1905 /* other info */
1906 ii->ii_Unique = indexStruct->indisunique;
1907 ii->ii_ReadyForInserts = IndexIsReady(indexStruct);
1908 /* assume not doing speculative insertion for now */
1909 ii->ii_UniqueOps = NULL;
1910 ii->ii_UniqueProcs = NULL;
1911 ii->ii_UniqueStrats = NULL;
1912
1913 /* initialize index-build state to default */
1914 ii->ii_Concurrent = false;
1915 ii->ii_BrokenHotChain = false;
1916 ii->ii_ParallelWorkers = 0;
1917
1918 /* set up for possible use by index AM */
1919 ii->ii_Am = index->rd_rel->relam;
1920 ii->ii_AmCache = NULL;
1921 ii->ii_Context = CurrentMemoryContext;
1922
1923 return ii;
1924 }
1925
1926 /*
1927 * CompareIndexInfo
1928 * Return whether the properties of two indexes (in different tables)
1929 * indicate that they have the "same" definitions.
1930 *
1931 * Note: passing collations and opfamilies separately is a kludge. Adding
1932 * them to IndexInfo may result in better coding here and elsewhere.
1933 *
1934 * Use convert_tuples_by_name_map(index2, index1) to build the attmap.
1935 */
1936 bool
CompareIndexInfo(IndexInfo * info1,IndexInfo * info2,Oid * collations1,Oid * collations2,Oid * opfamilies1,Oid * opfamilies2,AttrNumber * attmap,int maplen)1937 CompareIndexInfo(IndexInfo *info1, IndexInfo *info2,
1938 Oid *collations1, Oid *collations2,
1939 Oid *opfamilies1, Oid *opfamilies2,
1940 AttrNumber *attmap, int maplen)
1941 {
1942 int i;
1943
1944 if (info1->ii_Unique != info2->ii_Unique)
1945 return false;
1946
1947 /* indexes are only equivalent if they have the same access method */
1948 if (info1->ii_Am != info2->ii_Am)
1949 return false;
1950
1951 /* and same number of attributes */
1952 if (info1->ii_NumIndexAttrs != info2->ii_NumIndexAttrs)
1953 return false;
1954
1955 /* and same number of key attributes */
1956 if (info1->ii_NumIndexKeyAttrs != info2->ii_NumIndexKeyAttrs)
1957 return false;
1958
1959 /*
1960 * and columns match through the attribute map (actual attribute numbers
1961 * might differ!) Note that this implies that index columns that are
1962 * expressions appear in the same positions. We will next compare the
1963 * expressions themselves.
1964 */
1965 for (i = 0; i < info1->ii_NumIndexAttrs; i++)
1966 {
1967 if (maplen < info2->ii_IndexAttrNumbers[i])
1968 elog(ERROR, "incorrect attribute map");
1969
1970 /* ignore expressions at this stage */
1971 if ((info1->ii_IndexAttrNumbers[i] != InvalidAttrNumber) &&
1972 (attmap[info2->ii_IndexAttrNumbers[i] - 1] !=
1973 info1->ii_IndexAttrNumbers[i]))
1974 return false;
1975
1976 /* collation and opfamily is not valid for including columns */
1977 if (i >= info1->ii_NumIndexKeyAttrs)
1978 continue;
1979
1980 if (collations1[i] != collations2[i])
1981 return false;
1982 if (opfamilies1[i] != opfamilies2[i])
1983 return false;
1984 }
1985
1986 /*
1987 * For expression indexes: either both are expression indexes, or neither
1988 * is; if they are, make sure the expressions match.
1989 */
1990 if ((info1->ii_Expressions != NIL) != (info2->ii_Expressions != NIL))
1991 return false;
1992 if (info1->ii_Expressions != NIL)
1993 {
1994 bool found_whole_row;
1995 Node *mapped;
1996
1997 mapped = map_variable_attnos((Node *) info2->ii_Expressions,
1998 1, 0, attmap, maplen,
1999 InvalidOid, &found_whole_row);
2000 if (found_whole_row)
2001 {
2002 /*
2003 * we could throw an error here, but seems out of scope for this
2004 * routine.
2005 */
2006 return false;
2007 }
2008
2009 if (!equal(info1->ii_Expressions, mapped))
2010 return false;
2011 }
2012
2013 /* Partial index predicates must be identical, if they exist */
2014 if ((info1->ii_Predicate == NULL) != (info2->ii_Predicate == NULL))
2015 return false;
2016 if (info1->ii_Predicate != NULL)
2017 {
2018 bool found_whole_row;
2019 Node *mapped;
2020
2021 mapped = map_variable_attnos((Node *) info2->ii_Predicate,
2022 1, 0, attmap, maplen,
2023 InvalidOid, &found_whole_row);
2024 if (found_whole_row)
2025 {
2026 /*
2027 * we could throw an error here, but seems out of scope for this
2028 * routine.
2029 */
2030 return false;
2031 }
2032 if (!equal(info1->ii_Predicate, mapped))
2033 return false;
2034 }
2035
2036 /* No support currently for comparing exclusion indexes. */
2037 if (info1->ii_ExclusionOps != NULL || info2->ii_ExclusionOps != NULL)
2038 return false;
2039
2040 return true;
2041 }
2042
2043 /* ----------------
2044 * BuildSpeculativeIndexInfo
2045 * Add extra state to IndexInfo record
2046 *
2047 * For unique indexes, we usually don't want to add info to the IndexInfo for
2048 * checking uniqueness, since the B-Tree AM handles that directly. However,
2049 * in the case of speculative insertion, additional support is required.
2050 *
2051 * Do this processing here rather than in BuildIndexInfo() to not incur the
2052 * overhead in the common non-speculative cases.
2053 * ----------------
2054 */
2055 void
BuildSpeculativeIndexInfo(Relation index,IndexInfo * ii)2056 BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii)
2057 {
2058 int indnkeyatts;
2059 int i;
2060
2061 indnkeyatts = IndexRelationGetNumberOfKeyAttributes(index);
2062
2063 /*
2064 * fetch info for checking unique indexes
2065 */
2066 Assert(ii->ii_Unique);
2067
2068 if (index->rd_rel->relam != BTREE_AM_OID)
2069 elog(ERROR, "unexpected non-btree speculative unique index");
2070
2071 ii->ii_UniqueOps = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
2072 ii->ii_UniqueProcs = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
2073 ii->ii_UniqueStrats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts);
2074
2075 /*
2076 * We have to look up the operator's strategy number. This provides a
2077 * cross-check that the operator does match the index.
2078 */
2079 /* We need the func OIDs and strategy numbers too */
2080 for (i = 0; i < indnkeyatts; i++)
2081 {
2082 ii->ii_UniqueStrats[i] = BTEqualStrategyNumber;
2083 ii->ii_UniqueOps[i] =
2084 get_opfamily_member(index->rd_opfamily[i],
2085 index->rd_opcintype[i],
2086 index->rd_opcintype[i],
2087 ii->ii_UniqueStrats[i]);
2088 if (!OidIsValid(ii->ii_UniqueOps[i]))
2089 elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
2090 ii->ii_UniqueStrats[i], index->rd_opcintype[i],
2091 index->rd_opcintype[i], index->rd_opfamily[i]);
2092 ii->ii_UniqueProcs[i] = get_opcode(ii->ii_UniqueOps[i]);
2093 }
2094 }
2095
2096 /* ----------------
2097 * FormIndexDatum
2098 * Construct values[] and isnull[] arrays for a new index tuple.
2099 *
2100 * indexInfo Info about the index
2101 * slot Heap tuple for which we must prepare an index entry
2102 * estate executor state for evaluating any index expressions
2103 * values Array of index Datums (output area)
2104 * isnull Array of is-null indicators (output area)
2105 *
2106 * When there are no index expressions, estate may be NULL. Otherwise it
2107 * must be supplied, *and* the ecxt_scantuple slot of its per-tuple expr
2108 * context must point to the heap tuple passed in.
2109 *
2110 * Notice we don't actually call index_form_tuple() here; we just prepare
2111 * its input arrays values[] and isnull[]. This is because the index AM
2112 * may wish to alter the data before storage.
2113 * ----------------
2114 */
2115 void
FormIndexDatum(IndexInfo * indexInfo,TupleTableSlot * slot,EState * estate,Datum * values,bool * isnull)2116 FormIndexDatum(IndexInfo *indexInfo,
2117 TupleTableSlot *slot,
2118 EState *estate,
2119 Datum *values,
2120 bool *isnull)
2121 {
2122 ListCell *indexpr_item;
2123 int i;
2124
2125 if (indexInfo->ii_Expressions != NIL &&
2126 indexInfo->ii_ExpressionsState == NIL)
2127 {
2128 /* First time through, set up expression evaluation state */
2129 indexInfo->ii_ExpressionsState =
2130 ExecPrepareExprList(indexInfo->ii_Expressions, estate);
2131 /* Check caller has set up context correctly */
2132 Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
2133 }
2134 indexpr_item = list_head(indexInfo->ii_ExpressionsState);
2135
2136 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
2137 {
2138 int keycol = indexInfo->ii_IndexAttrNumbers[i];
2139 Datum iDatum;
2140 bool isNull;
2141
2142 if (keycol != 0)
2143 {
2144 /*
2145 * Plain index column; get the value we need directly from the
2146 * heap tuple.
2147 */
2148 iDatum = slot_getattr(slot, keycol, &isNull);
2149 }
2150 else
2151 {
2152 /*
2153 * Index expression --- need to evaluate it.
2154 */
2155 if (indexpr_item == NULL)
2156 elog(ERROR, "wrong number of index expressions");
2157 iDatum = ExecEvalExprSwitchContext((ExprState *) lfirst(indexpr_item),
2158 GetPerTupleExprContext(estate),
2159 &isNull);
2160 indexpr_item = lnext(indexpr_item);
2161 }
2162 values[i] = iDatum;
2163 isnull[i] = isNull;
2164 }
2165
2166 if (indexpr_item != NULL)
2167 elog(ERROR, "wrong number of index expressions");
2168 }
2169
2170
2171 /*
2172 * index_update_stats --- update pg_class entry after CREATE INDEX or REINDEX
2173 *
2174 * This routine updates the pg_class row of either an index or its parent
2175 * relation after CREATE INDEX or REINDEX. Its rather bizarre API is designed
2176 * to ensure we can do all the necessary work in just one update.
2177 *
2178 * hasindex: set relhasindex to this value
2179 * reltuples: if >= 0, set reltuples to this value; else no change
2180 *
2181 * If reltuples >= 0, relpages and relallvisible are also updated (using
2182 * RelationGetNumberOfBlocks() and visibilitymap_count()).
2183 *
2184 * NOTE: an important side-effect of this operation is that an SI invalidation
2185 * message is sent out to all backends --- including me --- causing relcache
2186 * entries to be flushed or updated with the new data. This must happen even
2187 * if we find that no change is needed in the pg_class row. When updating
2188 * a heap entry, this ensures that other backends find out about the new
2189 * index. When updating an index, it's important because some index AMs
2190 * expect a relcache flush to occur after REINDEX.
2191 */
2192 static void
index_update_stats(Relation rel,bool hasindex,double reltuples)2193 index_update_stats(Relation rel,
2194 bool hasindex,
2195 double reltuples)
2196 {
2197 Oid relid = RelationGetRelid(rel);
2198 Relation pg_class;
2199 HeapTuple tuple;
2200 Form_pg_class rd_rel;
2201 bool dirty;
2202
2203 /*
2204 * We always update the pg_class row using a non-transactional,
2205 * overwrite-in-place update. There are several reasons for this:
2206 *
2207 * 1. In bootstrap mode, we have no choice --- UPDATE wouldn't work.
2208 *
2209 * 2. We could be reindexing pg_class itself, in which case we can't move
2210 * its pg_class row because CatalogTupleInsert/CatalogTupleUpdate might
2211 * not know about all the indexes yet (see reindex_relation).
2212 *
2213 * 3. Because we execute CREATE INDEX with just share lock on the parent
2214 * rel (to allow concurrent index creations), an ordinary update could
2215 * suffer a tuple-concurrently-updated failure against another CREATE
2216 * INDEX committing at about the same time. We can avoid that by having
2217 * them both do nontransactional updates (we assume they will both be
2218 * trying to change the pg_class row to the same thing, so it doesn't
2219 * matter which goes first).
2220 *
2221 * It is safe to use a non-transactional update even though our
2222 * transaction could still fail before committing. Setting relhasindex
2223 * true is safe even if there are no indexes (VACUUM will eventually fix
2224 * it). And of course the new relpages and reltuples counts are correct
2225 * regardless. However, we don't want to change relpages (or
2226 * relallvisible) if the caller isn't providing an updated reltuples
2227 * count, because that would bollix the reltuples/relpages ratio which is
2228 * what's really important.
2229 */
2230
2231 pg_class = heap_open(RelationRelationId, RowExclusiveLock);
2232
2233 /*
2234 * Make a copy of the tuple to update. Normally we use the syscache, but
2235 * we can't rely on that during bootstrap or while reindexing pg_class
2236 * itself.
2237 */
2238 if (IsBootstrapProcessingMode() ||
2239 ReindexIsProcessingHeap(RelationRelationId))
2240 {
2241 /* don't assume syscache will work */
2242 HeapScanDesc pg_class_scan;
2243 ScanKeyData key[1];
2244
2245 ScanKeyInit(&key[0],
2246 ObjectIdAttributeNumber,
2247 BTEqualStrategyNumber, F_OIDEQ,
2248 ObjectIdGetDatum(relid));
2249
2250 pg_class_scan = heap_beginscan_catalog(pg_class, 1, key);
2251 tuple = heap_getnext(pg_class_scan, ForwardScanDirection);
2252 tuple = heap_copytuple(tuple);
2253 heap_endscan(pg_class_scan);
2254 }
2255 else
2256 {
2257 /* normal case, use syscache */
2258 tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
2259 }
2260
2261 if (!HeapTupleIsValid(tuple))
2262 elog(ERROR, "could not find tuple for relation %u", relid);
2263 rd_rel = (Form_pg_class) GETSTRUCT(tuple);
2264
2265 /* Should this be a more comprehensive test? */
2266 Assert(rd_rel->relkind != RELKIND_PARTITIONED_INDEX);
2267
2268 /* Apply required updates, if any, to copied tuple */
2269
2270 dirty = false;
2271 if (rd_rel->relhasindex != hasindex)
2272 {
2273 rd_rel->relhasindex = hasindex;
2274 dirty = true;
2275 }
2276
2277 if (reltuples >= 0)
2278 {
2279 BlockNumber relpages = RelationGetNumberOfBlocks(rel);
2280 BlockNumber relallvisible;
2281
2282 if (rd_rel->relkind != RELKIND_INDEX)
2283 visibilitymap_count(rel, &relallvisible, NULL);
2284 else /* don't bother for indexes */
2285 relallvisible = 0;
2286
2287 if (rd_rel->relpages != (int32) relpages)
2288 {
2289 rd_rel->relpages = (int32) relpages;
2290 dirty = true;
2291 }
2292 if (rd_rel->reltuples != (float4) reltuples)
2293 {
2294 rd_rel->reltuples = (float4) reltuples;
2295 dirty = true;
2296 }
2297 if (rd_rel->relallvisible != (int32) relallvisible)
2298 {
2299 rd_rel->relallvisible = (int32) relallvisible;
2300 dirty = true;
2301 }
2302 }
2303
2304 /*
2305 * If anything changed, write out the tuple
2306 */
2307 if (dirty)
2308 {
2309 heap_inplace_update(pg_class, tuple);
2310 /* the above sends a cache inval message */
2311 }
2312 else
2313 {
2314 /* no need to change tuple, but force relcache inval anyway */
2315 CacheInvalidateRelcacheByTuple(tuple);
2316 }
2317
2318 heap_freetuple(tuple);
2319
2320 heap_close(pg_class, RowExclusiveLock);
2321 }
2322
2323
2324 /*
2325 * index_build - invoke access-method-specific index build procedure
2326 *
2327 * On entry, the index's catalog entries are valid, and its physical disk
2328 * file has been created but is empty. We call the AM-specific build
2329 * procedure to fill in the index contents. We then update the pg_class
2330 * entries of the index and heap relation as needed, using statistics
2331 * returned by ambuild as well as data passed by the caller.
2332 *
2333 * isprimary tells whether to mark the index as a primary-key index.
2334 * isreindex indicates we are recreating a previously-existing index.
2335 * parallel indicates if parallelism may be useful.
2336 *
2337 * Note: when reindexing an existing index, isprimary can be false even if
2338 * the index is a PK; it's already properly marked and need not be re-marked.
2339 *
2340 * Note: before Postgres 8.2, the passed-in heap and index Relations
2341 * were automatically closed by this routine. This is no longer the case.
2342 * The caller opened 'em, and the caller should close 'em.
2343 */
2344 void
index_build(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,bool isprimary,bool isreindex,bool parallel)2345 index_build(Relation heapRelation,
2346 Relation indexRelation,
2347 IndexInfo *indexInfo,
2348 bool isprimary,
2349 bool isreindex,
2350 bool parallel)
2351 {
2352 IndexBuildResult *stats;
2353 Oid save_userid;
2354 int save_sec_context;
2355 int save_nestlevel;
2356
2357 /*
2358 * sanity checks
2359 */
2360 Assert(RelationIsValid(indexRelation));
2361 Assert(PointerIsValid(indexRelation->rd_amroutine));
2362 Assert(PointerIsValid(indexRelation->rd_amroutine->ambuild));
2363 Assert(PointerIsValid(indexRelation->rd_amroutine->ambuildempty));
2364
2365 /*
2366 * Determine worker process details for parallel CREATE INDEX. Currently,
2367 * only btree has support for parallel builds.
2368 *
2369 * Note that planner considers parallel safety for us.
2370 */
2371 if (parallel && IsNormalProcessingMode() &&
2372 indexRelation->rd_rel->relam == BTREE_AM_OID)
2373 indexInfo->ii_ParallelWorkers =
2374 plan_create_index_workers(RelationGetRelid(heapRelation),
2375 RelationGetRelid(indexRelation));
2376
2377 if (indexInfo->ii_ParallelWorkers == 0)
2378 ereport(DEBUG1,
2379 (errmsg("building index \"%s\" on table \"%s\" serially",
2380 RelationGetRelationName(indexRelation),
2381 RelationGetRelationName(heapRelation))));
2382 else
2383 ereport(DEBUG1,
2384 (errmsg_plural("building index \"%s\" on table \"%s\" with request for %d parallel worker",
2385 "building index \"%s\" on table \"%s\" with request for %d parallel workers",
2386 indexInfo->ii_ParallelWorkers,
2387 RelationGetRelationName(indexRelation),
2388 RelationGetRelationName(heapRelation),
2389 indexInfo->ii_ParallelWorkers)));
2390
2391 /*
2392 * Switch to the table owner's userid, so that any index functions are run
2393 * as that user. Also lock down security-restricted operations and
2394 * arrange to make GUC variable changes local to this command.
2395 */
2396 GetUserIdAndSecContext(&save_userid, &save_sec_context);
2397 SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
2398 save_sec_context | SECURITY_RESTRICTED_OPERATION);
2399 save_nestlevel = NewGUCNestLevel();
2400
2401 /*
2402 * Call the access method's build procedure
2403 */
2404 stats = indexRelation->rd_amroutine->ambuild(heapRelation, indexRelation,
2405 indexInfo);
2406 Assert(PointerIsValid(stats));
2407
2408 /*
2409 * If this is an unlogged index, we may need to write out an init fork for
2410 * it -- but we must first check whether one already exists. If, for
2411 * example, an unlogged relation is truncated in the transaction that
2412 * created it, or truncated twice in a subsequent transaction, the
2413 * relfilenode won't change, and nothing needs to be done here.
2414 */
2415 if (indexRelation->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
2416 !smgrexists(indexRelation->rd_smgr, INIT_FORKNUM))
2417 {
2418 RelationOpenSmgr(indexRelation);
2419 smgrcreate(indexRelation->rd_smgr, INIT_FORKNUM, false);
2420 indexRelation->rd_amroutine->ambuildempty(indexRelation);
2421 }
2422
2423 /*
2424 * If we found any potentially broken HOT chains, mark the index as not
2425 * being usable until the current transaction is below the event horizon.
2426 * See src/backend/access/heap/README.HOT for discussion. Also set this
2427 * if early pruning/vacuuming is enabled for the heap relation. While it
2428 * might become safe to use the index earlier based on actual cleanup
2429 * activity and other active transactions, the test for that would be much
2430 * more complex and would require some form of blocking, so keep it simple
2431 * and fast by just using the current transaction.
2432 *
2433 * However, when reindexing an existing index, we should do nothing here.
2434 * Any HOT chains that are broken with respect to the index must predate
2435 * the index's original creation, so there is no need to change the
2436 * index's usability horizon. Moreover, we *must not* try to change the
2437 * index's pg_index entry while reindexing pg_index itself, and this
2438 * optimization nicely prevents that. The more complex rules needed for a
2439 * reindex are handled separately after this function returns.
2440 *
2441 * We also need not set indcheckxmin during a concurrent index build,
2442 * because we won't set indisvalid true until all transactions that care
2443 * about the broken HOT chains or early pruning/vacuuming are gone.
2444 *
2445 * Therefore, this code path can only be taken during non-concurrent
2446 * CREATE INDEX. Thus the fact that heap_update will set the pg_index
2447 * tuple's xmin doesn't matter, because that tuple was created in the
2448 * current transaction anyway. That also means we don't need to worry
2449 * about any concurrent readers of the tuple; no other transaction can see
2450 * it yet.
2451 */
2452 if ((indexInfo->ii_BrokenHotChain || EarlyPruningEnabled(heapRelation)) &&
2453 !isreindex &&
2454 !indexInfo->ii_Concurrent)
2455 {
2456 Oid indexId = RelationGetRelid(indexRelation);
2457 Relation pg_index;
2458 HeapTuple indexTuple;
2459 Form_pg_index indexForm;
2460
2461 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
2462
2463 indexTuple = SearchSysCacheCopy1(INDEXRELID,
2464 ObjectIdGetDatum(indexId));
2465 if (!HeapTupleIsValid(indexTuple))
2466 elog(ERROR, "cache lookup failed for index %u", indexId);
2467 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
2468
2469 /* If it's a new index, indcheckxmin shouldn't be set ... */
2470 Assert(!indexForm->indcheckxmin);
2471
2472 indexForm->indcheckxmin = true;
2473 CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
2474
2475 heap_freetuple(indexTuple);
2476 heap_close(pg_index, RowExclusiveLock);
2477 }
2478
2479 /*
2480 * Update heap and index pg_class rows
2481 */
2482 index_update_stats(heapRelation,
2483 true,
2484 stats->heap_tuples);
2485
2486 index_update_stats(indexRelation,
2487 false,
2488 stats->index_tuples);
2489
2490 /* Make the updated catalog row versions visible */
2491 CommandCounterIncrement();
2492
2493 /*
2494 * If it's for an exclusion constraint, make a second pass over the heap
2495 * to verify that the constraint is satisfied. We must not do this until
2496 * the index is fully valid. (Broken HOT chains shouldn't matter, though;
2497 * see comments for IndexCheckExclusion.)
2498 */
2499 if (indexInfo->ii_ExclusionOps != NULL)
2500 IndexCheckExclusion(heapRelation, indexRelation, indexInfo);
2501
2502 /* Roll back any GUC changes executed by index functions */
2503 AtEOXact_GUC(false, save_nestlevel);
2504
2505 /* Restore userid and security context */
2506 SetUserIdAndSecContext(save_userid, save_sec_context);
2507 }
2508
2509
2510 /*
2511 * IndexBuildHeapScan - scan the heap relation to find tuples to be indexed
2512 *
2513 * This is called back from an access-method-specific index build procedure
2514 * after the AM has done whatever setup it needs. The parent heap relation
2515 * is scanned to find tuples that should be entered into the index. Each
2516 * such tuple is passed to the AM's callback routine, which does the right
2517 * things to add it to the new index. After we return, the AM's index
2518 * build procedure does whatever cleanup it needs.
2519 *
2520 * The total count of live heap tuples is returned. This is for updating
2521 * pg_class statistics. (It's annoying not to be able to do that here, but we
2522 * want to merge that update with others; see index_update_stats.) Note that
2523 * the index AM itself must keep track of the number of index tuples; we don't
2524 * do so here because the AM might reject some of the tuples for its own
2525 * reasons, such as being unable to store NULLs.
2526 *
2527 * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
2528 * any potentially broken HOT chains. Currently, we set this if there are
2529 * any RECENTLY_DEAD or DELETE_IN_PROGRESS entries in a HOT chain, without
2530 * trying very hard to detect whether they're really incompatible with the
2531 * chain tip.
2532 */
2533 double
IndexBuildHeapScan(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,bool allow_sync,IndexBuildCallback callback,void * callback_state,HeapScanDesc scan)2534 IndexBuildHeapScan(Relation heapRelation,
2535 Relation indexRelation,
2536 IndexInfo *indexInfo,
2537 bool allow_sync,
2538 IndexBuildCallback callback,
2539 void *callback_state,
2540 HeapScanDesc scan)
2541 {
2542 return IndexBuildHeapRangeScan(heapRelation, indexRelation,
2543 indexInfo, allow_sync,
2544 false,
2545 0, InvalidBlockNumber,
2546 callback, callback_state, scan);
2547 }
2548
2549 /*
2550 * As above, except that instead of scanning the complete heap, only the given
2551 * number of blocks are scanned. Scan to end-of-rel can be signalled by
2552 * passing InvalidBlockNumber as numblocks. Note that restricting the range
2553 * to scan cannot be done when requesting syncscan.
2554 *
2555 * When "anyvisible" mode is requested, all tuples visible to any transaction
2556 * are indexed and counted as live, including those inserted or deleted by
2557 * transactions that are still in progress.
2558 */
2559 double
IndexBuildHeapRangeScan(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,bool allow_sync,bool anyvisible,BlockNumber start_blockno,BlockNumber numblocks,IndexBuildCallback callback,void * callback_state,HeapScanDesc scan)2560 IndexBuildHeapRangeScan(Relation heapRelation,
2561 Relation indexRelation,
2562 IndexInfo *indexInfo,
2563 bool allow_sync,
2564 bool anyvisible,
2565 BlockNumber start_blockno,
2566 BlockNumber numblocks,
2567 IndexBuildCallback callback,
2568 void *callback_state,
2569 HeapScanDesc scan)
2570 {
2571 bool is_system_catalog;
2572 bool checking_uniqueness;
2573 HeapTuple heapTuple;
2574 Datum values[INDEX_MAX_KEYS];
2575 bool isnull[INDEX_MAX_KEYS];
2576 double reltuples;
2577 ExprState *predicate;
2578 TupleTableSlot *slot;
2579 EState *estate;
2580 ExprContext *econtext;
2581 Snapshot snapshot;
2582 bool need_unregister_snapshot = false;
2583 TransactionId OldestXmin;
2584 BlockNumber root_blkno = InvalidBlockNumber;
2585 OffsetNumber root_offsets[MaxHeapTuplesPerPage];
2586
2587 /*
2588 * sanity checks
2589 */
2590 Assert(OidIsValid(indexRelation->rd_rel->relam));
2591
2592 /* Remember if it's a system catalog */
2593 is_system_catalog = IsSystemRelation(heapRelation);
2594
2595 /* See whether we're verifying uniqueness/exclusion properties */
2596 checking_uniqueness = (indexInfo->ii_Unique ||
2597 indexInfo->ii_ExclusionOps != NULL);
2598
2599 /*
2600 * "Any visible" mode is not compatible with uniqueness checks; make sure
2601 * only one of those is requested.
2602 */
2603 Assert(!(anyvisible && checking_uniqueness));
2604
2605 /*
2606 * Need an EState for evaluation of index expressions and partial-index
2607 * predicates. Also a slot to hold the current tuple.
2608 */
2609 estate = CreateExecutorState();
2610 econtext = GetPerTupleExprContext(estate);
2611 slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
2612
2613 /* Arrange for econtext's scan tuple to be the tuple under test */
2614 econtext->ecxt_scantuple = slot;
2615
2616 /* Set up execution state for predicate, if any. */
2617 predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
2618
2619 /*
2620 * Prepare for scan of the base relation. In a normal index build, we use
2621 * SnapshotAny because we must retrieve all tuples and do our own time
2622 * qual checks (because we have to index RECENTLY_DEAD tuples). In a
2623 * concurrent build, or during bootstrap, we take a regular MVCC snapshot
2624 * and index whatever's live according to that.
2625 */
2626 OldestXmin = InvalidTransactionId;
2627
2628 /* okay to ignore lazy VACUUMs here */
2629 if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent)
2630 OldestXmin = GetOldestXmin(heapRelation, PROCARRAY_FLAGS_VACUUM);
2631
2632 if (!scan)
2633 {
2634 /*
2635 * Serial index build.
2636 *
2637 * Must begin our own heap scan in this case. We may also need to
2638 * register a snapshot whose lifetime is under our direct control.
2639 */
2640 if (!TransactionIdIsValid(OldestXmin))
2641 {
2642 snapshot = RegisterSnapshot(GetTransactionSnapshot());
2643 need_unregister_snapshot = true;
2644 }
2645 else
2646 snapshot = SnapshotAny;
2647
2648 scan = heap_beginscan_strat(heapRelation, /* relation */
2649 snapshot, /* snapshot */
2650 0, /* number of keys */
2651 NULL, /* scan key */
2652 true, /* buffer access strategy OK */
2653 allow_sync); /* syncscan OK? */
2654 }
2655 else
2656 {
2657 /*
2658 * Parallel index build.
2659 *
2660 * Parallel case never registers/unregisters own snapshot. Snapshot
2661 * is taken from parallel heap scan, and is SnapshotAny or an MVCC
2662 * snapshot, based on same criteria as serial case.
2663 */
2664 Assert(!IsBootstrapProcessingMode());
2665 Assert(allow_sync);
2666 snapshot = scan->rs_snapshot;
2667 }
2668
2669 /*
2670 * Must call GetOldestXmin() with SnapshotAny. Should never call
2671 * GetOldestXmin() with MVCC snapshot. (It's especially worth checking
2672 * this for parallel builds, since ambuild routines that support parallel
2673 * builds must work these details out for themselves.)
2674 */
2675 Assert(snapshot == SnapshotAny || IsMVCCSnapshot(snapshot));
2676 Assert(snapshot == SnapshotAny ? TransactionIdIsValid(OldestXmin) :
2677 !TransactionIdIsValid(OldestXmin));
2678 Assert(snapshot == SnapshotAny || !anyvisible);
2679
2680 /* set our scan endpoints */
2681 if (!allow_sync)
2682 heap_setscanlimits(scan, start_blockno, numblocks);
2683 else
2684 {
2685 /* syncscan can only be requested on whole relation */
2686 Assert(start_blockno == 0);
2687 Assert(numblocks == InvalidBlockNumber);
2688 }
2689
2690 reltuples = 0;
2691
2692 /*
2693 * Scan all tuples in the base relation.
2694 */
2695 while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2696 {
2697 bool tupleIsAlive;
2698
2699 CHECK_FOR_INTERRUPTS();
2700
2701 /*
2702 * When dealing with a HOT-chain of updated tuples, we want to index
2703 * the values of the live tuple (if any), but index it under the TID
2704 * of the chain's root tuple. This approach is necessary to preserve
2705 * the HOT-chain structure in the heap. So we need to be able to find
2706 * the root item offset for every tuple that's in a HOT-chain. When
2707 * first reaching a new page of the relation, call
2708 * heap_get_root_tuples() to build a map of root item offsets on the
2709 * page.
2710 *
2711 * It might look unsafe to use this information across buffer
2712 * lock/unlock. However, we hold ShareLock on the table so no
2713 * ordinary insert/update/delete should occur; and we hold pin on the
2714 * buffer continuously while visiting the page, so no pruning
2715 * operation can occur either.
2716 *
2717 * In cases with only ShareUpdateExclusiveLock on the table, it's
2718 * possible for some HOT tuples to appear that we didn't know about
2719 * when we first read the page. To handle that case, we re-obtain the
2720 * list of root offsets when a HOT tuple points to a root item that we
2721 * don't know about.
2722 *
2723 * Also, although our opinions about tuple liveness could change while
2724 * we scan the page (due to concurrent transaction commits/aborts),
2725 * the chain root locations won't, so this info doesn't need to be
2726 * rebuilt after waiting for another transaction.
2727 *
2728 * Note the implied assumption that there is no more than one live
2729 * tuple per HOT-chain --- else we could create more than one index
2730 * entry pointing to the same root tuple.
2731 */
2732 if (scan->rs_cblock != root_blkno)
2733 {
2734 Page page = BufferGetPage(scan->rs_cbuf);
2735
2736 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
2737 heap_get_root_tuples(page, root_offsets);
2738 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2739
2740 root_blkno = scan->rs_cblock;
2741 }
2742
2743 if (snapshot == SnapshotAny)
2744 {
2745 /* do our own time qual check */
2746 bool indexIt;
2747 TransactionId xwait;
2748
2749 recheck:
2750
2751 /*
2752 * We could possibly get away with not locking the buffer here,
2753 * since caller should hold ShareLock on the relation, but let's
2754 * be conservative about it. (This remark is still correct even
2755 * with HOT-pruning: our pin on the buffer prevents pruning.)
2756 */
2757 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
2758
2759 /*
2760 * The criteria for counting a tuple as live in this block need to
2761 * match what analyze.c's acquire_sample_rows() does, otherwise
2762 * CREATE INDEX and ANALYZE may produce wildly different reltuples
2763 * values, e.g. when there are many recently-dead tuples.
2764 */
2765 switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
2766 scan->rs_cbuf))
2767 {
2768 case HEAPTUPLE_DEAD:
2769 /* Definitely dead, we can ignore it */
2770 indexIt = false;
2771 tupleIsAlive = false;
2772 break;
2773 case HEAPTUPLE_LIVE:
2774 /* Normal case, index and unique-check it */
2775 indexIt = true;
2776 tupleIsAlive = true;
2777 /* Count it as live, too */
2778 reltuples += 1;
2779 break;
2780 case HEAPTUPLE_RECENTLY_DEAD:
2781
2782 /*
2783 * If tuple is recently deleted then we must index it
2784 * anyway to preserve MVCC semantics. (Pre-existing
2785 * transactions could try to use the index after we finish
2786 * building it, and may need to see such tuples.)
2787 *
2788 * However, if it was HOT-updated then we must only index
2789 * the live tuple at the end of the HOT-chain. Since this
2790 * breaks semantics for pre-existing snapshots, mark the
2791 * index as unusable for them.
2792 *
2793 * We don't count recently-dead tuples in reltuples, even
2794 * if we index them; see acquire_sample_rows().
2795 */
2796 if (HeapTupleIsHotUpdated(heapTuple))
2797 {
2798 indexIt = false;
2799 /* mark the index as unsafe for old snapshots */
2800 indexInfo->ii_BrokenHotChain = true;
2801 }
2802 else
2803 indexIt = true;
2804 /* In any case, exclude the tuple from unique-checking */
2805 tupleIsAlive = false;
2806 break;
2807 case HEAPTUPLE_INSERT_IN_PROGRESS:
2808
2809 /*
2810 * In "anyvisible" mode, this tuple is visible and we
2811 * don't need any further checks.
2812 */
2813 if (anyvisible)
2814 {
2815 indexIt = true;
2816 tupleIsAlive = true;
2817 reltuples += 1;
2818 break;
2819 }
2820
2821 /*
2822 * Since caller should hold ShareLock or better, normally
2823 * the only way to see this is if it was inserted earlier
2824 * in our own transaction. However, it can happen in
2825 * system catalogs, since we tend to release write lock
2826 * before commit there. Give a warning if neither case
2827 * applies.
2828 */
2829 xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
2830 if (!TransactionIdIsCurrentTransactionId(xwait))
2831 {
2832 if (!is_system_catalog)
2833 elog(WARNING, "concurrent insert in progress within table \"%s\"",
2834 RelationGetRelationName(heapRelation));
2835
2836 /*
2837 * If we are performing uniqueness checks, indexing
2838 * such a tuple could lead to a bogus uniqueness
2839 * failure. In that case we wait for the inserting
2840 * transaction to finish and check again.
2841 */
2842 if (checking_uniqueness)
2843 {
2844 /*
2845 * Must drop the lock on the buffer before we wait
2846 */
2847 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2848 XactLockTableWait(xwait, heapRelation,
2849 &heapTuple->t_self,
2850 XLTW_InsertIndexUnique);
2851 CHECK_FOR_INTERRUPTS();
2852 goto recheck;
2853 }
2854 }
2855 else
2856 {
2857 /*
2858 * For consistency with acquire_sample_rows(), count
2859 * HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only
2860 * when inserted by our own transaction.
2861 */
2862 reltuples += 1;
2863 }
2864
2865 /*
2866 * We must index such tuples, since if the index build
2867 * commits then they're good.
2868 */
2869 indexIt = true;
2870 tupleIsAlive = true;
2871 break;
2872 case HEAPTUPLE_DELETE_IN_PROGRESS:
2873
2874 /*
2875 * As with INSERT_IN_PROGRESS case, this is unexpected
2876 * unless it's our own deletion or a system catalog; but
2877 * in anyvisible mode, this tuple is visible.
2878 */
2879 if (anyvisible)
2880 {
2881 indexIt = true;
2882 tupleIsAlive = false;
2883 reltuples += 1;
2884 break;
2885 }
2886
2887 xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
2888 if (!TransactionIdIsCurrentTransactionId(xwait))
2889 {
2890 if (!is_system_catalog)
2891 elog(WARNING, "concurrent delete in progress within table \"%s\"",
2892 RelationGetRelationName(heapRelation));
2893
2894 /*
2895 * If we are performing uniqueness checks, assuming
2896 * the tuple is dead could lead to missing a
2897 * uniqueness violation. In that case we wait for the
2898 * deleting transaction to finish and check again.
2899 *
2900 * Also, if it's a HOT-updated tuple, we should not
2901 * index it but rather the live tuple at the end of
2902 * the HOT-chain. However, the deleting transaction
2903 * could abort, possibly leaving this tuple as live
2904 * after all, in which case it has to be indexed. The
2905 * only way to know what to do is to wait for the
2906 * deleting transaction to finish and check again.
2907 */
2908 if (checking_uniqueness ||
2909 HeapTupleIsHotUpdated(heapTuple))
2910 {
2911 /*
2912 * Must drop the lock on the buffer before we wait
2913 */
2914 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2915 XactLockTableWait(xwait, heapRelation,
2916 &heapTuple->t_self,
2917 XLTW_InsertIndexUnique);
2918 CHECK_FOR_INTERRUPTS();
2919 goto recheck;
2920 }
2921
2922 /*
2923 * Otherwise index it but don't check for uniqueness,
2924 * the same as a RECENTLY_DEAD tuple.
2925 */
2926 indexIt = true;
2927
2928 /*
2929 * Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live,
2930 * if they were not deleted by the current
2931 * transaction. That's what acquire_sample_rows()
2932 * does, and we want the behavior to be consistent.
2933 */
2934 reltuples += 1;
2935 }
2936 else if (HeapTupleIsHotUpdated(heapTuple))
2937 {
2938 /*
2939 * It's a HOT-updated tuple deleted by our own xact.
2940 * We can assume the deletion will commit (else the
2941 * index contents don't matter), so treat the same as
2942 * RECENTLY_DEAD HOT-updated tuples.
2943 */
2944 indexIt = false;
2945 /* mark the index as unsafe for old snapshots */
2946 indexInfo->ii_BrokenHotChain = true;
2947 }
2948 else
2949 {
2950 /*
2951 * It's a regular tuple deleted by our own xact. Index
2952 * it, but don't check for uniqueness nor count in
2953 * reltuples, the same as a RECENTLY_DEAD tuple.
2954 */
2955 indexIt = true;
2956 }
2957 /* In any case, exclude the tuple from unique-checking */
2958 tupleIsAlive = false;
2959 break;
2960 default:
2961 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2962 indexIt = tupleIsAlive = false; /* keep compiler quiet */
2963 break;
2964 }
2965
2966 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2967
2968 if (!indexIt)
2969 continue;
2970 }
2971 else
2972 {
2973 /* heap_getnext did the time qual check */
2974 tupleIsAlive = true;
2975 reltuples += 1;
2976 }
2977
2978 MemoryContextReset(econtext->ecxt_per_tuple_memory);
2979
2980 /* Set up for predicate or expression evaluation */
2981 ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
2982
2983 /*
2984 * In a partial index, discard tuples that don't satisfy the
2985 * predicate.
2986 */
2987 if (predicate != NULL)
2988 {
2989 if (!ExecQual(predicate, econtext))
2990 continue;
2991 }
2992
2993 /*
2994 * For the current heap tuple, extract all the attributes we use in
2995 * this index, and note which are null. This also performs evaluation
2996 * of any expressions needed.
2997 */
2998 FormIndexDatum(indexInfo,
2999 slot,
3000 estate,
3001 values,
3002 isnull);
3003
3004 /*
3005 * You'd think we should go ahead and build the index tuple here, but
3006 * some index AMs want to do further processing on the data first. So
3007 * pass the values[] and isnull[] arrays, instead.
3008 */
3009
3010 if (HeapTupleIsHeapOnly(heapTuple))
3011 {
3012 /*
3013 * For a heap-only tuple, pretend its TID is that of the root. See
3014 * src/backend/access/heap/README.HOT for discussion.
3015 */
3016 HeapTupleData rootTuple;
3017 OffsetNumber offnum;
3018
3019 rootTuple = *heapTuple;
3020 offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
3021
3022 /*
3023 * If a HOT tuple points to a root that we don't know
3024 * about, obtain root items afresh. If that still fails,
3025 * report it as corruption.
3026 */
3027 if (root_offsets[offnum - 1] == InvalidOffsetNumber)
3028 {
3029 Page page = BufferGetPage(scan->rs_cbuf);
3030
3031 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
3032 heap_get_root_tuples(page, root_offsets);
3033 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
3034 }
3035
3036 if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
3037 ereport(ERROR,
3038 (errcode(ERRCODE_DATA_CORRUPTED),
3039 errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
3040 ItemPointerGetBlockNumber(&heapTuple->t_self),
3041 offnum,
3042 RelationGetRelationName(heapRelation))));
3043
3044 ItemPointerSetOffsetNumber(&rootTuple.t_self,
3045 root_offsets[offnum - 1]);
3046
3047 /* Call the AM's callback routine to process the tuple */
3048 callback(indexRelation, &rootTuple, values, isnull, tupleIsAlive,
3049 callback_state);
3050 }
3051 else
3052 {
3053 /* Call the AM's callback routine to process the tuple */
3054 callback(indexRelation, heapTuple, values, isnull, tupleIsAlive,
3055 callback_state);
3056 }
3057 }
3058
3059 heap_endscan(scan);
3060
3061 /* we can now forget our snapshot, if set and registered by us */
3062 if (need_unregister_snapshot)
3063 UnregisterSnapshot(snapshot);
3064
3065 ExecDropSingleTupleTableSlot(slot);
3066
3067 FreeExecutorState(estate);
3068
3069 /* These may have been pointing to the now-gone estate */
3070 indexInfo->ii_ExpressionsState = NIL;
3071 indexInfo->ii_PredicateState = NULL;
3072
3073 return reltuples;
3074 }
3075
3076
3077 /*
3078 * IndexCheckExclusion - verify that a new exclusion constraint is satisfied
3079 *
3080 * When creating an exclusion constraint, we first build the index normally
3081 * and then rescan the heap to check for conflicts. We assume that we only
3082 * need to validate tuples that are live according to an up-to-date snapshot,
3083 * and that these were correctly indexed even in the presence of broken HOT
3084 * chains. This should be OK since we are holding at least ShareLock on the
3085 * table, meaning there can be no uncommitted updates from other transactions.
3086 * (Note: that wouldn't necessarily work for system catalogs, since many
3087 * operations release write lock early on the system catalogs.)
3088 */
3089 static void
IndexCheckExclusion(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo)3090 IndexCheckExclusion(Relation heapRelation,
3091 Relation indexRelation,
3092 IndexInfo *indexInfo)
3093 {
3094 HeapScanDesc scan;
3095 HeapTuple heapTuple;
3096 Datum values[INDEX_MAX_KEYS];
3097 bool isnull[INDEX_MAX_KEYS];
3098 ExprState *predicate;
3099 TupleTableSlot *slot;
3100 EState *estate;
3101 ExprContext *econtext;
3102 Snapshot snapshot;
3103
3104 /*
3105 * If we are reindexing the target index, mark it as no longer being
3106 * reindexed, to forestall an Assert in index_beginscan when we try to use
3107 * the index for probes. This is OK because the index is now fully valid.
3108 */
3109 if (ReindexIsCurrentlyProcessingIndex(RelationGetRelid(indexRelation)))
3110 ResetReindexProcessing();
3111
3112 /*
3113 * Need an EState for evaluation of index expressions and partial-index
3114 * predicates. Also a slot to hold the current tuple.
3115 */
3116 estate = CreateExecutorState();
3117 econtext = GetPerTupleExprContext(estate);
3118 slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
3119
3120 /* Arrange for econtext's scan tuple to be the tuple under test */
3121 econtext->ecxt_scantuple = slot;
3122
3123 /* Set up execution state for predicate, if any. */
3124 predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
3125
3126 /*
3127 * Scan all live tuples in the base relation.
3128 */
3129 snapshot = RegisterSnapshot(GetLatestSnapshot());
3130 scan = heap_beginscan_strat(heapRelation, /* relation */
3131 snapshot, /* snapshot */
3132 0, /* number of keys */
3133 NULL, /* scan key */
3134 true, /* buffer access strategy OK */
3135 true); /* syncscan OK */
3136
3137 while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
3138 {
3139 CHECK_FOR_INTERRUPTS();
3140
3141 MemoryContextReset(econtext->ecxt_per_tuple_memory);
3142
3143 /* Set up for predicate or expression evaluation */
3144 ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
3145
3146 /*
3147 * In a partial index, ignore tuples that don't satisfy the predicate.
3148 */
3149 if (predicate != NULL)
3150 {
3151 if (!ExecQual(predicate, econtext))
3152 continue;
3153 }
3154
3155 /*
3156 * Extract index column values, including computing expressions.
3157 */
3158 FormIndexDatum(indexInfo,
3159 slot,
3160 estate,
3161 values,
3162 isnull);
3163
3164 /*
3165 * Check that this tuple has no conflicts.
3166 */
3167 check_exclusion_constraint(heapRelation,
3168 indexRelation, indexInfo,
3169 &(heapTuple->t_self), values, isnull,
3170 estate, true);
3171 }
3172
3173 heap_endscan(scan);
3174 UnregisterSnapshot(snapshot);
3175
3176 ExecDropSingleTupleTableSlot(slot);
3177
3178 FreeExecutorState(estate);
3179
3180 /* These may have been pointing to the now-gone estate */
3181 indexInfo->ii_ExpressionsState = NIL;
3182 indexInfo->ii_PredicateState = NULL;
3183 }
3184
3185
3186 /*
3187 * validate_index - support code for concurrent index builds
3188 *
3189 * We do a concurrent index build by first inserting the catalog entry for the
3190 * index via index_create(), marking it not indisready and not indisvalid.
3191 * Then we commit our transaction and start a new one, then we wait for all
3192 * transactions that could have been modifying the table to terminate. Now
3193 * we know that any subsequently-started transactions will see the index and
3194 * honor its constraints on HOT updates; so while existing HOT-chains might
3195 * be broken with respect to the index, no currently live tuple will have an
3196 * incompatible HOT update done to it. We now build the index normally via
3197 * index_build(), while holding a weak lock that allows concurrent
3198 * insert/update/delete. Also, we index only tuples that are valid
3199 * as of the start of the scan (see IndexBuildHeapScan), whereas a normal
3200 * build takes care to include recently-dead tuples. This is OK because
3201 * we won't mark the index valid until all transactions that might be able
3202 * to see those tuples are gone. The reason for doing that is to avoid
3203 * bogus unique-index failures due to concurrent UPDATEs (we might see
3204 * different versions of the same row as being valid when we pass over them,
3205 * if we used HeapTupleSatisfiesVacuum). This leaves us with an index that
3206 * does not contain any tuples added to the table while we built the index.
3207 *
3208 * Next, we mark the index "indisready" (but still not "indisvalid") and
3209 * commit the second transaction and start a third. Again we wait for all
3210 * transactions that could have been modifying the table to terminate. Now
3211 * we know that any subsequently-started transactions will see the index and
3212 * insert their new tuples into it. We then take a new reference snapshot
3213 * which is passed to validate_index(). Any tuples that are valid according
3214 * to this snap, but are not in the index, must be added to the index.
3215 * (Any tuples committed live after the snap will be inserted into the
3216 * index by their originating transaction. Any tuples committed dead before
3217 * the snap need not be indexed, because we will wait out all transactions
3218 * that might care about them before we mark the index valid.)
3219 *
3220 * validate_index() works by first gathering all the TIDs currently in the
3221 * index, using a bulkdelete callback that just stores the TIDs and doesn't
3222 * ever say "delete it". (This should be faster than a plain indexscan;
3223 * also, not all index AMs support full-index indexscan.) Then we sort the
3224 * TIDs, and finally scan the table doing a "merge join" against the TID list
3225 * to see which tuples are missing from the index. Thus we will ensure that
3226 * all tuples valid according to the reference snapshot are in the index.
3227 *
3228 * Building a unique index this way is tricky: we might try to insert a
3229 * tuple that is already dead or is in process of being deleted, and we
3230 * mustn't have a uniqueness failure against an updated version of the same
3231 * row. We could try to check the tuple to see if it's already dead and tell
3232 * index_insert() not to do the uniqueness check, but that still leaves us
3233 * with a race condition against an in-progress update. To handle that,
3234 * we expect the index AM to recheck liveness of the to-be-inserted tuple
3235 * before it declares a uniqueness error.
3236 *
3237 * After completing validate_index(), we wait until all transactions that
3238 * were alive at the time of the reference snapshot are gone; this is
3239 * necessary to be sure there are none left with a transaction snapshot
3240 * older than the reference (and hence possibly able to see tuples we did
3241 * not index). Then we mark the index "indisvalid" and commit. Subsequent
3242 * transactions will be able to use it for queries.
3243 *
3244 * Doing two full table scans is a brute-force strategy. We could try to be
3245 * cleverer, eg storing new tuples in a special area of the table (perhaps
3246 * making the table append-only by setting use_fsm). However that would
3247 * add yet more locking issues.
3248 */
3249 void
validate_index(Oid heapId,Oid indexId,Snapshot snapshot)3250 validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
3251 {
3252 Relation heapRelation,
3253 indexRelation;
3254 IndexInfo *indexInfo;
3255 IndexVacuumInfo ivinfo;
3256 v_i_state state;
3257 Oid save_userid;
3258 int save_sec_context;
3259 int save_nestlevel;
3260
3261 /* Open and lock the parent heap relation */
3262 heapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
3263 /* And the target index relation */
3264 indexRelation = index_open(indexId, RowExclusiveLock);
3265
3266 /*
3267 * Fetch info needed for index_insert. (You might think this should be
3268 * passed in from DefineIndex, but its copy is long gone due to having
3269 * been built in a previous transaction.)
3270 */
3271 indexInfo = BuildIndexInfo(indexRelation);
3272
3273 /* mark build is concurrent just for consistency */
3274 indexInfo->ii_Concurrent = true;
3275
3276 /*
3277 * Switch to the table owner's userid, so that any index functions are run
3278 * as that user. Also lock down security-restricted operations and
3279 * arrange to make GUC variable changes local to this command.
3280 */
3281 GetUserIdAndSecContext(&save_userid, &save_sec_context);
3282 SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
3283 save_sec_context | SECURITY_RESTRICTED_OPERATION);
3284 save_nestlevel = NewGUCNestLevel();
3285
3286 /*
3287 * Scan the index and gather up all the TIDs into a tuplesort object.
3288 */
3289 ivinfo.index = indexRelation;
3290 ivinfo.analyze_only = false;
3291 ivinfo.estimated_count = true;
3292 ivinfo.message_level = DEBUG2;
3293 ivinfo.num_heap_tuples = heapRelation->rd_rel->reltuples;
3294 ivinfo.strategy = NULL;
3295
3296 /*
3297 * Encode TIDs as int8 values for the sort, rather than directly sorting
3298 * item pointers. This can be significantly faster, primarily because TID
3299 * is a pass-by-reference type on all platforms, whereas int8 is
3300 * pass-by-value on most platforms.
3301 */
3302 state.tuplesort = tuplesort_begin_datum(INT8OID, Int8LessOperator,
3303 InvalidOid, false,
3304 maintenance_work_mem,
3305 NULL, false);
3306 state.htups = state.itups = state.tups_inserted = 0;
3307
3308 (void) index_bulk_delete(&ivinfo, NULL,
3309 validate_index_callback, (void *) &state);
3310
3311 /* Execute the sort */
3312 tuplesort_performsort(state.tuplesort);
3313
3314 /*
3315 * Now scan the heap and "merge" it with the index
3316 */
3317 validate_index_heapscan(heapRelation,
3318 indexRelation,
3319 indexInfo,
3320 snapshot,
3321 &state);
3322
3323 /* Done with tuplesort object */
3324 tuplesort_end(state.tuplesort);
3325
3326 elog(DEBUG2,
3327 "validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples",
3328 state.htups, state.itups, state.tups_inserted);
3329
3330 /* Roll back any GUC changes executed by index functions */
3331 AtEOXact_GUC(false, save_nestlevel);
3332
3333 /* Restore userid and security context */
3334 SetUserIdAndSecContext(save_userid, save_sec_context);
3335
3336 /* Close rels, but keep locks */
3337 index_close(indexRelation, NoLock);
3338 heap_close(heapRelation, NoLock);
3339 }
3340
3341 /*
3342 * itemptr_encode - Encode ItemPointer as int64/int8
3343 *
3344 * This representation must produce values encoded as int64 that sort in the
3345 * same order as their corresponding original TID values would (using the
3346 * default int8 opclass to produce a result equivalent to the default TID
3347 * opclass).
3348 *
3349 * As noted in validate_index(), this can be significantly faster.
3350 */
3351 static inline int64
itemptr_encode(ItemPointer itemptr)3352 itemptr_encode(ItemPointer itemptr)
3353 {
3354 BlockNumber block = ItemPointerGetBlockNumber(itemptr);
3355 OffsetNumber offset = ItemPointerGetOffsetNumber(itemptr);
3356 int64 encoded;
3357
3358 /*
3359 * Use the 16 least significant bits for the offset. 32 adjacent bits are
3360 * used for the block number. Since remaining bits are unused, there
3361 * cannot be negative encoded values (We assume a two's complement
3362 * representation).
3363 */
3364 encoded = ((uint64) block << 16) | (uint16) offset;
3365
3366 return encoded;
3367 }
3368
3369 /*
3370 * itemptr_decode - Decode int64/int8 representation back to ItemPointer
3371 */
3372 static inline void
itemptr_decode(ItemPointer itemptr,int64 encoded)3373 itemptr_decode(ItemPointer itemptr, int64 encoded)
3374 {
3375 BlockNumber block = (BlockNumber) (encoded >> 16);
3376 OffsetNumber offset = (OffsetNumber) (encoded & 0xFFFF);
3377
3378 ItemPointerSet(itemptr, block, offset);
3379 }
3380
3381 /*
3382 * validate_index_callback - bulkdelete callback to collect the index TIDs
3383 */
3384 static bool
validate_index_callback(ItemPointer itemptr,void * opaque)3385 validate_index_callback(ItemPointer itemptr, void *opaque)
3386 {
3387 v_i_state *state = (v_i_state *) opaque;
3388 int64 encoded = itemptr_encode(itemptr);
3389
3390 tuplesort_putdatum(state->tuplesort, Int64GetDatum(encoded), false);
3391 state->itups += 1;
3392 return false; /* never actually delete anything */
3393 }
3394
3395 /*
3396 * validate_index_heapscan - second table scan for concurrent index build
3397 *
3398 * This has much code in common with IndexBuildHeapScan, but it's enough
3399 * different that it seems cleaner to have two routines not one.
3400 */
3401 static void
validate_index_heapscan(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,Snapshot snapshot,v_i_state * state)3402 validate_index_heapscan(Relation heapRelation,
3403 Relation indexRelation,
3404 IndexInfo *indexInfo,
3405 Snapshot snapshot,
3406 v_i_state *state)
3407 {
3408 HeapScanDesc scan;
3409 HeapTuple heapTuple;
3410 Datum values[INDEX_MAX_KEYS];
3411 bool isnull[INDEX_MAX_KEYS];
3412 ExprState *predicate;
3413 TupleTableSlot *slot;
3414 EState *estate;
3415 ExprContext *econtext;
3416 BlockNumber root_blkno = InvalidBlockNumber;
3417 OffsetNumber root_offsets[MaxHeapTuplesPerPage];
3418 bool in_index[MaxHeapTuplesPerPage];
3419
3420 /* state variables for the merge */
3421 ItemPointer indexcursor = NULL;
3422 ItemPointerData decoded;
3423 bool tuplesort_empty = false;
3424
3425 /*
3426 * sanity checks
3427 */
3428 Assert(OidIsValid(indexRelation->rd_rel->relam));
3429
3430 /*
3431 * Need an EState for evaluation of index expressions and partial-index
3432 * predicates. Also a slot to hold the current tuple.
3433 */
3434 estate = CreateExecutorState();
3435 econtext = GetPerTupleExprContext(estate);
3436 slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
3437
3438 /* Arrange for econtext's scan tuple to be the tuple under test */
3439 econtext->ecxt_scantuple = slot;
3440
3441 /* Set up execution state for predicate, if any. */
3442 predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
3443
3444 /*
3445 * Prepare for scan of the base relation. We need just those tuples
3446 * satisfying the passed-in reference snapshot. We must disable syncscan
3447 * here, because it's critical that we read from block zero forward to
3448 * match the sorted TIDs.
3449 */
3450 scan = heap_beginscan_strat(heapRelation, /* relation */
3451 snapshot, /* snapshot */
3452 0, /* number of keys */
3453 NULL, /* scan key */
3454 true, /* buffer access strategy OK */
3455 false); /* syncscan not OK */
3456
3457 /*
3458 * Scan all tuples matching the snapshot.
3459 */
3460 while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
3461 {
3462 ItemPointer heapcursor = &heapTuple->t_self;
3463 ItemPointerData rootTuple;
3464 OffsetNumber root_offnum;
3465
3466 CHECK_FOR_INTERRUPTS();
3467
3468 state->htups += 1;
3469
3470 /*
3471 * As commented in IndexBuildHeapScan, we should index heap-only
3472 * tuples under the TIDs of their root tuples; so when we advance onto
3473 * a new heap page, build a map of root item offsets on the page.
3474 *
3475 * This complicates merging against the tuplesort output: we will
3476 * visit the live tuples in order by their offsets, but the root
3477 * offsets that we need to compare against the index contents might be
3478 * ordered differently. So we might have to "look back" within the
3479 * tuplesort output, but only within the current page. We handle that
3480 * by keeping a bool array in_index[] showing all the
3481 * already-passed-over tuplesort output TIDs of the current page. We
3482 * clear that array here, when advancing onto a new heap page.
3483 */
3484 if (scan->rs_cblock != root_blkno)
3485 {
3486 Page page = BufferGetPage(scan->rs_cbuf);
3487
3488 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
3489 heap_get_root_tuples(page, root_offsets);
3490 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
3491
3492 memset(in_index, 0, sizeof(in_index));
3493
3494 root_blkno = scan->rs_cblock;
3495 }
3496
3497 /* Convert actual tuple TID to root TID */
3498 rootTuple = *heapcursor;
3499 root_offnum = ItemPointerGetOffsetNumber(heapcursor);
3500
3501 if (HeapTupleIsHeapOnly(heapTuple))
3502 {
3503 root_offnum = root_offsets[root_offnum - 1];
3504 if (!OffsetNumberIsValid(root_offnum))
3505 ereport(ERROR,
3506 (errcode(ERRCODE_DATA_CORRUPTED),
3507 errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
3508 ItemPointerGetBlockNumber(heapcursor),
3509 ItemPointerGetOffsetNumber(heapcursor),
3510 RelationGetRelationName(heapRelation))));
3511 ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
3512 }
3513
3514 /*
3515 * "merge" by skipping through the index tuples until we find or pass
3516 * the current root tuple.
3517 */
3518 while (!tuplesort_empty &&
3519 (!indexcursor ||
3520 ItemPointerCompare(indexcursor, &rootTuple) < 0))
3521 {
3522 Datum ts_val;
3523 bool ts_isnull;
3524
3525 if (indexcursor)
3526 {
3527 /*
3528 * Remember index items seen earlier on the current heap page
3529 */
3530 if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
3531 in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
3532 }
3533
3534 tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
3535 &ts_val, &ts_isnull, NULL);
3536 Assert(tuplesort_empty || !ts_isnull);
3537 if (!tuplesort_empty)
3538 {
3539 itemptr_decode(&decoded, DatumGetInt64(ts_val));
3540 indexcursor = &decoded;
3541
3542 /* If int8 is pass-by-ref, free (encoded) TID Datum memory */
3543 #ifndef USE_FLOAT8_BYVAL
3544 pfree(DatumGetPointer(ts_val));
3545 #endif
3546 }
3547 else
3548 {
3549 /* Be tidy */
3550 indexcursor = NULL;
3551 }
3552 }
3553
3554 /*
3555 * If the tuplesort has overshot *and* we didn't see a match earlier,
3556 * then this tuple is missing from the index, so insert it.
3557 */
3558 if ((tuplesort_empty ||
3559 ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
3560 !in_index[root_offnum - 1])
3561 {
3562 MemoryContextReset(econtext->ecxt_per_tuple_memory);
3563
3564 /* Set up for predicate or expression evaluation */
3565 ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
3566
3567 /*
3568 * In a partial index, discard tuples that don't satisfy the
3569 * predicate.
3570 */
3571 if (predicate != NULL)
3572 {
3573 if (!ExecQual(predicate, econtext))
3574 continue;
3575 }
3576
3577 /*
3578 * For the current heap tuple, extract all the attributes we use
3579 * in this index, and note which are null. This also performs
3580 * evaluation of any expressions needed.
3581 */
3582 FormIndexDatum(indexInfo,
3583 slot,
3584 estate,
3585 values,
3586 isnull);
3587
3588 /*
3589 * You'd think we should go ahead and build the index tuple here,
3590 * but some index AMs want to do further processing on the data
3591 * first. So pass the values[] and isnull[] arrays, instead.
3592 */
3593
3594 /*
3595 * If the tuple is already committed dead, you might think we
3596 * could suppress uniqueness checking, but this is no longer true
3597 * in the presence of HOT, because the insert is actually a proxy
3598 * for a uniqueness check on the whole HOT-chain. That is, the
3599 * tuple we have here could be dead because it was already
3600 * HOT-updated, and if so the updating transaction will not have
3601 * thought it should insert index entries. The index AM will
3602 * check the whole HOT-chain and correctly detect a conflict if
3603 * there is one.
3604 */
3605
3606 index_insert(indexRelation,
3607 values,
3608 isnull,
3609 &rootTuple,
3610 heapRelation,
3611 indexInfo->ii_Unique ?
3612 UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
3613 indexInfo);
3614
3615 state->tups_inserted += 1;
3616 }
3617 }
3618
3619 heap_endscan(scan);
3620
3621 ExecDropSingleTupleTableSlot(slot);
3622
3623 FreeExecutorState(estate);
3624
3625 /* These may have been pointing to the now-gone estate */
3626 indexInfo->ii_ExpressionsState = NIL;
3627 indexInfo->ii_PredicateState = NULL;
3628 }
3629
3630
3631 /*
3632 * index_set_state_flags - adjust pg_index state flags
3633 *
3634 * This is used during CREATE/DROP INDEX CONCURRENTLY to adjust the pg_index
3635 * flags that denote the index's state.
3636 *
3637 * Note that CatalogTupleUpdate() sends a cache invalidation message for the
3638 * tuple, so other sessions will hear about the update as soon as we commit.
3639 */
3640 void
index_set_state_flags(Oid indexId,IndexStateFlagsAction action)3641 index_set_state_flags(Oid indexId, IndexStateFlagsAction action)
3642 {
3643 Relation pg_index;
3644 HeapTuple indexTuple;
3645 Form_pg_index indexForm;
3646
3647 /* Open pg_index and fetch a writable copy of the index's tuple */
3648 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
3649
3650 indexTuple = SearchSysCacheCopy1(INDEXRELID,
3651 ObjectIdGetDatum(indexId));
3652 if (!HeapTupleIsValid(indexTuple))
3653 elog(ERROR, "cache lookup failed for index %u", indexId);
3654 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3655
3656 /* Perform the requested state change on the copy */
3657 switch (action)
3658 {
3659 case INDEX_CREATE_SET_READY:
3660 /* Set indisready during a CREATE INDEX CONCURRENTLY sequence */
3661 Assert(indexForm->indislive);
3662 Assert(!indexForm->indisready);
3663 Assert(!indexForm->indisvalid);
3664 indexForm->indisready = true;
3665 break;
3666 case INDEX_CREATE_SET_VALID:
3667 /* Set indisvalid during a CREATE INDEX CONCURRENTLY sequence */
3668 Assert(indexForm->indislive);
3669 Assert(indexForm->indisready);
3670 Assert(!indexForm->indisvalid);
3671 indexForm->indisvalid = true;
3672 break;
3673 case INDEX_DROP_CLEAR_VALID:
3674
3675 /*
3676 * Clear indisvalid during a DROP INDEX CONCURRENTLY sequence
3677 *
3678 * If indisready == true we leave it set so the index still gets
3679 * maintained by active transactions. We only need to ensure that
3680 * indisvalid is false. (We don't assert that either is initially
3681 * true, though, since we want to be able to retry a DROP INDEX
3682 * CONCURRENTLY that failed partway through.)
3683 *
3684 * Note: the CLUSTER logic assumes that indisclustered cannot be
3685 * set on any invalid index, so clear that flag too.
3686 */
3687 indexForm->indisvalid = false;
3688 indexForm->indisclustered = false;
3689 break;
3690 case INDEX_DROP_SET_DEAD:
3691
3692 /*
3693 * Clear indisready/indislive during DROP INDEX CONCURRENTLY
3694 *
3695 * We clear both indisready and indislive, because we not only
3696 * want to stop updates, we want to prevent sessions from touching
3697 * the index at all.
3698 */
3699 Assert(!indexForm->indisvalid);
3700 indexForm->indisready = false;
3701 indexForm->indislive = false;
3702 break;
3703 }
3704
3705 /* ... and update it */
3706 CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
3707
3708 heap_close(pg_index, RowExclusiveLock);
3709 }
3710
3711
3712 /*
3713 * IndexGetRelation: given an index's relation OID, get the OID of the
3714 * relation it is an index on. Uses the system cache.
3715 */
3716 Oid
IndexGetRelation(Oid indexId,bool missing_ok)3717 IndexGetRelation(Oid indexId, bool missing_ok)
3718 {
3719 HeapTuple tuple;
3720 Form_pg_index index;
3721 Oid result;
3722
3723 tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
3724 if (!HeapTupleIsValid(tuple))
3725 {
3726 if (missing_ok)
3727 return InvalidOid;
3728 elog(ERROR, "cache lookup failed for index %u", indexId);
3729 }
3730 index = (Form_pg_index) GETSTRUCT(tuple);
3731 Assert(index->indexrelid == indexId);
3732
3733 result = index->indrelid;
3734 ReleaseSysCache(tuple);
3735 return result;
3736 }
3737
3738 /*
3739 * reindex_index - This routine is used to recreate a single index
3740 */
3741 void
reindex_index(Oid indexId,bool skip_constraint_checks,char persistence,int options)3742 reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
3743 int options)
3744 {
3745 Relation iRel,
3746 heapRelation;
3747 Oid heapId;
3748 IndexInfo *indexInfo;
3749 volatile bool skipped_constraint = false;
3750 PGRUsage ru0;
3751
3752 pg_rusage_init(&ru0);
3753
3754 /*
3755 * Open and lock the parent heap relation. ShareLock is sufficient since
3756 * we only need to be sure no schema or data changes are going on.
3757 */
3758 heapId = IndexGetRelation(indexId, false);
3759 heapRelation = heap_open(heapId, ShareLock);
3760
3761 /*
3762 * Open the target index relation and get an exclusive lock on it, to
3763 * ensure that no one else is touching this particular index.
3764 */
3765 iRel = index_open(indexId, AccessExclusiveLock);
3766
3767 /*
3768 * The case of reindexing partitioned tables and indexes is handled
3769 * differently by upper layers, so this case shouldn't arise.
3770 */
3771 if (iRel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
3772 elog(ERROR, "unsupported relation kind for index \"%s\"",
3773 RelationGetRelationName(iRel));
3774
3775 /*
3776 * Don't allow reindex on temp tables of other backends ... their local
3777 * buffer manager is not going to cope.
3778 */
3779 if (RELATION_IS_OTHER_TEMP(iRel))
3780 ereport(ERROR,
3781 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3782 errmsg("cannot reindex temporary tables of other sessions")));
3783
3784 /*
3785 * Also check for active uses of the index in the current transaction; we
3786 * don't want to reindex underneath an open indexscan.
3787 */
3788 CheckTableNotInUse(iRel, "REINDEX INDEX");
3789
3790 /*
3791 * All predicate locks on the index are about to be made invalid. Promote
3792 * them to relation locks on the heap.
3793 */
3794 TransferPredicateLocksToHeapRelation(iRel);
3795
3796 /* Fetch info needed for index_build */
3797 indexInfo = BuildIndexInfo(iRel);
3798
3799 /* If requested, skip checking uniqueness/exclusion constraints */
3800 if (skip_constraint_checks)
3801 {
3802 if (indexInfo->ii_Unique || indexInfo->ii_ExclusionOps != NULL)
3803 skipped_constraint = true;
3804 indexInfo->ii_Unique = false;
3805 indexInfo->ii_ExclusionOps = NULL;
3806 indexInfo->ii_ExclusionProcs = NULL;
3807 indexInfo->ii_ExclusionStrats = NULL;
3808 }
3809
3810 /* Suppress use of the target index while rebuilding it */
3811 SetReindexProcessing(heapId, indexId);
3812
3813 /* Create a new physical relation for the index */
3814 RelationSetNewRelfilenode(iRel, persistence, InvalidTransactionId,
3815 InvalidMultiXactId);
3816
3817 /* Initialize the index and rebuild */
3818 /* Note: we do not need to re-establish pkey setting */
3819 index_build(heapRelation, iRel, indexInfo, false, true, true);
3820
3821 /* Re-allow use of target index */
3822 ResetReindexProcessing();
3823
3824 /*
3825 * If the index is marked invalid/not-ready/dead (ie, it's from a failed
3826 * CREATE INDEX CONCURRENTLY, or a DROP INDEX CONCURRENTLY failed midway),
3827 * and we didn't skip a uniqueness check, we can now mark it valid. This
3828 * allows REINDEX to be used to clean up in such cases.
3829 *
3830 * We can also reset indcheckxmin, because we have now done a
3831 * non-concurrent index build, *except* in the case where index_build
3832 * found some still-broken HOT chains. If it did, and we don't have to
3833 * change any of the other flags, we just leave indcheckxmin alone (note
3834 * that index_build won't have changed it, because this is a reindex).
3835 * This is okay and desirable because not updating the tuple leaves the
3836 * index's usability horizon (recorded as the tuple's xmin value) the same
3837 * as it was.
3838 *
3839 * But, if the index was invalid/not-ready/dead and there were broken HOT
3840 * chains, we had better force indcheckxmin true, because the normal
3841 * argument that the HOT chains couldn't conflict with the index is
3842 * suspect for an invalid index. (A conflict is definitely possible if
3843 * the index was dead. It probably shouldn't happen otherwise, but let's
3844 * be conservative.) In this case advancing the usability horizon is
3845 * appropriate.
3846 *
3847 * Another reason for avoiding unnecessary updates here is that while
3848 * reindexing pg_index itself, we must not try to update tuples in it.
3849 * pg_index's indexes should always have these flags in their clean state,
3850 * so that won't happen.
3851 *
3852 * If early pruning/vacuuming is enabled for the heap relation, the
3853 * usability horizon must be advanced to the current transaction on every
3854 * build or rebuild. pg_index is OK in this regard because catalog tables
3855 * are not subject to early cleanup.
3856 */
3857 if (!skipped_constraint)
3858 {
3859 Relation pg_index;
3860 HeapTuple indexTuple;
3861 Form_pg_index indexForm;
3862 bool index_bad;
3863 bool early_pruning_enabled = EarlyPruningEnabled(heapRelation);
3864
3865 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
3866
3867 indexTuple = SearchSysCacheCopy1(INDEXRELID,
3868 ObjectIdGetDatum(indexId));
3869 if (!HeapTupleIsValid(indexTuple))
3870 elog(ERROR, "cache lookup failed for index %u", indexId);
3871 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3872
3873 index_bad = (!indexForm->indisvalid ||
3874 !indexForm->indisready ||
3875 !indexForm->indislive);
3876 if (index_bad ||
3877 (indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain) ||
3878 early_pruning_enabled)
3879 {
3880 if (!indexInfo->ii_BrokenHotChain && !early_pruning_enabled)
3881 indexForm->indcheckxmin = false;
3882 else if (index_bad || early_pruning_enabled)
3883 indexForm->indcheckxmin = true;
3884 indexForm->indisvalid = true;
3885 indexForm->indisready = true;
3886 indexForm->indislive = true;
3887 CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
3888
3889 /*
3890 * Invalidate the relcache for the table, so that after we commit
3891 * all sessions will refresh the table's index list. This ensures
3892 * that if anyone misses seeing the pg_index row during this
3893 * update, they'll refresh their list before attempting any update
3894 * on the table.
3895 */
3896 CacheInvalidateRelcache(heapRelation);
3897 }
3898
3899 heap_close(pg_index, RowExclusiveLock);
3900 }
3901
3902 /* Log what we did */
3903 if (options & REINDEXOPT_VERBOSE)
3904 ereport(INFO,
3905 (errmsg("index \"%s\" was reindexed",
3906 get_rel_name(indexId)),
3907 errdetail_internal("%s",
3908 pg_rusage_show(&ru0))));
3909
3910 /* Close rels, but keep locks */
3911 index_close(iRel, NoLock);
3912 heap_close(heapRelation, NoLock);
3913 }
3914
3915 /*
3916 * reindex_relation - This routine is used to recreate all indexes
3917 * of a relation (and optionally its toast relation too, if any).
3918 *
3919 * "flags" is a bitmask that can include any combination of these bits:
3920 *
3921 * REINDEX_REL_PROCESS_TOAST: if true, process the toast table too (if any).
3922 *
3923 * REINDEX_REL_SUPPRESS_INDEX_USE: if true, the relation was just completely
3924 * rebuilt by an operation such as VACUUM FULL or CLUSTER, and therefore its
3925 * indexes are inconsistent with it. This makes things tricky if the relation
3926 * is a system catalog that we might consult during the reindexing. To deal
3927 * with that case, we mark all of the indexes as pending rebuild so that they
3928 * won't be trusted until rebuilt. The caller is required to call us *without*
3929 * having made the rebuilt table visible by doing CommandCounterIncrement;
3930 * we'll do CCI after having collected the index list. (This way we can still
3931 * use catalog indexes while collecting the list.)
3932 *
3933 * REINDEX_REL_CHECK_CONSTRAINTS: if true, recheck unique and exclusion
3934 * constraint conditions, else don't. To avoid deadlocks, VACUUM FULL or
3935 * CLUSTER on a system catalog must omit this flag. REINDEX should be used to
3936 * rebuild an index if constraint inconsistency is suspected. For optimal
3937 * performance, other callers should include the flag only after transforming
3938 * the data in a manner that risks a change in constraint validity.
3939 *
3940 * REINDEX_REL_FORCE_INDEXES_UNLOGGED: if true, set the persistence of the
3941 * rebuilt indexes to unlogged.
3942 *
3943 * REINDEX_REL_FORCE_INDEXES_PERMANENT: if true, set the persistence of the
3944 * rebuilt indexes to permanent.
3945 *
3946 * Returns true if any indexes were rebuilt (including toast table's index
3947 * when relevant). Note that a CommandCounterIncrement will occur after each
3948 * index rebuild.
3949 */
3950 bool
reindex_relation(Oid relid,int flags,int options)3951 reindex_relation(Oid relid, int flags, int options)
3952 {
3953 Relation rel;
3954 Oid toast_relid;
3955 List *indexIds;
3956 char persistence;
3957 bool result;
3958 ListCell *indexId;
3959
3960 /*
3961 * Open and lock the relation. ShareLock is sufficient since we only need
3962 * to prevent schema and data changes in it. The lock level used here
3963 * should match ReindexTable().
3964 */
3965 rel = heap_open(relid, ShareLock);
3966
3967 /*
3968 * This may be useful when implemented someday; but that day is not today.
3969 * For now, avoid erroring out when called in a multi-table context
3970 * (REINDEX SCHEMA) and happen to come across a partitioned table. The
3971 * partitions may be reindexed on their own anyway.
3972 */
3973 if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
3974 {
3975 ereport(WARNING,
3976 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3977 errmsg("REINDEX of partitioned tables is not yet implemented, skipping \"%s\"",
3978 RelationGetRelationName(rel))));
3979 heap_close(rel, ShareLock);
3980 return false;
3981 }
3982
3983 toast_relid = rel->rd_rel->reltoastrelid;
3984
3985 /*
3986 * Get the list of index OIDs for this relation. (We trust to the
3987 * relcache to get this with a sequential scan if ignoring system
3988 * indexes.)
3989 */
3990 indexIds = RelationGetIndexList(rel);
3991
3992 if (flags & REINDEX_REL_SUPPRESS_INDEX_USE)
3993 {
3994 /* Suppress use of all the indexes until they are rebuilt */
3995 SetReindexPending(indexIds);
3996
3997 /*
3998 * Make the new heap contents visible --- now things might be
3999 * inconsistent!
4000 */
4001 CommandCounterIncrement();
4002 }
4003
4004 /*
4005 * Compute persistence of indexes: same as that of owning rel, unless
4006 * caller specified otherwise.
4007 */
4008 if (flags & REINDEX_REL_FORCE_INDEXES_UNLOGGED)
4009 persistence = RELPERSISTENCE_UNLOGGED;
4010 else if (flags & REINDEX_REL_FORCE_INDEXES_PERMANENT)
4011 persistence = RELPERSISTENCE_PERMANENT;
4012 else
4013 persistence = rel->rd_rel->relpersistence;
4014
4015 /* Reindex all the indexes. */
4016 foreach(indexId, indexIds)
4017 {
4018 Oid indexOid = lfirst_oid(indexId);
4019
4020 reindex_index(indexOid, !(flags & REINDEX_REL_CHECK_CONSTRAINTS),
4021 persistence, options);
4022
4023 CommandCounterIncrement();
4024
4025 /* Index should no longer be in the pending list */
4026 Assert(!ReindexIsProcessingIndex(indexOid));
4027 }
4028
4029 /*
4030 * Close rel, but continue to hold the lock.
4031 */
4032 heap_close(rel, NoLock);
4033
4034 result = (indexIds != NIL);
4035
4036 /*
4037 * If the relation has a secondary toast rel, reindex that too while we
4038 * still hold the lock on the master table.
4039 */
4040 if ((flags & REINDEX_REL_PROCESS_TOAST) && OidIsValid(toast_relid))
4041 result |= reindex_relation(toast_relid, flags, options);
4042
4043 return result;
4044 }
4045
4046
4047 /* ----------------------------------------------------------------
4048 * System index reindexing support
4049 *
4050 * When we are busy reindexing a system index, this code provides support
4051 * for preventing catalog lookups from using that index. We also make use
4052 * of this to catch attempted uses of user indexes during reindexing of
4053 * those indexes. This information is propagated to parallel workers;
4054 * attempting to change it during a parallel operation is not permitted.
4055 * ----------------------------------------------------------------
4056 */
4057
4058 static Oid currentlyReindexedHeap = InvalidOid;
4059 static Oid currentlyReindexedIndex = InvalidOid;
4060 static List *pendingReindexedIndexes = NIL;
4061 static int reindexingNestLevel = 0;
4062
4063 /*
4064 * ReindexIsProcessingHeap
4065 * True if heap specified by OID is currently being reindexed.
4066 */
4067 bool
ReindexIsProcessingHeap(Oid heapOid)4068 ReindexIsProcessingHeap(Oid heapOid)
4069 {
4070 return heapOid == currentlyReindexedHeap;
4071 }
4072
4073 /*
4074 * ReindexIsCurrentlyProcessingIndex
4075 * True if index specified by OID is currently being reindexed.
4076 */
4077 static bool
ReindexIsCurrentlyProcessingIndex(Oid indexOid)4078 ReindexIsCurrentlyProcessingIndex(Oid indexOid)
4079 {
4080 return indexOid == currentlyReindexedIndex;
4081 }
4082
4083 /*
4084 * ReindexIsProcessingIndex
4085 * True if index specified by OID is currently being reindexed,
4086 * or should be treated as invalid because it is awaiting reindex.
4087 */
4088 bool
ReindexIsProcessingIndex(Oid indexOid)4089 ReindexIsProcessingIndex(Oid indexOid)
4090 {
4091 return indexOid == currentlyReindexedIndex ||
4092 list_member_oid(pendingReindexedIndexes, indexOid);
4093 }
4094
4095 /*
4096 * SetReindexProcessing
4097 * Set flag that specified heap/index are being reindexed.
4098 */
4099 static void
SetReindexProcessing(Oid heapOid,Oid indexOid)4100 SetReindexProcessing(Oid heapOid, Oid indexOid)
4101 {
4102 Assert(OidIsValid(heapOid) && OidIsValid(indexOid));
4103 /* Reindexing is not re-entrant. */
4104 if (OidIsValid(currentlyReindexedHeap))
4105 elog(ERROR, "cannot reindex while reindexing");
4106 currentlyReindexedHeap = heapOid;
4107 currentlyReindexedIndex = indexOid;
4108 /* Index is no longer "pending" reindex. */
4109 RemoveReindexPending(indexOid);
4110 /* This may have been set already, but in case it isn't, do so now. */
4111 reindexingNestLevel = GetCurrentTransactionNestLevel();
4112 }
4113
4114 /*
4115 * ResetReindexProcessing
4116 * Unset reindexing status.
4117 */
4118 static void
ResetReindexProcessing(void)4119 ResetReindexProcessing(void)
4120 {
4121 currentlyReindexedHeap = InvalidOid;
4122 currentlyReindexedIndex = InvalidOid;
4123 /* reindexingNestLevel remains set till end of (sub)transaction */
4124 }
4125
4126 /*
4127 * SetReindexPending
4128 * Mark the given indexes as pending reindex.
4129 *
4130 * NB: we assume that the current memory context stays valid throughout.
4131 */
4132 static void
SetReindexPending(List * indexes)4133 SetReindexPending(List *indexes)
4134 {
4135 /* Reindexing is not re-entrant. */
4136 if (pendingReindexedIndexes)
4137 elog(ERROR, "cannot reindex while reindexing");
4138 if (IsInParallelMode())
4139 elog(ERROR, "cannot modify reindex state during a parallel operation");
4140 pendingReindexedIndexes = list_copy(indexes);
4141 reindexingNestLevel = GetCurrentTransactionNestLevel();
4142 }
4143
4144 /*
4145 * RemoveReindexPending
4146 * Remove the given index from the pending list.
4147 */
4148 static void
RemoveReindexPending(Oid indexOid)4149 RemoveReindexPending(Oid indexOid)
4150 {
4151 if (IsInParallelMode())
4152 elog(ERROR, "cannot modify reindex state during a parallel operation");
4153 pendingReindexedIndexes = list_delete_oid(pendingReindexedIndexes,
4154 indexOid);
4155 }
4156
4157 /*
4158 * ResetReindexState
4159 * Clear all reindexing state during (sub)transaction abort.
4160 */
4161 void
ResetReindexState(int nestLevel)4162 ResetReindexState(int nestLevel)
4163 {
4164 /*
4165 * Because reindexing is not re-entrant, we don't need to cope with nested
4166 * reindexing states. We just need to avoid messing up the outer-level
4167 * state in case a subtransaction fails within a REINDEX. So checking the
4168 * current nest level against that of the reindex operation is sufficient.
4169 */
4170 if (reindexingNestLevel >= nestLevel)
4171 {
4172 currentlyReindexedHeap = InvalidOid;
4173 currentlyReindexedIndex = InvalidOid;
4174
4175 /*
4176 * We needn't try to release the contents of pendingReindexedIndexes;
4177 * that list should be in a transaction-lifespan context, so it will
4178 * go away automatically.
4179 */
4180 pendingReindexedIndexes = NIL;
4181
4182 reindexingNestLevel = 0;
4183 }
4184 }
4185
4186 /*
4187 * EstimateReindexStateSpace
4188 * Estimate space needed to pass reindex state to parallel workers.
4189 */
4190 Size
EstimateReindexStateSpace(void)4191 EstimateReindexStateSpace(void)
4192 {
4193 return offsetof(SerializedReindexState, pendingReindexedIndexes)
4194 + mul_size(sizeof(Oid), list_length(pendingReindexedIndexes));
4195 }
4196
4197 /*
4198 * SerializeReindexState
4199 * Serialize reindex state for parallel workers.
4200 */
4201 void
SerializeReindexState(Size maxsize,char * start_address)4202 SerializeReindexState(Size maxsize, char *start_address)
4203 {
4204 SerializedReindexState *sistate = (SerializedReindexState *) start_address;
4205 int c = 0;
4206 ListCell *lc;
4207
4208 sistate->currentlyReindexedHeap = currentlyReindexedHeap;
4209 sistate->currentlyReindexedIndex = currentlyReindexedIndex;
4210 sistate->numPendingReindexedIndexes = list_length(pendingReindexedIndexes);
4211 foreach(lc, pendingReindexedIndexes)
4212 sistate->pendingReindexedIndexes[c++] = lfirst_oid(lc);
4213 }
4214
4215 /*
4216 * RestoreReindexState
4217 * Restore reindex state in a parallel worker.
4218 */
4219 void
RestoreReindexState(void * reindexstate)4220 RestoreReindexState(void *reindexstate)
4221 {
4222 SerializedReindexState *sistate = (SerializedReindexState *) reindexstate;
4223 int c = 0;
4224 MemoryContext oldcontext;
4225
4226 currentlyReindexedHeap = sistate->currentlyReindexedHeap;
4227 currentlyReindexedIndex = sistate->currentlyReindexedIndex;
4228
4229 Assert(pendingReindexedIndexes == NIL);
4230 oldcontext = MemoryContextSwitchTo(TopMemoryContext);
4231 for (c = 0; c < sistate->numPendingReindexedIndexes; ++c)
4232 pendingReindexedIndexes =
4233 lappend_oid(pendingReindexedIndexes,
4234 sistate->pendingReindexedIndexes[c]);
4235 MemoryContextSwitchTo(oldcontext);
4236
4237 /* Note the worker has its own transaction nesting level */
4238 reindexingNestLevel = GetCurrentTransactionNestLevel();
4239 }
4240