1 /*-------------------------------------------------------------------------
2 *
3 * index.c
4 * code to create and destroy POSTGRES index relations
5 *
6 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/catalog/index.c
12 *
13 *
14 * INTERFACE ROUTINES
15 * index_create() - Create a cataloged index relation
16 * index_drop() - Removes index relation from catalogs
17 * BuildIndexInfo() - Prepare to insert index tuples
18 * FormIndexDatum() - Construct datum vector for one index tuple
19 *
20 *-------------------------------------------------------------------------
21 */
22 #include "postgres.h"
23
24 #include <unistd.h>
25
26 #include "access/amapi.h"
27 #include "access/multixact.h"
28 #include "access/relscan.h"
29 #include "access/sysattr.h"
30 #include "access/transam.h"
31 #include "access/visibilitymap.h"
32 #include "access/xact.h"
33 #include "bootstrap/bootstrap.h"
34 #include "catalog/binary_upgrade.h"
35 #include "catalog/catalog.h"
36 #include "catalog/dependency.h"
37 #include "catalog/heap.h"
38 #include "catalog/index.h"
39 #include "catalog/objectaccess.h"
40 #include "catalog/pg_am.h"
41 #include "catalog/pg_collation.h"
42 #include "catalog/pg_constraint.h"
43 #include "catalog/pg_constraint_fn.h"
44 #include "catalog/pg_operator.h"
45 #include "catalog/pg_opclass.h"
46 #include "catalog/pg_tablespace.h"
47 #include "catalog/pg_trigger.h"
48 #include "catalog/pg_type.h"
49 #include "catalog/storage.h"
50 #include "commands/tablecmds.h"
51 #include "commands/event_trigger.h"
52 #include "commands/trigger.h"
53 #include "executor/executor.h"
54 #include "miscadmin.h"
55 #include "nodes/makefuncs.h"
56 #include "nodes/nodeFuncs.h"
57 #include "optimizer/clauses.h"
58 #include "parser/parser.h"
59 #include "storage/bufmgr.h"
60 #include "storage/lmgr.h"
61 #include "storage/predicate.h"
62 #include "storage/procarray.h"
63 #include "storage/smgr.h"
64 #include "utils/builtins.h"
65 #include "utils/fmgroids.h"
66 #include "utils/guc.h"
67 #include "utils/inval.h"
68 #include "utils/lsyscache.h"
69 #include "utils/memutils.h"
70 #include "utils/pg_rusage.h"
71 #include "utils/syscache.h"
72 #include "utils/tuplesort.h"
73 #include "utils/snapmgr.h"
74 #include "utils/tqual.h"
75
76
77 /* Potentially set by pg_upgrade_support functions */
78 Oid binary_upgrade_next_index_pg_class_oid = InvalidOid;
79
80 /* state info for validate_index bulkdelete callback */
81 typedef struct
82 {
83 Tuplesortstate *tuplesort; /* for sorting the index TIDs */
84 /* statistics (for debug purposes only): */
85 double htups,
86 itups,
87 tups_inserted;
88 } v_i_state;
89
90 /* non-export function prototypes */
91 static bool relationHasPrimaryKey(Relation rel);
92 static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
93 IndexInfo *indexInfo,
94 List *indexColNames,
95 Oid accessMethodObjectId,
96 Oid *collationObjectId,
97 Oid *classObjectId);
98 static void InitializeAttributeOids(Relation indexRelation,
99 int numatts, Oid indexoid);
100 static void AppendAttributeTuples(Relation indexRelation, int numatts);
101 static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
102 IndexInfo *indexInfo,
103 Oid *collationOids,
104 Oid *classOids,
105 int16 *coloptions,
106 bool primary,
107 bool isexclusion,
108 bool immediate,
109 bool isvalid);
110 static void index_update_stats(Relation rel,
111 bool hasindex, bool isprimary,
112 double reltuples);
113 static void IndexCheckExclusion(Relation heapRelation,
114 Relation indexRelation,
115 IndexInfo *indexInfo);
116 static inline int64 itemptr_encode(ItemPointer itemptr);
117 static inline void itemptr_decode(ItemPointer itemptr, int64 encoded);
118 static bool validate_index_callback(ItemPointer itemptr, void *opaque);
119 static void validate_index_heapscan(Relation heapRelation,
120 Relation indexRelation,
121 IndexInfo *indexInfo,
122 Snapshot snapshot,
123 v_i_state *state);
124 static bool ReindexIsCurrentlyProcessingIndex(Oid indexOid);
125 static void SetReindexProcessing(Oid heapOid, Oid indexOid);
126 static void ResetReindexProcessing(void);
127 static void SetReindexPending(List *indexes);
128 static void RemoveReindexPending(Oid indexOid);
129
130
131 /*
132 * relationHasPrimaryKey
133 * See whether an existing relation has a primary key.
134 *
135 * Caller must have suitable lock on the relation.
136 *
137 * Note: we intentionally do not check IndexIsValid here; that's because this
138 * is used to enforce the rule that there can be only one indisprimary index,
139 * and we want that to be true even if said index is invalid.
140 */
141 static bool
relationHasPrimaryKey(Relation rel)142 relationHasPrimaryKey(Relation rel)
143 {
144 bool result = false;
145 List *indexoidlist;
146 ListCell *indexoidscan;
147
148 /*
149 * Get the list of index OIDs for the table from the relcache, and look up
150 * each one in the pg_index syscache until we find one marked primary key
151 * (hopefully there isn't more than one such).
152 */
153 indexoidlist = RelationGetIndexList(rel);
154
155 foreach(indexoidscan, indexoidlist)
156 {
157 Oid indexoid = lfirst_oid(indexoidscan);
158 HeapTuple indexTuple;
159
160 indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid));
161 if (!HeapTupleIsValid(indexTuple)) /* should not happen */
162 elog(ERROR, "cache lookup failed for index %u", indexoid);
163 result = ((Form_pg_index) GETSTRUCT(indexTuple))->indisprimary;
164 ReleaseSysCache(indexTuple);
165 if (result)
166 break;
167 }
168
169 list_free(indexoidlist);
170
171 return result;
172 }
173
174 /*
175 * index_check_primary_key
176 * Apply special checks needed before creating a PRIMARY KEY index
177 *
178 * This processing used to be in DefineIndex(), but has been split out
179 * so that it can be applied during ALTER TABLE ADD PRIMARY KEY USING INDEX.
180 *
181 * We check for a pre-existing primary key, and that all columns of the index
182 * are simple column references (not expressions), and that all those
183 * columns are marked NOT NULL. If they aren't (which can only happen during
184 * ALTER TABLE ADD CONSTRAINT, since the parser forces such columns to be
185 * created NOT NULL during CREATE TABLE), do an ALTER SET NOT NULL to mark
186 * them so --- or fail if they are not in fact nonnull.
187 *
188 * Caller had better have at least ShareLock on the table, else the not-null
189 * checking isn't trustworthy.
190 */
191 void
index_check_primary_key(Relation heapRel,IndexInfo * indexInfo,bool is_alter_table,IndexStmt * stmt)192 index_check_primary_key(Relation heapRel,
193 IndexInfo *indexInfo,
194 bool is_alter_table,
195 IndexStmt *stmt)
196 {
197 List *cmds;
198 int i;
199
200 /*
201 * If ALTER TABLE, check that there isn't already a PRIMARY KEY. In CREATE
202 * TABLE, we have faith that the parser rejected multiple pkey clauses;
203 * and CREATE INDEX doesn't have a way to say PRIMARY KEY, so it's no
204 * problem either.
205 */
206 if (is_alter_table &&
207 relationHasPrimaryKey(heapRel))
208 {
209 ereport(ERROR,
210 (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
211 errmsg("multiple primary keys for table \"%s\" are not allowed",
212 RelationGetRelationName(heapRel))));
213 }
214
215 /*
216 * Check that all of the attributes in a primary key are marked as not
217 * null, otherwise attempt to ALTER TABLE .. SET NOT NULL
218 */
219 cmds = NIL;
220 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
221 {
222 AttrNumber attnum = indexInfo->ii_KeyAttrNumbers[i];
223 HeapTuple atttuple;
224 Form_pg_attribute attform;
225
226 if (attnum == 0)
227 ereport(ERROR,
228 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
229 errmsg("primary keys cannot be expressions")));
230
231 /* System attributes are never null, so no need to check */
232 if (attnum < 0)
233 continue;
234
235 atttuple = SearchSysCache2(ATTNUM,
236 ObjectIdGetDatum(RelationGetRelid(heapRel)),
237 Int16GetDatum(attnum));
238 if (!HeapTupleIsValid(atttuple))
239 elog(ERROR, "cache lookup failed for attribute %d of relation %u",
240 attnum, RelationGetRelid(heapRel));
241 attform = (Form_pg_attribute) GETSTRUCT(atttuple);
242
243 if (!attform->attnotnull)
244 {
245 /* Add a subcommand to make this one NOT NULL */
246 AlterTableCmd *cmd = makeNode(AlterTableCmd);
247
248 cmd->subtype = AT_SetNotNull;
249 cmd->name = pstrdup(NameStr(attform->attname));
250 cmds = lappend(cmds, cmd);
251 }
252
253 ReleaseSysCache(atttuple);
254 }
255
256 /*
257 * XXX: Shouldn't the ALTER TABLE .. SET NOT NULL cascade to child tables?
258 * Currently, since the PRIMARY KEY itself doesn't cascade, we don't
259 * cascade the notnull constraint(s) either; but this is pretty debatable.
260 *
261 * XXX: possible future improvement: when being called from ALTER TABLE,
262 * it would be more efficient to merge this with the outer ALTER TABLE, so
263 * as to avoid two scans. But that seems to complicate DefineIndex's API
264 * unduly.
265 */
266 if (cmds)
267 {
268 EventTriggerAlterTableStart((Node *) stmt);
269 AlterTableInternal(RelationGetRelid(heapRel), cmds, false);
270 EventTriggerAlterTableEnd();
271 }
272 }
273
274 /*
275 * ConstructTupleDescriptor
276 *
277 * Build an index tuple descriptor for a new index
278 */
279 static TupleDesc
ConstructTupleDescriptor(Relation heapRelation,IndexInfo * indexInfo,List * indexColNames,Oid accessMethodObjectId,Oid * collationObjectId,Oid * classObjectId)280 ConstructTupleDescriptor(Relation heapRelation,
281 IndexInfo *indexInfo,
282 List *indexColNames,
283 Oid accessMethodObjectId,
284 Oid *collationObjectId,
285 Oid *classObjectId)
286 {
287 int numatts = indexInfo->ii_NumIndexAttrs;
288 ListCell *colnames_item = list_head(indexColNames);
289 ListCell *indexpr_item = list_head(indexInfo->ii_Expressions);
290 IndexAmRoutine *amroutine;
291 TupleDesc heapTupDesc;
292 TupleDesc indexTupDesc;
293 int natts; /* #atts in heap rel --- for error checks */
294 int i;
295
296 /* We need access to the index AM's API struct */
297 amroutine = GetIndexAmRoutineByAmId(accessMethodObjectId, false);
298
299 /* ... and to the table's tuple descriptor */
300 heapTupDesc = RelationGetDescr(heapRelation);
301 natts = RelationGetForm(heapRelation)->relnatts;
302
303 /*
304 * allocate the new tuple descriptor
305 */
306 indexTupDesc = CreateTemplateTupleDesc(numatts, false);
307
308 /*
309 * For simple index columns, we copy the pg_attribute row from the parent
310 * relation and modify it as necessary. For expressions we have to cons
311 * up a pg_attribute row the hard way.
312 */
313 for (i = 0; i < numatts; i++)
314 {
315 AttrNumber atnum = indexInfo->ii_KeyAttrNumbers[i];
316 Form_pg_attribute to = indexTupDesc->attrs[i];
317 HeapTuple tuple;
318 Form_pg_type typeTup;
319 Form_pg_opclass opclassTup;
320 Oid keyType;
321
322 if (atnum != 0)
323 {
324 /* Simple index column */
325 Form_pg_attribute from;
326
327 if (atnum < 0)
328 {
329 /*
330 * here we are indexing on a system attribute (-1...-n)
331 */
332 from = SystemAttributeDefinition(atnum,
333 heapRelation->rd_rel->relhasoids);
334 }
335 else
336 {
337 /*
338 * here we are indexing on a normal attribute (1...n)
339 */
340 if (atnum > natts) /* safety check */
341 elog(ERROR, "invalid column number %d", atnum);
342 from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)];
343 }
344
345 /*
346 * now that we've determined the "from", let's copy the tuple desc
347 * data...
348 */
349 memcpy(to, from, ATTRIBUTE_FIXED_PART_SIZE);
350
351 /*
352 * Set the attribute name as specified by caller.
353 */
354 if (colnames_item == NULL) /* shouldn't happen */
355 elog(ERROR, "too few entries in colnames list");
356 namestrcpy(&to->attname, (const char *) lfirst(colnames_item));
357 colnames_item = lnext(colnames_item);
358
359 /*
360 * Fix the stuff that should not be the same as the underlying
361 * attr
362 */
363 to->attnum = i + 1;
364
365 to->attstattarget = -1;
366 to->attcacheoff = -1;
367 to->attnotnull = false;
368 to->atthasdef = false;
369 to->attislocal = true;
370 to->attinhcount = 0;
371 to->attcollation = collationObjectId[i];
372 }
373 else
374 {
375 /* Expressional index */
376 Node *indexkey;
377
378 MemSet(to, 0, ATTRIBUTE_FIXED_PART_SIZE);
379
380 /*
381 * Set the attribute name as specified by caller.
382 */
383 if (colnames_item == NULL) /* shouldn't happen */
384 elog(ERROR, "too few entries in colnames list");
385 namestrcpy(&to->attname, (const char *) lfirst(colnames_item));
386 colnames_item = lnext(colnames_item);
387
388 if (indexpr_item == NULL) /* shouldn't happen */
389 elog(ERROR, "too few entries in indexprs list");
390 indexkey = (Node *) lfirst(indexpr_item);
391 indexpr_item = lnext(indexpr_item);
392
393 /*
394 * Lookup the expression type in pg_type for the type length etc.
395 */
396 keyType = exprType(indexkey);
397 tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
398 if (!HeapTupleIsValid(tuple))
399 elog(ERROR, "cache lookup failed for type %u", keyType);
400 typeTup = (Form_pg_type) GETSTRUCT(tuple);
401
402 /*
403 * Assign some of the attributes values. Leave the rest as 0.
404 */
405 to->attnum = i + 1;
406 to->atttypid = keyType;
407 to->attlen = typeTup->typlen;
408 to->attbyval = typeTup->typbyval;
409 to->attstorage = typeTup->typstorage;
410 to->attalign = typeTup->typalign;
411 to->attstattarget = -1;
412 to->attcacheoff = -1;
413 to->atttypmod = exprTypmod(indexkey);
414 to->attislocal = true;
415 to->attcollation = collationObjectId[i];
416
417 ReleaseSysCache(tuple);
418
419 /*
420 * Make sure the expression yields a type that's safe to store in
421 * an index. We need this defense because we have index opclasses
422 * for pseudo-types such as "record", and the actually stored type
423 * had better be safe; eg, a named composite type is okay, an
424 * anonymous record type is not. The test is the same as for
425 * whether a table column is of a safe type (which is why we
426 * needn't check for the non-expression case).
427 */
428 CheckAttributeType(NameStr(to->attname),
429 to->atttypid, to->attcollation,
430 NIL, false);
431 }
432
433 /*
434 * We do not yet have the correct relation OID for the index, so just
435 * set it invalid for now. InitializeAttributeOids() will fix it
436 * later.
437 */
438 to->attrelid = InvalidOid;
439
440 /*
441 * Check the opclass and index AM to see if either provides a keytype
442 * (overriding the attribute type). Opclass takes precedence.
443 */
444 tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(classObjectId[i]));
445 if (!HeapTupleIsValid(tuple))
446 elog(ERROR, "cache lookup failed for opclass %u",
447 classObjectId[i]);
448 opclassTup = (Form_pg_opclass) GETSTRUCT(tuple);
449 if (OidIsValid(opclassTup->opckeytype))
450 keyType = opclassTup->opckeytype;
451 else
452 keyType = amroutine->amkeytype;
453 ReleaseSysCache(tuple);
454
455 if (OidIsValid(keyType) && keyType != to->atttypid)
456 {
457 /* index value and heap value have different types */
458 tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
459 if (!HeapTupleIsValid(tuple))
460 elog(ERROR, "cache lookup failed for type %u", keyType);
461 typeTup = (Form_pg_type) GETSTRUCT(tuple);
462
463 to->atttypid = keyType;
464 to->atttypmod = -1;
465 to->attlen = typeTup->typlen;
466 to->attbyval = typeTup->typbyval;
467 to->attalign = typeTup->typalign;
468 to->attstorage = typeTup->typstorage;
469
470 ReleaseSysCache(tuple);
471 }
472 }
473
474 pfree(amroutine);
475
476 return indexTupDesc;
477 }
478
479 /* ----------------------------------------------------------------
480 * InitializeAttributeOids
481 * ----------------------------------------------------------------
482 */
483 static void
InitializeAttributeOids(Relation indexRelation,int numatts,Oid indexoid)484 InitializeAttributeOids(Relation indexRelation,
485 int numatts,
486 Oid indexoid)
487 {
488 TupleDesc tupleDescriptor;
489 int i;
490
491 tupleDescriptor = RelationGetDescr(indexRelation);
492
493 for (i = 0; i < numatts; i += 1)
494 tupleDescriptor->attrs[i]->attrelid = indexoid;
495 }
496
497 /* ----------------------------------------------------------------
498 * AppendAttributeTuples
499 * ----------------------------------------------------------------
500 */
501 static void
AppendAttributeTuples(Relation indexRelation,int numatts)502 AppendAttributeTuples(Relation indexRelation, int numatts)
503 {
504 Relation pg_attribute;
505 CatalogIndexState indstate;
506 TupleDesc indexTupDesc;
507 int i;
508
509 /*
510 * open the attribute relation and its indexes
511 */
512 pg_attribute = heap_open(AttributeRelationId, RowExclusiveLock);
513
514 indstate = CatalogOpenIndexes(pg_attribute);
515
516 /*
517 * insert data from new index's tupdesc into pg_attribute
518 */
519 indexTupDesc = RelationGetDescr(indexRelation);
520
521 for (i = 0; i < numatts; i++)
522 {
523 /*
524 * There used to be very grotty code here to set these fields, but I
525 * think it's unnecessary. They should be set already.
526 */
527 Assert(indexTupDesc->attrs[i]->attnum == i + 1);
528 Assert(indexTupDesc->attrs[i]->attcacheoff == -1);
529
530 InsertPgAttributeTuple(pg_attribute, indexTupDesc->attrs[i], indstate);
531 }
532
533 CatalogCloseIndexes(indstate);
534
535 heap_close(pg_attribute, RowExclusiveLock);
536 }
537
538 /* ----------------------------------------------------------------
539 * UpdateIndexRelation
540 *
541 * Construct and insert a new entry in the pg_index catalog
542 * ----------------------------------------------------------------
543 */
544 static void
UpdateIndexRelation(Oid indexoid,Oid heapoid,IndexInfo * indexInfo,Oid * collationOids,Oid * classOids,int16 * coloptions,bool primary,bool isexclusion,bool immediate,bool isvalid)545 UpdateIndexRelation(Oid indexoid,
546 Oid heapoid,
547 IndexInfo *indexInfo,
548 Oid *collationOids,
549 Oid *classOids,
550 int16 *coloptions,
551 bool primary,
552 bool isexclusion,
553 bool immediate,
554 bool isvalid)
555 {
556 int2vector *indkey;
557 oidvector *indcollation;
558 oidvector *indclass;
559 int2vector *indoption;
560 Datum exprsDatum;
561 Datum predDatum;
562 Datum values[Natts_pg_index];
563 bool nulls[Natts_pg_index];
564 Relation pg_index;
565 HeapTuple tuple;
566 int i;
567
568 /*
569 * Copy the index key, opclass, and indoption info into arrays (should we
570 * make the caller pass them like this to start with?)
571 */
572 indkey = buildint2vector(NULL, indexInfo->ii_NumIndexAttrs);
573 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
574 indkey->values[i] = indexInfo->ii_KeyAttrNumbers[i];
575 indcollation = buildoidvector(collationOids, indexInfo->ii_NumIndexAttrs);
576 indclass = buildoidvector(classOids, indexInfo->ii_NumIndexAttrs);
577 indoption = buildint2vector(coloptions, indexInfo->ii_NumIndexAttrs);
578
579 /*
580 * Convert the index expressions (if any) to a text datum
581 */
582 if (indexInfo->ii_Expressions != NIL)
583 {
584 char *exprsString;
585
586 exprsString = nodeToString(indexInfo->ii_Expressions);
587 exprsDatum = CStringGetTextDatum(exprsString);
588 pfree(exprsString);
589 }
590 else
591 exprsDatum = (Datum) 0;
592
593 /*
594 * Convert the index predicate (if any) to a text datum. Note we convert
595 * implicit-AND format to normal explicit-AND for storage.
596 */
597 if (indexInfo->ii_Predicate != NIL)
598 {
599 char *predString;
600
601 predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate));
602 predDatum = CStringGetTextDatum(predString);
603 pfree(predString);
604 }
605 else
606 predDatum = (Datum) 0;
607
608 /*
609 * open the system catalog index relation
610 */
611 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
612
613 /*
614 * Build a pg_index tuple
615 */
616 MemSet(nulls, false, sizeof(nulls));
617
618 values[Anum_pg_index_indexrelid - 1] = ObjectIdGetDatum(indexoid);
619 values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid);
620 values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs);
621 values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique);
622 values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
623 values[Anum_pg_index_indisexclusion - 1] = BoolGetDatum(isexclusion);
624 values[Anum_pg_index_indimmediate - 1] = BoolGetDatum(immediate);
625 values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
626 values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
627 values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false);
628 /* we set isvalid and isready the same way */
629 values[Anum_pg_index_indisready - 1] = BoolGetDatum(isvalid);
630 values[Anum_pg_index_indislive - 1] = BoolGetDatum(true);
631 values[Anum_pg_index_indisreplident - 1] = BoolGetDatum(false);
632 values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey);
633 values[Anum_pg_index_indcollation - 1] = PointerGetDatum(indcollation);
634 values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass);
635 values[Anum_pg_index_indoption - 1] = PointerGetDatum(indoption);
636 values[Anum_pg_index_indexprs - 1] = exprsDatum;
637 if (exprsDatum == (Datum) 0)
638 nulls[Anum_pg_index_indexprs - 1] = true;
639 values[Anum_pg_index_indpred - 1] = predDatum;
640 if (predDatum == (Datum) 0)
641 nulls[Anum_pg_index_indpred - 1] = true;
642
643 tuple = heap_form_tuple(RelationGetDescr(pg_index), values, nulls);
644
645 /*
646 * insert the tuple into the pg_index catalog
647 */
648 simple_heap_insert(pg_index, tuple);
649
650 /* update the indexes on pg_index */
651 CatalogUpdateIndexes(pg_index, tuple);
652
653 /*
654 * close the relation and free the tuple
655 */
656 heap_close(pg_index, RowExclusiveLock);
657 heap_freetuple(tuple);
658 }
659
660
661 /*
662 * index_create
663 *
664 * heapRelation: table to build index on (suitably locked by caller)
665 * indexRelationName: what it say
666 * indexRelationId: normally, pass InvalidOid to let this routine
667 * generate an OID for the index. During bootstrap this may be
668 * nonzero to specify a preselected OID.
669 * relFileNode: normally, pass InvalidOid to get new storage. May be
670 * nonzero to attach an existing valid build.
671 * indexInfo: same info executor uses to insert into the index
672 * indexColNames: column names to use for index (List of char *)
673 * accessMethodObjectId: OID of index AM to use
674 * tableSpaceId: OID of tablespace to use
675 * collationObjectId: array of collation OIDs, one per index column
676 * classObjectId: array of index opclass OIDs, one per index column
677 * coloptions: array of per-index-column indoption settings
678 * reloptions: AM-specific options
679 * isprimary: index is a PRIMARY KEY
680 * isconstraint: index is owned by PRIMARY KEY, UNIQUE, or EXCLUSION constraint
681 * deferrable: constraint is DEFERRABLE
682 * initdeferred: constraint is INITIALLY DEFERRED
683 * allow_system_table_mods: allow table to be a system catalog
684 * skip_build: true to skip the index_build() step for the moment; caller
685 * must do it later (typically via reindex_index())
686 * concurrent: if true, do not lock the table against writers. The index
687 * will be marked "invalid" and the caller must take additional steps
688 * to fix it up.
689 * is_internal: if true, post creation hook for new index
690 * if_not_exists: if true, do not throw an error if a relation with
691 * the same name already exists.
692 *
693 * Returns the OID of the created index.
694 */
695 Oid
index_create(Relation heapRelation,const char * indexRelationName,Oid indexRelationId,Oid relFileNode,IndexInfo * indexInfo,List * indexColNames,Oid accessMethodObjectId,Oid tableSpaceId,Oid * collationObjectId,Oid * classObjectId,int16 * coloptions,Datum reloptions,bool isprimary,bool isconstraint,bool deferrable,bool initdeferred,bool allow_system_table_mods,bool skip_build,bool concurrent,bool is_internal,bool if_not_exists)696 index_create(Relation heapRelation,
697 const char *indexRelationName,
698 Oid indexRelationId,
699 Oid relFileNode,
700 IndexInfo *indexInfo,
701 List *indexColNames,
702 Oid accessMethodObjectId,
703 Oid tableSpaceId,
704 Oid *collationObjectId,
705 Oid *classObjectId,
706 int16 *coloptions,
707 Datum reloptions,
708 bool isprimary,
709 bool isconstraint,
710 bool deferrable,
711 bool initdeferred,
712 bool allow_system_table_mods,
713 bool skip_build,
714 bool concurrent,
715 bool is_internal,
716 bool if_not_exists)
717 {
718 Oid heapRelationId = RelationGetRelid(heapRelation);
719 Relation pg_class;
720 Relation indexRelation;
721 TupleDesc indexTupDesc;
722 bool shared_relation;
723 bool mapped_relation;
724 bool is_exclusion;
725 Oid namespaceId;
726 int i;
727 char relpersistence;
728
729 is_exclusion = (indexInfo->ii_ExclusionOps != NULL);
730
731 pg_class = heap_open(RelationRelationId, RowExclusiveLock);
732
733 /*
734 * The index will be in the same namespace as its parent table, and is
735 * shared across databases if and only if the parent is. Likewise, it
736 * will use the relfilenode map if and only if the parent does; and it
737 * inherits the parent's relpersistence.
738 */
739 namespaceId = RelationGetNamespace(heapRelation);
740 shared_relation = heapRelation->rd_rel->relisshared;
741 mapped_relation = RelationIsMapped(heapRelation);
742 relpersistence = heapRelation->rd_rel->relpersistence;
743
744 /*
745 * check parameters
746 */
747 if (indexInfo->ii_NumIndexAttrs < 1)
748 elog(ERROR, "must index at least one column");
749
750 if (!allow_system_table_mods &&
751 IsSystemRelation(heapRelation) &&
752 IsNormalProcessingMode())
753 ereport(ERROR,
754 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
755 errmsg("user-defined indexes on system catalog tables are not supported")));
756
757 /*
758 * concurrent index build on a system catalog is unsafe because we tend to
759 * release locks before committing in catalogs
760 */
761 if (concurrent &&
762 IsSystemRelation(heapRelation))
763 ereport(ERROR,
764 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
765 errmsg("concurrent index creation on system catalog tables is not supported")));
766
767 /*
768 * This case is currently not supported, but there's no way to ask for it
769 * in the grammar anyway, so it can't happen.
770 */
771 if (concurrent && is_exclusion)
772 ereport(ERROR,
773 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
774 errmsg_internal("concurrent index creation for exclusion constraints is not supported")));
775
776 /*
777 * We cannot allow indexing a shared relation after initdb (because
778 * there's no way to make the entry in other databases' pg_class).
779 */
780 if (shared_relation && !IsBootstrapProcessingMode())
781 ereport(ERROR,
782 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
783 errmsg("shared indexes cannot be created after initdb")));
784
785 /*
786 * Shared relations must be in pg_global, too (last-ditch check)
787 */
788 if (shared_relation && tableSpaceId != GLOBALTABLESPACE_OID)
789 elog(ERROR, "shared relations must be placed in pg_global tablespace");
790
791 if (get_relname_relid(indexRelationName, namespaceId))
792 {
793 if (if_not_exists)
794 {
795 ereport(NOTICE,
796 (errcode(ERRCODE_DUPLICATE_TABLE),
797 errmsg("relation \"%s\" already exists, skipping",
798 indexRelationName)));
799 heap_close(pg_class, RowExclusiveLock);
800 return InvalidOid;
801 }
802
803 ereport(ERROR,
804 (errcode(ERRCODE_DUPLICATE_TABLE),
805 errmsg("relation \"%s\" already exists",
806 indexRelationName)));
807 }
808
809 /*
810 * construct tuple descriptor for index tuples
811 */
812 indexTupDesc = ConstructTupleDescriptor(heapRelation,
813 indexInfo,
814 indexColNames,
815 accessMethodObjectId,
816 collationObjectId,
817 classObjectId);
818
819 /*
820 * Allocate an OID for the index, unless we were told what to use.
821 *
822 * The OID will be the relfilenode as well, so make sure it doesn't
823 * collide with either pg_class OIDs or existing physical files.
824 */
825 if (!OidIsValid(indexRelationId))
826 {
827 /* Use binary-upgrade override for pg_class.oid/relfilenode? */
828 if (IsBinaryUpgrade)
829 {
830 if (!OidIsValid(binary_upgrade_next_index_pg_class_oid))
831 ereport(ERROR,
832 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
833 errmsg("pg_class index OID value not set when in binary upgrade mode")));
834
835 indexRelationId = binary_upgrade_next_index_pg_class_oid;
836 binary_upgrade_next_index_pg_class_oid = InvalidOid;
837 }
838 else
839 {
840 indexRelationId =
841 GetNewRelFileNode(tableSpaceId, pg_class, relpersistence);
842 }
843 }
844
845 /*
846 * create the index relation's relcache entry and physical disk file. (If
847 * we fail further down, it's the smgr's responsibility to remove the disk
848 * file again.)
849 */
850 indexRelation = heap_create(indexRelationName,
851 namespaceId,
852 tableSpaceId,
853 indexRelationId,
854 relFileNode,
855 indexTupDesc,
856 RELKIND_INDEX,
857 relpersistence,
858 shared_relation,
859 mapped_relation,
860 allow_system_table_mods);
861
862 Assert(indexRelationId == RelationGetRelid(indexRelation));
863
864 /*
865 * Obtain exclusive lock on it. Although no other backends can see it
866 * until we commit, this prevents deadlock-risk complaints from lock
867 * manager in cases such as CLUSTER.
868 */
869 LockRelation(indexRelation, AccessExclusiveLock);
870
871 /*
872 * Fill in fields of the index's pg_class entry that are not set correctly
873 * by heap_create.
874 *
875 * XXX should have a cleaner way to create cataloged indexes
876 */
877 indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner;
878 indexRelation->rd_rel->relam = accessMethodObjectId;
879 indexRelation->rd_rel->relhasoids = false;
880
881 /*
882 * store index's pg_class entry
883 */
884 InsertPgClassTuple(pg_class, indexRelation,
885 RelationGetRelid(indexRelation),
886 (Datum) 0,
887 reloptions);
888
889 /* done with pg_class */
890 heap_close(pg_class, RowExclusiveLock);
891
892 /*
893 * now update the object id's of all the attribute tuple forms in the
894 * index relation's tuple descriptor
895 */
896 InitializeAttributeOids(indexRelation,
897 indexInfo->ii_NumIndexAttrs,
898 indexRelationId);
899
900 /*
901 * append ATTRIBUTE tuples for the index
902 */
903 AppendAttributeTuples(indexRelation, indexInfo->ii_NumIndexAttrs);
904
905 /* ----------------
906 * update pg_index
907 * (append INDEX tuple)
908 *
909 * Note that this stows away a representation of "predicate".
910 * (Or, could define a rule to maintain the predicate) --Nels, Feb '92
911 * ----------------
912 */
913 UpdateIndexRelation(indexRelationId, heapRelationId, indexInfo,
914 collationObjectId, classObjectId, coloptions,
915 isprimary, is_exclusion,
916 !deferrable,
917 !concurrent);
918
919 /*
920 * Register relcache invalidation on the indexes' heap relation, to
921 * maintain consistency of its index list
922 */
923 CacheInvalidateRelcache(heapRelation);
924
925 /*
926 * Register constraint and dependencies for the index.
927 *
928 * If the index is from a CONSTRAINT clause, construct a pg_constraint
929 * entry. The index will be linked to the constraint, which in turn is
930 * linked to the table. If it's not a CONSTRAINT, we need to make a
931 * dependency directly on the table.
932 *
933 * We don't need a dependency on the namespace, because there'll be an
934 * indirect dependency via our parent table.
935 *
936 * During bootstrap we can't register any dependencies, and we don't try
937 * to make a constraint either.
938 */
939 if (!IsBootstrapProcessingMode())
940 {
941 ObjectAddress myself,
942 referenced;
943
944 myself.classId = RelationRelationId;
945 myself.objectId = indexRelationId;
946 myself.objectSubId = 0;
947
948 if (isconstraint)
949 {
950 char constraintType;
951
952 if (isprimary)
953 constraintType = CONSTRAINT_PRIMARY;
954 else if (indexInfo->ii_Unique)
955 constraintType = CONSTRAINT_UNIQUE;
956 else if (is_exclusion)
957 constraintType = CONSTRAINT_EXCLUSION;
958 else
959 {
960 elog(ERROR, "constraint must be PRIMARY, UNIQUE or EXCLUDE");
961 constraintType = 0; /* keep compiler quiet */
962 }
963
964 index_constraint_create(heapRelation,
965 indexRelationId,
966 indexInfo,
967 indexRelationName,
968 constraintType,
969 deferrable,
970 initdeferred,
971 false, /* already marked primary */
972 false, /* pg_index entry is OK */
973 false, /* no old dependencies */
974 allow_system_table_mods,
975 is_internal);
976 }
977 else
978 {
979 bool have_simple_col = false;
980
981 /* Create auto dependencies on simply-referenced columns */
982 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
983 {
984 if (indexInfo->ii_KeyAttrNumbers[i] != 0)
985 {
986 referenced.classId = RelationRelationId;
987 referenced.objectId = heapRelationId;
988 referenced.objectSubId = indexInfo->ii_KeyAttrNumbers[i];
989
990 recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
991
992 have_simple_col = true;
993 }
994 }
995
996 /*
997 * If there are no simply-referenced columns, give the index an
998 * auto dependency on the whole table. In most cases, this will
999 * be redundant, but it might not be if the index expressions and
1000 * predicate contain no Vars or only whole-row Vars.
1001 */
1002 if (!have_simple_col)
1003 {
1004 referenced.classId = RelationRelationId;
1005 referenced.objectId = heapRelationId;
1006 referenced.objectSubId = 0;
1007
1008 recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
1009 }
1010
1011 /* Non-constraint indexes can't be deferrable */
1012 Assert(!deferrable);
1013 Assert(!initdeferred);
1014 }
1015
1016 /* Store dependency on collations */
1017 /* The default collation is pinned, so don't bother recording it */
1018 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1019 {
1020 if (OidIsValid(collationObjectId[i]) &&
1021 collationObjectId[i] != DEFAULT_COLLATION_OID)
1022 {
1023 referenced.classId = CollationRelationId;
1024 referenced.objectId = collationObjectId[i];
1025 referenced.objectSubId = 0;
1026
1027 recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
1028 }
1029 }
1030
1031 /* Store dependency on operator classes */
1032 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1033 {
1034 referenced.classId = OperatorClassRelationId;
1035 referenced.objectId = classObjectId[i];
1036 referenced.objectSubId = 0;
1037
1038 recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
1039 }
1040
1041 /* Store dependencies on anything mentioned in index expressions */
1042 if (indexInfo->ii_Expressions)
1043 {
1044 recordDependencyOnSingleRelExpr(&myself,
1045 (Node *) indexInfo->ii_Expressions,
1046 heapRelationId,
1047 DEPENDENCY_NORMAL,
1048 DEPENDENCY_AUTO);
1049 }
1050
1051 /* Store dependencies on anything mentioned in predicate */
1052 if (indexInfo->ii_Predicate)
1053 {
1054 recordDependencyOnSingleRelExpr(&myself,
1055 (Node *) indexInfo->ii_Predicate,
1056 heapRelationId,
1057 DEPENDENCY_NORMAL,
1058 DEPENDENCY_AUTO);
1059 }
1060 }
1061 else
1062 {
1063 /* Bootstrap mode - assert we weren't asked for constraint support */
1064 Assert(!isconstraint);
1065 Assert(!deferrable);
1066 Assert(!initdeferred);
1067 }
1068
1069 /* Post creation hook for new index */
1070 InvokeObjectPostCreateHookArg(RelationRelationId,
1071 indexRelationId, 0, is_internal);
1072
1073 /*
1074 * Advance the command counter so that we can see the newly-entered
1075 * catalog tuples for the index.
1076 */
1077 CommandCounterIncrement();
1078
1079 /*
1080 * In bootstrap mode, we have to fill in the index strategy structure with
1081 * information from the catalogs. If we aren't bootstrapping, then the
1082 * relcache entry has already been rebuilt thanks to sinval update during
1083 * CommandCounterIncrement.
1084 */
1085 if (IsBootstrapProcessingMode())
1086 RelationInitIndexAccessInfo(indexRelation);
1087 else
1088 Assert(indexRelation->rd_indexcxt != NULL);
1089
1090 /*
1091 * If this is bootstrap (initdb) time, then we don't actually fill in the
1092 * index yet. We'll be creating more indexes and classes later, so we
1093 * delay filling them in until just before we're done with bootstrapping.
1094 * Similarly, if the caller specified skip_build then filling the index is
1095 * delayed till later (ALTER TABLE can save work in some cases with this).
1096 * Otherwise, we call the AM routine that constructs the index.
1097 */
1098 if (IsBootstrapProcessingMode())
1099 {
1100 index_register(heapRelationId, indexRelationId, indexInfo);
1101 }
1102 else if (skip_build)
1103 {
1104 /*
1105 * Caller is responsible for filling the index later on. However,
1106 * we'd better make sure that the heap relation is correctly marked as
1107 * having an index.
1108 */
1109 index_update_stats(heapRelation,
1110 true,
1111 isprimary,
1112 -1.0);
1113 /* Make the above update visible */
1114 CommandCounterIncrement();
1115 }
1116 else
1117 {
1118 index_build(heapRelation, indexRelation, indexInfo, isprimary, false);
1119 }
1120
1121 /*
1122 * Close the index; but we keep the lock that we acquired above until end
1123 * of transaction. Closing the heap is caller's responsibility.
1124 */
1125 index_close(indexRelation, NoLock);
1126
1127 return indexRelationId;
1128 }
1129
1130 /*
1131 * index_constraint_create
1132 *
1133 * Set up a constraint associated with an index. Return the new constraint's
1134 * address.
1135 *
1136 * heapRelation: table owning the index (must be suitably locked by caller)
1137 * indexRelationId: OID of the index
1138 * indexInfo: same info executor uses to insert into the index
1139 * constraintName: what it say (generally, should match name of index)
1140 * constraintType: one of CONSTRAINT_PRIMARY, CONSTRAINT_UNIQUE, or
1141 * CONSTRAINT_EXCLUSION
1142 * deferrable: constraint is DEFERRABLE
1143 * initdeferred: constraint is INITIALLY DEFERRED
1144 * mark_as_primary: if true, set flags to mark index as primary key
1145 * update_pgindex: if true, update pg_index row (else caller's done that)
1146 * remove_old_dependencies: if true, remove existing dependencies of index
1147 * on table's columns
1148 * allow_system_table_mods: allow table to be a system catalog
1149 * is_internal: index is constructed due to internal process
1150 */
1151 ObjectAddress
index_constraint_create(Relation heapRelation,Oid indexRelationId,IndexInfo * indexInfo,const char * constraintName,char constraintType,bool deferrable,bool initdeferred,bool mark_as_primary,bool update_pgindex,bool remove_old_dependencies,bool allow_system_table_mods,bool is_internal)1152 index_constraint_create(Relation heapRelation,
1153 Oid indexRelationId,
1154 IndexInfo *indexInfo,
1155 const char *constraintName,
1156 char constraintType,
1157 bool deferrable,
1158 bool initdeferred,
1159 bool mark_as_primary,
1160 bool update_pgindex,
1161 bool remove_old_dependencies,
1162 bool allow_system_table_mods,
1163 bool is_internal)
1164 {
1165 Oid namespaceId = RelationGetNamespace(heapRelation);
1166 ObjectAddress myself,
1167 referenced;
1168 Oid conOid;
1169
1170 /* constraint creation support doesn't work while bootstrapping */
1171 Assert(!IsBootstrapProcessingMode());
1172
1173 /* enforce system-table restriction */
1174 if (!allow_system_table_mods &&
1175 IsSystemRelation(heapRelation) &&
1176 IsNormalProcessingMode())
1177 ereport(ERROR,
1178 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1179 errmsg("user-defined indexes on system catalog tables are not supported")));
1180
1181 /* primary/unique constraints shouldn't have any expressions */
1182 if (indexInfo->ii_Expressions &&
1183 constraintType != CONSTRAINT_EXCLUSION)
1184 elog(ERROR, "constraints cannot have index expressions");
1185
1186 /*
1187 * If we're manufacturing a constraint for a pre-existing index, we need
1188 * to get rid of the existing auto dependencies for the index (the ones
1189 * that index_create() would have made instead of calling this function).
1190 *
1191 * Note: this code would not necessarily do the right thing if the index
1192 * has any expressions or predicate, but we'd never be turning such an
1193 * index into a UNIQUE or PRIMARY KEY constraint.
1194 */
1195 if (remove_old_dependencies)
1196 deleteDependencyRecordsForClass(RelationRelationId, indexRelationId,
1197 RelationRelationId, DEPENDENCY_AUTO);
1198
1199 /*
1200 * Construct a pg_constraint entry.
1201 */
1202 conOid = CreateConstraintEntry(constraintName,
1203 namespaceId,
1204 constraintType,
1205 deferrable,
1206 initdeferred,
1207 true,
1208 RelationGetRelid(heapRelation),
1209 indexInfo->ii_KeyAttrNumbers,
1210 indexInfo->ii_NumIndexAttrs,
1211 InvalidOid, /* no domain */
1212 indexRelationId, /* index OID */
1213 InvalidOid, /* no foreign key */
1214 NULL,
1215 NULL,
1216 NULL,
1217 NULL,
1218 0,
1219 ' ',
1220 ' ',
1221 ' ',
1222 indexInfo->ii_ExclusionOps,
1223 NULL, /* no check constraint */
1224 NULL,
1225 NULL,
1226 true, /* islocal */
1227 0, /* inhcount */
1228 true, /* noinherit */
1229 is_internal);
1230
1231 /*
1232 * Register the index as internally dependent on the constraint.
1233 *
1234 * Note that the constraint has a dependency on the table, so we don't
1235 * need (or want) any direct dependency from the index to the table.
1236 */
1237 myself.classId = RelationRelationId;
1238 myself.objectId = indexRelationId;
1239 myself.objectSubId = 0;
1240
1241 referenced.classId = ConstraintRelationId;
1242 referenced.objectId = conOid;
1243 referenced.objectSubId = 0;
1244
1245 recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
1246
1247 /*
1248 * If the constraint is deferrable, create the deferred uniqueness
1249 * checking trigger. (The trigger will be given an internal dependency on
1250 * the constraint by CreateTrigger.)
1251 */
1252 if (deferrable)
1253 {
1254 CreateTrigStmt *trigger;
1255
1256 trigger = makeNode(CreateTrigStmt);
1257 trigger->trigname = (constraintType == CONSTRAINT_PRIMARY) ?
1258 "PK_ConstraintTrigger" :
1259 "Unique_ConstraintTrigger";
1260 trigger->relation = NULL;
1261 trigger->funcname = SystemFuncName("unique_key_recheck");
1262 trigger->args = NIL;
1263 trigger->row = true;
1264 trigger->timing = TRIGGER_TYPE_AFTER;
1265 trigger->events = TRIGGER_TYPE_INSERT | TRIGGER_TYPE_UPDATE;
1266 trigger->columns = NIL;
1267 trigger->whenClause = NULL;
1268 trigger->isconstraint = true;
1269 trigger->deferrable = true;
1270 trigger->initdeferred = initdeferred;
1271 trigger->constrrel = NULL;
1272
1273 (void) CreateTrigger(trigger, NULL, RelationGetRelid(heapRelation),
1274 InvalidOid, conOid, indexRelationId, true);
1275 }
1276
1277 /*
1278 * If needed, mark the table as having a primary key. We assume it can't
1279 * have been so marked already, so no need to clear the flag in the other
1280 * case.
1281 *
1282 * Note: this might better be done by callers. We do it here to avoid
1283 * exposing index_update_stats() globally, but that wouldn't be necessary
1284 * if relhaspkey went away.
1285 */
1286 if (mark_as_primary)
1287 index_update_stats(heapRelation,
1288 true,
1289 true,
1290 -1.0);
1291
1292 /*
1293 * If needed, mark the index as primary and/or deferred in pg_index.
1294 *
1295 * Note: When making an existing index into a constraint, caller must have
1296 * a table lock that prevents concurrent table updates; otherwise, there
1297 * is a risk that concurrent readers of the table will miss seeing this
1298 * index at all.
1299 */
1300 if (update_pgindex && (mark_as_primary || deferrable))
1301 {
1302 Relation pg_index;
1303 HeapTuple indexTuple;
1304 Form_pg_index indexForm;
1305 bool dirty = false;
1306
1307 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
1308
1309 indexTuple = SearchSysCacheCopy1(INDEXRELID,
1310 ObjectIdGetDatum(indexRelationId));
1311 if (!HeapTupleIsValid(indexTuple))
1312 elog(ERROR, "cache lookup failed for index %u", indexRelationId);
1313 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
1314
1315 if (mark_as_primary && !indexForm->indisprimary)
1316 {
1317 indexForm->indisprimary = true;
1318 dirty = true;
1319 }
1320
1321 if (deferrable && indexForm->indimmediate)
1322 {
1323 indexForm->indimmediate = false;
1324 dirty = true;
1325 }
1326
1327 if (dirty)
1328 {
1329 simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
1330 CatalogUpdateIndexes(pg_index, indexTuple);
1331
1332 InvokeObjectPostAlterHookArg(IndexRelationId, indexRelationId, 0,
1333 InvalidOid, is_internal);
1334 }
1335
1336 heap_freetuple(indexTuple);
1337 heap_close(pg_index, RowExclusiveLock);
1338 }
1339
1340 return referenced;
1341 }
1342
1343 /*
1344 * index_drop
1345 *
1346 * NOTE: this routine should now only be called through performDeletion(),
1347 * else associated dependencies won't be cleaned up.
1348 */
1349 void
index_drop(Oid indexId,bool concurrent)1350 index_drop(Oid indexId, bool concurrent)
1351 {
1352 Oid heapId;
1353 Relation userHeapRelation;
1354 Relation userIndexRelation;
1355 Relation indexRelation;
1356 HeapTuple tuple;
1357 bool hasexprs;
1358 LockRelId heaprelid,
1359 indexrelid;
1360 LOCKTAG heaplocktag;
1361 LOCKMODE lockmode;
1362
1363 /*
1364 * A temporary relation uses a non-concurrent DROP. Other backends can't
1365 * access a temporary relation, so there's no harm in grabbing a stronger
1366 * lock (see comments in RemoveRelations), and a non-concurrent DROP is
1367 * more efficient.
1368 */
1369 Assert(get_rel_persistence(indexId) != RELPERSISTENCE_TEMP ||
1370 !concurrent);
1371
1372 /*
1373 * To drop an index safely, we must grab exclusive lock on its parent
1374 * table. Exclusive lock on the index alone is insufficient because
1375 * another backend might be about to execute a query on the parent table.
1376 * If it relies on a previously cached list of index OIDs, then it could
1377 * attempt to access the just-dropped index. We must therefore take a
1378 * table lock strong enough to prevent all queries on the table from
1379 * proceeding until we commit and send out a shared-cache-inval notice
1380 * that will make them update their index lists.
1381 *
1382 * In the concurrent case we avoid this requirement by disabling index use
1383 * in multiple steps and waiting out any transactions that might be using
1384 * the index, so we don't need exclusive lock on the parent table. Instead
1385 * we take ShareUpdateExclusiveLock, to ensure that two sessions aren't
1386 * doing CREATE/DROP INDEX CONCURRENTLY on the same index. (We will get
1387 * AccessExclusiveLock on the index below, once we're sure nobody else is
1388 * using it.)
1389 */
1390 heapId = IndexGetRelation(indexId, false);
1391 lockmode = concurrent ? ShareUpdateExclusiveLock : AccessExclusiveLock;
1392 userHeapRelation = heap_open(heapId, lockmode);
1393 userIndexRelation = index_open(indexId, lockmode);
1394
1395 /*
1396 * We might still have open queries using it in our own session, which the
1397 * above locking won't prevent, so test explicitly.
1398 */
1399 CheckTableNotInUse(userIndexRelation, "DROP INDEX");
1400
1401 /*
1402 * Drop Index Concurrently is more or less the reverse process of Create
1403 * Index Concurrently.
1404 *
1405 * First we unset indisvalid so queries starting afterwards don't use the
1406 * index to answer queries anymore. We have to keep indisready = true so
1407 * transactions that are still scanning the index can continue to see
1408 * valid index contents. For instance, if they are using READ COMMITTED
1409 * mode, and another transaction makes changes and commits, they need to
1410 * see those new tuples in the index.
1411 *
1412 * After all transactions that could possibly have used the index for
1413 * queries end, we can unset indisready and indislive, then wait till
1414 * nobody could be touching it anymore. (Note: we need indislive because
1415 * this state must be distinct from the initial state during CREATE INDEX
1416 * CONCURRENTLY, which has indislive true while indisready and indisvalid
1417 * are false. That's because in that state, transactions must examine the
1418 * index for HOT-safety decisions, while in this state we don't want them
1419 * to open it at all.)
1420 *
1421 * Since all predicate locks on the index are about to be made invalid, we
1422 * must promote them to predicate locks on the heap. In the
1423 * non-concurrent case we can just do that now. In the concurrent case
1424 * it's a bit trickier. The predicate locks must be moved when there are
1425 * no index scans in progress on the index and no more can subsequently
1426 * start, so that no new predicate locks can be made on the index. Also,
1427 * they must be moved before heap inserts stop maintaining the index, else
1428 * the conflict with the predicate lock on the index gap could be missed
1429 * before the lock on the heap relation is in place to detect a conflict
1430 * based on the heap tuple insert.
1431 */
1432 if (concurrent)
1433 {
1434 /*
1435 * We must commit our transaction in order to make the first pg_index
1436 * state update visible to other sessions. If the DROP machinery has
1437 * already performed any other actions (removal of other objects,
1438 * pg_depend entries, etc), the commit would make those actions
1439 * permanent, which would leave us with inconsistent catalog state if
1440 * we fail partway through the following sequence. Since DROP INDEX
1441 * CONCURRENTLY is restricted to dropping just one index that has no
1442 * dependencies, we should get here before anything's been done ---
1443 * but let's check that to be sure. We can verify that the current
1444 * transaction has not executed any transactional updates by checking
1445 * that no XID has been assigned.
1446 */
1447 if (GetTopTransactionIdIfAny() != InvalidTransactionId)
1448 ereport(ERROR,
1449 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1450 errmsg("DROP INDEX CONCURRENTLY must be first action in transaction")));
1451
1452 /*
1453 * Mark index invalid by updating its pg_index entry
1454 */
1455 index_set_state_flags(indexId, INDEX_DROP_CLEAR_VALID);
1456
1457 /*
1458 * Invalidate the relcache for the table, so that after this commit
1459 * all sessions will refresh any cached plans that might reference the
1460 * index.
1461 */
1462 CacheInvalidateRelcache(userHeapRelation);
1463
1464 /* save lockrelid and locktag for below, then close but keep locks */
1465 heaprelid = userHeapRelation->rd_lockInfo.lockRelId;
1466 SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
1467 indexrelid = userIndexRelation->rd_lockInfo.lockRelId;
1468
1469 heap_close(userHeapRelation, NoLock);
1470 index_close(userIndexRelation, NoLock);
1471
1472 /*
1473 * We must commit our current transaction so that the indisvalid
1474 * update becomes visible to other transactions; then start another.
1475 * Note that any previously-built data structures are lost in the
1476 * commit. The only data we keep past here are the relation IDs.
1477 *
1478 * Before committing, get a session-level lock on the table, to ensure
1479 * that neither it nor the index can be dropped before we finish. This
1480 * cannot block, even if someone else is waiting for access, because
1481 * we already have the same lock within our transaction.
1482 */
1483 LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
1484 LockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
1485
1486 PopActiveSnapshot();
1487 CommitTransactionCommand();
1488 StartTransactionCommand();
1489
1490 /*
1491 * Now we must wait until no running transaction could be using the
1492 * index for a query. Use AccessExclusiveLock here to check for
1493 * running transactions that hold locks of any kind on the table. Note
1494 * we do not need to worry about xacts that open the table for reading
1495 * after this point; they will see the index as invalid when they open
1496 * the relation.
1497 *
1498 * Note: the reason we use actual lock acquisition here, rather than
1499 * just checking the ProcArray and sleeping, is that deadlock is
1500 * possible if one of the transactions in question is blocked trying
1501 * to acquire an exclusive lock on our table. The lock code will
1502 * detect deadlock and error out properly.
1503 */
1504 WaitForLockers(heaplocktag, AccessExclusiveLock);
1505
1506 /*
1507 * No more predicate locks will be acquired on this index, and we're
1508 * about to stop doing inserts into the index which could show
1509 * conflicts with existing predicate locks, so now is the time to move
1510 * them to the heap relation.
1511 */
1512 userHeapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
1513 userIndexRelation = index_open(indexId, ShareUpdateExclusiveLock);
1514 TransferPredicateLocksToHeapRelation(userIndexRelation);
1515
1516 /*
1517 * Now we are sure that nobody uses the index for queries; they just
1518 * might have it open for updating it. So now we can unset indisready
1519 * and indislive, then wait till nobody could be using it at all
1520 * anymore.
1521 */
1522 index_set_state_flags(indexId, INDEX_DROP_SET_DEAD);
1523
1524 /*
1525 * Invalidate the relcache for the table, so that after this commit
1526 * all sessions will refresh the table's index list. Forgetting just
1527 * the index's relcache entry is not enough.
1528 */
1529 CacheInvalidateRelcache(userHeapRelation);
1530
1531 /*
1532 * Close the relations again, though still holding session lock.
1533 */
1534 heap_close(userHeapRelation, NoLock);
1535 index_close(userIndexRelation, NoLock);
1536
1537 /*
1538 * Again, commit the transaction to make the pg_index update visible
1539 * to other sessions.
1540 */
1541 CommitTransactionCommand();
1542 StartTransactionCommand();
1543
1544 /*
1545 * Wait till every transaction that saw the old index state has
1546 * finished.
1547 */
1548 WaitForLockers(heaplocktag, AccessExclusiveLock);
1549
1550 /*
1551 * Re-open relations to allow us to complete our actions.
1552 *
1553 * At this point, nothing should be accessing the index, but lets
1554 * leave nothing to chance and grab AccessExclusiveLock on the index
1555 * before the physical deletion.
1556 */
1557 userHeapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
1558 userIndexRelation = index_open(indexId, AccessExclusiveLock);
1559 }
1560 else
1561 {
1562 /* Not concurrent, so just transfer predicate locks and we're good */
1563 TransferPredicateLocksToHeapRelation(userIndexRelation);
1564 }
1565
1566 /*
1567 * Schedule physical removal of the files
1568 */
1569 RelationDropStorage(userIndexRelation);
1570
1571 /*
1572 * Close and flush the index's relcache entry, to ensure relcache doesn't
1573 * try to rebuild it while we're deleting catalog entries. We keep the
1574 * lock though.
1575 */
1576 index_close(userIndexRelation, NoLock);
1577
1578 RelationForgetRelation(indexId);
1579
1580 /*
1581 * fix INDEX relation, and check for expressional index
1582 */
1583 indexRelation = heap_open(IndexRelationId, RowExclusiveLock);
1584
1585 tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
1586 if (!HeapTupleIsValid(tuple))
1587 elog(ERROR, "cache lookup failed for index %u", indexId);
1588
1589 hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs);
1590
1591 simple_heap_delete(indexRelation, &tuple->t_self);
1592
1593 ReleaseSysCache(tuple);
1594 heap_close(indexRelation, RowExclusiveLock);
1595
1596 /*
1597 * if it has any expression columns, we might have stored statistics about
1598 * them.
1599 */
1600 if (hasexprs)
1601 RemoveStatistics(indexId, 0);
1602
1603 /*
1604 * fix ATTRIBUTE relation
1605 */
1606 DeleteAttributeTuples(indexId);
1607
1608 /*
1609 * fix RELATION relation
1610 */
1611 DeleteRelationTuple(indexId);
1612
1613 /*
1614 * We are presently too lazy to attempt to compute the new correct value
1615 * of relhasindex (the next VACUUM will fix it if necessary). So there is
1616 * no need to update the pg_class tuple for the owning relation. But we
1617 * must send out a shared-cache-inval notice on the owning relation to
1618 * ensure other backends update their relcache lists of indexes. (In the
1619 * concurrent case, this is redundant but harmless.)
1620 */
1621 CacheInvalidateRelcache(userHeapRelation);
1622
1623 /*
1624 * Close owning rel, but keep lock
1625 */
1626 heap_close(userHeapRelation, NoLock);
1627
1628 /*
1629 * Release the session locks before we go.
1630 */
1631 if (concurrent)
1632 {
1633 UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
1634 UnlockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
1635 }
1636 }
1637
1638 /* ----------------------------------------------------------------
1639 * index_build support
1640 * ----------------------------------------------------------------
1641 */
1642
1643 /* ----------------
1644 * BuildIndexInfo
1645 * Construct an IndexInfo record for an open index
1646 *
1647 * IndexInfo stores the information about the index that's needed by
1648 * FormIndexDatum, which is used for both index_build() and later insertion
1649 * of individual index tuples. Normally we build an IndexInfo for an index
1650 * just once per command, and then use it for (potentially) many tuples.
1651 * ----------------
1652 */
1653 IndexInfo *
BuildIndexInfo(Relation index)1654 BuildIndexInfo(Relation index)
1655 {
1656 IndexInfo *ii = makeNode(IndexInfo);
1657 Form_pg_index indexStruct = index->rd_index;
1658 int i;
1659 int numKeys;
1660
1661 /* check the number of keys, and copy attr numbers into the IndexInfo */
1662 numKeys = indexStruct->indnatts;
1663 if (numKeys < 1 || numKeys > INDEX_MAX_KEYS)
1664 elog(ERROR, "invalid indnatts %d for index %u",
1665 numKeys, RelationGetRelid(index));
1666 ii->ii_NumIndexAttrs = numKeys;
1667 for (i = 0; i < numKeys; i++)
1668 ii->ii_KeyAttrNumbers[i] = indexStruct->indkey.values[i];
1669
1670 /* fetch any expressions needed for expressional indexes */
1671 ii->ii_Expressions = RelationGetIndexExpressions(index);
1672 ii->ii_ExpressionsState = NIL;
1673
1674 /* fetch index predicate if any */
1675 ii->ii_Predicate = RelationGetIndexPredicate(index);
1676 ii->ii_PredicateState = NIL;
1677
1678 /* fetch exclusion constraint info if any */
1679 if (indexStruct->indisexclusion)
1680 {
1681 RelationGetExclusionInfo(index,
1682 &ii->ii_ExclusionOps,
1683 &ii->ii_ExclusionProcs,
1684 &ii->ii_ExclusionStrats);
1685 }
1686 else
1687 {
1688 ii->ii_ExclusionOps = NULL;
1689 ii->ii_ExclusionProcs = NULL;
1690 ii->ii_ExclusionStrats = NULL;
1691 }
1692
1693 /* other info */
1694 ii->ii_Unique = indexStruct->indisunique;
1695 ii->ii_ReadyForInserts = IndexIsReady(indexStruct);
1696 /* assume not doing speculative insertion for now */
1697 ii->ii_UniqueOps = NULL;
1698 ii->ii_UniqueProcs = NULL;
1699 ii->ii_UniqueStrats = NULL;
1700
1701 /* initialize index-build state to default */
1702 ii->ii_Concurrent = false;
1703 ii->ii_BrokenHotChain = false;
1704
1705 return ii;
1706 }
1707
1708 /* ----------------
1709 * BuildDummyIndexInfo
1710 * Construct a dummy IndexInfo record for an open index
1711 *
1712 * This differs from the real BuildIndexInfo in that it will never run any
1713 * user-defined code that might exist in index expressions or predicates.
1714 * Instead of the real index expressions, we return null constants that have
1715 * the right types/typmods/collations. Predicates and exclusion clauses are
1716 * just ignored. This is sufficient for the purpose of truncating an index,
1717 * since we will not need to actually evaluate the expressions or predicates;
1718 * the only thing that's likely to be done with the data is construction of
1719 * a tupdesc describing the index's rowtype.
1720 * ----------------
1721 */
1722 IndexInfo *
BuildDummyIndexInfo(Relation index)1723 BuildDummyIndexInfo(Relation index)
1724 {
1725 IndexInfo *ii = makeNode(IndexInfo);
1726 Form_pg_index indexStruct = index->rd_index;
1727 int i;
1728 int numKeys;
1729
1730 /* check the number of keys, and copy attr numbers into the IndexInfo */
1731 numKeys = indexStruct->indnatts;
1732 if (numKeys < 1 || numKeys > INDEX_MAX_KEYS)
1733 elog(ERROR, "invalid indnatts %d for index %u",
1734 numKeys, RelationGetRelid(index));
1735 ii->ii_NumIndexAttrs = numKeys;
1736 for (i = 0; i < numKeys; i++)
1737 ii->ii_KeyAttrNumbers[i] = indexStruct->indkey.values[i];
1738
1739 /* fetch dummy expressions for expressional indexes */
1740 ii->ii_Expressions = RelationGetDummyIndexExpressions(index);
1741 ii->ii_ExpressionsState = NIL;
1742
1743 /* pretend there is no predicate */
1744 ii->ii_Predicate = NIL;
1745 ii->ii_PredicateState = NULL;
1746
1747 /* We ignore the exclusion constraint if any */
1748 ii->ii_ExclusionOps = NULL;
1749 ii->ii_ExclusionProcs = NULL;
1750 ii->ii_ExclusionStrats = NULL;
1751
1752 /* other info */
1753 ii->ii_Unique = indexStruct->indisunique;
1754 ii->ii_ReadyForInserts = IndexIsReady(indexStruct);
1755 /* assume not doing speculative insertion for now */
1756 ii->ii_UniqueOps = NULL;
1757 ii->ii_UniqueProcs = NULL;
1758 ii->ii_UniqueStrats = NULL;
1759
1760 /* initialize index-build state to default */
1761 ii->ii_Concurrent = false;
1762 ii->ii_BrokenHotChain = false;
1763
1764 return ii;
1765 }
1766
1767 /* ----------------
1768 * BuildSpeculativeIndexInfo
1769 * Add extra state to IndexInfo record
1770 *
1771 * For unique indexes, we usually don't want to add info to the IndexInfo for
1772 * checking uniqueness, since the B-Tree AM handles that directly. However,
1773 * in the case of speculative insertion, additional support is required.
1774 *
1775 * Do this processing here rather than in BuildIndexInfo() to not incur the
1776 * overhead in the common non-speculative cases.
1777 * ----------------
1778 */
1779 void
BuildSpeculativeIndexInfo(Relation index,IndexInfo * ii)1780 BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii)
1781 {
1782 int ncols = index->rd_rel->relnatts;
1783 int i;
1784
1785 /*
1786 * fetch info for checking unique indexes
1787 */
1788 Assert(ii->ii_Unique);
1789
1790 if (index->rd_rel->relam != BTREE_AM_OID)
1791 elog(ERROR, "unexpected non-btree speculative unique index");
1792
1793 ii->ii_UniqueOps = (Oid *) palloc(sizeof(Oid) * ncols);
1794 ii->ii_UniqueProcs = (Oid *) palloc(sizeof(Oid) * ncols);
1795 ii->ii_UniqueStrats = (uint16 *) palloc(sizeof(uint16) * ncols);
1796
1797 /*
1798 * We have to look up the operator's strategy number. This provides a
1799 * cross-check that the operator does match the index.
1800 */
1801 /* We need the func OIDs and strategy numbers too */
1802 for (i = 0; i < ncols; i++)
1803 {
1804 ii->ii_UniqueStrats[i] = BTEqualStrategyNumber;
1805 ii->ii_UniqueOps[i] =
1806 get_opfamily_member(index->rd_opfamily[i],
1807 index->rd_opcintype[i],
1808 index->rd_opcintype[i],
1809 ii->ii_UniqueStrats[i]);
1810 ii->ii_UniqueProcs[i] = get_opcode(ii->ii_UniqueOps[i]);
1811 }
1812 }
1813
1814 /* ----------------
1815 * FormIndexDatum
1816 * Construct values[] and isnull[] arrays for a new index tuple.
1817 *
1818 * indexInfo Info about the index
1819 * slot Heap tuple for which we must prepare an index entry
1820 * estate executor state for evaluating any index expressions
1821 * values Array of index Datums (output area)
1822 * isnull Array of is-null indicators (output area)
1823 *
1824 * When there are no index expressions, estate may be NULL. Otherwise it
1825 * must be supplied, *and* the ecxt_scantuple slot of its per-tuple expr
1826 * context must point to the heap tuple passed in.
1827 *
1828 * Notice we don't actually call index_form_tuple() here; we just prepare
1829 * its input arrays values[] and isnull[]. This is because the index AM
1830 * may wish to alter the data before storage.
1831 * ----------------
1832 */
1833 void
FormIndexDatum(IndexInfo * indexInfo,TupleTableSlot * slot,EState * estate,Datum * values,bool * isnull)1834 FormIndexDatum(IndexInfo *indexInfo,
1835 TupleTableSlot *slot,
1836 EState *estate,
1837 Datum *values,
1838 bool *isnull)
1839 {
1840 ListCell *indexpr_item;
1841 int i;
1842
1843 if (indexInfo->ii_Expressions != NIL &&
1844 indexInfo->ii_ExpressionsState == NIL)
1845 {
1846 /* First time through, set up expression evaluation state */
1847 indexInfo->ii_ExpressionsState = (List *)
1848 ExecPrepareExpr((Expr *) indexInfo->ii_Expressions,
1849 estate);
1850 /* Check caller has set up context correctly */
1851 Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
1852 }
1853 indexpr_item = list_head(indexInfo->ii_ExpressionsState);
1854
1855 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1856 {
1857 int keycol = indexInfo->ii_KeyAttrNumbers[i];
1858 Datum iDatum;
1859 bool isNull;
1860
1861 if (keycol != 0)
1862 {
1863 /*
1864 * Plain index column; get the value we need directly from the
1865 * heap tuple.
1866 */
1867 iDatum = slot_getattr(slot, keycol, &isNull);
1868 }
1869 else
1870 {
1871 /*
1872 * Index expression --- need to evaluate it.
1873 */
1874 if (indexpr_item == NULL)
1875 elog(ERROR, "wrong number of index expressions");
1876 iDatum = ExecEvalExprSwitchContext((ExprState *) lfirst(indexpr_item),
1877 GetPerTupleExprContext(estate),
1878 &isNull,
1879 NULL);
1880 indexpr_item = lnext(indexpr_item);
1881 }
1882 values[i] = iDatum;
1883 isnull[i] = isNull;
1884 }
1885
1886 if (indexpr_item != NULL)
1887 elog(ERROR, "wrong number of index expressions");
1888 }
1889
1890
1891 /*
1892 * index_update_stats --- update pg_class entry after CREATE INDEX or REINDEX
1893 *
1894 * This routine updates the pg_class row of either an index or its parent
1895 * relation after CREATE INDEX or REINDEX. Its rather bizarre API is designed
1896 * to ensure we can do all the necessary work in just one update.
1897 *
1898 * hasindex: set relhasindex to this value
1899 * isprimary: if true, set relhaspkey true; else no change
1900 * reltuples: if >= 0, set reltuples to this value; else no change
1901 *
1902 * If reltuples >= 0, relpages and relallvisible are also updated (using
1903 * RelationGetNumberOfBlocks() and visibilitymap_count()).
1904 *
1905 * NOTE: an important side-effect of this operation is that an SI invalidation
1906 * message is sent out to all backends --- including me --- causing relcache
1907 * entries to be flushed or updated with the new data. This must happen even
1908 * if we find that no change is needed in the pg_class row. When updating
1909 * a heap entry, this ensures that other backends find out about the new
1910 * index. When updating an index, it's important because some index AMs
1911 * expect a relcache flush to occur after REINDEX.
1912 */
1913 static void
index_update_stats(Relation rel,bool hasindex,bool isprimary,double reltuples)1914 index_update_stats(Relation rel,
1915 bool hasindex,
1916 bool isprimary,
1917 double reltuples)
1918 {
1919 Oid relid = RelationGetRelid(rel);
1920 Relation pg_class;
1921 HeapTuple tuple;
1922 Form_pg_class rd_rel;
1923 bool dirty;
1924
1925 /*
1926 * We always update the pg_class row using a non-transactional,
1927 * overwrite-in-place update. There are several reasons for this:
1928 *
1929 * 1. In bootstrap mode, we have no choice --- UPDATE wouldn't work.
1930 *
1931 * 2. We could be reindexing pg_class itself, in which case we can't move
1932 * its pg_class row because CatalogUpdateIndexes might not know about all
1933 * the indexes yet (see reindex_relation).
1934 *
1935 * 3. Because we execute CREATE INDEX with just share lock on the parent
1936 * rel (to allow concurrent index creations), an ordinary update could
1937 * suffer a tuple-concurrently-updated failure against another CREATE
1938 * INDEX committing at about the same time. We can avoid that by having
1939 * them both do nontransactional updates (we assume they will both be
1940 * trying to change the pg_class row to the same thing, so it doesn't
1941 * matter which goes first).
1942 *
1943 * It is safe to use a non-transactional update even though our
1944 * transaction could still fail before committing. Setting relhasindex
1945 * true is safe even if there are no indexes (VACUUM will eventually fix
1946 * it), likewise for relhaspkey. And of course the new relpages and
1947 * reltuples counts are correct regardless. However, we don't want to
1948 * change relpages (or relallvisible) if the caller isn't providing an
1949 * updated reltuples count, because that would bollix the
1950 * reltuples/relpages ratio which is what's really important.
1951 */
1952
1953 pg_class = heap_open(RelationRelationId, RowExclusiveLock);
1954
1955 /*
1956 * Make a copy of the tuple to update. Normally we use the syscache, but
1957 * we can't rely on that during bootstrap or while reindexing pg_class
1958 * itself.
1959 */
1960 if (IsBootstrapProcessingMode() ||
1961 ReindexIsProcessingHeap(RelationRelationId))
1962 {
1963 /* don't assume syscache will work */
1964 HeapScanDesc pg_class_scan;
1965 ScanKeyData key[1];
1966
1967 ScanKeyInit(&key[0],
1968 ObjectIdAttributeNumber,
1969 BTEqualStrategyNumber, F_OIDEQ,
1970 ObjectIdGetDatum(relid));
1971
1972 pg_class_scan = heap_beginscan_catalog(pg_class, 1, key);
1973 tuple = heap_getnext(pg_class_scan, ForwardScanDirection);
1974 tuple = heap_copytuple(tuple);
1975 heap_endscan(pg_class_scan);
1976 }
1977 else
1978 {
1979 /* normal case, use syscache */
1980 tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
1981 }
1982
1983 if (!HeapTupleIsValid(tuple))
1984 elog(ERROR, "could not find tuple for relation %u", relid);
1985 rd_rel = (Form_pg_class) GETSTRUCT(tuple);
1986
1987 /* Apply required updates, if any, to copied tuple */
1988
1989 dirty = false;
1990 if (rd_rel->relhasindex != hasindex)
1991 {
1992 rd_rel->relhasindex = hasindex;
1993 dirty = true;
1994 }
1995 if (isprimary)
1996 {
1997 if (!rd_rel->relhaspkey)
1998 {
1999 rd_rel->relhaspkey = true;
2000 dirty = true;
2001 }
2002 }
2003
2004 if (reltuples >= 0)
2005 {
2006 BlockNumber relpages = RelationGetNumberOfBlocks(rel);
2007 BlockNumber relallvisible;
2008
2009 if (rd_rel->relkind != RELKIND_INDEX)
2010 visibilitymap_count(rel, &relallvisible, NULL);
2011 else /* don't bother for indexes */
2012 relallvisible = 0;
2013
2014 if (rd_rel->relpages != (int32) relpages)
2015 {
2016 rd_rel->relpages = (int32) relpages;
2017 dirty = true;
2018 }
2019 if (rd_rel->reltuples != (float4) reltuples)
2020 {
2021 rd_rel->reltuples = (float4) reltuples;
2022 dirty = true;
2023 }
2024 if (rd_rel->relallvisible != (int32) relallvisible)
2025 {
2026 rd_rel->relallvisible = (int32) relallvisible;
2027 dirty = true;
2028 }
2029 }
2030
2031 /*
2032 * If anything changed, write out the tuple
2033 */
2034 if (dirty)
2035 {
2036 heap_inplace_update(pg_class, tuple);
2037 /* the above sends a cache inval message */
2038 }
2039 else
2040 {
2041 /* no need to change tuple, but force relcache inval anyway */
2042 CacheInvalidateRelcacheByTuple(tuple);
2043 }
2044
2045 heap_freetuple(tuple);
2046
2047 heap_close(pg_class, RowExclusiveLock);
2048 }
2049
2050
2051 /*
2052 * index_build - invoke access-method-specific index build procedure
2053 *
2054 * On entry, the index's catalog entries are valid, and its physical disk
2055 * file has been created but is empty. We call the AM-specific build
2056 * procedure to fill in the index contents. We then update the pg_class
2057 * entries of the index and heap relation as needed, using statistics
2058 * returned by ambuild as well as data passed by the caller.
2059 *
2060 * isprimary tells whether to mark the index as a primary-key index.
2061 * isreindex indicates we are recreating a previously-existing index.
2062 *
2063 * Note: when reindexing an existing index, isprimary can be false even if
2064 * the index is a PK; it's already properly marked and need not be re-marked.
2065 *
2066 * Note: before Postgres 8.2, the passed-in heap and index Relations
2067 * were automatically closed by this routine. This is no longer the case.
2068 * The caller opened 'em, and the caller should close 'em.
2069 */
2070 void
index_build(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,bool isprimary,bool isreindex)2071 index_build(Relation heapRelation,
2072 Relation indexRelation,
2073 IndexInfo *indexInfo,
2074 bool isprimary,
2075 bool isreindex)
2076 {
2077 IndexBuildResult *stats;
2078 Oid save_userid;
2079 int save_sec_context;
2080 int save_nestlevel;
2081
2082 /*
2083 * sanity checks
2084 */
2085 Assert(RelationIsValid(indexRelation));
2086 Assert(PointerIsValid(indexRelation->rd_amroutine));
2087 Assert(PointerIsValid(indexRelation->rd_amroutine->ambuild));
2088 Assert(PointerIsValid(indexRelation->rd_amroutine->ambuildempty));
2089
2090 ereport(DEBUG1,
2091 (errmsg("building index \"%s\" on table \"%s\"",
2092 RelationGetRelationName(indexRelation),
2093 RelationGetRelationName(heapRelation))));
2094
2095 /*
2096 * Switch to the table owner's userid, so that any index functions are run
2097 * as that user. Also lock down security-restricted operations and
2098 * arrange to make GUC variable changes local to this command.
2099 */
2100 GetUserIdAndSecContext(&save_userid, &save_sec_context);
2101 SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
2102 save_sec_context | SECURITY_RESTRICTED_OPERATION);
2103 save_nestlevel = NewGUCNestLevel();
2104
2105 /*
2106 * Call the access method's build procedure
2107 */
2108 stats = indexRelation->rd_amroutine->ambuild(heapRelation, indexRelation,
2109 indexInfo);
2110 Assert(PointerIsValid(stats));
2111
2112 /*
2113 * If this is an unlogged index, we may need to write out an init fork for
2114 * it -- but we must first check whether one already exists. If, for
2115 * example, an unlogged relation is truncated in the transaction that
2116 * created it, or truncated twice in a subsequent transaction, the
2117 * relfilenode won't change, and nothing needs to be done here.
2118 */
2119 if (indexRelation->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
2120 !smgrexists(indexRelation->rd_smgr, INIT_FORKNUM))
2121 {
2122 RelationOpenSmgr(indexRelation);
2123 smgrcreate(indexRelation->rd_smgr, INIT_FORKNUM, false);
2124 indexRelation->rd_amroutine->ambuildempty(indexRelation);
2125 }
2126
2127 /*
2128 * If we found any potentially broken HOT chains, mark the index as not
2129 * being usable until the current transaction is below the event horizon.
2130 * See src/backend/access/heap/README.HOT for discussion. Also set this
2131 * if early pruning/vacuuming is enabled for the heap relation. While it
2132 * might become safe to use the index earlier based on actual cleanup
2133 * activity and other active transactions, the test for that would be much
2134 * more complex and would require some form of blocking, so keep it simple
2135 * and fast by just using the current transaction.
2136 *
2137 * However, when reindexing an existing index, we should do nothing here.
2138 * Any HOT chains that are broken with respect to the index must predate
2139 * the index's original creation, so there is no need to change the
2140 * index's usability horizon. Moreover, we *must not* try to change the
2141 * index's pg_index entry while reindexing pg_index itself, and this
2142 * optimization nicely prevents that. The more complex rules needed for a
2143 * reindex are handled separately after this function returns.
2144 *
2145 * We also need not set indcheckxmin during a concurrent index build,
2146 * because we won't set indisvalid true until all transactions that care
2147 * about the broken HOT chains or early pruning/vacuuming are gone.
2148 *
2149 * Therefore, this code path can only be taken during non-concurrent
2150 * CREATE INDEX. Thus the fact that heap_update will set the pg_index
2151 * tuple's xmin doesn't matter, because that tuple was created in the
2152 * current transaction anyway. That also means we don't need to worry
2153 * about any concurrent readers of the tuple; no other transaction can see
2154 * it yet.
2155 */
2156 if ((indexInfo->ii_BrokenHotChain || EarlyPruningEnabled(heapRelation)) &&
2157 !isreindex &&
2158 !indexInfo->ii_Concurrent)
2159 {
2160 Oid indexId = RelationGetRelid(indexRelation);
2161 Relation pg_index;
2162 HeapTuple indexTuple;
2163 Form_pg_index indexForm;
2164
2165 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
2166
2167 indexTuple = SearchSysCacheCopy1(INDEXRELID,
2168 ObjectIdGetDatum(indexId));
2169 if (!HeapTupleIsValid(indexTuple))
2170 elog(ERROR, "cache lookup failed for index %u", indexId);
2171 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
2172
2173 /* If it's a new index, indcheckxmin shouldn't be set ... */
2174 Assert(!indexForm->indcheckxmin);
2175
2176 indexForm->indcheckxmin = true;
2177 simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
2178 CatalogUpdateIndexes(pg_index, indexTuple);
2179
2180 heap_freetuple(indexTuple);
2181 heap_close(pg_index, RowExclusiveLock);
2182 }
2183
2184 /*
2185 * Update heap and index pg_class rows
2186 */
2187 index_update_stats(heapRelation,
2188 true,
2189 isprimary,
2190 stats->heap_tuples);
2191
2192 index_update_stats(indexRelation,
2193 false,
2194 false,
2195 stats->index_tuples);
2196
2197 /* Make the updated catalog row versions visible */
2198 CommandCounterIncrement();
2199
2200 /*
2201 * If it's for an exclusion constraint, make a second pass over the heap
2202 * to verify that the constraint is satisfied. We must not do this until
2203 * the index is fully valid. (Broken HOT chains shouldn't matter, though;
2204 * see comments for IndexCheckExclusion.)
2205 */
2206 if (indexInfo->ii_ExclusionOps != NULL)
2207 IndexCheckExclusion(heapRelation, indexRelation, indexInfo);
2208
2209 /* Roll back any GUC changes executed by index functions */
2210 AtEOXact_GUC(false, save_nestlevel);
2211
2212 /* Restore userid and security context */
2213 SetUserIdAndSecContext(save_userid, save_sec_context);
2214 }
2215
2216
2217 /*
2218 * IndexBuildHeapScan - scan the heap relation to find tuples to be indexed
2219 *
2220 * This is called back from an access-method-specific index build procedure
2221 * after the AM has done whatever setup it needs. The parent heap relation
2222 * is scanned to find tuples that should be entered into the index. Each
2223 * such tuple is passed to the AM's callback routine, which does the right
2224 * things to add it to the new index. After we return, the AM's index
2225 * build procedure does whatever cleanup it needs.
2226 *
2227 * The total count of heap tuples is returned. This is for updating pg_class
2228 * statistics. (It's annoying not to be able to do that here, but we want
2229 * to merge that update with others; see index_update_stats.) Note that the
2230 * index AM itself must keep track of the number of index tuples; we don't do
2231 * so here because the AM might reject some of the tuples for its own reasons,
2232 * such as being unable to store NULLs.
2233 *
2234 * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
2235 * any potentially broken HOT chains. Currently, we set this if there are
2236 * any RECENTLY_DEAD or DELETE_IN_PROGRESS entries in a HOT chain, without
2237 * trying very hard to detect whether they're really incompatible with the
2238 * chain tip.
2239 */
2240 double
IndexBuildHeapScan(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,bool allow_sync,IndexBuildCallback callback,void * callback_state)2241 IndexBuildHeapScan(Relation heapRelation,
2242 Relation indexRelation,
2243 IndexInfo *indexInfo,
2244 bool allow_sync,
2245 IndexBuildCallback callback,
2246 void *callback_state)
2247 {
2248 return IndexBuildHeapRangeScan(heapRelation, indexRelation,
2249 indexInfo, allow_sync,
2250 false,
2251 0, InvalidBlockNumber,
2252 callback, callback_state);
2253 }
2254
2255 /*
2256 * As above, except that instead of scanning the complete heap, only the given
2257 * number of blocks are scanned. Scan to end-of-rel can be signalled by
2258 * passing InvalidBlockNumber as numblocks. Note that restricting the range
2259 * to scan cannot be done when requesting syncscan.
2260 *
2261 * When "anyvisible" mode is requested, all tuples visible to any transaction
2262 * are considered, including those inserted or deleted by transactions that are
2263 * still in progress.
2264 */
2265 double
IndexBuildHeapRangeScan(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,bool allow_sync,bool anyvisible,BlockNumber start_blockno,BlockNumber numblocks,IndexBuildCallback callback,void * callback_state)2266 IndexBuildHeapRangeScan(Relation heapRelation,
2267 Relation indexRelation,
2268 IndexInfo *indexInfo,
2269 bool allow_sync,
2270 bool anyvisible,
2271 BlockNumber start_blockno,
2272 BlockNumber numblocks,
2273 IndexBuildCallback callback,
2274 void *callback_state)
2275 {
2276 bool is_system_catalog;
2277 bool checking_uniqueness;
2278 HeapScanDesc scan;
2279 HeapTuple heapTuple;
2280 Datum values[INDEX_MAX_KEYS];
2281 bool isnull[INDEX_MAX_KEYS];
2282 double reltuples;
2283 List *predicate;
2284 TupleTableSlot *slot;
2285 EState *estate;
2286 ExprContext *econtext;
2287 Snapshot snapshot;
2288 TransactionId OldestXmin;
2289 BlockNumber root_blkno = InvalidBlockNumber;
2290 OffsetNumber root_offsets[MaxHeapTuplesPerPage];
2291
2292 /*
2293 * sanity checks
2294 */
2295 Assert(OidIsValid(indexRelation->rd_rel->relam));
2296
2297 /* Remember if it's a system catalog */
2298 is_system_catalog = IsSystemRelation(heapRelation);
2299
2300 /* See whether we're verifying uniqueness/exclusion properties */
2301 checking_uniqueness = (indexInfo->ii_Unique ||
2302 indexInfo->ii_ExclusionOps != NULL);
2303
2304 /*
2305 * "Any visible" mode is not compatible with uniqueness checks; make sure
2306 * only one of those is requested.
2307 */
2308 Assert(!(anyvisible && checking_uniqueness));
2309
2310 /*
2311 * Need an EState for evaluation of index expressions and partial-index
2312 * predicates. Also a slot to hold the current tuple.
2313 */
2314 estate = CreateExecutorState();
2315 econtext = GetPerTupleExprContext(estate);
2316 slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
2317
2318 /* Arrange for econtext's scan tuple to be the tuple under test */
2319 econtext->ecxt_scantuple = slot;
2320
2321 /* Set up execution state for predicate, if any. */
2322 predicate = (List *)
2323 ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
2324 estate);
2325
2326 /*
2327 * Prepare for scan of the base relation. In a normal index build, we use
2328 * SnapshotAny because we must retrieve all tuples and do our own time
2329 * qual checks (because we have to index RECENTLY_DEAD tuples). In a
2330 * concurrent build, or during bootstrap, we take a regular MVCC snapshot
2331 * and index whatever's live according to that.
2332 */
2333 if (IsBootstrapProcessingMode() || indexInfo->ii_Concurrent)
2334 {
2335 snapshot = RegisterSnapshot(GetTransactionSnapshot());
2336 OldestXmin = InvalidTransactionId; /* not used */
2337
2338 /* "any visible" mode is not compatible with this */
2339 Assert(!anyvisible);
2340 }
2341 else
2342 {
2343 snapshot = SnapshotAny;
2344 /* okay to ignore lazy VACUUMs here */
2345 OldestXmin = GetOldestXmin(heapRelation, true);
2346 }
2347
2348 scan = heap_beginscan_strat(heapRelation, /* relation */
2349 snapshot, /* snapshot */
2350 0, /* number of keys */
2351 NULL, /* scan key */
2352 true, /* buffer access strategy OK */
2353 allow_sync); /* syncscan OK? */
2354
2355 /* set our scan endpoints */
2356 if (!allow_sync)
2357 heap_setscanlimits(scan, start_blockno, numblocks);
2358 else
2359 {
2360 /* syncscan can only be requested on whole relation */
2361 Assert(start_blockno == 0);
2362 Assert(numblocks == InvalidBlockNumber);
2363 }
2364
2365 reltuples = 0;
2366
2367 /*
2368 * Scan all tuples in the base relation.
2369 */
2370 while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2371 {
2372 bool tupleIsAlive;
2373
2374 CHECK_FOR_INTERRUPTS();
2375
2376 /*
2377 * When dealing with a HOT-chain of updated tuples, we want to index
2378 * the values of the live tuple (if any), but index it under the TID
2379 * of the chain's root tuple. This approach is necessary to preserve
2380 * the HOT-chain structure in the heap. So we need to be able to find
2381 * the root item offset for every tuple that's in a HOT-chain. When
2382 * first reaching a new page of the relation, call
2383 * heap_get_root_tuples() to build a map of root item offsets on the
2384 * page.
2385 *
2386 * It might look unsafe to use this information across buffer
2387 * lock/unlock. However, we hold ShareLock on the table so no
2388 * ordinary insert/update/delete should occur; and we hold pin on the
2389 * buffer continuously while visiting the page, so no pruning
2390 * operation can occur either.
2391 *
2392 * In cases with only ShareUpdateExclusiveLock on the table, it's
2393 * possible for some HOT tuples to appear that we didn't know about
2394 * when we first read the page. To handle that case, we re-obtain the
2395 * list of root offsets when a HOT tuple points to a root item that we
2396 * don't know about.
2397 *
2398 * Also, although our opinions about tuple liveness could change while
2399 * we scan the page (due to concurrent transaction commits/aborts),
2400 * the chain root locations won't, so this info doesn't need to be
2401 * rebuilt after waiting for another transaction.
2402 *
2403 * Note the implied assumption that there is no more than one live
2404 * tuple per HOT-chain --- else we could create more than one index
2405 * entry pointing to the same root tuple.
2406 */
2407 if (scan->rs_cblock != root_blkno)
2408 {
2409 Page page = BufferGetPage(scan->rs_cbuf);
2410
2411 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
2412 heap_get_root_tuples(page, root_offsets);
2413 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2414
2415 root_blkno = scan->rs_cblock;
2416 }
2417
2418 if (snapshot == SnapshotAny)
2419 {
2420 /* do our own time qual check */
2421 bool indexIt;
2422 TransactionId xwait;
2423
2424 recheck:
2425
2426 /*
2427 * We could possibly get away with not locking the buffer here,
2428 * since caller should hold ShareLock on the relation, but let's
2429 * be conservative about it. (This remark is still correct even
2430 * with HOT-pruning: our pin on the buffer prevents pruning.)
2431 */
2432 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
2433
2434 switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
2435 scan->rs_cbuf))
2436 {
2437 case HEAPTUPLE_DEAD:
2438 /* Definitely dead, we can ignore it */
2439 indexIt = false;
2440 tupleIsAlive = false;
2441 break;
2442 case HEAPTUPLE_LIVE:
2443 /* Normal case, index and unique-check it */
2444 indexIt = true;
2445 tupleIsAlive = true;
2446 break;
2447 case HEAPTUPLE_RECENTLY_DEAD:
2448
2449 /*
2450 * If tuple is recently deleted then we must index it
2451 * anyway to preserve MVCC semantics. (Pre-existing
2452 * transactions could try to use the index after we finish
2453 * building it, and may need to see such tuples.)
2454 *
2455 * However, if it was HOT-updated then we must only index
2456 * the live tuple at the end of the HOT-chain. Since this
2457 * breaks semantics for pre-existing snapshots, mark the
2458 * index as unusable for them.
2459 */
2460 if (HeapTupleIsHotUpdated(heapTuple))
2461 {
2462 indexIt = false;
2463 /* mark the index as unsafe for old snapshots */
2464 indexInfo->ii_BrokenHotChain = true;
2465 }
2466 else
2467 indexIt = true;
2468 /* In any case, exclude the tuple from unique-checking */
2469 tupleIsAlive = false;
2470 break;
2471 case HEAPTUPLE_INSERT_IN_PROGRESS:
2472
2473 /*
2474 * In "anyvisible" mode, this tuple is visible and we
2475 * don't need any further checks.
2476 */
2477 if (anyvisible)
2478 {
2479 indexIt = true;
2480 tupleIsAlive = true;
2481 break;
2482 }
2483
2484 /*
2485 * Since caller should hold ShareLock or better, normally
2486 * the only way to see this is if it was inserted earlier
2487 * in our own transaction. However, it can happen in
2488 * system catalogs, since we tend to release write lock
2489 * before commit there. Give a warning if neither case
2490 * applies.
2491 */
2492 xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
2493 if (!TransactionIdIsCurrentTransactionId(xwait))
2494 {
2495 if (!is_system_catalog)
2496 elog(WARNING, "concurrent insert in progress within table \"%s\"",
2497 RelationGetRelationName(heapRelation));
2498
2499 /*
2500 * If we are performing uniqueness checks, indexing
2501 * such a tuple could lead to a bogus uniqueness
2502 * failure. In that case we wait for the inserting
2503 * transaction to finish and check again.
2504 */
2505 if (checking_uniqueness)
2506 {
2507 /*
2508 * Must drop the lock on the buffer before we wait
2509 */
2510 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2511 XactLockTableWait(xwait, heapRelation,
2512 &heapTuple->t_self,
2513 XLTW_InsertIndexUnique);
2514 CHECK_FOR_INTERRUPTS();
2515 goto recheck;
2516 }
2517 }
2518
2519 /*
2520 * We must index such tuples, since if the index build
2521 * commits then they're good.
2522 */
2523 indexIt = true;
2524 tupleIsAlive = true;
2525 break;
2526 case HEAPTUPLE_DELETE_IN_PROGRESS:
2527
2528 /*
2529 * As with INSERT_IN_PROGRESS case, this is unexpected
2530 * unless it's our own deletion or a system catalog; but
2531 * in anyvisible mode, this tuple is visible.
2532 */
2533 if (anyvisible)
2534 {
2535 indexIt = true;
2536 tupleIsAlive = false;
2537 break;
2538 }
2539
2540 xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
2541 if (!TransactionIdIsCurrentTransactionId(xwait))
2542 {
2543 if (!is_system_catalog)
2544 elog(WARNING, "concurrent delete in progress within table \"%s\"",
2545 RelationGetRelationName(heapRelation));
2546
2547 /*
2548 * If we are performing uniqueness checks, assuming
2549 * the tuple is dead could lead to missing a
2550 * uniqueness violation. In that case we wait for the
2551 * deleting transaction to finish and check again.
2552 *
2553 * Also, if it's a HOT-updated tuple, we should not
2554 * index it but rather the live tuple at the end of
2555 * the HOT-chain. However, the deleting transaction
2556 * could abort, possibly leaving this tuple as live
2557 * after all, in which case it has to be indexed. The
2558 * only way to know what to do is to wait for the
2559 * deleting transaction to finish and check again.
2560 */
2561 if (checking_uniqueness ||
2562 HeapTupleIsHotUpdated(heapTuple))
2563 {
2564 /*
2565 * Must drop the lock on the buffer before we wait
2566 */
2567 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2568 XactLockTableWait(xwait, heapRelation,
2569 &heapTuple->t_self,
2570 XLTW_InsertIndexUnique);
2571 CHECK_FOR_INTERRUPTS();
2572 goto recheck;
2573 }
2574
2575 /*
2576 * Otherwise index it but don't check for uniqueness,
2577 * the same as a RECENTLY_DEAD tuple.
2578 */
2579 indexIt = true;
2580 }
2581 else if (HeapTupleIsHotUpdated(heapTuple))
2582 {
2583 /*
2584 * It's a HOT-updated tuple deleted by our own xact.
2585 * We can assume the deletion will commit (else the
2586 * index contents don't matter), so treat the same as
2587 * RECENTLY_DEAD HOT-updated tuples.
2588 */
2589 indexIt = false;
2590 /* mark the index as unsafe for old snapshots */
2591 indexInfo->ii_BrokenHotChain = true;
2592 }
2593 else
2594 {
2595 /*
2596 * It's a regular tuple deleted by our own xact. Index
2597 * it but don't check for uniqueness, the same as a
2598 * RECENTLY_DEAD tuple.
2599 */
2600 indexIt = true;
2601 }
2602 /* In any case, exclude the tuple from unique-checking */
2603 tupleIsAlive = false;
2604 break;
2605 default:
2606 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2607 indexIt = tupleIsAlive = false; /* keep compiler quiet */
2608 break;
2609 }
2610
2611 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2612
2613 if (!indexIt)
2614 continue;
2615 }
2616 else
2617 {
2618 /* heap_getnext did the time qual check */
2619 tupleIsAlive = true;
2620 }
2621
2622 reltuples += 1;
2623
2624 MemoryContextReset(econtext->ecxt_per_tuple_memory);
2625
2626 /* Set up for predicate or expression evaluation */
2627 ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
2628
2629 /*
2630 * In a partial index, discard tuples that don't satisfy the
2631 * predicate.
2632 */
2633 if (predicate != NIL)
2634 {
2635 if (!ExecQual(predicate, econtext, false))
2636 continue;
2637 }
2638
2639 /*
2640 * For the current heap tuple, extract all the attributes we use in
2641 * this index, and note which are null. This also performs evaluation
2642 * of any expressions needed.
2643 */
2644 FormIndexDatum(indexInfo,
2645 slot,
2646 estate,
2647 values,
2648 isnull);
2649
2650 /*
2651 * You'd think we should go ahead and build the index tuple here, but
2652 * some index AMs want to do further processing on the data first. So
2653 * pass the values[] and isnull[] arrays, instead.
2654 */
2655
2656 if (HeapTupleIsHeapOnly(heapTuple))
2657 {
2658 /*
2659 * For a heap-only tuple, pretend its TID is that of the root. See
2660 * src/backend/access/heap/README.HOT for discussion.
2661 */
2662 HeapTupleData rootTuple;
2663 OffsetNumber offnum;
2664
2665 rootTuple = *heapTuple;
2666 offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
2667
2668 /*
2669 * If a HOT tuple points to a root that we don't know
2670 * about, obtain root items afresh. If that still fails,
2671 * report it as corruption.
2672 */
2673 if (root_offsets[offnum - 1] == InvalidOffsetNumber)
2674 {
2675 Page page = BufferGetPage(scan->rs_cbuf);
2676
2677 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
2678 heap_get_root_tuples(page, root_offsets);
2679 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2680 }
2681
2682 if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
2683 ereport(ERROR,
2684 (errcode(ERRCODE_DATA_CORRUPTED),
2685 errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
2686 ItemPointerGetBlockNumber(&heapTuple->t_self),
2687 offnum,
2688 RelationGetRelationName(heapRelation))));
2689
2690 ItemPointerSetOffsetNumber(&rootTuple.t_self,
2691 root_offsets[offnum - 1]);
2692
2693 /* Call the AM's callback routine to process the tuple */
2694 callback(indexRelation, &rootTuple, values, isnull, tupleIsAlive,
2695 callback_state);
2696 }
2697 else
2698 {
2699 /* Call the AM's callback routine to process the tuple */
2700 callback(indexRelation, heapTuple, values, isnull, tupleIsAlive,
2701 callback_state);
2702 }
2703 }
2704
2705 heap_endscan(scan);
2706
2707 /* we can now forget our snapshot, if set */
2708 if (IsBootstrapProcessingMode() || indexInfo->ii_Concurrent)
2709 UnregisterSnapshot(snapshot);
2710
2711 ExecDropSingleTupleTableSlot(slot);
2712
2713 FreeExecutorState(estate);
2714
2715 /* These may have been pointing to the now-gone estate */
2716 indexInfo->ii_ExpressionsState = NIL;
2717 indexInfo->ii_PredicateState = NIL;
2718
2719 return reltuples;
2720 }
2721
2722
2723 /*
2724 * IndexCheckExclusion - verify that a new exclusion constraint is satisfied
2725 *
2726 * When creating an exclusion constraint, we first build the index normally
2727 * and then rescan the heap to check for conflicts. We assume that we only
2728 * need to validate tuples that are live according to an up-to-date snapshot,
2729 * and that these were correctly indexed even in the presence of broken HOT
2730 * chains. This should be OK since we are holding at least ShareLock on the
2731 * table, meaning there can be no uncommitted updates from other transactions.
2732 * (Note: that wouldn't necessarily work for system catalogs, since many
2733 * operations release write lock early on the system catalogs.)
2734 */
2735 static void
IndexCheckExclusion(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo)2736 IndexCheckExclusion(Relation heapRelation,
2737 Relation indexRelation,
2738 IndexInfo *indexInfo)
2739 {
2740 HeapScanDesc scan;
2741 HeapTuple heapTuple;
2742 Datum values[INDEX_MAX_KEYS];
2743 bool isnull[INDEX_MAX_KEYS];
2744 List *predicate;
2745 TupleTableSlot *slot;
2746 EState *estate;
2747 ExprContext *econtext;
2748 Snapshot snapshot;
2749
2750 /*
2751 * If we are reindexing the target index, mark it as no longer being
2752 * reindexed, to forestall an Assert in index_beginscan when we try to use
2753 * the index for probes. This is OK because the index is now fully valid.
2754 */
2755 if (ReindexIsCurrentlyProcessingIndex(RelationGetRelid(indexRelation)))
2756 ResetReindexProcessing();
2757
2758 /*
2759 * Need an EState for evaluation of index expressions and partial-index
2760 * predicates. Also a slot to hold the current tuple.
2761 */
2762 estate = CreateExecutorState();
2763 econtext = GetPerTupleExprContext(estate);
2764 slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
2765
2766 /* Arrange for econtext's scan tuple to be the tuple under test */
2767 econtext->ecxt_scantuple = slot;
2768
2769 /* Set up execution state for predicate, if any. */
2770 predicate = (List *)
2771 ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
2772 estate);
2773
2774 /*
2775 * Scan all live tuples in the base relation.
2776 */
2777 snapshot = RegisterSnapshot(GetLatestSnapshot());
2778 scan = heap_beginscan_strat(heapRelation, /* relation */
2779 snapshot, /* snapshot */
2780 0, /* number of keys */
2781 NULL, /* scan key */
2782 true, /* buffer access strategy OK */
2783 true); /* syncscan OK */
2784
2785 while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2786 {
2787 CHECK_FOR_INTERRUPTS();
2788
2789 MemoryContextReset(econtext->ecxt_per_tuple_memory);
2790
2791 /* Set up for predicate or expression evaluation */
2792 ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
2793
2794 /*
2795 * In a partial index, ignore tuples that don't satisfy the predicate.
2796 */
2797 if (predicate != NIL)
2798 {
2799 if (!ExecQual(predicate, econtext, false))
2800 continue;
2801 }
2802
2803 /*
2804 * Extract index column values, including computing expressions.
2805 */
2806 FormIndexDatum(indexInfo,
2807 slot,
2808 estate,
2809 values,
2810 isnull);
2811
2812 /*
2813 * Check that this tuple has no conflicts.
2814 */
2815 check_exclusion_constraint(heapRelation,
2816 indexRelation, indexInfo,
2817 &(heapTuple->t_self), values, isnull,
2818 estate, true);
2819 }
2820
2821 heap_endscan(scan);
2822 UnregisterSnapshot(snapshot);
2823
2824 ExecDropSingleTupleTableSlot(slot);
2825
2826 FreeExecutorState(estate);
2827
2828 /* These may have been pointing to the now-gone estate */
2829 indexInfo->ii_ExpressionsState = NIL;
2830 indexInfo->ii_PredicateState = NIL;
2831 }
2832
2833
2834 /*
2835 * validate_index - support code for concurrent index builds
2836 *
2837 * We do a concurrent index build by first inserting the catalog entry for the
2838 * index via index_create(), marking it not indisready and not indisvalid.
2839 * Then we commit our transaction and start a new one, then we wait for all
2840 * transactions that could have been modifying the table to terminate. Now
2841 * we know that any subsequently-started transactions will see the index and
2842 * honor its constraints on HOT updates; so while existing HOT-chains might
2843 * be broken with respect to the index, no currently live tuple will have an
2844 * incompatible HOT update done to it. We now build the index normally via
2845 * index_build(), while holding a weak lock that allows concurrent
2846 * insert/update/delete. Also, we index only tuples that are valid
2847 * as of the start of the scan (see IndexBuildHeapScan), whereas a normal
2848 * build takes care to include recently-dead tuples. This is OK because
2849 * we won't mark the index valid until all transactions that might be able
2850 * to see those tuples are gone. The reason for doing that is to avoid
2851 * bogus unique-index failures due to concurrent UPDATEs (we might see
2852 * different versions of the same row as being valid when we pass over them,
2853 * if we used HeapTupleSatisfiesVacuum). This leaves us with an index that
2854 * does not contain any tuples added to the table while we built the index.
2855 *
2856 * Next, we mark the index "indisready" (but still not "indisvalid") and
2857 * commit the second transaction and start a third. Again we wait for all
2858 * transactions that could have been modifying the table to terminate. Now
2859 * we know that any subsequently-started transactions will see the index and
2860 * insert their new tuples into it. We then take a new reference snapshot
2861 * which is passed to validate_index(). Any tuples that are valid according
2862 * to this snap, but are not in the index, must be added to the index.
2863 * (Any tuples committed live after the snap will be inserted into the
2864 * index by their originating transaction. Any tuples committed dead before
2865 * the snap need not be indexed, because we will wait out all transactions
2866 * that might care about them before we mark the index valid.)
2867 *
2868 * validate_index() works by first gathering all the TIDs currently in the
2869 * index, using a bulkdelete callback that just stores the TIDs and doesn't
2870 * ever say "delete it". (This should be faster than a plain indexscan;
2871 * also, not all index AMs support full-index indexscan.) Then we sort the
2872 * TIDs, and finally scan the table doing a "merge join" against the TID list
2873 * to see which tuples are missing from the index. Thus we will ensure that
2874 * all tuples valid according to the reference snapshot are in the index.
2875 *
2876 * Building a unique index this way is tricky: we might try to insert a
2877 * tuple that is already dead or is in process of being deleted, and we
2878 * mustn't have a uniqueness failure against an updated version of the same
2879 * row. We could try to check the tuple to see if it's already dead and tell
2880 * index_insert() not to do the uniqueness check, but that still leaves us
2881 * with a race condition against an in-progress update. To handle that,
2882 * we expect the index AM to recheck liveness of the to-be-inserted tuple
2883 * before it declares a uniqueness error.
2884 *
2885 * After completing validate_index(), we wait until all transactions that
2886 * were alive at the time of the reference snapshot are gone; this is
2887 * necessary to be sure there are none left with a transaction snapshot
2888 * older than the reference (and hence possibly able to see tuples we did
2889 * not index). Then we mark the index "indisvalid" and commit. Subsequent
2890 * transactions will be able to use it for queries.
2891 *
2892 * Doing two full table scans is a brute-force strategy. We could try to be
2893 * cleverer, eg storing new tuples in a special area of the table (perhaps
2894 * making the table append-only by setting use_fsm). However that would
2895 * add yet more locking issues.
2896 */
2897 void
validate_index(Oid heapId,Oid indexId,Snapshot snapshot)2898 validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
2899 {
2900 Relation heapRelation,
2901 indexRelation;
2902 IndexInfo *indexInfo;
2903 IndexVacuumInfo ivinfo;
2904 v_i_state state;
2905 Oid save_userid;
2906 int save_sec_context;
2907 int save_nestlevel;
2908
2909 /* Open and lock the parent heap relation */
2910 heapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
2911 /* And the target index relation */
2912 indexRelation = index_open(indexId, RowExclusiveLock);
2913
2914 /*
2915 * Fetch info needed for index_insert. (You might think this should be
2916 * passed in from DefineIndex, but its copy is long gone due to having
2917 * been built in a previous transaction.)
2918 */
2919 indexInfo = BuildIndexInfo(indexRelation);
2920
2921 /* mark build is concurrent just for consistency */
2922 indexInfo->ii_Concurrent = true;
2923
2924 /*
2925 * Switch to the table owner's userid, so that any index functions are run
2926 * as that user. Also lock down security-restricted operations and
2927 * arrange to make GUC variable changes local to this command.
2928 */
2929 GetUserIdAndSecContext(&save_userid, &save_sec_context);
2930 SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
2931 save_sec_context | SECURITY_RESTRICTED_OPERATION);
2932 save_nestlevel = NewGUCNestLevel();
2933
2934 /*
2935 * Scan the index and gather up all the TIDs into a tuplesort object.
2936 */
2937 ivinfo.index = indexRelation;
2938 ivinfo.analyze_only = false;
2939 ivinfo.estimated_count = true;
2940 ivinfo.message_level = DEBUG2;
2941 ivinfo.num_heap_tuples = heapRelation->rd_rel->reltuples;
2942 ivinfo.strategy = NULL;
2943
2944 /*
2945 * Encode TIDs as int8 values for the sort, rather than directly sorting
2946 * item pointers. This can be significantly faster, primarily because TID
2947 * is a pass-by-reference type on all platforms, whereas int8 is
2948 * pass-by-value on most platforms.
2949 */
2950 state.tuplesort = tuplesort_begin_datum(INT8OID, Int8LessOperator,
2951 InvalidOid, false,
2952 maintenance_work_mem,
2953 false);
2954 state.htups = state.itups = state.tups_inserted = 0;
2955
2956 (void) index_bulk_delete(&ivinfo, NULL,
2957 validate_index_callback, (void *) &state);
2958
2959 /* Execute the sort */
2960 tuplesort_performsort(state.tuplesort);
2961
2962 /*
2963 * Now scan the heap and "merge" it with the index
2964 */
2965 validate_index_heapscan(heapRelation,
2966 indexRelation,
2967 indexInfo,
2968 snapshot,
2969 &state);
2970
2971 /* Done with tuplesort object */
2972 tuplesort_end(state.tuplesort);
2973
2974 elog(DEBUG2,
2975 "validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples",
2976 state.htups, state.itups, state.tups_inserted);
2977
2978 /* Roll back any GUC changes executed by index functions */
2979 AtEOXact_GUC(false, save_nestlevel);
2980
2981 /* Restore userid and security context */
2982 SetUserIdAndSecContext(save_userid, save_sec_context);
2983
2984 /* Close rels, but keep locks */
2985 index_close(indexRelation, NoLock);
2986 heap_close(heapRelation, NoLock);
2987 }
2988
2989 /*
2990 * itemptr_encode - Encode ItemPointer as int64/int8
2991 *
2992 * This representation must produce values encoded as int64 that sort in the
2993 * same order as their corresponding original TID values would (using the
2994 * default int8 opclass to produce a result equivalent to the default TID
2995 * opclass).
2996 *
2997 * As noted in validate_index(), this can be significantly faster.
2998 */
2999 static inline int64
itemptr_encode(ItemPointer itemptr)3000 itemptr_encode(ItemPointer itemptr)
3001 {
3002 BlockNumber block = ItemPointerGetBlockNumber(itemptr);
3003 OffsetNumber offset = ItemPointerGetOffsetNumber(itemptr);
3004 int64 encoded;
3005
3006 /*
3007 * Use the 16 least significant bits for the offset. 32 adjacent bits are
3008 * used for the block number. Since remaining bits are unused, there
3009 * cannot be negative encoded values (We assume a two's complement
3010 * representation).
3011 */
3012 encoded = ((uint64) block << 16) | (uint16) offset;
3013
3014 return encoded;
3015 }
3016
3017 /*
3018 * itemptr_decode - Decode int64/int8 representation back to ItemPointer
3019 */
3020 static inline void
itemptr_decode(ItemPointer itemptr,int64 encoded)3021 itemptr_decode(ItemPointer itemptr, int64 encoded)
3022 {
3023 BlockNumber block = (BlockNumber) (encoded >> 16);
3024 OffsetNumber offset = (OffsetNumber) (encoded & 0xFFFF);
3025
3026 ItemPointerSet(itemptr, block, offset);
3027 }
3028
3029 /*
3030 * validate_index_callback - bulkdelete callback to collect the index TIDs
3031 */
3032 static bool
validate_index_callback(ItemPointer itemptr,void * opaque)3033 validate_index_callback(ItemPointer itemptr, void *opaque)
3034 {
3035 v_i_state *state = (v_i_state *) opaque;
3036 int64 encoded = itemptr_encode(itemptr);
3037
3038 tuplesort_putdatum(state->tuplesort, Int64GetDatum(encoded), false);
3039 state->itups += 1;
3040 return false; /* never actually delete anything */
3041 }
3042
3043 /*
3044 * validate_index_heapscan - second table scan for concurrent index build
3045 *
3046 * This has much code in common with IndexBuildHeapScan, but it's enough
3047 * different that it seems cleaner to have two routines not one.
3048 */
3049 static void
validate_index_heapscan(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,Snapshot snapshot,v_i_state * state)3050 validate_index_heapscan(Relation heapRelation,
3051 Relation indexRelation,
3052 IndexInfo *indexInfo,
3053 Snapshot snapshot,
3054 v_i_state *state)
3055 {
3056 HeapScanDesc scan;
3057 HeapTuple heapTuple;
3058 Datum values[INDEX_MAX_KEYS];
3059 bool isnull[INDEX_MAX_KEYS];
3060 List *predicate;
3061 TupleTableSlot *slot;
3062 EState *estate;
3063 ExprContext *econtext;
3064 BlockNumber root_blkno = InvalidBlockNumber;
3065 OffsetNumber root_offsets[MaxHeapTuplesPerPage];
3066 bool in_index[MaxHeapTuplesPerPage];
3067
3068 /* state variables for the merge */
3069 ItemPointer indexcursor = NULL;
3070 ItemPointerData decoded;
3071 bool tuplesort_empty = false;
3072
3073 /*
3074 * sanity checks
3075 */
3076 Assert(OidIsValid(indexRelation->rd_rel->relam));
3077
3078 /*
3079 * Need an EState for evaluation of index expressions and partial-index
3080 * predicates. Also a slot to hold the current tuple.
3081 */
3082 estate = CreateExecutorState();
3083 econtext = GetPerTupleExprContext(estate);
3084 slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
3085
3086 /* Arrange for econtext's scan tuple to be the tuple under test */
3087 econtext->ecxt_scantuple = slot;
3088
3089 /* Set up execution state for predicate, if any. */
3090 predicate = (List *)
3091 ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
3092 estate);
3093
3094 /*
3095 * Prepare for scan of the base relation. We need just those tuples
3096 * satisfying the passed-in reference snapshot. We must disable syncscan
3097 * here, because it's critical that we read from block zero forward to
3098 * match the sorted TIDs.
3099 */
3100 scan = heap_beginscan_strat(heapRelation, /* relation */
3101 snapshot, /* snapshot */
3102 0, /* number of keys */
3103 NULL, /* scan key */
3104 true, /* buffer access strategy OK */
3105 false); /* syncscan not OK */
3106
3107 /*
3108 * Scan all tuples matching the snapshot.
3109 */
3110 while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
3111 {
3112 ItemPointer heapcursor = &heapTuple->t_self;
3113 ItemPointerData rootTuple;
3114 OffsetNumber root_offnum;
3115
3116 CHECK_FOR_INTERRUPTS();
3117
3118 state->htups += 1;
3119
3120 /*
3121 * As commented in IndexBuildHeapScan, we should index heap-only
3122 * tuples under the TIDs of their root tuples; so when we advance onto
3123 * a new heap page, build a map of root item offsets on the page.
3124 *
3125 * This complicates merging against the tuplesort output: we will
3126 * visit the live tuples in order by their offsets, but the root
3127 * offsets that we need to compare against the index contents might be
3128 * ordered differently. So we might have to "look back" within the
3129 * tuplesort output, but only within the current page. We handle that
3130 * by keeping a bool array in_index[] showing all the
3131 * already-passed-over tuplesort output TIDs of the current page. We
3132 * clear that array here, when advancing onto a new heap page.
3133 */
3134 if (scan->rs_cblock != root_blkno)
3135 {
3136 Page page = BufferGetPage(scan->rs_cbuf);
3137
3138 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
3139 heap_get_root_tuples(page, root_offsets);
3140 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
3141
3142 memset(in_index, 0, sizeof(in_index));
3143
3144 root_blkno = scan->rs_cblock;
3145 }
3146
3147 /* Convert actual tuple TID to root TID */
3148 rootTuple = *heapcursor;
3149 root_offnum = ItemPointerGetOffsetNumber(heapcursor);
3150
3151 if (HeapTupleIsHeapOnly(heapTuple))
3152 {
3153 root_offnum = root_offsets[root_offnum - 1];
3154 if (!OffsetNumberIsValid(root_offnum))
3155 ereport(ERROR,
3156 (errcode(ERRCODE_DATA_CORRUPTED),
3157 errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
3158 ItemPointerGetBlockNumber(heapcursor),
3159 ItemPointerGetOffsetNumber(heapcursor),
3160 RelationGetRelationName(heapRelation))));
3161 ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
3162 }
3163
3164 /*
3165 * "merge" by skipping through the index tuples until we find or pass
3166 * the current root tuple.
3167 */
3168 while (!tuplesort_empty &&
3169 (!indexcursor ||
3170 ItemPointerCompare(indexcursor, &rootTuple) < 0))
3171 {
3172 Datum ts_val;
3173 bool ts_isnull;
3174
3175 if (indexcursor)
3176 {
3177 /*
3178 * Remember index items seen earlier on the current heap page
3179 */
3180 if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
3181 in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
3182 }
3183
3184 tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
3185 &ts_val, &ts_isnull, NULL);
3186 Assert(tuplesort_empty || !ts_isnull);
3187 if (!tuplesort_empty)
3188 {
3189 itemptr_decode(&decoded, DatumGetInt64(ts_val));
3190 indexcursor = &decoded;
3191
3192 /* If int8 is pass-by-ref, free (encoded) TID Datum memory */
3193 #ifndef USE_FLOAT8_BYVAL
3194 pfree(DatumGetPointer(ts_val));
3195 #endif
3196 }
3197 else
3198 {
3199 /* Be tidy */
3200 indexcursor = NULL;
3201 }
3202 }
3203
3204 /*
3205 * If the tuplesort has overshot *and* we didn't see a match earlier,
3206 * then this tuple is missing from the index, so insert it.
3207 */
3208 if ((tuplesort_empty ||
3209 ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
3210 !in_index[root_offnum - 1])
3211 {
3212 MemoryContextReset(econtext->ecxt_per_tuple_memory);
3213
3214 /* Set up for predicate or expression evaluation */
3215 ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
3216
3217 /*
3218 * In a partial index, discard tuples that don't satisfy the
3219 * predicate.
3220 */
3221 if (predicate != NIL)
3222 {
3223 if (!ExecQual(predicate, econtext, false))
3224 continue;
3225 }
3226
3227 /*
3228 * For the current heap tuple, extract all the attributes we use
3229 * in this index, and note which are null. This also performs
3230 * evaluation of any expressions needed.
3231 */
3232 FormIndexDatum(indexInfo,
3233 slot,
3234 estate,
3235 values,
3236 isnull);
3237
3238 /*
3239 * You'd think we should go ahead and build the index tuple here,
3240 * but some index AMs want to do further processing on the data
3241 * first. So pass the values[] and isnull[] arrays, instead.
3242 */
3243
3244 /*
3245 * If the tuple is already committed dead, you might think we
3246 * could suppress uniqueness checking, but this is no longer true
3247 * in the presence of HOT, because the insert is actually a proxy
3248 * for a uniqueness check on the whole HOT-chain. That is, the
3249 * tuple we have here could be dead because it was already
3250 * HOT-updated, and if so the updating transaction will not have
3251 * thought it should insert index entries. The index AM will
3252 * check the whole HOT-chain and correctly detect a conflict if
3253 * there is one.
3254 */
3255
3256 index_insert(indexRelation,
3257 values,
3258 isnull,
3259 &rootTuple,
3260 heapRelation,
3261 indexInfo->ii_Unique ?
3262 UNIQUE_CHECK_YES : UNIQUE_CHECK_NO);
3263
3264 state->tups_inserted += 1;
3265 }
3266 }
3267
3268 heap_endscan(scan);
3269
3270 ExecDropSingleTupleTableSlot(slot);
3271
3272 FreeExecutorState(estate);
3273
3274 /* These may have been pointing to the now-gone estate */
3275 indexInfo->ii_ExpressionsState = NIL;
3276 indexInfo->ii_PredicateState = NIL;
3277 }
3278
3279
3280 /*
3281 * index_set_state_flags - adjust pg_index state flags
3282 *
3283 * This is used during CREATE/DROP INDEX CONCURRENTLY to adjust the pg_index
3284 * flags that denote the index's state.
3285 *
3286 * Note that simple_heap_update() sends a cache invalidation message for the
3287 * tuple, so other sessions will hear about the update as soon as we commit.
3288 */
3289 void
index_set_state_flags(Oid indexId,IndexStateFlagsAction action)3290 index_set_state_flags(Oid indexId, IndexStateFlagsAction action)
3291 {
3292 Relation pg_index;
3293 HeapTuple indexTuple;
3294 Form_pg_index indexForm;
3295
3296 /* Open pg_index and fetch a writable copy of the index's tuple */
3297 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
3298
3299 indexTuple = SearchSysCacheCopy1(INDEXRELID,
3300 ObjectIdGetDatum(indexId));
3301 if (!HeapTupleIsValid(indexTuple))
3302 elog(ERROR, "cache lookup failed for index %u", indexId);
3303 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3304
3305 /* Perform the requested state change on the copy */
3306 switch (action)
3307 {
3308 case INDEX_CREATE_SET_READY:
3309 /* Set indisready during a CREATE INDEX CONCURRENTLY sequence */
3310 Assert(indexForm->indislive);
3311 Assert(!indexForm->indisready);
3312 Assert(!indexForm->indisvalid);
3313 indexForm->indisready = true;
3314 break;
3315 case INDEX_CREATE_SET_VALID:
3316 /* Set indisvalid during a CREATE INDEX CONCURRENTLY sequence */
3317 Assert(indexForm->indislive);
3318 Assert(indexForm->indisready);
3319 Assert(!indexForm->indisvalid);
3320 indexForm->indisvalid = true;
3321 break;
3322 case INDEX_DROP_CLEAR_VALID:
3323
3324 /*
3325 * Clear indisvalid during a DROP INDEX CONCURRENTLY sequence
3326 *
3327 * If indisready == true we leave it set so the index still gets
3328 * maintained by active transactions. We only need to ensure that
3329 * indisvalid is false. (We don't assert that either is initially
3330 * true, though, since we want to be able to retry a DROP INDEX
3331 * CONCURRENTLY that failed partway through.)
3332 *
3333 * Note: the CLUSTER logic assumes that indisclustered cannot be
3334 * set on any invalid index, so clear that flag too.
3335 */
3336 indexForm->indisvalid = false;
3337 indexForm->indisclustered = false;
3338 break;
3339 case INDEX_DROP_SET_DEAD:
3340
3341 /*
3342 * Clear indisready/indislive during DROP INDEX CONCURRENTLY
3343 *
3344 * We clear both indisready and indislive, because we not only
3345 * want to stop updates, we want to prevent sessions from touching
3346 * the index at all.
3347 */
3348 Assert(!indexForm->indisvalid);
3349 indexForm->indisready = false;
3350 indexForm->indislive = false;
3351 break;
3352 }
3353
3354 /* ... and update it */
3355 simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
3356 CatalogUpdateIndexes(pg_index, indexTuple);
3357
3358 heap_close(pg_index, RowExclusiveLock);
3359 }
3360
3361
3362 /*
3363 * IndexGetRelation: given an index's relation OID, get the OID of the
3364 * relation it is an index on. Uses the system cache.
3365 */
3366 Oid
IndexGetRelation(Oid indexId,bool missing_ok)3367 IndexGetRelation(Oid indexId, bool missing_ok)
3368 {
3369 HeapTuple tuple;
3370 Form_pg_index index;
3371 Oid result;
3372
3373 tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
3374 if (!HeapTupleIsValid(tuple))
3375 {
3376 if (missing_ok)
3377 return InvalidOid;
3378 elog(ERROR, "cache lookup failed for index %u", indexId);
3379 }
3380 index = (Form_pg_index) GETSTRUCT(tuple);
3381 Assert(index->indexrelid == indexId);
3382
3383 result = index->indrelid;
3384 ReleaseSysCache(tuple);
3385 return result;
3386 }
3387
3388 /*
3389 * reindex_index - This routine is used to recreate a single index
3390 */
3391 void
reindex_index(Oid indexId,bool skip_constraint_checks,char persistence,int options)3392 reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
3393 int options)
3394 {
3395 Relation iRel,
3396 heapRelation;
3397 Oid heapId;
3398 IndexInfo *indexInfo;
3399 volatile bool skipped_constraint = false;
3400 PGRUsage ru0;
3401
3402 pg_rusage_init(&ru0);
3403
3404 /*
3405 * Open and lock the parent heap relation. ShareLock is sufficient since
3406 * we only need to be sure no schema or data changes are going on.
3407 */
3408 heapId = IndexGetRelation(indexId, false);
3409 heapRelation = heap_open(heapId, ShareLock);
3410
3411 /*
3412 * Open the target index relation and get an exclusive lock on it, to
3413 * ensure that no one else is touching this particular index.
3414 */
3415 iRel = index_open(indexId, AccessExclusiveLock);
3416
3417 /*
3418 * Don't allow reindex on temp tables of other backends ... their local
3419 * buffer manager is not going to cope.
3420 */
3421 if (RELATION_IS_OTHER_TEMP(iRel))
3422 ereport(ERROR,
3423 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3424 errmsg("cannot reindex temporary tables of other sessions")));
3425
3426 /*
3427 * Also check for active uses of the index in the current transaction; we
3428 * don't want to reindex underneath an open indexscan.
3429 */
3430 CheckTableNotInUse(iRel, "REINDEX INDEX");
3431
3432 /*
3433 * All predicate locks on the index are about to be made invalid. Promote
3434 * them to relation locks on the heap.
3435 */
3436 TransferPredicateLocksToHeapRelation(iRel);
3437
3438 /* Fetch info needed for index_build */
3439 indexInfo = BuildIndexInfo(iRel);
3440
3441 /* If requested, skip checking uniqueness/exclusion constraints */
3442 if (skip_constraint_checks)
3443 {
3444 if (indexInfo->ii_Unique || indexInfo->ii_ExclusionOps != NULL)
3445 skipped_constraint = true;
3446 indexInfo->ii_Unique = false;
3447 indexInfo->ii_ExclusionOps = NULL;
3448 indexInfo->ii_ExclusionProcs = NULL;
3449 indexInfo->ii_ExclusionStrats = NULL;
3450 }
3451
3452 /* Suppress use of the target index while rebuilding it */
3453 SetReindexProcessing(heapId, indexId);
3454
3455 /* Create a new physical relation for the index */
3456 RelationSetNewRelfilenode(iRel, persistence, InvalidTransactionId,
3457 InvalidMultiXactId);
3458
3459 /* Initialize the index and rebuild */
3460 /* Note: we do not need to re-establish pkey setting */
3461 index_build(heapRelation, iRel, indexInfo, false, true);
3462
3463 /* Re-allow use of target index */
3464 ResetReindexProcessing();
3465
3466 /*
3467 * If the index is marked invalid/not-ready/dead (ie, it's from a failed
3468 * CREATE INDEX CONCURRENTLY, or a DROP INDEX CONCURRENTLY failed midway),
3469 * and we didn't skip a uniqueness check, we can now mark it valid. This
3470 * allows REINDEX to be used to clean up in such cases.
3471 *
3472 * We can also reset indcheckxmin, because we have now done a
3473 * non-concurrent index build, *except* in the case where index_build
3474 * found some still-broken HOT chains. If it did, and we don't have to
3475 * change any of the other flags, we just leave indcheckxmin alone (note
3476 * that index_build won't have changed it, because this is a reindex).
3477 * This is okay and desirable because not updating the tuple leaves the
3478 * index's usability horizon (recorded as the tuple's xmin value) the same
3479 * as it was.
3480 *
3481 * But, if the index was invalid/not-ready/dead and there were broken HOT
3482 * chains, we had better force indcheckxmin true, because the normal
3483 * argument that the HOT chains couldn't conflict with the index is
3484 * suspect for an invalid index. (A conflict is definitely possible if
3485 * the index was dead. It probably shouldn't happen otherwise, but let's
3486 * be conservative.) In this case advancing the usability horizon is
3487 * appropriate.
3488 *
3489 * Another reason for avoiding unnecessary updates here is that while
3490 * reindexing pg_index itself, we must not try to update tuples in it.
3491 * pg_index's indexes should always have these flags in their clean state,
3492 * so that won't happen.
3493 *
3494 * If early pruning/vacuuming is enabled for the heap relation, the
3495 * usability horizon must be advanced to the current transaction on every
3496 * build or rebuild. pg_index is OK in this regard because catalog tables
3497 * are not subject to early cleanup.
3498 */
3499 if (!skipped_constraint)
3500 {
3501 Relation pg_index;
3502 HeapTuple indexTuple;
3503 Form_pg_index indexForm;
3504 bool index_bad;
3505 bool early_pruning_enabled = EarlyPruningEnabled(heapRelation);
3506
3507 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
3508
3509 indexTuple = SearchSysCacheCopy1(INDEXRELID,
3510 ObjectIdGetDatum(indexId));
3511 if (!HeapTupleIsValid(indexTuple))
3512 elog(ERROR, "cache lookup failed for index %u", indexId);
3513 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3514
3515 index_bad = (!indexForm->indisvalid ||
3516 !indexForm->indisready ||
3517 !indexForm->indislive);
3518 if (index_bad ||
3519 (indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain) ||
3520 early_pruning_enabled)
3521 {
3522 if (!indexInfo->ii_BrokenHotChain && !early_pruning_enabled)
3523 indexForm->indcheckxmin = false;
3524 else if (index_bad || early_pruning_enabled)
3525 indexForm->indcheckxmin = true;
3526 indexForm->indisvalid = true;
3527 indexForm->indisready = true;
3528 indexForm->indislive = true;
3529 simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
3530 CatalogUpdateIndexes(pg_index, indexTuple);
3531
3532 /*
3533 * Invalidate the relcache for the table, so that after we commit
3534 * all sessions will refresh the table's index list. This ensures
3535 * that if anyone misses seeing the pg_index row during this
3536 * update, they'll refresh their list before attempting any update
3537 * on the table.
3538 */
3539 CacheInvalidateRelcache(heapRelation);
3540 }
3541
3542 heap_close(pg_index, RowExclusiveLock);
3543 }
3544
3545 /* Log what we did */
3546 if (options & REINDEXOPT_VERBOSE)
3547 ereport(INFO,
3548 (errmsg("index \"%s\" was reindexed",
3549 get_rel_name(indexId)),
3550 errdetail_internal("%s",
3551 pg_rusage_show(&ru0))));
3552
3553 /* Close rels, but keep locks */
3554 index_close(iRel, NoLock);
3555 heap_close(heapRelation, NoLock);
3556 }
3557
3558 /*
3559 * reindex_relation - This routine is used to recreate all indexes
3560 * of a relation (and optionally its toast relation too, if any).
3561 *
3562 * "flags" is a bitmask that can include any combination of these bits:
3563 *
3564 * REINDEX_REL_PROCESS_TOAST: if true, process the toast table too (if any).
3565 *
3566 * REINDEX_REL_SUPPRESS_INDEX_USE: if true, the relation was just completely
3567 * rebuilt by an operation such as VACUUM FULL or CLUSTER, and therefore its
3568 * indexes are inconsistent with it. This makes things tricky if the relation
3569 * is a system catalog that we might consult during the reindexing. To deal
3570 * with that case, we mark all of the indexes as pending rebuild so that they
3571 * won't be trusted until rebuilt. The caller is required to call us *without*
3572 * having made the rebuilt table visible by doing CommandCounterIncrement;
3573 * we'll do CCI after having collected the index list. (This way we can still
3574 * use catalog indexes while collecting the list.)
3575 *
3576 * REINDEX_REL_CHECK_CONSTRAINTS: if true, recheck unique and exclusion
3577 * constraint conditions, else don't. To avoid deadlocks, VACUUM FULL or
3578 * CLUSTER on a system catalog must omit this flag. REINDEX should be used to
3579 * rebuild an index if constraint inconsistency is suspected. For optimal
3580 * performance, other callers should include the flag only after transforming
3581 * the data in a manner that risks a change in constraint validity.
3582 *
3583 * REINDEX_REL_FORCE_INDEXES_UNLOGGED: if true, set the persistence of the
3584 * rebuilt indexes to unlogged.
3585 *
3586 * REINDEX_REL_FORCE_INDEXES_PERMANENT: if true, set the persistence of the
3587 * rebuilt indexes to permanent.
3588 *
3589 * Returns true if any indexes were rebuilt (including toast table's index
3590 * when relevant). Note that a CommandCounterIncrement will occur after each
3591 * index rebuild.
3592 */
3593 bool
reindex_relation(Oid relid,int flags,int options)3594 reindex_relation(Oid relid, int flags, int options)
3595 {
3596 Relation rel;
3597 Oid toast_relid;
3598 List *indexIds;
3599 char persistence;
3600 bool result;
3601 ListCell *indexId;
3602
3603 /*
3604 * Open and lock the relation. ShareLock is sufficient since we only need
3605 * to prevent schema and data changes in it. The lock level used here
3606 * should match ReindexTable().
3607 */
3608 rel = heap_open(relid, ShareLock);
3609
3610 toast_relid = rel->rd_rel->reltoastrelid;
3611
3612 /*
3613 * Get the list of index OIDs for this relation. (We trust to the
3614 * relcache to get this with a sequential scan if ignoring system
3615 * indexes.)
3616 */
3617 indexIds = RelationGetIndexList(rel);
3618
3619 if (flags & REINDEX_REL_SUPPRESS_INDEX_USE)
3620 {
3621 /* Suppress use of all the indexes until they are rebuilt */
3622 SetReindexPending(indexIds);
3623
3624 /*
3625 * Make the new heap contents visible --- now things might be
3626 * inconsistent!
3627 */
3628 CommandCounterIncrement();
3629 }
3630
3631 /*
3632 * Compute persistence of indexes: same as that of owning rel, unless
3633 * caller specified otherwise.
3634 */
3635 if (flags & REINDEX_REL_FORCE_INDEXES_UNLOGGED)
3636 persistence = RELPERSISTENCE_UNLOGGED;
3637 else if (flags & REINDEX_REL_FORCE_INDEXES_PERMANENT)
3638 persistence = RELPERSISTENCE_PERMANENT;
3639 else
3640 persistence = rel->rd_rel->relpersistence;
3641
3642 /* Reindex all the indexes. */
3643 foreach(indexId, indexIds)
3644 {
3645 Oid indexOid = lfirst_oid(indexId);
3646
3647 reindex_index(indexOid, !(flags & REINDEX_REL_CHECK_CONSTRAINTS),
3648 persistence, options);
3649
3650 CommandCounterIncrement();
3651
3652 /* Index should no longer be in the pending list */
3653 Assert(!ReindexIsProcessingIndex(indexOid));
3654 }
3655
3656 /*
3657 * Close rel, but continue to hold the lock.
3658 */
3659 heap_close(rel, NoLock);
3660
3661 result = (indexIds != NIL);
3662
3663 /*
3664 * If the relation has a secondary toast rel, reindex that too while we
3665 * still hold the lock on the master table.
3666 */
3667 if ((flags & REINDEX_REL_PROCESS_TOAST) && OidIsValid(toast_relid))
3668 result |= reindex_relation(toast_relid, flags, options);
3669
3670 return result;
3671 }
3672
3673
3674 /* ----------------------------------------------------------------
3675 * System index reindexing support
3676 *
3677 * When we are busy reindexing a system index, this code provides support
3678 * for preventing catalog lookups from using that index. We also make use
3679 * of this to catch attempted uses of user indexes during reindexing of
3680 * those indexes.
3681 * ----------------------------------------------------------------
3682 */
3683
3684 static Oid currentlyReindexedHeap = InvalidOid;
3685 static Oid currentlyReindexedIndex = InvalidOid;
3686 static List *pendingReindexedIndexes = NIL;
3687 static int reindexingNestLevel = 0;
3688
3689 /*
3690 * ReindexIsProcessingHeap
3691 * True if heap specified by OID is currently being reindexed.
3692 */
3693 bool
ReindexIsProcessingHeap(Oid heapOid)3694 ReindexIsProcessingHeap(Oid heapOid)
3695 {
3696 return heapOid == currentlyReindexedHeap;
3697 }
3698
3699 /*
3700 * ReindexIsCurrentlyProcessingIndex
3701 * True if index specified by OID is currently being reindexed.
3702 */
3703 static bool
ReindexIsCurrentlyProcessingIndex(Oid indexOid)3704 ReindexIsCurrentlyProcessingIndex(Oid indexOid)
3705 {
3706 return indexOid == currentlyReindexedIndex;
3707 }
3708
3709 /*
3710 * ReindexIsProcessingIndex
3711 * True if index specified by OID is currently being reindexed,
3712 * or should be treated as invalid because it is awaiting reindex.
3713 */
3714 bool
ReindexIsProcessingIndex(Oid indexOid)3715 ReindexIsProcessingIndex(Oid indexOid)
3716 {
3717 return indexOid == currentlyReindexedIndex ||
3718 list_member_oid(pendingReindexedIndexes, indexOid);
3719 }
3720
3721 /*
3722 * SetReindexProcessing
3723 * Set flag that specified heap/index are being reindexed.
3724 */
3725 static void
SetReindexProcessing(Oid heapOid,Oid indexOid)3726 SetReindexProcessing(Oid heapOid, Oid indexOid)
3727 {
3728 Assert(OidIsValid(heapOid) && OidIsValid(indexOid));
3729 /* Reindexing is not re-entrant. */
3730 if (OidIsValid(currentlyReindexedHeap))
3731 elog(ERROR, "cannot reindex while reindexing");
3732 currentlyReindexedHeap = heapOid;
3733 currentlyReindexedIndex = indexOid;
3734 /* Index is no longer "pending" reindex. */
3735 RemoveReindexPending(indexOid);
3736 /* This may have been set already, but in case it isn't, do so now. */
3737 reindexingNestLevel = GetCurrentTransactionNestLevel();
3738 }
3739
3740 /*
3741 * ResetReindexProcessing
3742 * Unset reindexing status.
3743 */
3744 static void
ResetReindexProcessing(void)3745 ResetReindexProcessing(void)
3746 {
3747 currentlyReindexedHeap = InvalidOid;
3748 currentlyReindexedIndex = InvalidOid;
3749 /* reindexingNestLevel remains set till end of (sub)transaction */
3750 }
3751
3752 /*
3753 * SetReindexPending
3754 * Mark the given indexes as pending reindex.
3755 *
3756 * NB: we assume that the current memory context stays valid throughout.
3757 */
3758 static void
SetReindexPending(List * indexes)3759 SetReindexPending(List *indexes)
3760 {
3761 /* Reindexing is not re-entrant. */
3762 if (pendingReindexedIndexes)
3763 elog(ERROR, "cannot reindex while reindexing");
3764 pendingReindexedIndexes = list_copy(indexes);
3765 reindexingNestLevel = GetCurrentTransactionNestLevel();
3766 }
3767
3768 /*
3769 * RemoveReindexPending
3770 * Remove the given index from the pending list.
3771 */
3772 static void
RemoveReindexPending(Oid indexOid)3773 RemoveReindexPending(Oid indexOid)
3774 {
3775 pendingReindexedIndexes = list_delete_oid(pendingReindexedIndexes,
3776 indexOid);
3777 }
3778
3779 /*
3780 * ResetReindexState
3781 * Clear all reindexing state during (sub)transaction abort.
3782 */
3783 void
ResetReindexState(int nestLevel)3784 ResetReindexState(int nestLevel)
3785 {
3786 /*
3787 * Because reindexing is not re-entrant, we don't need to cope with nested
3788 * reindexing states. We just need to avoid messing up the outer-level
3789 * state in case a subtransaction fails within a REINDEX. So checking the
3790 * current nest level against that of the reindex operation is sufficient.
3791 */
3792 if (reindexingNestLevel >= nestLevel)
3793 {
3794 currentlyReindexedHeap = InvalidOid;
3795 currentlyReindexedIndex = InvalidOid;
3796
3797 /*
3798 * We needn't try to release the contents of pendingReindexedIndexes;
3799 * that list should be in a transaction-lifespan context, so it will
3800 * go away automatically.
3801 */
3802 pendingReindexedIndexes = NIL;
3803
3804 reindexingNestLevel = 0;
3805 }
3806 }
3807