1 /*-------------------------------------------------------------------------
2 *
3 * index.c
4 * code to create and destroy POSTGRES index relations
5 *
6 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/catalog/index.c
12 *
13 *
14 * INTERFACE ROUTINES
15 * index_create() - Create a cataloged index relation
16 * index_drop() - Removes index relation from catalogs
17 * BuildIndexInfo() - Prepare to insert index tuples
18 * FormIndexDatum() - Construct datum vector for one index tuple
19 *
20 *-------------------------------------------------------------------------
21 */
22 #include "postgres.h"
23
24 #include <unistd.h>
25
26 #include "access/amapi.h"
27 #include "access/multixact.h"
28 #include "access/relscan.h"
29 #include "access/sysattr.h"
30 #include "access/transam.h"
31 #include "access/visibilitymap.h"
32 #include "access/xact.h"
33 #include "bootstrap/bootstrap.h"
34 #include "catalog/binary_upgrade.h"
35 #include "catalog/catalog.h"
36 #include "catalog/dependency.h"
37 #include "catalog/heap.h"
38 #include "catalog/index.h"
39 #include "catalog/objectaccess.h"
40 #include "catalog/pg_am.h"
41 #include "catalog/pg_collation.h"
42 #include "catalog/pg_constraint.h"
43 #include "catalog/pg_constraint_fn.h"
44 #include "catalog/pg_operator.h"
45 #include "catalog/pg_opclass.h"
46 #include "catalog/pg_tablespace.h"
47 #include "catalog/pg_trigger.h"
48 #include "catalog/pg_type.h"
49 #include "catalog/storage.h"
50 #include "commands/tablecmds.h"
51 #include "commands/event_trigger.h"
52 #include "commands/trigger.h"
53 #include "executor/executor.h"
54 #include "miscadmin.h"
55 #include "nodes/makefuncs.h"
56 #include "nodes/nodeFuncs.h"
57 #include "optimizer/clauses.h"
58 #include "parser/parser.h"
59 #include "storage/bufmgr.h"
60 #include "storage/lmgr.h"
61 #include "storage/predicate.h"
62 #include "storage/procarray.h"
63 #include "storage/smgr.h"
64 #include "utils/builtins.h"
65 #include "utils/fmgroids.h"
66 #include "utils/guc.h"
67 #include "utils/inval.h"
68 #include "utils/lsyscache.h"
69 #include "utils/memutils.h"
70 #include "utils/pg_rusage.h"
71 #include "utils/syscache.h"
72 #include "utils/tuplesort.h"
73 #include "utils/snapmgr.h"
74 #include "utils/tqual.h"
75
76
77 /* Potentially set by pg_upgrade_support functions */
78 Oid binary_upgrade_next_index_pg_class_oid = InvalidOid;
79
80 /* state info for validate_index bulkdelete callback */
81 typedef struct
82 {
83 Tuplesortstate *tuplesort; /* for sorting the index TIDs */
84 /* statistics (for debug purposes only): */
85 double htups,
86 itups,
87 tups_inserted;
88 } v_i_state;
89
90 /* non-export function prototypes */
91 static bool relationHasPrimaryKey(Relation rel);
92 static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
93 IndexInfo *indexInfo,
94 List *indexColNames,
95 Oid accessMethodObjectId,
96 Oid *collationObjectId,
97 Oid *classObjectId);
98 static void InitializeAttributeOids(Relation indexRelation,
99 int numatts, Oid indexoid);
100 static void AppendAttributeTuples(Relation indexRelation, int numatts);
101 static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
102 IndexInfo *indexInfo,
103 Oid *collationOids,
104 Oid *classOids,
105 int16 *coloptions,
106 bool primary,
107 bool isexclusion,
108 bool immediate,
109 bool isvalid);
110 static void index_update_stats(Relation rel,
111 bool hasindex, bool isprimary,
112 double reltuples);
113 static void IndexCheckExclusion(Relation heapRelation,
114 Relation indexRelation,
115 IndexInfo *indexInfo);
116 static inline int64 itemptr_encode(ItemPointer itemptr);
117 static inline void itemptr_decode(ItemPointer itemptr, int64 encoded);
118 static bool validate_index_callback(ItemPointer itemptr, void *opaque);
119 static void validate_index_heapscan(Relation heapRelation,
120 Relation indexRelation,
121 IndexInfo *indexInfo,
122 Snapshot snapshot,
123 v_i_state *state);
124 static bool ReindexIsCurrentlyProcessingIndex(Oid indexOid);
125 static void SetReindexProcessing(Oid heapOid, Oid indexOid);
126 static void ResetReindexProcessing(void);
127 static void SetReindexPending(List *indexes);
128 static void RemoveReindexPending(Oid indexOid);
129
130
131 /*
132 * relationHasPrimaryKey
133 * See whether an existing relation has a primary key.
134 *
135 * Caller must have suitable lock on the relation.
136 *
137 * Note: we intentionally do not check IndexIsValid here; that's because this
138 * is used to enforce the rule that there can be only one indisprimary index,
139 * and we want that to be true even if said index is invalid.
140 */
141 static bool
relationHasPrimaryKey(Relation rel)142 relationHasPrimaryKey(Relation rel)
143 {
144 bool result = false;
145 List *indexoidlist;
146 ListCell *indexoidscan;
147
148 /*
149 * Get the list of index OIDs for the table from the relcache, and look up
150 * each one in the pg_index syscache until we find one marked primary key
151 * (hopefully there isn't more than one such).
152 */
153 indexoidlist = RelationGetIndexList(rel);
154
155 foreach(indexoidscan, indexoidlist)
156 {
157 Oid indexoid = lfirst_oid(indexoidscan);
158 HeapTuple indexTuple;
159
160 indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid));
161 if (!HeapTupleIsValid(indexTuple)) /* should not happen */
162 elog(ERROR, "cache lookup failed for index %u", indexoid);
163 result = ((Form_pg_index) GETSTRUCT(indexTuple))->indisprimary;
164 ReleaseSysCache(indexTuple);
165 if (result)
166 break;
167 }
168
169 list_free(indexoidlist);
170
171 return result;
172 }
173
174 /*
175 * index_check_primary_key
176 * Apply special checks needed before creating a PRIMARY KEY index
177 *
178 * This processing used to be in DefineIndex(), but has been split out
179 * so that it can be applied during ALTER TABLE ADD PRIMARY KEY USING INDEX.
180 *
181 * We check for a pre-existing primary key, and that all columns of the index
182 * are simple column references (not expressions), and that all those
183 * columns are marked NOT NULL. If they aren't (which can only happen during
184 * ALTER TABLE ADD CONSTRAINT, since the parser forces such columns to be
185 * created NOT NULL during CREATE TABLE), do an ALTER SET NOT NULL to mark
186 * them so --- or fail if they are not in fact nonnull.
187 *
188 * As of PG v10, the SET NOT NULL is applied to child tables as well, so
189 * that the behavior is like a manual SET NOT NULL.
190 *
191 * Caller had better have at least ShareLock on the table, else the not-null
192 * checking isn't trustworthy.
193 */
194 void
index_check_primary_key(Relation heapRel,IndexInfo * indexInfo,bool is_alter_table,IndexStmt * stmt)195 index_check_primary_key(Relation heapRel,
196 IndexInfo *indexInfo,
197 bool is_alter_table,
198 IndexStmt *stmt)
199 {
200 List *cmds;
201 int i;
202
203 /*
204 * If ALTER TABLE, check that there isn't already a PRIMARY KEY. In CREATE
205 * TABLE, we have faith that the parser rejected multiple pkey clauses;
206 * and CREATE INDEX doesn't have a way to say PRIMARY KEY, so it's no
207 * problem either.
208 */
209 if (is_alter_table &&
210 relationHasPrimaryKey(heapRel))
211 {
212 ereport(ERROR,
213 (errcode(ERRCODE_INVALID_TABLE_DEFINITION),
214 errmsg("multiple primary keys for table \"%s\" are not allowed",
215 RelationGetRelationName(heapRel))));
216 }
217
218 /*
219 * Check that all of the attributes in a primary key are marked as not
220 * null, otherwise attempt to ALTER TABLE .. SET NOT NULL
221 */
222 cmds = NIL;
223 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
224 {
225 AttrNumber attnum = indexInfo->ii_KeyAttrNumbers[i];
226 HeapTuple atttuple;
227 Form_pg_attribute attform;
228
229 if (attnum == 0)
230 ereport(ERROR,
231 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
232 errmsg("primary keys cannot be expressions")));
233
234 /* System attributes are never null, so no need to check */
235 if (attnum < 0)
236 continue;
237
238 atttuple = SearchSysCache2(ATTNUM,
239 ObjectIdGetDatum(RelationGetRelid(heapRel)),
240 Int16GetDatum(attnum));
241 if (!HeapTupleIsValid(atttuple))
242 elog(ERROR, "cache lookup failed for attribute %d of relation %u",
243 attnum, RelationGetRelid(heapRel));
244 attform = (Form_pg_attribute) GETSTRUCT(atttuple);
245
246 if (!attform->attnotnull)
247 {
248 /* Add a subcommand to make this one NOT NULL */
249 AlterTableCmd *cmd = makeNode(AlterTableCmd);
250
251 cmd->subtype = AT_SetNotNull;
252 cmd->name = pstrdup(NameStr(attform->attname));
253 cmds = lappend(cmds, cmd);
254 }
255
256 ReleaseSysCache(atttuple);
257 }
258
259 /*
260 * XXX: possible future improvement: when being called from ALTER TABLE,
261 * it would be more efficient to merge this with the outer ALTER TABLE, so
262 * as to avoid two scans. But that seems to complicate DefineIndex's API
263 * unduly.
264 */
265 if (cmds)
266 {
267 EventTriggerAlterTableStart((Node *) stmt);
268 AlterTableInternal(RelationGetRelid(heapRel), cmds, true);
269 EventTriggerAlterTableEnd();
270 }
271 }
272
273 /*
274 * ConstructTupleDescriptor
275 *
276 * Build an index tuple descriptor for a new index
277 */
278 static TupleDesc
ConstructTupleDescriptor(Relation heapRelation,IndexInfo * indexInfo,List * indexColNames,Oid accessMethodObjectId,Oid * collationObjectId,Oid * classObjectId)279 ConstructTupleDescriptor(Relation heapRelation,
280 IndexInfo *indexInfo,
281 List *indexColNames,
282 Oid accessMethodObjectId,
283 Oid *collationObjectId,
284 Oid *classObjectId)
285 {
286 int numatts = indexInfo->ii_NumIndexAttrs;
287 ListCell *colnames_item = list_head(indexColNames);
288 ListCell *indexpr_item = list_head(indexInfo->ii_Expressions);
289 IndexAmRoutine *amroutine;
290 TupleDesc heapTupDesc;
291 TupleDesc indexTupDesc;
292 int natts; /* #atts in heap rel --- for error checks */
293 int i;
294
295 /* We need access to the index AM's API struct */
296 amroutine = GetIndexAmRoutineByAmId(accessMethodObjectId, false);
297
298 /* ... and to the table's tuple descriptor */
299 heapTupDesc = RelationGetDescr(heapRelation);
300 natts = RelationGetForm(heapRelation)->relnatts;
301
302 /*
303 * allocate the new tuple descriptor
304 */
305 indexTupDesc = CreateTemplateTupleDesc(numatts, false);
306
307 /*
308 * For simple index columns, we copy the pg_attribute row from the parent
309 * relation and modify it as necessary. For expressions we have to cons
310 * up a pg_attribute row the hard way.
311 */
312 for (i = 0; i < numatts; i++)
313 {
314 AttrNumber atnum = indexInfo->ii_KeyAttrNumbers[i];
315 Form_pg_attribute to = indexTupDesc->attrs[i];
316 HeapTuple tuple;
317 Form_pg_type typeTup;
318 Form_pg_opclass opclassTup;
319 Oid keyType;
320
321 if (atnum != 0)
322 {
323 /* Simple index column */
324 Form_pg_attribute from;
325
326 if (atnum < 0)
327 {
328 /*
329 * here we are indexing on a system attribute (-1...-n)
330 */
331 from = SystemAttributeDefinition(atnum,
332 heapRelation->rd_rel->relhasoids);
333 }
334 else
335 {
336 /*
337 * here we are indexing on a normal attribute (1...n)
338 */
339 if (atnum > natts) /* safety check */
340 elog(ERROR, "invalid column number %d", atnum);
341 from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)];
342 }
343
344 /*
345 * now that we've determined the "from", let's copy the tuple desc
346 * data...
347 */
348 memcpy(to, from, ATTRIBUTE_FIXED_PART_SIZE);
349
350 /*
351 * Set the attribute name as specified by caller.
352 */
353 if (colnames_item == NULL) /* shouldn't happen */
354 elog(ERROR, "too few entries in colnames list");
355 namestrcpy(&to->attname, (const char *) lfirst(colnames_item));
356 colnames_item = lnext(colnames_item);
357
358 /*
359 * Fix the stuff that should not be the same as the underlying
360 * attr
361 */
362 to->attnum = i + 1;
363
364 to->attstattarget = -1;
365 to->attcacheoff = -1;
366 to->attnotnull = false;
367 to->atthasdef = false;
368 to->attidentity = '\0';
369 to->attislocal = true;
370 to->attinhcount = 0;
371 to->attcollation = collationObjectId[i];
372 }
373 else
374 {
375 /* Expressional index */
376 Node *indexkey;
377
378 MemSet(to, 0, ATTRIBUTE_FIXED_PART_SIZE);
379
380 /*
381 * Set the attribute name as specified by caller.
382 */
383 if (colnames_item == NULL) /* shouldn't happen */
384 elog(ERROR, "too few entries in colnames list");
385 namestrcpy(&to->attname, (const char *) lfirst(colnames_item));
386 colnames_item = lnext(colnames_item);
387
388 if (indexpr_item == NULL) /* shouldn't happen */
389 elog(ERROR, "too few entries in indexprs list");
390 indexkey = (Node *) lfirst(indexpr_item);
391 indexpr_item = lnext(indexpr_item);
392
393 /*
394 * Lookup the expression type in pg_type for the type length etc.
395 */
396 keyType = exprType(indexkey);
397 tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
398 if (!HeapTupleIsValid(tuple))
399 elog(ERROR, "cache lookup failed for type %u", keyType);
400 typeTup = (Form_pg_type) GETSTRUCT(tuple);
401
402 /*
403 * Assign some of the attributes values. Leave the rest as 0.
404 */
405 to->attnum = i + 1;
406 to->atttypid = keyType;
407 to->attlen = typeTup->typlen;
408 to->attbyval = typeTup->typbyval;
409 to->attstorage = typeTup->typstorage;
410 to->attalign = typeTup->typalign;
411 to->attstattarget = -1;
412 to->attcacheoff = -1;
413 to->atttypmod = exprTypmod(indexkey);
414 to->attislocal = true;
415 to->attcollation = collationObjectId[i];
416
417 ReleaseSysCache(tuple);
418
419 /*
420 * Make sure the expression yields a type that's safe to store in
421 * an index. We need this defense because we have index opclasses
422 * for pseudo-types such as "record", and the actually stored type
423 * had better be safe; eg, a named composite type is okay, an
424 * anonymous record type is not. The test is the same as for
425 * whether a table column is of a safe type (which is why we
426 * needn't check for the non-expression case).
427 */
428 CheckAttributeType(NameStr(to->attname),
429 to->atttypid, to->attcollation,
430 NIL, false);
431 }
432
433 /*
434 * We do not yet have the correct relation OID for the index, so just
435 * set it invalid for now. InitializeAttributeOids() will fix it
436 * later.
437 */
438 to->attrelid = InvalidOid;
439
440 /*
441 * Check the opclass and index AM to see if either provides a keytype
442 * (overriding the attribute type). Opclass takes precedence.
443 */
444 tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(classObjectId[i]));
445 if (!HeapTupleIsValid(tuple))
446 elog(ERROR, "cache lookup failed for opclass %u",
447 classObjectId[i]);
448 opclassTup = (Form_pg_opclass) GETSTRUCT(tuple);
449 if (OidIsValid(opclassTup->opckeytype))
450 keyType = opclassTup->opckeytype;
451 else
452 keyType = amroutine->amkeytype;
453
454 /*
455 * If keytype is specified as ANYELEMENT, and opcintype is ANYARRAY,
456 * then the attribute type must be an array (else it'd not have
457 * matched this opclass); use its element type.
458 */
459 if (keyType == ANYELEMENTOID && opclassTup->opcintype == ANYARRAYOID)
460 {
461 keyType = get_base_element_type(to->atttypid);
462 if (!OidIsValid(keyType))
463 elog(ERROR, "could not get element type of array type %u",
464 to->atttypid);
465 }
466
467 ReleaseSysCache(tuple);
468
469 /*
470 * If a key type different from the heap value is specified, update
471 * the type-related fields in the index tupdesc.
472 */
473 if (OidIsValid(keyType) && keyType != to->atttypid)
474 {
475 tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
476 if (!HeapTupleIsValid(tuple))
477 elog(ERROR, "cache lookup failed for type %u", keyType);
478 typeTup = (Form_pg_type) GETSTRUCT(tuple);
479
480 to->atttypid = keyType;
481 to->atttypmod = -1;
482 to->attlen = typeTup->typlen;
483 to->attbyval = typeTup->typbyval;
484 to->attalign = typeTup->typalign;
485 to->attstorage = typeTup->typstorage;
486
487 ReleaseSysCache(tuple);
488 }
489 }
490
491 pfree(amroutine);
492
493 return indexTupDesc;
494 }
495
496 /* ----------------------------------------------------------------
497 * InitializeAttributeOids
498 * ----------------------------------------------------------------
499 */
500 static void
InitializeAttributeOids(Relation indexRelation,int numatts,Oid indexoid)501 InitializeAttributeOids(Relation indexRelation,
502 int numatts,
503 Oid indexoid)
504 {
505 TupleDesc tupleDescriptor;
506 int i;
507
508 tupleDescriptor = RelationGetDescr(indexRelation);
509
510 for (i = 0; i < numatts; i += 1)
511 tupleDescriptor->attrs[i]->attrelid = indexoid;
512 }
513
514 /* ----------------------------------------------------------------
515 * AppendAttributeTuples
516 * ----------------------------------------------------------------
517 */
518 static void
AppendAttributeTuples(Relation indexRelation,int numatts)519 AppendAttributeTuples(Relation indexRelation, int numatts)
520 {
521 Relation pg_attribute;
522 CatalogIndexState indstate;
523 TupleDesc indexTupDesc;
524 int i;
525
526 /*
527 * open the attribute relation and its indexes
528 */
529 pg_attribute = heap_open(AttributeRelationId, RowExclusiveLock);
530
531 indstate = CatalogOpenIndexes(pg_attribute);
532
533 /*
534 * insert data from new index's tupdesc into pg_attribute
535 */
536 indexTupDesc = RelationGetDescr(indexRelation);
537
538 for (i = 0; i < numatts; i++)
539 {
540 /*
541 * There used to be very grotty code here to set these fields, but I
542 * think it's unnecessary. They should be set already.
543 */
544 Assert(indexTupDesc->attrs[i]->attnum == i + 1);
545 Assert(indexTupDesc->attrs[i]->attcacheoff == -1);
546
547 InsertPgAttributeTuple(pg_attribute, indexTupDesc->attrs[i], indstate);
548 }
549
550 CatalogCloseIndexes(indstate);
551
552 heap_close(pg_attribute, RowExclusiveLock);
553 }
554
555 /* ----------------------------------------------------------------
556 * UpdateIndexRelation
557 *
558 * Construct and insert a new entry in the pg_index catalog
559 * ----------------------------------------------------------------
560 */
561 static void
UpdateIndexRelation(Oid indexoid,Oid heapoid,IndexInfo * indexInfo,Oid * collationOids,Oid * classOids,int16 * coloptions,bool primary,bool isexclusion,bool immediate,bool isvalid)562 UpdateIndexRelation(Oid indexoid,
563 Oid heapoid,
564 IndexInfo *indexInfo,
565 Oid *collationOids,
566 Oid *classOids,
567 int16 *coloptions,
568 bool primary,
569 bool isexclusion,
570 bool immediate,
571 bool isvalid)
572 {
573 int2vector *indkey;
574 oidvector *indcollation;
575 oidvector *indclass;
576 int2vector *indoption;
577 Datum exprsDatum;
578 Datum predDatum;
579 Datum values[Natts_pg_index];
580 bool nulls[Natts_pg_index];
581 Relation pg_index;
582 HeapTuple tuple;
583 int i;
584
585 /*
586 * Copy the index key, opclass, and indoption info into arrays (should we
587 * make the caller pass them like this to start with?)
588 */
589 indkey = buildint2vector(NULL, indexInfo->ii_NumIndexAttrs);
590 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
591 indkey->values[i] = indexInfo->ii_KeyAttrNumbers[i];
592 indcollation = buildoidvector(collationOids, indexInfo->ii_NumIndexAttrs);
593 indclass = buildoidvector(classOids, indexInfo->ii_NumIndexAttrs);
594 indoption = buildint2vector(coloptions, indexInfo->ii_NumIndexAttrs);
595
596 /*
597 * Convert the index expressions (if any) to a text datum
598 */
599 if (indexInfo->ii_Expressions != NIL)
600 {
601 char *exprsString;
602
603 exprsString = nodeToString(indexInfo->ii_Expressions);
604 exprsDatum = CStringGetTextDatum(exprsString);
605 pfree(exprsString);
606 }
607 else
608 exprsDatum = (Datum) 0;
609
610 /*
611 * Convert the index predicate (if any) to a text datum. Note we convert
612 * implicit-AND format to normal explicit-AND for storage.
613 */
614 if (indexInfo->ii_Predicate != NIL)
615 {
616 char *predString;
617
618 predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate));
619 predDatum = CStringGetTextDatum(predString);
620 pfree(predString);
621 }
622 else
623 predDatum = (Datum) 0;
624
625 /*
626 * open the system catalog index relation
627 */
628 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
629
630 /*
631 * Build a pg_index tuple
632 */
633 MemSet(nulls, false, sizeof(nulls));
634
635 values[Anum_pg_index_indexrelid - 1] = ObjectIdGetDatum(indexoid);
636 values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid);
637 values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs);
638 values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique);
639 values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
640 values[Anum_pg_index_indisexclusion - 1] = BoolGetDatum(isexclusion);
641 values[Anum_pg_index_indimmediate - 1] = BoolGetDatum(immediate);
642 values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
643 values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
644 values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false);
645 /* we set isvalid and isready the same way */
646 values[Anum_pg_index_indisready - 1] = BoolGetDatum(isvalid);
647 values[Anum_pg_index_indislive - 1] = BoolGetDatum(true);
648 values[Anum_pg_index_indisreplident - 1] = BoolGetDatum(false);
649 values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey);
650 values[Anum_pg_index_indcollation - 1] = PointerGetDatum(indcollation);
651 values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass);
652 values[Anum_pg_index_indoption - 1] = PointerGetDatum(indoption);
653 values[Anum_pg_index_indexprs - 1] = exprsDatum;
654 if (exprsDatum == (Datum) 0)
655 nulls[Anum_pg_index_indexprs - 1] = true;
656 values[Anum_pg_index_indpred - 1] = predDatum;
657 if (predDatum == (Datum) 0)
658 nulls[Anum_pg_index_indpred - 1] = true;
659
660 tuple = heap_form_tuple(RelationGetDescr(pg_index), values, nulls);
661
662 /*
663 * insert the tuple into the pg_index catalog
664 */
665 CatalogTupleInsert(pg_index, tuple);
666
667 /*
668 * close the relation and free the tuple
669 */
670 heap_close(pg_index, RowExclusiveLock);
671 heap_freetuple(tuple);
672 }
673
674
675 /*
676 * index_create
677 *
678 * heapRelation: table to build index on (suitably locked by caller)
679 * indexRelationName: what it say
680 * indexRelationId: normally, pass InvalidOid to let this routine
681 * generate an OID for the index. During bootstrap this may be
682 * nonzero to specify a preselected OID.
683 * relFileNode: normally, pass InvalidOid to get new storage. May be
684 * nonzero to attach an existing valid build.
685 * indexInfo: same info executor uses to insert into the index
686 * indexColNames: column names to use for index (List of char *)
687 * accessMethodObjectId: OID of index AM to use
688 * tableSpaceId: OID of tablespace to use
689 * collationObjectId: array of collation OIDs, one per index column
690 * classObjectId: array of index opclass OIDs, one per index column
691 * coloptions: array of per-index-column indoption settings
692 * reloptions: AM-specific options
693 * isprimary: index is a PRIMARY KEY
694 * isconstraint: index is owned by PRIMARY KEY, UNIQUE, or EXCLUSION constraint
695 * deferrable: constraint is DEFERRABLE
696 * initdeferred: constraint is INITIALLY DEFERRED
697 * allow_system_table_mods: allow table to be a system catalog
698 * skip_build: true to skip the index_build() step for the moment; caller
699 * must do it later (typically via reindex_index())
700 * concurrent: if true, do not lock the table against writers. The index
701 * will be marked "invalid" and the caller must take additional steps
702 * to fix it up.
703 * is_internal: if true, post creation hook for new index
704 * if_not_exists: if true, do not throw an error if a relation with
705 * the same name already exists.
706 *
707 * Returns the OID of the created index.
708 */
709 Oid
index_create(Relation heapRelation,const char * indexRelationName,Oid indexRelationId,Oid relFileNode,IndexInfo * indexInfo,List * indexColNames,Oid accessMethodObjectId,Oid tableSpaceId,Oid * collationObjectId,Oid * classObjectId,int16 * coloptions,Datum reloptions,bool isprimary,bool isconstraint,bool deferrable,bool initdeferred,bool allow_system_table_mods,bool skip_build,bool concurrent,bool is_internal,bool if_not_exists)710 index_create(Relation heapRelation,
711 const char *indexRelationName,
712 Oid indexRelationId,
713 Oid relFileNode,
714 IndexInfo *indexInfo,
715 List *indexColNames,
716 Oid accessMethodObjectId,
717 Oid tableSpaceId,
718 Oid *collationObjectId,
719 Oid *classObjectId,
720 int16 *coloptions,
721 Datum reloptions,
722 bool isprimary,
723 bool isconstraint,
724 bool deferrable,
725 bool initdeferred,
726 bool allow_system_table_mods,
727 bool skip_build,
728 bool concurrent,
729 bool is_internal,
730 bool if_not_exists)
731 {
732 Oid heapRelationId = RelationGetRelid(heapRelation);
733 Relation pg_class;
734 Relation indexRelation;
735 TupleDesc indexTupDesc;
736 bool shared_relation;
737 bool mapped_relation;
738 bool is_exclusion;
739 Oid namespaceId;
740 int i;
741 char relpersistence;
742
743 is_exclusion = (indexInfo->ii_ExclusionOps != NULL);
744
745 pg_class = heap_open(RelationRelationId, RowExclusiveLock);
746
747 /*
748 * The index will be in the same namespace as its parent table, and is
749 * shared across databases if and only if the parent is. Likewise, it
750 * will use the relfilenode map if and only if the parent does; and it
751 * inherits the parent's relpersistence.
752 */
753 namespaceId = RelationGetNamespace(heapRelation);
754 shared_relation = heapRelation->rd_rel->relisshared;
755 mapped_relation = RelationIsMapped(heapRelation);
756 relpersistence = heapRelation->rd_rel->relpersistence;
757
758 /*
759 * check parameters
760 */
761 if (indexInfo->ii_NumIndexAttrs < 1)
762 elog(ERROR, "must index at least one column");
763
764 if (!allow_system_table_mods &&
765 IsSystemRelation(heapRelation) &&
766 IsNormalProcessingMode())
767 ereport(ERROR,
768 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
769 errmsg("user-defined indexes on system catalog tables are not supported")));
770
771 /*
772 * concurrent index build on a system catalog is unsafe because we tend to
773 * release locks before committing in catalogs
774 */
775 if (concurrent &&
776 IsSystemRelation(heapRelation))
777 ereport(ERROR,
778 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
779 errmsg("concurrent index creation on system catalog tables is not supported")));
780
781 /*
782 * This case is currently not supported, but there's no way to ask for it
783 * in the grammar anyway, so it can't happen.
784 */
785 if (concurrent && is_exclusion)
786 ereport(ERROR,
787 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
788 errmsg_internal("concurrent index creation for exclusion constraints is not supported")));
789
790 /*
791 * We cannot allow indexing a shared relation after initdb (because
792 * there's no way to make the entry in other databases' pg_class).
793 */
794 if (shared_relation && !IsBootstrapProcessingMode())
795 ereport(ERROR,
796 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
797 errmsg("shared indexes cannot be created after initdb")));
798
799 /*
800 * Shared relations must be in pg_global, too (last-ditch check)
801 */
802 if (shared_relation && tableSpaceId != GLOBALTABLESPACE_OID)
803 elog(ERROR, "shared relations must be placed in pg_global tablespace");
804
805 if (get_relname_relid(indexRelationName, namespaceId))
806 {
807 if (if_not_exists)
808 {
809 ereport(NOTICE,
810 (errcode(ERRCODE_DUPLICATE_TABLE),
811 errmsg("relation \"%s\" already exists, skipping",
812 indexRelationName)));
813 heap_close(pg_class, RowExclusiveLock);
814 return InvalidOid;
815 }
816
817 ereport(ERROR,
818 (errcode(ERRCODE_DUPLICATE_TABLE),
819 errmsg("relation \"%s\" already exists",
820 indexRelationName)));
821 }
822
823 /*
824 * construct tuple descriptor for index tuples
825 */
826 indexTupDesc = ConstructTupleDescriptor(heapRelation,
827 indexInfo,
828 indexColNames,
829 accessMethodObjectId,
830 collationObjectId,
831 classObjectId);
832
833 /*
834 * Allocate an OID for the index, unless we were told what to use.
835 *
836 * The OID will be the relfilenode as well, so make sure it doesn't
837 * collide with either pg_class OIDs or existing physical files.
838 */
839 if (!OidIsValid(indexRelationId))
840 {
841 /* Use binary-upgrade override for pg_class.oid/relfilenode? */
842 if (IsBinaryUpgrade)
843 {
844 if (!OidIsValid(binary_upgrade_next_index_pg_class_oid))
845 ereport(ERROR,
846 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
847 errmsg("pg_class index OID value not set when in binary upgrade mode")));
848
849 indexRelationId = binary_upgrade_next_index_pg_class_oid;
850 binary_upgrade_next_index_pg_class_oid = InvalidOid;
851 }
852 else
853 {
854 indexRelationId =
855 GetNewRelFileNode(tableSpaceId, pg_class, relpersistence);
856 }
857 }
858
859 /*
860 * create the index relation's relcache entry and physical disk file. (If
861 * we fail further down, it's the smgr's responsibility to remove the disk
862 * file again.)
863 */
864 indexRelation = heap_create(indexRelationName,
865 namespaceId,
866 tableSpaceId,
867 indexRelationId,
868 relFileNode,
869 indexTupDesc,
870 RELKIND_INDEX,
871 relpersistence,
872 shared_relation,
873 mapped_relation,
874 allow_system_table_mods);
875
876 Assert(indexRelationId == RelationGetRelid(indexRelation));
877
878 /*
879 * Obtain exclusive lock on it. Although no other backends can see it
880 * until we commit, this prevents deadlock-risk complaints from lock
881 * manager in cases such as CLUSTER.
882 */
883 LockRelation(indexRelation, AccessExclusiveLock);
884
885 /*
886 * Fill in fields of the index's pg_class entry that are not set correctly
887 * by heap_create.
888 *
889 * XXX should have a cleaner way to create cataloged indexes
890 */
891 indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner;
892 indexRelation->rd_rel->relam = accessMethodObjectId;
893 indexRelation->rd_rel->relhasoids = false;
894
895 /*
896 * store index's pg_class entry
897 */
898 InsertPgClassTuple(pg_class, indexRelation,
899 RelationGetRelid(indexRelation),
900 (Datum) 0,
901 reloptions);
902
903 /* done with pg_class */
904 heap_close(pg_class, RowExclusiveLock);
905
906 /*
907 * now update the object id's of all the attribute tuple forms in the
908 * index relation's tuple descriptor
909 */
910 InitializeAttributeOids(indexRelation,
911 indexInfo->ii_NumIndexAttrs,
912 indexRelationId);
913
914 /*
915 * append ATTRIBUTE tuples for the index
916 */
917 AppendAttributeTuples(indexRelation, indexInfo->ii_NumIndexAttrs);
918
919 /* ----------------
920 * update pg_index
921 * (append INDEX tuple)
922 *
923 * Note that this stows away a representation of "predicate".
924 * (Or, could define a rule to maintain the predicate) --Nels, Feb '92
925 * ----------------
926 */
927 UpdateIndexRelation(indexRelationId, heapRelationId, indexInfo,
928 collationObjectId, classObjectId, coloptions,
929 isprimary, is_exclusion,
930 !deferrable,
931 !concurrent);
932
933 /*
934 * Register relcache invalidation on the indexes' heap relation, to
935 * maintain consistency of its index list
936 */
937 CacheInvalidateRelcache(heapRelation);
938
939 /*
940 * Register constraint and dependencies for the index.
941 *
942 * If the index is from a CONSTRAINT clause, construct a pg_constraint
943 * entry. The index will be linked to the constraint, which in turn is
944 * linked to the table. If it's not a CONSTRAINT, we need to make a
945 * dependency directly on the table.
946 *
947 * We don't need a dependency on the namespace, because there'll be an
948 * indirect dependency via our parent table.
949 *
950 * During bootstrap we can't register any dependencies, and we don't try
951 * to make a constraint either.
952 */
953 if (!IsBootstrapProcessingMode())
954 {
955 ObjectAddress myself,
956 referenced;
957
958 myself.classId = RelationRelationId;
959 myself.objectId = indexRelationId;
960 myself.objectSubId = 0;
961
962 if (isconstraint)
963 {
964 char constraintType;
965
966 if (isprimary)
967 constraintType = CONSTRAINT_PRIMARY;
968 else if (indexInfo->ii_Unique)
969 constraintType = CONSTRAINT_UNIQUE;
970 else if (is_exclusion)
971 constraintType = CONSTRAINT_EXCLUSION;
972 else
973 {
974 elog(ERROR, "constraint must be PRIMARY, UNIQUE or EXCLUDE");
975 constraintType = 0; /* keep compiler quiet */
976 }
977
978 index_constraint_create(heapRelation,
979 indexRelationId,
980 indexInfo,
981 indexRelationName,
982 constraintType,
983 deferrable,
984 initdeferred,
985 false, /* already marked primary */
986 false, /* pg_index entry is OK */
987 false, /* no old dependencies */
988 allow_system_table_mods,
989 is_internal);
990 }
991 else
992 {
993 bool have_simple_col = false;
994
995 /* Create auto dependencies on simply-referenced columns */
996 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
997 {
998 if (indexInfo->ii_KeyAttrNumbers[i] != 0)
999 {
1000 referenced.classId = RelationRelationId;
1001 referenced.objectId = heapRelationId;
1002 referenced.objectSubId = indexInfo->ii_KeyAttrNumbers[i];
1003
1004 recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
1005
1006 have_simple_col = true;
1007 }
1008 }
1009
1010 /*
1011 * If there are no simply-referenced columns, give the index an
1012 * auto dependency on the whole table. In most cases, this will
1013 * be redundant, but it might not be if the index expressions and
1014 * predicate contain no Vars or only whole-row Vars.
1015 */
1016 if (!have_simple_col)
1017 {
1018 referenced.classId = RelationRelationId;
1019 referenced.objectId = heapRelationId;
1020 referenced.objectSubId = 0;
1021
1022 recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
1023 }
1024
1025 /* Non-constraint indexes can't be deferrable */
1026 Assert(!deferrable);
1027 Assert(!initdeferred);
1028 }
1029
1030 /* Store dependency on collations */
1031 /* The default collation is pinned, so don't bother recording it */
1032 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1033 {
1034 if (OidIsValid(collationObjectId[i]) &&
1035 collationObjectId[i] != DEFAULT_COLLATION_OID)
1036 {
1037 referenced.classId = CollationRelationId;
1038 referenced.objectId = collationObjectId[i];
1039 referenced.objectSubId = 0;
1040
1041 recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
1042 }
1043 }
1044
1045 /* Store dependency on operator classes */
1046 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1047 {
1048 referenced.classId = OperatorClassRelationId;
1049 referenced.objectId = classObjectId[i];
1050 referenced.objectSubId = 0;
1051
1052 recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
1053 }
1054
1055 /* Store dependencies on anything mentioned in index expressions */
1056 if (indexInfo->ii_Expressions)
1057 {
1058 recordDependencyOnSingleRelExpr(&myself,
1059 (Node *) indexInfo->ii_Expressions,
1060 heapRelationId,
1061 DEPENDENCY_NORMAL,
1062 DEPENDENCY_AUTO, false);
1063 }
1064
1065 /* Store dependencies on anything mentioned in predicate */
1066 if (indexInfo->ii_Predicate)
1067 {
1068 recordDependencyOnSingleRelExpr(&myself,
1069 (Node *) indexInfo->ii_Predicate,
1070 heapRelationId,
1071 DEPENDENCY_NORMAL,
1072 DEPENDENCY_AUTO, false);
1073 }
1074 }
1075 else
1076 {
1077 /* Bootstrap mode - assert we weren't asked for constraint support */
1078 Assert(!isconstraint);
1079 Assert(!deferrable);
1080 Assert(!initdeferred);
1081 }
1082
1083 /* Post creation hook for new index */
1084 InvokeObjectPostCreateHookArg(RelationRelationId,
1085 indexRelationId, 0, is_internal);
1086
1087 /*
1088 * Advance the command counter so that we can see the newly-entered
1089 * catalog tuples for the index.
1090 */
1091 CommandCounterIncrement();
1092
1093 /*
1094 * In bootstrap mode, we have to fill in the index strategy structure with
1095 * information from the catalogs. If we aren't bootstrapping, then the
1096 * relcache entry has already been rebuilt thanks to sinval update during
1097 * CommandCounterIncrement.
1098 */
1099 if (IsBootstrapProcessingMode())
1100 RelationInitIndexAccessInfo(indexRelation);
1101 else
1102 Assert(indexRelation->rd_indexcxt != NULL);
1103
1104 /*
1105 * If this is bootstrap (initdb) time, then we don't actually fill in the
1106 * index yet. We'll be creating more indexes and classes later, so we
1107 * delay filling them in until just before we're done with bootstrapping.
1108 * Similarly, if the caller specified skip_build then filling the index is
1109 * delayed till later (ALTER TABLE can save work in some cases with this).
1110 * Otherwise, we call the AM routine that constructs the index.
1111 */
1112 if (IsBootstrapProcessingMode())
1113 {
1114 index_register(heapRelationId, indexRelationId, indexInfo);
1115 }
1116 else if (skip_build)
1117 {
1118 /*
1119 * Caller is responsible for filling the index later on. However,
1120 * we'd better make sure that the heap relation is correctly marked as
1121 * having an index.
1122 */
1123 index_update_stats(heapRelation,
1124 true,
1125 isprimary,
1126 -1.0);
1127 /* Make the above update visible */
1128 CommandCounterIncrement();
1129 }
1130 else
1131 {
1132 index_build(heapRelation, indexRelation, indexInfo, isprimary, false);
1133 }
1134
1135 /*
1136 * Close the index; but we keep the lock that we acquired above until end
1137 * of transaction. Closing the heap is caller's responsibility.
1138 */
1139 index_close(indexRelation, NoLock);
1140
1141 return indexRelationId;
1142 }
1143
1144 /*
1145 * index_constraint_create
1146 *
1147 * Set up a constraint associated with an index. Return the new constraint's
1148 * address.
1149 *
1150 * heapRelation: table owning the index (must be suitably locked by caller)
1151 * indexRelationId: OID of the index
1152 * indexInfo: same info executor uses to insert into the index
1153 * constraintName: what it say (generally, should match name of index)
1154 * constraintType: one of CONSTRAINT_PRIMARY, CONSTRAINT_UNIQUE, or
1155 * CONSTRAINT_EXCLUSION
1156 * deferrable: constraint is DEFERRABLE
1157 * initdeferred: constraint is INITIALLY DEFERRED
1158 * mark_as_primary: if true, set flags to mark index as primary key
1159 * update_pgindex: if true, update pg_index row (else caller's done that)
1160 * remove_old_dependencies: if true, remove existing dependencies of index
1161 * on table's columns
1162 * allow_system_table_mods: allow table to be a system catalog
1163 * is_internal: index is constructed due to internal process
1164 */
1165 ObjectAddress
index_constraint_create(Relation heapRelation,Oid indexRelationId,IndexInfo * indexInfo,const char * constraintName,char constraintType,bool deferrable,bool initdeferred,bool mark_as_primary,bool update_pgindex,bool remove_old_dependencies,bool allow_system_table_mods,bool is_internal)1166 index_constraint_create(Relation heapRelation,
1167 Oid indexRelationId,
1168 IndexInfo *indexInfo,
1169 const char *constraintName,
1170 char constraintType,
1171 bool deferrable,
1172 bool initdeferred,
1173 bool mark_as_primary,
1174 bool update_pgindex,
1175 bool remove_old_dependencies,
1176 bool allow_system_table_mods,
1177 bool is_internal)
1178 {
1179 Oid namespaceId = RelationGetNamespace(heapRelation);
1180 ObjectAddress myself,
1181 referenced;
1182 Oid conOid;
1183
1184 /* constraint creation support doesn't work while bootstrapping */
1185 Assert(!IsBootstrapProcessingMode());
1186
1187 /* enforce system-table restriction */
1188 if (!allow_system_table_mods &&
1189 IsSystemRelation(heapRelation) &&
1190 IsNormalProcessingMode())
1191 ereport(ERROR,
1192 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1193 errmsg("user-defined indexes on system catalog tables are not supported")));
1194
1195 /* primary/unique constraints shouldn't have any expressions */
1196 if (indexInfo->ii_Expressions &&
1197 constraintType != CONSTRAINT_EXCLUSION)
1198 elog(ERROR, "constraints cannot have index expressions");
1199
1200 /*
1201 * If we're manufacturing a constraint for a pre-existing index, we need
1202 * to get rid of the existing auto dependencies for the index (the ones
1203 * that index_create() would have made instead of calling this function).
1204 *
1205 * Note: this code would not necessarily do the right thing if the index
1206 * has any expressions or predicate, but we'd never be turning such an
1207 * index into a UNIQUE or PRIMARY KEY constraint.
1208 */
1209 if (remove_old_dependencies)
1210 deleteDependencyRecordsForClass(RelationRelationId, indexRelationId,
1211 RelationRelationId, DEPENDENCY_AUTO);
1212
1213 /*
1214 * Construct a pg_constraint entry.
1215 */
1216 conOid = CreateConstraintEntry(constraintName,
1217 namespaceId,
1218 constraintType,
1219 deferrable,
1220 initdeferred,
1221 true,
1222 RelationGetRelid(heapRelation),
1223 indexInfo->ii_KeyAttrNumbers,
1224 indexInfo->ii_NumIndexAttrs,
1225 InvalidOid, /* no domain */
1226 indexRelationId, /* index OID */
1227 InvalidOid, /* no foreign key */
1228 NULL,
1229 NULL,
1230 NULL,
1231 NULL,
1232 0,
1233 ' ',
1234 ' ',
1235 ' ',
1236 indexInfo->ii_ExclusionOps,
1237 NULL, /* no check constraint */
1238 NULL,
1239 NULL,
1240 true, /* islocal */
1241 0, /* inhcount */
1242 true, /* noinherit */
1243 is_internal);
1244
1245 /*
1246 * Register the index as internally dependent on the constraint.
1247 *
1248 * Note that the constraint has a dependency on the table, so we don't
1249 * need (or want) any direct dependency from the index to the table.
1250 */
1251 myself.classId = RelationRelationId;
1252 myself.objectId = indexRelationId;
1253 myself.objectSubId = 0;
1254
1255 referenced.classId = ConstraintRelationId;
1256 referenced.objectId = conOid;
1257 referenced.objectSubId = 0;
1258
1259 recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
1260
1261 /*
1262 * If the constraint is deferrable, create the deferred uniqueness
1263 * checking trigger. (The trigger will be given an internal dependency on
1264 * the constraint by CreateTrigger.)
1265 */
1266 if (deferrable)
1267 {
1268 CreateTrigStmt *trigger;
1269
1270 trigger = makeNode(CreateTrigStmt);
1271 trigger->trigname = (constraintType == CONSTRAINT_PRIMARY) ?
1272 "PK_ConstraintTrigger" :
1273 "Unique_ConstraintTrigger";
1274 trigger->relation = NULL;
1275 trigger->funcname = SystemFuncName("unique_key_recheck");
1276 trigger->args = NIL;
1277 trigger->row = true;
1278 trigger->timing = TRIGGER_TYPE_AFTER;
1279 trigger->events = TRIGGER_TYPE_INSERT | TRIGGER_TYPE_UPDATE;
1280 trigger->columns = NIL;
1281 trigger->whenClause = NULL;
1282 trigger->isconstraint = true;
1283 trigger->deferrable = true;
1284 trigger->initdeferred = initdeferred;
1285 trigger->constrrel = NULL;
1286
1287 (void) CreateTrigger(trigger, NULL, RelationGetRelid(heapRelation),
1288 InvalidOid, conOid, indexRelationId, true);
1289 }
1290
1291 /*
1292 * If needed, mark the table as having a primary key. We assume it can't
1293 * have been so marked already, so no need to clear the flag in the other
1294 * case.
1295 *
1296 * Note: this might better be done by callers. We do it here to avoid
1297 * exposing index_update_stats() globally, but that wouldn't be necessary
1298 * if relhaspkey went away.
1299 */
1300 if (mark_as_primary)
1301 index_update_stats(heapRelation,
1302 true,
1303 true,
1304 -1.0);
1305
1306 /*
1307 * If needed, mark the index as primary and/or deferred in pg_index.
1308 *
1309 * Note: When making an existing index into a constraint, caller must have
1310 * a table lock that prevents concurrent table updates; otherwise, there
1311 * is a risk that concurrent readers of the table will miss seeing this
1312 * index at all.
1313 */
1314 if (update_pgindex && (mark_as_primary || deferrable))
1315 {
1316 Relation pg_index;
1317 HeapTuple indexTuple;
1318 Form_pg_index indexForm;
1319 bool dirty = false;
1320
1321 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
1322
1323 indexTuple = SearchSysCacheCopy1(INDEXRELID,
1324 ObjectIdGetDatum(indexRelationId));
1325 if (!HeapTupleIsValid(indexTuple))
1326 elog(ERROR, "cache lookup failed for index %u", indexRelationId);
1327 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
1328
1329 if (mark_as_primary && !indexForm->indisprimary)
1330 {
1331 indexForm->indisprimary = true;
1332 dirty = true;
1333 }
1334
1335 if (deferrable && indexForm->indimmediate)
1336 {
1337 indexForm->indimmediate = false;
1338 dirty = true;
1339 }
1340
1341 if (dirty)
1342 {
1343 CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
1344
1345 InvokeObjectPostAlterHookArg(IndexRelationId, indexRelationId, 0,
1346 InvalidOid, is_internal);
1347 }
1348
1349 heap_freetuple(indexTuple);
1350 heap_close(pg_index, RowExclusiveLock);
1351 }
1352
1353 return referenced;
1354 }
1355
1356 /*
1357 * index_drop
1358 *
1359 * NOTE: this routine should now only be called through performDeletion(),
1360 * else associated dependencies won't be cleaned up.
1361 */
1362 void
index_drop(Oid indexId,bool concurrent)1363 index_drop(Oid indexId, bool concurrent)
1364 {
1365 Oid heapId;
1366 Relation userHeapRelation;
1367 Relation userIndexRelation;
1368 Relation indexRelation;
1369 HeapTuple tuple;
1370 bool hasexprs;
1371 LockRelId heaprelid,
1372 indexrelid;
1373 LOCKTAG heaplocktag;
1374 LOCKMODE lockmode;
1375
1376 /*
1377 * A temporary relation uses a non-concurrent DROP. Other backends can't
1378 * access a temporary relation, so there's no harm in grabbing a stronger
1379 * lock (see comments in RemoveRelations), and a non-concurrent DROP is
1380 * more efficient.
1381 */
1382 Assert(get_rel_persistence(indexId) != RELPERSISTENCE_TEMP ||
1383 !concurrent);
1384
1385 /*
1386 * To drop an index safely, we must grab exclusive lock on its parent
1387 * table. Exclusive lock on the index alone is insufficient because
1388 * another backend might be about to execute a query on the parent table.
1389 * If it relies on a previously cached list of index OIDs, then it could
1390 * attempt to access the just-dropped index. We must therefore take a
1391 * table lock strong enough to prevent all queries on the table from
1392 * proceeding until we commit and send out a shared-cache-inval notice
1393 * that will make them update their index lists.
1394 *
1395 * In the concurrent case we avoid this requirement by disabling index use
1396 * in multiple steps and waiting out any transactions that might be using
1397 * the index, so we don't need exclusive lock on the parent table. Instead
1398 * we take ShareUpdateExclusiveLock, to ensure that two sessions aren't
1399 * doing CREATE/DROP INDEX CONCURRENTLY on the same index. (We will get
1400 * AccessExclusiveLock on the index below, once we're sure nobody else is
1401 * using it.)
1402 */
1403 heapId = IndexGetRelation(indexId, false);
1404 lockmode = concurrent ? ShareUpdateExclusiveLock : AccessExclusiveLock;
1405 userHeapRelation = heap_open(heapId, lockmode);
1406 userIndexRelation = index_open(indexId, lockmode);
1407
1408 /*
1409 * We might still have open queries using it in our own session, which the
1410 * above locking won't prevent, so test explicitly.
1411 */
1412 CheckTableNotInUse(userIndexRelation, "DROP INDEX");
1413
1414 /*
1415 * Drop Index Concurrently is more or less the reverse process of Create
1416 * Index Concurrently.
1417 *
1418 * First we unset indisvalid so queries starting afterwards don't use the
1419 * index to answer queries anymore. We have to keep indisready = true so
1420 * transactions that are still scanning the index can continue to see
1421 * valid index contents. For instance, if they are using READ COMMITTED
1422 * mode, and another transaction makes changes and commits, they need to
1423 * see those new tuples in the index.
1424 *
1425 * After all transactions that could possibly have used the index for
1426 * queries end, we can unset indisready and indislive, then wait till
1427 * nobody could be touching it anymore. (Note: we need indislive because
1428 * this state must be distinct from the initial state during CREATE INDEX
1429 * CONCURRENTLY, which has indislive true while indisready and indisvalid
1430 * are false. That's because in that state, transactions must examine the
1431 * index for HOT-safety decisions, while in this state we don't want them
1432 * to open it at all.)
1433 *
1434 * Since all predicate locks on the index are about to be made invalid, we
1435 * must promote them to predicate locks on the heap. In the
1436 * non-concurrent case we can just do that now. In the concurrent case
1437 * it's a bit trickier. The predicate locks must be moved when there are
1438 * no index scans in progress on the index and no more can subsequently
1439 * start, so that no new predicate locks can be made on the index. Also,
1440 * they must be moved before heap inserts stop maintaining the index, else
1441 * the conflict with the predicate lock on the index gap could be missed
1442 * before the lock on the heap relation is in place to detect a conflict
1443 * based on the heap tuple insert.
1444 */
1445 if (concurrent)
1446 {
1447 /*
1448 * We must commit our transaction in order to make the first pg_index
1449 * state update visible to other sessions. If the DROP machinery has
1450 * already performed any other actions (removal of other objects,
1451 * pg_depend entries, etc), the commit would make those actions
1452 * permanent, which would leave us with inconsistent catalog state if
1453 * we fail partway through the following sequence. Since DROP INDEX
1454 * CONCURRENTLY is restricted to dropping just one index that has no
1455 * dependencies, we should get here before anything's been done ---
1456 * but let's check that to be sure. We can verify that the current
1457 * transaction has not executed any transactional updates by checking
1458 * that no XID has been assigned.
1459 */
1460 if (GetTopTransactionIdIfAny() != InvalidTransactionId)
1461 ereport(ERROR,
1462 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1463 errmsg("DROP INDEX CONCURRENTLY must be first action in transaction")));
1464
1465 /*
1466 * Mark index invalid by updating its pg_index entry
1467 */
1468 index_set_state_flags(indexId, INDEX_DROP_CLEAR_VALID);
1469
1470 /*
1471 * Invalidate the relcache for the table, so that after this commit
1472 * all sessions will refresh any cached plans that might reference the
1473 * index.
1474 */
1475 CacheInvalidateRelcache(userHeapRelation);
1476
1477 /* save lockrelid and locktag for below, then close but keep locks */
1478 heaprelid = userHeapRelation->rd_lockInfo.lockRelId;
1479 SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
1480 indexrelid = userIndexRelation->rd_lockInfo.lockRelId;
1481
1482 heap_close(userHeapRelation, NoLock);
1483 index_close(userIndexRelation, NoLock);
1484
1485 /*
1486 * We must commit our current transaction so that the indisvalid
1487 * update becomes visible to other transactions; then start another.
1488 * Note that any previously-built data structures are lost in the
1489 * commit. The only data we keep past here are the relation IDs.
1490 *
1491 * Before committing, get a session-level lock on the table, to ensure
1492 * that neither it nor the index can be dropped before we finish. This
1493 * cannot block, even if someone else is waiting for access, because
1494 * we already have the same lock within our transaction.
1495 */
1496 LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
1497 LockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
1498
1499 PopActiveSnapshot();
1500 CommitTransactionCommand();
1501 StartTransactionCommand();
1502
1503 /*
1504 * Now we must wait until no running transaction could be using the
1505 * index for a query. Use AccessExclusiveLock here to check for
1506 * running transactions that hold locks of any kind on the table. Note
1507 * we do not need to worry about xacts that open the table for reading
1508 * after this point; they will see the index as invalid when they open
1509 * the relation.
1510 *
1511 * Note: the reason we use actual lock acquisition here, rather than
1512 * just checking the ProcArray and sleeping, is that deadlock is
1513 * possible if one of the transactions in question is blocked trying
1514 * to acquire an exclusive lock on our table. The lock code will
1515 * detect deadlock and error out properly.
1516 */
1517 WaitForLockers(heaplocktag, AccessExclusiveLock);
1518
1519 /*
1520 * No more predicate locks will be acquired on this index, and we're
1521 * about to stop doing inserts into the index which could show
1522 * conflicts with existing predicate locks, so now is the time to move
1523 * them to the heap relation.
1524 */
1525 userHeapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
1526 userIndexRelation = index_open(indexId, ShareUpdateExclusiveLock);
1527 TransferPredicateLocksToHeapRelation(userIndexRelation);
1528
1529 /*
1530 * Now we are sure that nobody uses the index for queries; they just
1531 * might have it open for updating it. So now we can unset indisready
1532 * and indislive, then wait till nobody could be using it at all
1533 * anymore.
1534 */
1535 index_set_state_flags(indexId, INDEX_DROP_SET_DEAD);
1536
1537 /*
1538 * Invalidate the relcache for the table, so that after this commit
1539 * all sessions will refresh the table's index list. Forgetting just
1540 * the index's relcache entry is not enough.
1541 */
1542 CacheInvalidateRelcache(userHeapRelation);
1543
1544 /*
1545 * Close the relations again, though still holding session lock.
1546 */
1547 heap_close(userHeapRelation, NoLock);
1548 index_close(userIndexRelation, NoLock);
1549
1550 /*
1551 * Again, commit the transaction to make the pg_index update visible
1552 * to other sessions.
1553 */
1554 CommitTransactionCommand();
1555 StartTransactionCommand();
1556
1557 /*
1558 * Wait till every transaction that saw the old index state has
1559 * finished.
1560 */
1561 WaitForLockers(heaplocktag, AccessExclusiveLock);
1562
1563 /*
1564 * Re-open relations to allow us to complete our actions.
1565 *
1566 * At this point, nothing should be accessing the index, but lets
1567 * leave nothing to chance and grab AccessExclusiveLock on the index
1568 * before the physical deletion.
1569 */
1570 userHeapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
1571 userIndexRelation = index_open(indexId, AccessExclusiveLock);
1572 }
1573 else
1574 {
1575 /* Not concurrent, so just transfer predicate locks and we're good */
1576 TransferPredicateLocksToHeapRelation(userIndexRelation);
1577 }
1578
1579 /*
1580 * Schedule physical removal of the files
1581 */
1582 RelationDropStorage(userIndexRelation);
1583
1584 /*
1585 * Close and flush the index's relcache entry, to ensure relcache doesn't
1586 * try to rebuild it while we're deleting catalog entries. We keep the
1587 * lock though.
1588 */
1589 index_close(userIndexRelation, NoLock);
1590
1591 RelationForgetRelation(indexId);
1592
1593 /*
1594 * fix INDEX relation, and check for expressional index
1595 */
1596 indexRelation = heap_open(IndexRelationId, RowExclusiveLock);
1597
1598 tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
1599 if (!HeapTupleIsValid(tuple))
1600 elog(ERROR, "cache lookup failed for index %u", indexId);
1601
1602 hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs);
1603
1604 CatalogTupleDelete(indexRelation, &tuple->t_self);
1605
1606 ReleaseSysCache(tuple);
1607 heap_close(indexRelation, RowExclusiveLock);
1608
1609 /*
1610 * if it has any expression columns, we might have stored statistics about
1611 * them.
1612 */
1613 if (hasexprs)
1614 RemoveStatistics(indexId, 0);
1615
1616 /*
1617 * fix ATTRIBUTE relation
1618 */
1619 DeleteAttributeTuples(indexId);
1620
1621 /*
1622 * fix RELATION relation
1623 */
1624 DeleteRelationTuple(indexId);
1625
1626 /*
1627 * We are presently too lazy to attempt to compute the new correct value
1628 * of relhasindex (the next VACUUM will fix it if necessary). So there is
1629 * no need to update the pg_class tuple for the owning relation. But we
1630 * must send out a shared-cache-inval notice on the owning relation to
1631 * ensure other backends update their relcache lists of indexes. (In the
1632 * concurrent case, this is redundant but harmless.)
1633 */
1634 CacheInvalidateRelcache(userHeapRelation);
1635
1636 /*
1637 * Close owning rel, but keep lock
1638 */
1639 heap_close(userHeapRelation, NoLock);
1640
1641 /*
1642 * Release the session locks before we go.
1643 */
1644 if (concurrent)
1645 {
1646 UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
1647 UnlockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
1648 }
1649 }
1650
1651 /* ----------------------------------------------------------------
1652 * index_build support
1653 * ----------------------------------------------------------------
1654 */
1655
1656 /* ----------------
1657 * BuildIndexInfo
1658 * Construct an IndexInfo record for an open index
1659 *
1660 * IndexInfo stores the information about the index that's needed by
1661 * FormIndexDatum, which is used for both index_build() and later insertion
1662 * of individual index tuples. Normally we build an IndexInfo for an index
1663 * just once per command, and then use it for (potentially) many tuples.
1664 * ----------------
1665 */
1666 IndexInfo *
BuildIndexInfo(Relation index)1667 BuildIndexInfo(Relation index)
1668 {
1669 IndexInfo *ii = makeNode(IndexInfo);
1670 Form_pg_index indexStruct = index->rd_index;
1671 int i;
1672 int numKeys;
1673
1674 /* check the number of keys, and copy attr numbers into the IndexInfo */
1675 numKeys = indexStruct->indnatts;
1676 if (numKeys < 1 || numKeys > INDEX_MAX_KEYS)
1677 elog(ERROR, "invalid indnatts %d for index %u",
1678 numKeys, RelationGetRelid(index));
1679 ii->ii_NumIndexAttrs = numKeys;
1680 for (i = 0; i < numKeys; i++)
1681 ii->ii_KeyAttrNumbers[i] = indexStruct->indkey.values[i];
1682
1683 /* fetch any expressions needed for expressional indexes */
1684 ii->ii_Expressions = RelationGetIndexExpressions(index);
1685 ii->ii_ExpressionsState = NIL;
1686
1687 /* fetch index predicate if any */
1688 ii->ii_Predicate = RelationGetIndexPredicate(index);
1689 ii->ii_PredicateState = NULL;
1690
1691 /* fetch exclusion constraint info if any */
1692 if (indexStruct->indisexclusion)
1693 {
1694 RelationGetExclusionInfo(index,
1695 &ii->ii_ExclusionOps,
1696 &ii->ii_ExclusionProcs,
1697 &ii->ii_ExclusionStrats);
1698 }
1699 else
1700 {
1701 ii->ii_ExclusionOps = NULL;
1702 ii->ii_ExclusionProcs = NULL;
1703 ii->ii_ExclusionStrats = NULL;
1704 }
1705
1706 /* other info */
1707 ii->ii_Unique = indexStruct->indisunique;
1708 ii->ii_ReadyForInserts = IndexIsReady(indexStruct);
1709 /* assume not doing speculative insertion for now */
1710 ii->ii_UniqueOps = NULL;
1711 ii->ii_UniqueProcs = NULL;
1712 ii->ii_UniqueStrats = NULL;
1713
1714 /* initialize index-build state to default */
1715 ii->ii_Concurrent = false;
1716 ii->ii_BrokenHotChain = false;
1717
1718 /* set up for possible use by index AM */
1719 ii->ii_AmCache = NULL;
1720 ii->ii_Context = CurrentMemoryContext;
1721
1722 return ii;
1723 }
1724
1725 /* ----------------
1726 * BuildDummyIndexInfo
1727 * Construct a dummy IndexInfo record for an open index
1728 *
1729 * This differs from the real BuildIndexInfo in that it will never run any
1730 * user-defined code that might exist in index expressions or predicates.
1731 * Instead of the real index expressions, we return null constants that have
1732 * the right types/typmods/collations. Predicates and exclusion clauses are
1733 * just ignored. This is sufficient for the purpose of truncating an index,
1734 * since we will not need to actually evaluate the expressions or predicates;
1735 * the only thing that's likely to be done with the data is construction of
1736 * a tupdesc describing the index's rowtype.
1737 * ----------------
1738 */
1739 IndexInfo *
BuildDummyIndexInfo(Relation index)1740 BuildDummyIndexInfo(Relation index)
1741 {
1742 IndexInfo *ii = makeNode(IndexInfo);
1743 Form_pg_index indexStruct = index->rd_index;
1744 int i;
1745 int numKeys;
1746
1747 /* check the number of keys, and copy attr numbers into the IndexInfo */
1748 numKeys = indexStruct->indnatts;
1749 if (numKeys < 1 || numKeys > INDEX_MAX_KEYS)
1750 elog(ERROR, "invalid indnatts %d for index %u",
1751 numKeys, RelationGetRelid(index));
1752 ii->ii_NumIndexAttrs = numKeys;
1753 for (i = 0; i < numKeys; i++)
1754 ii->ii_KeyAttrNumbers[i] = indexStruct->indkey.values[i];
1755
1756 /* fetch dummy expressions for expressional indexes */
1757 ii->ii_Expressions = RelationGetDummyIndexExpressions(index);
1758 ii->ii_ExpressionsState = NIL;
1759
1760 /* pretend there is no predicate */
1761 ii->ii_Predicate = NIL;
1762 ii->ii_PredicateState = NULL;
1763
1764 /* We ignore the exclusion constraint if any */
1765 ii->ii_ExclusionOps = NULL;
1766 ii->ii_ExclusionProcs = NULL;
1767 ii->ii_ExclusionStrats = NULL;
1768
1769 /* other info */
1770 ii->ii_Unique = indexStruct->indisunique;
1771 ii->ii_ReadyForInserts = IndexIsReady(indexStruct);
1772 /* assume not doing speculative insertion for now */
1773 ii->ii_UniqueOps = NULL;
1774 ii->ii_UniqueProcs = NULL;
1775 ii->ii_UniqueStrats = NULL;
1776
1777 /* initialize index-build state to default */
1778 ii->ii_Concurrent = false;
1779 ii->ii_BrokenHotChain = false;
1780
1781 /* set up for possible use by index AM */
1782 ii->ii_AmCache = NULL;
1783 ii->ii_Context = CurrentMemoryContext;
1784
1785 return ii;
1786 }
1787
1788 /* ----------------
1789 * BuildSpeculativeIndexInfo
1790 * Add extra state to IndexInfo record
1791 *
1792 * For unique indexes, we usually don't want to add info to the IndexInfo for
1793 * checking uniqueness, since the B-Tree AM handles that directly. However,
1794 * in the case of speculative insertion, additional support is required.
1795 *
1796 * Do this processing here rather than in BuildIndexInfo() to not incur the
1797 * overhead in the common non-speculative cases.
1798 * ----------------
1799 */
1800 void
BuildSpeculativeIndexInfo(Relation index,IndexInfo * ii)1801 BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii)
1802 {
1803 int ncols = index->rd_rel->relnatts;
1804 int i;
1805
1806 /*
1807 * fetch info for checking unique indexes
1808 */
1809 Assert(ii->ii_Unique);
1810
1811 if (index->rd_rel->relam != BTREE_AM_OID)
1812 elog(ERROR, "unexpected non-btree speculative unique index");
1813
1814 ii->ii_UniqueOps = (Oid *) palloc(sizeof(Oid) * ncols);
1815 ii->ii_UniqueProcs = (Oid *) palloc(sizeof(Oid) * ncols);
1816 ii->ii_UniqueStrats = (uint16 *) palloc(sizeof(uint16) * ncols);
1817
1818 /*
1819 * We have to look up the operator's strategy number. This provides a
1820 * cross-check that the operator does match the index.
1821 */
1822 /* We need the func OIDs and strategy numbers too */
1823 for (i = 0; i < ncols; i++)
1824 {
1825 ii->ii_UniqueStrats[i] = BTEqualStrategyNumber;
1826 ii->ii_UniqueOps[i] =
1827 get_opfamily_member(index->rd_opfamily[i],
1828 index->rd_opcintype[i],
1829 index->rd_opcintype[i],
1830 ii->ii_UniqueStrats[i]);
1831 if (!OidIsValid(ii->ii_UniqueOps[i]))
1832 elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
1833 ii->ii_UniqueStrats[i], index->rd_opcintype[i],
1834 index->rd_opcintype[i], index->rd_opfamily[i]);
1835 ii->ii_UniqueProcs[i] = get_opcode(ii->ii_UniqueOps[i]);
1836 }
1837 }
1838
1839 /* ----------------
1840 * FormIndexDatum
1841 * Construct values[] and isnull[] arrays for a new index tuple.
1842 *
1843 * indexInfo Info about the index
1844 * slot Heap tuple for which we must prepare an index entry
1845 * estate executor state for evaluating any index expressions
1846 * values Array of index Datums (output area)
1847 * isnull Array of is-null indicators (output area)
1848 *
1849 * When there are no index expressions, estate may be NULL. Otherwise it
1850 * must be supplied, *and* the ecxt_scantuple slot of its per-tuple expr
1851 * context must point to the heap tuple passed in.
1852 *
1853 * Notice we don't actually call index_form_tuple() here; we just prepare
1854 * its input arrays values[] and isnull[]. This is because the index AM
1855 * may wish to alter the data before storage.
1856 * ----------------
1857 */
1858 void
FormIndexDatum(IndexInfo * indexInfo,TupleTableSlot * slot,EState * estate,Datum * values,bool * isnull)1859 FormIndexDatum(IndexInfo *indexInfo,
1860 TupleTableSlot *slot,
1861 EState *estate,
1862 Datum *values,
1863 bool *isnull)
1864 {
1865 ListCell *indexpr_item;
1866 int i;
1867
1868 if (indexInfo->ii_Expressions != NIL &&
1869 indexInfo->ii_ExpressionsState == NIL)
1870 {
1871 /* First time through, set up expression evaluation state */
1872 indexInfo->ii_ExpressionsState =
1873 ExecPrepareExprList(indexInfo->ii_Expressions, estate);
1874 /* Check caller has set up context correctly */
1875 Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
1876 }
1877 indexpr_item = list_head(indexInfo->ii_ExpressionsState);
1878
1879 for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1880 {
1881 int keycol = indexInfo->ii_KeyAttrNumbers[i];
1882 Datum iDatum;
1883 bool isNull;
1884
1885 if (keycol != 0)
1886 {
1887 /*
1888 * Plain index column; get the value we need directly from the
1889 * heap tuple.
1890 */
1891 iDatum = slot_getattr(slot, keycol, &isNull);
1892 }
1893 else
1894 {
1895 /*
1896 * Index expression --- need to evaluate it.
1897 */
1898 if (indexpr_item == NULL)
1899 elog(ERROR, "wrong number of index expressions");
1900 iDatum = ExecEvalExprSwitchContext((ExprState *) lfirst(indexpr_item),
1901 GetPerTupleExprContext(estate),
1902 &isNull);
1903 indexpr_item = lnext(indexpr_item);
1904 }
1905 values[i] = iDatum;
1906 isnull[i] = isNull;
1907 }
1908
1909 if (indexpr_item != NULL)
1910 elog(ERROR, "wrong number of index expressions");
1911 }
1912
1913
1914 /*
1915 * index_update_stats --- update pg_class entry after CREATE INDEX or REINDEX
1916 *
1917 * This routine updates the pg_class row of either an index or its parent
1918 * relation after CREATE INDEX or REINDEX. Its rather bizarre API is designed
1919 * to ensure we can do all the necessary work in just one update.
1920 *
1921 * hasindex: set relhasindex to this value
1922 * isprimary: if true, set relhaspkey true; else no change
1923 * reltuples: if >= 0, set reltuples to this value; else no change
1924 *
1925 * If reltuples >= 0, relpages and relallvisible are also updated (using
1926 * RelationGetNumberOfBlocks() and visibilitymap_count()).
1927 *
1928 * NOTE: an important side-effect of this operation is that an SI invalidation
1929 * message is sent out to all backends --- including me --- causing relcache
1930 * entries to be flushed or updated with the new data. This must happen even
1931 * if we find that no change is needed in the pg_class row. When updating
1932 * a heap entry, this ensures that other backends find out about the new
1933 * index. When updating an index, it's important because some index AMs
1934 * expect a relcache flush to occur after REINDEX.
1935 */
1936 static void
index_update_stats(Relation rel,bool hasindex,bool isprimary,double reltuples)1937 index_update_stats(Relation rel,
1938 bool hasindex,
1939 bool isprimary,
1940 double reltuples)
1941 {
1942 Oid relid = RelationGetRelid(rel);
1943 Relation pg_class;
1944 HeapTuple tuple;
1945 Form_pg_class rd_rel;
1946 bool dirty;
1947
1948 /*
1949 * We always update the pg_class row using a non-transactional,
1950 * overwrite-in-place update. There are several reasons for this:
1951 *
1952 * 1. In bootstrap mode, we have no choice --- UPDATE wouldn't work.
1953 *
1954 * 2. We could be reindexing pg_class itself, in which case we can't move
1955 * its pg_class row because CatalogTupleInsert/CatalogTupleUpdate might
1956 * not know about all the indexes yet (see reindex_relation).
1957 *
1958 * 3. Because we execute CREATE INDEX with just share lock on the parent
1959 * rel (to allow concurrent index creations), an ordinary update could
1960 * suffer a tuple-concurrently-updated failure against another CREATE
1961 * INDEX committing at about the same time. We can avoid that by having
1962 * them both do nontransactional updates (we assume they will both be
1963 * trying to change the pg_class row to the same thing, so it doesn't
1964 * matter which goes first).
1965 *
1966 * It is safe to use a non-transactional update even though our
1967 * transaction could still fail before committing. Setting relhasindex
1968 * true is safe even if there are no indexes (VACUUM will eventually fix
1969 * it), likewise for relhaspkey. And of course the new relpages and
1970 * reltuples counts are correct regardless. However, we don't want to
1971 * change relpages (or relallvisible) if the caller isn't providing an
1972 * updated reltuples count, because that would bollix the
1973 * reltuples/relpages ratio which is what's really important.
1974 */
1975
1976 pg_class = heap_open(RelationRelationId, RowExclusiveLock);
1977
1978 /*
1979 * Make a copy of the tuple to update. Normally we use the syscache, but
1980 * we can't rely on that during bootstrap or while reindexing pg_class
1981 * itself.
1982 */
1983 if (IsBootstrapProcessingMode() ||
1984 ReindexIsProcessingHeap(RelationRelationId))
1985 {
1986 /* don't assume syscache will work */
1987 HeapScanDesc pg_class_scan;
1988 ScanKeyData key[1];
1989
1990 ScanKeyInit(&key[0],
1991 ObjectIdAttributeNumber,
1992 BTEqualStrategyNumber, F_OIDEQ,
1993 ObjectIdGetDatum(relid));
1994
1995 pg_class_scan = heap_beginscan_catalog(pg_class, 1, key);
1996 tuple = heap_getnext(pg_class_scan, ForwardScanDirection);
1997 tuple = heap_copytuple(tuple);
1998 heap_endscan(pg_class_scan);
1999 }
2000 else
2001 {
2002 /* normal case, use syscache */
2003 tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
2004 }
2005
2006 if (!HeapTupleIsValid(tuple))
2007 elog(ERROR, "could not find tuple for relation %u", relid);
2008 rd_rel = (Form_pg_class) GETSTRUCT(tuple);
2009
2010 /* Apply required updates, if any, to copied tuple */
2011
2012 dirty = false;
2013 if (rd_rel->relhasindex != hasindex)
2014 {
2015 rd_rel->relhasindex = hasindex;
2016 dirty = true;
2017 }
2018 if (isprimary)
2019 {
2020 if (!rd_rel->relhaspkey)
2021 {
2022 rd_rel->relhaspkey = true;
2023 dirty = true;
2024 }
2025 }
2026
2027 if (reltuples >= 0)
2028 {
2029 BlockNumber relpages = RelationGetNumberOfBlocks(rel);
2030 BlockNumber relallvisible;
2031
2032 if (rd_rel->relkind != RELKIND_INDEX)
2033 visibilitymap_count(rel, &relallvisible, NULL);
2034 else /* don't bother for indexes */
2035 relallvisible = 0;
2036
2037 if (rd_rel->relpages != (int32) relpages)
2038 {
2039 rd_rel->relpages = (int32) relpages;
2040 dirty = true;
2041 }
2042 if (rd_rel->reltuples != (float4) reltuples)
2043 {
2044 rd_rel->reltuples = (float4) reltuples;
2045 dirty = true;
2046 }
2047 if (rd_rel->relallvisible != (int32) relallvisible)
2048 {
2049 rd_rel->relallvisible = (int32) relallvisible;
2050 dirty = true;
2051 }
2052 }
2053
2054 /*
2055 * If anything changed, write out the tuple
2056 */
2057 if (dirty)
2058 {
2059 heap_inplace_update(pg_class, tuple);
2060 /* the above sends a cache inval message */
2061 }
2062 else
2063 {
2064 /* no need to change tuple, but force relcache inval anyway */
2065 CacheInvalidateRelcacheByTuple(tuple);
2066 }
2067
2068 heap_freetuple(tuple);
2069
2070 heap_close(pg_class, RowExclusiveLock);
2071 }
2072
2073
2074 /*
2075 * index_build - invoke access-method-specific index build procedure
2076 *
2077 * On entry, the index's catalog entries are valid, and its physical disk
2078 * file has been created but is empty. We call the AM-specific build
2079 * procedure to fill in the index contents. We then update the pg_class
2080 * entries of the index and heap relation as needed, using statistics
2081 * returned by ambuild as well as data passed by the caller.
2082 *
2083 * isprimary tells whether to mark the index as a primary-key index.
2084 * isreindex indicates we are recreating a previously-existing index.
2085 *
2086 * Note: when reindexing an existing index, isprimary can be false even if
2087 * the index is a PK; it's already properly marked and need not be re-marked.
2088 *
2089 * Note: before Postgres 8.2, the passed-in heap and index Relations
2090 * were automatically closed by this routine. This is no longer the case.
2091 * The caller opened 'em, and the caller should close 'em.
2092 */
2093 void
index_build(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,bool isprimary,bool isreindex)2094 index_build(Relation heapRelation,
2095 Relation indexRelation,
2096 IndexInfo *indexInfo,
2097 bool isprimary,
2098 bool isreindex)
2099 {
2100 IndexBuildResult *stats;
2101 Oid save_userid;
2102 int save_sec_context;
2103 int save_nestlevel;
2104
2105 /*
2106 * sanity checks
2107 */
2108 Assert(RelationIsValid(indexRelation));
2109 Assert(PointerIsValid(indexRelation->rd_amroutine));
2110 Assert(PointerIsValid(indexRelation->rd_amroutine->ambuild));
2111 Assert(PointerIsValid(indexRelation->rd_amroutine->ambuildempty));
2112
2113 ereport(DEBUG1,
2114 (errmsg("building index \"%s\" on table \"%s\"",
2115 RelationGetRelationName(indexRelation),
2116 RelationGetRelationName(heapRelation))));
2117
2118 /*
2119 * Switch to the table owner's userid, so that any index functions are run
2120 * as that user. Also lock down security-restricted operations and
2121 * arrange to make GUC variable changes local to this command.
2122 */
2123 GetUserIdAndSecContext(&save_userid, &save_sec_context);
2124 SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
2125 save_sec_context | SECURITY_RESTRICTED_OPERATION);
2126 save_nestlevel = NewGUCNestLevel();
2127
2128 /*
2129 * Call the access method's build procedure
2130 */
2131 stats = indexRelation->rd_amroutine->ambuild(heapRelation, indexRelation,
2132 indexInfo);
2133 Assert(PointerIsValid(stats));
2134
2135 /*
2136 * If this is an unlogged index, we may need to write out an init fork for
2137 * it -- but we must first check whether one already exists. If, for
2138 * example, an unlogged relation is truncated in the transaction that
2139 * created it, or truncated twice in a subsequent transaction, the
2140 * relfilenode won't change, and nothing needs to be done here.
2141 */
2142 if (indexRelation->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
2143 !smgrexists(indexRelation->rd_smgr, INIT_FORKNUM))
2144 {
2145 RelationOpenSmgr(indexRelation);
2146 smgrcreate(indexRelation->rd_smgr, INIT_FORKNUM, false);
2147 indexRelation->rd_amroutine->ambuildempty(indexRelation);
2148 }
2149
2150 /*
2151 * If we found any potentially broken HOT chains, mark the index as not
2152 * being usable until the current transaction is below the event horizon.
2153 * See src/backend/access/heap/README.HOT for discussion. Also set this
2154 * if early pruning/vacuuming is enabled for the heap relation. While it
2155 * might become safe to use the index earlier based on actual cleanup
2156 * activity and other active transactions, the test for that would be much
2157 * more complex and would require some form of blocking, so keep it simple
2158 * and fast by just using the current transaction.
2159 *
2160 * However, when reindexing an existing index, we should do nothing here.
2161 * Any HOT chains that are broken with respect to the index must predate
2162 * the index's original creation, so there is no need to change the
2163 * index's usability horizon. Moreover, we *must not* try to change the
2164 * index's pg_index entry while reindexing pg_index itself, and this
2165 * optimization nicely prevents that. The more complex rules needed for a
2166 * reindex are handled separately after this function returns.
2167 *
2168 * We also need not set indcheckxmin during a concurrent index build,
2169 * because we won't set indisvalid true until all transactions that care
2170 * about the broken HOT chains or early pruning/vacuuming are gone.
2171 *
2172 * Therefore, this code path can only be taken during non-concurrent
2173 * CREATE INDEX. Thus the fact that heap_update will set the pg_index
2174 * tuple's xmin doesn't matter, because that tuple was created in the
2175 * current transaction anyway. That also means we don't need to worry
2176 * about any concurrent readers of the tuple; no other transaction can see
2177 * it yet.
2178 */
2179 if ((indexInfo->ii_BrokenHotChain || EarlyPruningEnabled(heapRelation)) &&
2180 !isreindex &&
2181 !indexInfo->ii_Concurrent)
2182 {
2183 Oid indexId = RelationGetRelid(indexRelation);
2184 Relation pg_index;
2185 HeapTuple indexTuple;
2186 Form_pg_index indexForm;
2187
2188 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
2189
2190 indexTuple = SearchSysCacheCopy1(INDEXRELID,
2191 ObjectIdGetDatum(indexId));
2192 if (!HeapTupleIsValid(indexTuple))
2193 elog(ERROR, "cache lookup failed for index %u", indexId);
2194 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
2195
2196 /* If it's a new index, indcheckxmin shouldn't be set ... */
2197 Assert(!indexForm->indcheckxmin);
2198
2199 indexForm->indcheckxmin = true;
2200 CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
2201
2202 heap_freetuple(indexTuple);
2203 heap_close(pg_index, RowExclusiveLock);
2204 }
2205
2206 /*
2207 * Update heap and index pg_class rows
2208 */
2209 index_update_stats(heapRelation,
2210 true,
2211 isprimary,
2212 stats->heap_tuples);
2213
2214 index_update_stats(indexRelation,
2215 false,
2216 false,
2217 stats->index_tuples);
2218
2219 /* Make the updated catalog row versions visible */
2220 CommandCounterIncrement();
2221
2222 /*
2223 * If it's for an exclusion constraint, make a second pass over the heap
2224 * to verify that the constraint is satisfied. We must not do this until
2225 * the index is fully valid. (Broken HOT chains shouldn't matter, though;
2226 * see comments for IndexCheckExclusion.)
2227 */
2228 if (indexInfo->ii_ExclusionOps != NULL)
2229 IndexCheckExclusion(heapRelation, indexRelation, indexInfo);
2230
2231 /* Roll back any GUC changes executed by index functions */
2232 AtEOXact_GUC(false, save_nestlevel);
2233
2234 /* Restore userid and security context */
2235 SetUserIdAndSecContext(save_userid, save_sec_context);
2236 }
2237
2238
2239 /*
2240 * IndexBuildHeapScan - scan the heap relation to find tuples to be indexed
2241 *
2242 * This is called back from an access-method-specific index build procedure
2243 * after the AM has done whatever setup it needs. The parent heap relation
2244 * is scanned to find tuples that should be entered into the index. Each
2245 * such tuple is passed to the AM's callback routine, which does the right
2246 * things to add it to the new index. After we return, the AM's index
2247 * build procedure does whatever cleanup it needs.
2248 *
2249 * The total count of heap tuples is returned. This is for updating pg_class
2250 * statistics. (It's annoying not to be able to do that here, but we want
2251 * to merge that update with others; see index_update_stats.) Note that the
2252 * index AM itself must keep track of the number of index tuples; we don't do
2253 * so here because the AM might reject some of the tuples for its own reasons,
2254 * such as being unable to store NULLs.
2255 *
2256 * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
2257 * any potentially broken HOT chains. Currently, we set this if there are
2258 * any RECENTLY_DEAD or DELETE_IN_PROGRESS entries in a HOT chain, without
2259 * trying very hard to detect whether they're really incompatible with the
2260 * chain tip.
2261 */
2262 double
IndexBuildHeapScan(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,bool allow_sync,IndexBuildCallback callback,void * callback_state)2263 IndexBuildHeapScan(Relation heapRelation,
2264 Relation indexRelation,
2265 IndexInfo *indexInfo,
2266 bool allow_sync,
2267 IndexBuildCallback callback,
2268 void *callback_state)
2269 {
2270 return IndexBuildHeapRangeScan(heapRelation, indexRelation,
2271 indexInfo, allow_sync,
2272 false,
2273 0, InvalidBlockNumber,
2274 callback, callback_state);
2275 }
2276
2277 /*
2278 * As above, except that instead of scanning the complete heap, only the given
2279 * number of blocks are scanned. Scan to end-of-rel can be signalled by
2280 * passing InvalidBlockNumber as numblocks. Note that restricting the range
2281 * to scan cannot be done when requesting syncscan.
2282 *
2283 * When "anyvisible" mode is requested, all tuples visible to any transaction
2284 * are considered, including those inserted or deleted by transactions that are
2285 * still in progress.
2286 */
2287 double
IndexBuildHeapRangeScan(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,bool allow_sync,bool anyvisible,BlockNumber start_blockno,BlockNumber numblocks,IndexBuildCallback callback,void * callback_state)2288 IndexBuildHeapRangeScan(Relation heapRelation,
2289 Relation indexRelation,
2290 IndexInfo *indexInfo,
2291 bool allow_sync,
2292 bool anyvisible,
2293 BlockNumber start_blockno,
2294 BlockNumber numblocks,
2295 IndexBuildCallback callback,
2296 void *callback_state)
2297 {
2298 bool is_system_catalog;
2299 bool checking_uniqueness;
2300 HeapScanDesc scan;
2301 HeapTuple heapTuple;
2302 Datum values[INDEX_MAX_KEYS];
2303 bool isnull[INDEX_MAX_KEYS];
2304 double reltuples;
2305 ExprState *predicate;
2306 TupleTableSlot *slot;
2307 EState *estate;
2308 ExprContext *econtext;
2309 Snapshot snapshot;
2310 TransactionId OldestXmin;
2311 BlockNumber root_blkno = InvalidBlockNumber;
2312 OffsetNumber root_offsets[MaxHeapTuplesPerPage];
2313
2314 /*
2315 * sanity checks
2316 */
2317 Assert(OidIsValid(indexRelation->rd_rel->relam));
2318
2319 /* Remember if it's a system catalog */
2320 is_system_catalog = IsSystemRelation(heapRelation);
2321
2322 /* See whether we're verifying uniqueness/exclusion properties */
2323 checking_uniqueness = (indexInfo->ii_Unique ||
2324 indexInfo->ii_ExclusionOps != NULL);
2325
2326 /*
2327 * "Any visible" mode is not compatible with uniqueness checks; make sure
2328 * only one of those is requested.
2329 */
2330 Assert(!(anyvisible && checking_uniqueness));
2331
2332 /*
2333 * Need an EState for evaluation of index expressions and partial-index
2334 * predicates. Also a slot to hold the current tuple.
2335 */
2336 estate = CreateExecutorState();
2337 econtext = GetPerTupleExprContext(estate);
2338 slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
2339
2340 /* Arrange for econtext's scan tuple to be the tuple under test */
2341 econtext->ecxt_scantuple = slot;
2342
2343 /* Set up execution state for predicate, if any. */
2344 predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
2345
2346 /*
2347 * Prepare for scan of the base relation. In a normal index build, we use
2348 * SnapshotAny because we must retrieve all tuples and do our own time
2349 * qual checks (because we have to index RECENTLY_DEAD tuples). In a
2350 * concurrent build, or during bootstrap, we take a regular MVCC snapshot
2351 * and index whatever's live according to that.
2352 */
2353 if (IsBootstrapProcessingMode() || indexInfo->ii_Concurrent)
2354 {
2355 snapshot = RegisterSnapshot(GetTransactionSnapshot());
2356 OldestXmin = InvalidTransactionId; /* not used */
2357
2358 /* "any visible" mode is not compatible with this */
2359 Assert(!anyvisible);
2360 }
2361 else
2362 {
2363 snapshot = SnapshotAny;
2364 /* okay to ignore lazy VACUUMs here */
2365 OldestXmin = GetOldestXmin(heapRelation, PROCARRAY_FLAGS_VACUUM);
2366 }
2367
2368 scan = heap_beginscan_strat(heapRelation, /* relation */
2369 snapshot, /* snapshot */
2370 0, /* number of keys */
2371 NULL, /* scan key */
2372 true, /* buffer access strategy OK */
2373 allow_sync); /* syncscan OK? */
2374
2375 /* set our scan endpoints */
2376 if (!allow_sync)
2377 heap_setscanlimits(scan, start_blockno, numblocks);
2378 else
2379 {
2380 /* syncscan can only be requested on whole relation */
2381 Assert(start_blockno == 0);
2382 Assert(numblocks == InvalidBlockNumber);
2383 }
2384
2385 reltuples = 0;
2386
2387 /*
2388 * Scan all tuples in the base relation.
2389 */
2390 while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2391 {
2392 bool tupleIsAlive;
2393
2394 CHECK_FOR_INTERRUPTS();
2395
2396 /*
2397 * When dealing with a HOT-chain of updated tuples, we want to index
2398 * the values of the live tuple (if any), but index it under the TID
2399 * of the chain's root tuple. This approach is necessary to preserve
2400 * the HOT-chain structure in the heap. So we need to be able to find
2401 * the root item offset for every tuple that's in a HOT-chain. When
2402 * first reaching a new page of the relation, call
2403 * heap_get_root_tuples() to build a map of root item offsets on the
2404 * page.
2405 *
2406 * It might look unsafe to use this information across buffer
2407 * lock/unlock. However, we hold ShareLock on the table so no
2408 * ordinary insert/update/delete should occur; and we hold pin on the
2409 * buffer continuously while visiting the page, so no pruning
2410 * operation can occur either.
2411 *
2412 * In cases with only ShareUpdateExclusiveLock on the table, it's
2413 * possible for some HOT tuples to appear that we didn't know about
2414 * when we first read the page. To handle that case, we re-obtain the
2415 * list of root offsets when a HOT tuple points to a root item that we
2416 * don't know about.
2417 *
2418 * Also, although our opinions about tuple liveness could change while
2419 * we scan the page (due to concurrent transaction commits/aborts),
2420 * the chain root locations won't, so this info doesn't need to be
2421 * rebuilt after waiting for another transaction.
2422 *
2423 * Note the implied assumption that there is no more than one live
2424 * tuple per HOT-chain --- else we could create more than one index
2425 * entry pointing to the same root tuple.
2426 */
2427 if (scan->rs_cblock != root_blkno)
2428 {
2429 Page page = BufferGetPage(scan->rs_cbuf);
2430
2431 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
2432 heap_get_root_tuples(page, root_offsets);
2433 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2434
2435 root_blkno = scan->rs_cblock;
2436 }
2437
2438 if (snapshot == SnapshotAny)
2439 {
2440 /* do our own time qual check */
2441 bool indexIt;
2442 TransactionId xwait;
2443
2444 recheck:
2445
2446 /*
2447 * We could possibly get away with not locking the buffer here,
2448 * since caller should hold ShareLock on the relation, but let's
2449 * be conservative about it. (This remark is still correct even
2450 * with HOT-pruning: our pin on the buffer prevents pruning.)
2451 */
2452 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
2453
2454 switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
2455 scan->rs_cbuf))
2456 {
2457 case HEAPTUPLE_DEAD:
2458 /* Definitely dead, we can ignore it */
2459 indexIt = false;
2460 tupleIsAlive = false;
2461 break;
2462 case HEAPTUPLE_LIVE:
2463 /* Normal case, index and unique-check it */
2464 indexIt = true;
2465 tupleIsAlive = true;
2466 break;
2467 case HEAPTUPLE_RECENTLY_DEAD:
2468
2469 /*
2470 * If tuple is recently deleted then we must index it
2471 * anyway to preserve MVCC semantics. (Pre-existing
2472 * transactions could try to use the index after we finish
2473 * building it, and may need to see such tuples.)
2474 *
2475 * However, if it was HOT-updated then we must only index
2476 * the live tuple at the end of the HOT-chain. Since this
2477 * breaks semantics for pre-existing snapshots, mark the
2478 * index as unusable for them.
2479 */
2480 if (HeapTupleIsHotUpdated(heapTuple))
2481 {
2482 indexIt = false;
2483 /* mark the index as unsafe for old snapshots */
2484 indexInfo->ii_BrokenHotChain = true;
2485 }
2486 else
2487 indexIt = true;
2488 /* In any case, exclude the tuple from unique-checking */
2489 tupleIsAlive = false;
2490 break;
2491 case HEAPTUPLE_INSERT_IN_PROGRESS:
2492
2493 /*
2494 * In "anyvisible" mode, this tuple is visible and we
2495 * don't need any further checks.
2496 */
2497 if (anyvisible)
2498 {
2499 indexIt = true;
2500 tupleIsAlive = true;
2501 break;
2502 }
2503
2504 /*
2505 * Since caller should hold ShareLock or better, normally
2506 * the only way to see this is if it was inserted earlier
2507 * in our own transaction. However, it can happen in
2508 * system catalogs, since we tend to release write lock
2509 * before commit there. Give a warning if neither case
2510 * applies.
2511 */
2512 xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
2513 if (!TransactionIdIsCurrentTransactionId(xwait))
2514 {
2515 if (!is_system_catalog)
2516 elog(WARNING, "concurrent insert in progress within table \"%s\"",
2517 RelationGetRelationName(heapRelation));
2518
2519 /*
2520 * If we are performing uniqueness checks, indexing
2521 * such a tuple could lead to a bogus uniqueness
2522 * failure. In that case we wait for the inserting
2523 * transaction to finish and check again.
2524 */
2525 if (checking_uniqueness)
2526 {
2527 /*
2528 * Must drop the lock on the buffer before we wait
2529 */
2530 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2531 XactLockTableWait(xwait, heapRelation,
2532 &heapTuple->t_self,
2533 XLTW_InsertIndexUnique);
2534 CHECK_FOR_INTERRUPTS();
2535 goto recheck;
2536 }
2537 }
2538
2539 /*
2540 * We must index such tuples, since if the index build
2541 * commits then they're good.
2542 */
2543 indexIt = true;
2544 tupleIsAlive = true;
2545 break;
2546 case HEAPTUPLE_DELETE_IN_PROGRESS:
2547
2548 /*
2549 * As with INSERT_IN_PROGRESS case, this is unexpected
2550 * unless it's our own deletion or a system catalog; but
2551 * in anyvisible mode, this tuple is visible.
2552 */
2553 if (anyvisible)
2554 {
2555 indexIt = true;
2556 tupleIsAlive = false;
2557 break;
2558 }
2559
2560 xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
2561 if (!TransactionIdIsCurrentTransactionId(xwait))
2562 {
2563 if (!is_system_catalog)
2564 elog(WARNING, "concurrent delete in progress within table \"%s\"",
2565 RelationGetRelationName(heapRelation));
2566
2567 /*
2568 * If we are performing uniqueness checks, assuming
2569 * the tuple is dead could lead to missing a
2570 * uniqueness violation. In that case we wait for the
2571 * deleting transaction to finish and check again.
2572 *
2573 * Also, if it's a HOT-updated tuple, we should not
2574 * index it but rather the live tuple at the end of
2575 * the HOT-chain. However, the deleting transaction
2576 * could abort, possibly leaving this tuple as live
2577 * after all, in which case it has to be indexed. The
2578 * only way to know what to do is to wait for the
2579 * deleting transaction to finish and check again.
2580 */
2581 if (checking_uniqueness ||
2582 HeapTupleIsHotUpdated(heapTuple))
2583 {
2584 /*
2585 * Must drop the lock on the buffer before we wait
2586 */
2587 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2588 XactLockTableWait(xwait, heapRelation,
2589 &heapTuple->t_self,
2590 XLTW_InsertIndexUnique);
2591 CHECK_FOR_INTERRUPTS();
2592 goto recheck;
2593 }
2594
2595 /*
2596 * Otherwise index it but don't check for uniqueness,
2597 * the same as a RECENTLY_DEAD tuple.
2598 */
2599 indexIt = true;
2600 }
2601 else if (HeapTupleIsHotUpdated(heapTuple))
2602 {
2603 /*
2604 * It's a HOT-updated tuple deleted by our own xact.
2605 * We can assume the deletion will commit (else the
2606 * index contents don't matter), so treat the same as
2607 * RECENTLY_DEAD HOT-updated tuples.
2608 */
2609 indexIt = false;
2610 /* mark the index as unsafe for old snapshots */
2611 indexInfo->ii_BrokenHotChain = true;
2612 }
2613 else
2614 {
2615 /*
2616 * It's a regular tuple deleted by our own xact. Index
2617 * it but don't check for uniqueness, the same as a
2618 * RECENTLY_DEAD tuple.
2619 */
2620 indexIt = true;
2621 }
2622 /* In any case, exclude the tuple from unique-checking */
2623 tupleIsAlive = false;
2624 break;
2625 default:
2626 elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2627 indexIt = tupleIsAlive = false; /* keep compiler quiet */
2628 break;
2629 }
2630
2631 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2632
2633 if (!indexIt)
2634 continue;
2635 }
2636 else
2637 {
2638 /* heap_getnext did the time qual check */
2639 tupleIsAlive = true;
2640 }
2641
2642 reltuples += 1;
2643
2644 MemoryContextReset(econtext->ecxt_per_tuple_memory);
2645
2646 /* Set up for predicate or expression evaluation */
2647 ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
2648
2649 /*
2650 * In a partial index, discard tuples that don't satisfy the
2651 * predicate.
2652 */
2653 if (predicate != NULL)
2654 {
2655 if (!ExecQual(predicate, econtext))
2656 continue;
2657 }
2658
2659 /*
2660 * For the current heap tuple, extract all the attributes we use in
2661 * this index, and note which are null. This also performs evaluation
2662 * of any expressions needed.
2663 */
2664 FormIndexDatum(indexInfo,
2665 slot,
2666 estate,
2667 values,
2668 isnull);
2669
2670 /*
2671 * You'd think we should go ahead and build the index tuple here, but
2672 * some index AMs want to do further processing on the data first. So
2673 * pass the values[] and isnull[] arrays, instead.
2674 */
2675
2676 if (HeapTupleIsHeapOnly(heapTuple))
2677 {
2678 /*
2679 * For a heap-only tuple, pretend its TID is that of the root. See
2680 * src/backend/access/heap/README.HOT for discussion.
2681 */
2682 HeapTupleData rootTuple;
2683 OffsetNumber offnum;
2684
2685 rootTuple = *heapTuple;
2686 offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
2687
2688 /*
2689 * If a HOT tuple points to a root that we don't know
2690 * about, obtain root items afresh. If that still fails,
2691 * report it as corruption.
2692 */
2693 if (root_offsets[offnum - 1] == InvalidOffsetNumber)
2694 {
2695 Page page = BufferGetPage(scan->rs_cbuf);
2696
2697 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
2698 heap_get_root_tuples(page, root_offsets);
2699 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2700 }
2701
2702 if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
2703 ereport(ERROR,
2704 (errcode(ERRCODE_DATA_CORRUPTED),
2705 errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
2706 ItemPointerGetBlockNumber(&heapTuple->t_self),
2707 offnum,
2708 RelationGetRelationName(heapRelation))));
2709
2710 ItemPointerSetOffsetNumber(&rootTuple.t_self,
2711 root_offsets[offnum - 1]);
2712
2713 /* Call the AM's callback routine to process the tuple */
2714 callback(indexRelation, &rootTuple, values, isnull, tupleIsAlive,
2715 callback_state);
2716 }
2717 else
2718 {
2719 /* Call the AM's callback routine to process the tuple */
2720 callback(indexRelation, heapTuple, values, isnull, tupleIsAlive,
2721 callback_state);
2722 }
2723 }
2724
2725 heap_endscan(scan);
2726
2727 /* we can now forget our snapshot, if set */
2728 if (IsBootstrapProcessingMode() || indexInfo->ii_Concurrent)
2729 UnregisterSnapshot(snapshot);
2730
2731 ExecDropSingleTupleTableSlot(slot);
2732
2733 FreeExecutorState(estate);
2734
2735 /* These may have been pointing to the now-gone estate */
2736 indexInfo->ii_ExpressionsState = NIL;
2737 indexInfo->ii_PredicateState = NULL;
2738
2739 return reltuples;
2740 }
2741
2742
2743 /*
2744 * IndexCheckExclusion - verify that a new exclusion constraint is satisfied
2745 *
2746 * When creating an exclusion constraint, we first build the index normally
2747 * and then rescan the heap to check for conflicts. We assume that we only
2748 * need to validate tuples that are live according to an up-to-date snapshot,
2749 * and that these were correctly indexed even in the presence of broken HOT
2750 * chains. This should be OK since we are holding at least ShareLock on the
2751 * table, meaning there can be no uncommitted updates from other transactions.
2752 * (Note: that wouldn't necessarily work for system catalogs, since many
2753 * operations release write lock early on the system catalogs.)
2754 */
2755 static void
IndexCheckExclusion(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo)2756 IndexCheckExclusion(Relation heapRelation,
2757 Relation indexRelation,
2758 IndexInfo *indexInfo)
2759 {
2760 HeapScanDesc scan;
2761 HeapTuple heapTuple;
2762 Datum values[INDEX_MAX_KEYS];
2763 bool isnull[INDEX_MAX_KEYS];
2764 ExprState *predicate;
2765 TupleTableSlot *slot;
2766 EState *estate;
2767 ExprContext *econtext;
2768 Snapshot snapshot;
2769
2770 /*
2771 * If we are reindexing the target index, mark it as no longer being
2772 * reindexed, to forestall an Assert in index_beginscan when we try to use
2773 * the index for probes. This is OK because the index is now fully valid.
2774 */
2775 if (ReindexIsCurrentlyProcessingIndex(RelationGetRelid(indexRelation)))
2776 ResetReindexProcessing();
2777
2778 /*
2779 * Need an EState for evaluation of index expressions and partial-index
2780 * predicates. Also a slot to hold the current tuple.
2781 */
2782 estate = CreateExecutorState();
2783 econtext = GetPerTupleExprContext(estate);
2784 slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
2785
2786 /* Arrange for econtext's scan tuple to be the tuple under test */
2787 econtext->ecxt_scantuple = slot;
2788
2789 /* Set up execution state for predicate, if any. */
2790 predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
2791
2792 /*
2793 * Scan all live tuples in the base relation.
2794 */
2795 snapshot = RegisterSnapshot(GetLatestSnapshot());
2796 scan = heap_beginscan_strat(heapRelation, /* relation */
2797 snapshot, /* snapshot */
2798 0, /* number of keys */
2799 NULL, /* scan key */
2800 true, /* buffer access strategy OK */
2801 true); /* syncscan OK */
2802
2803 while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2804 {
2805 CHECK_FOR_INTERRUPTS();
2806
2807 MemoryContextReset(econtext->ecxt_per_tuple_memory);
2808
2809 /* Set up for predicate or expression evaluation */
2810 ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
2811
2812 /*
2813 * In a partial index, ignore tuples that don't satisfy the predicate.
2814 */
2815 if (predicate != NULL)
2816 {
2817 if (!ExecQual(predicate, econtext))
2818 continue;
2819 }
2820
2821 /*
2822 * Extract index column values, including computing expressions.
2823 */
2824 FormIndexDatum(indexInfo,
2825 slot,
2826 estate,
2827 values,
2828 isnull);
2829
2830 /*
2831 * Check that this tuple has no conflicts.
2832 */
2833 check_exclusion_constraint(heapRelation,
2834 indexRelation, indexInfo,
2835 &(heapTuple->t_self), values, isnull,
2836 estate, true);
2837 }
2838
2839 heap_endscan(scan);
2840 UnregisterSnapshot(snapshot);
2841
2842 ExecDropSingleTupleTableSlot(slot);
2843
2844 FreeExecutorState(estate);
2845
2846 /* These may have been pointing to the now-gone estate */
2847 indexInfo->ii_ExpressionsState = NIL;
2848 indexInfo->ii_PredicateState = NULL;
2849 }
2850
2851
2852 /*
2853 * validate_index - support code for concurrent index builds
2854 *
2855 * We do a concurrent index build by first inserting the catalog entry for the
2856 * index via index_create(), marking it not indisready and not indisvalid.
2857 * Then we commit our transaction and start a new one, then we wait for all
2858 * transactions that could have been modifying the table to terminate. Now
2859 * we know that any subsequently-started transactions will see the index and
2860 * honor its constraints on HOT updates; so while existing HOT-chains might
2861 * be broken with respect to the index, no currently live tuple will have an
2862 * incompatible HOT update done to it. We now build the index normally via
2863 * index_build(), while holding a weak lock that allows concurrent
2864 * insert/update/delete. Also, we index only tuples that are valid
2865 * as of the start of the scan (see IndexBuildHeapScan), whereas a normal
2866 * build takes care to include recently-dead tuples. This is OK because
2867 * we won't mark the index valid until all transactions that might be able
2868 * to see those tuples are gone. The reason for doing that is to avoid
2869 * bogus unique-index failures due to concurrent UPDATEs (we might see
2870 * different versions of the same row as being valid when we pass over them,
2871 * if we used HeapTupleSatisfiesVacuum). This leaves us with an index that
2872 * does not contain any tuples added to the table while we built the index.
2873 *
2874 * Next, we mark the index "indisready" (but still not "indisvalid") and
2875 * commit the second transaction and start a third. Again we wait for all
2876 * transactions that could have been modifying the table to terminate. Now
2877 * we know that any subsequently-started transactions will see the index and
2878 * insert their new tuples into it. We then take a new reference snapshot
2879 * which is passed to validate_index(). Any tuples that are valid according
2880 * to this snap, but are not in the index, must be added to the index.
2881 * (Any tuples committed live after the snap will be inserted into the
2882 * index by their originating transaction. Any tuples committed dead before
2883 * the snap need not be indexed, because we will wait out all transactions
2884 * that might care about them before we mark the index valid.)
2885 *
2886 * validate_index() works by first gathering all the TIDs currently in the
2887 * index, using a bulkdelete callback that just stores the TIDs and doesn't
2888 * ever say "delete it". (This should be faster than a plain indexscan;
2889 * also, not all index AMs support full-index indexscan.) Then we sort the
2890 * TIDs, and finally scan the table doing a "merge join" against the TID list
2891 * to see which tuples are missing from the index. Thus we will ensure that
2892 * all tuples valid according to the reference snapshot are in the index.
2893 *
2894 * Building a unique index this way is tricky: we might try to insert a
2895 * tuple that is already dead or is in process of being deleted, and we
2896 * mustn't have a uniqueness failure against an updated version of the same
2897 * row. We could try to check the tuple to see if it's already dead and tell
2898 * index_insert() not to do the uniqueness check, but that still leaves us
2899 * with a race condition against an in-progress update. To handle that,
2900 * we expect the index AM to recheck liveness of the to-be-inserted tuple
2901 * before it declares a uniqueness error.
2902 *
2903 * After completing validate_index(), we wait until all transactions that
2904 * were alive at the time of the reference snapshot are gone; this is
2905 * necessary to be sure there are none left with a transaction snapshot
2906 * older than the reference (and hence possibly able to see tuples we did
2907 * not index). Then we mark the index "indisvalid" and commit. Subsequent
2908 * transactions will be able to use it for queries.
2909 *
2910 * Doing two full table scans is a brute-force strategy. We could try to be
2911 * cleverer, eg storing new tuples in a special area of the table (perhaps
2912 * making the table append-only by setting use_fsm). However that would
2913 * add yet more locking issues.
2914 */
2915 void
validate_index(Oid heapId,Oid indexId,Snapshot snapshot)2916 validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
2917 {
2918 Relation heapRelation,
2919 indexRelation;
2920 IndexInfo *indexInfo;
2921 IndexVacuumInfo ivinfo;
2922 v_i_state state;
2923 Oid save_userid;
2924 int save_sec_context;
2925 int save_nestlevel;
2926
2927 /* Open and lock the parent heap relation */
2928 heapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
2929 /* And the target index relation */
2930 indexRelation = index_open(indexId, RowExclusiveLock);
2931
2932 /*
2933 * Fetch info needed for index_insert. (You might think this should be
2934 * passed in from DefineIndex, but its copy is long gone due to having
2935 * been built in a previous transaction.)
2936 */
2937 indexInfo = BuildIndexInfo(indexRelation);
2938
2939 /* mark build is concurrent just for consistency */
2940 indexInfo->ii_Concurrent = true;
2941
2942 /*
2943 * Switch to the table owner's userid, so that any index functions are run
2944 * as that user. Also lock down security-restricted operations and
2945 * arrange to make GUC variable changes local to this command.
2946 */
2947 GetUserIdAndSecContext(&save_userid, &save_sec_context);
2948 SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
2949 save_sec_context | SECURITY_RESTRICTED_OPERATION);
2950 save_nestlevel = NewGUCNestLevel();
2951
2952 /*
2953 * Scan the index and gather up all the TIDs into a tuplesort object.
2954 */
2955 ivinfo.index = indexRelation;
2956 ivinfo.analyze_only = false;
2957 ivinfo.estimated_count = true;
2958 ivinfo.message_level = DEBUG2;
2959 ivinfo.num_heap_tuples = heapRelation->rd_rel->reltuples;
2960 ivinfo.strategy = NULL;
2961
2962 /*
2963 * Encode TIDs as int8 values for the sort, rather than directly sorting
2964 * item pointers. This can be significantly faster, primarily because TID
2965 * is a pass-by-reference type on all platforms, whereas int8 is
2966 * pass-by-value on most platforms.
2967 */
2968 state.tuplesort = tuplesort_begin_datum(INT8OID, Int8LessOperator,
2969 InvalidOid, false,
2970 maintenance_work_mem,
2971 false);
2972 state.htups = state.itups = state.tups_inserted = 0;
2973
2974 (void) index_bulk_delete(&ivinfo, NULL,
2975 validate_index_callback, (void *) &state);
2976
2977 /* Execute the sort */
2978 tuplesort_performsort(state.tuplesort);
2979
2980 /*
2981 * Now scan the heap and "merge" it with the index
2982 */
2983 validate_index_heapscan(heapRelation,
2984 indexRelation,
2985 indexInfo,
2986 snapshot,
2987 &state);
2988
2989 /* Done with tuplesort object */
2990 tuplesort_end(state.tuplesort);
2991
2992 elog(DEBUG2,
2993 "validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples",
2994 state.htups, state.itups, state.tups_inserted);
2995
2996 /* Roll back any GUC changes executed by index functions */
2997 AtEOXact_GUC(false, save_nestlevel);
2998
2999 /* Restore userid and security context */
3000 SetUserIdAndSecContext(save_userid, save_sec_context);
3001
3002 /* Close rels, but keep locks */
3003 index_close(indexRelation, NoLock);
3004 heap_close(heapRelation, NoLock);
3005 }
3006
3007 /*
3008 * itemptr_encode - Encode ItemPointer as int64/int8
3009 *
3010 * This representation must produce values encoded as int64 that sort in the
3011 * same order as their corresponding original TID values would (using the
3012 * default int8 opclass to produce a result equivalent to the default TID
3013 * opclass).
3014 *
3015 * As noted in validate_index(), this can be significantly faster.
3016 */
3017 static inline int64
itemptr_encode(ItemPointer itemptr)3018 itemptr_encode(ItemPointer itemptr)
3019 {
3020 BlockNumber block = ItemPointerGetBlockNumber(itemptr);
3021 OffsetNumber offset = ItemPointerGetOffsetNumber(itemptr);
3022 int64 encoded;
3023
3024 /*
3025 * Use the 16 least significant bits for the offset. 32 adjacent bits are
3026 * used for the block number. Since remaining bits are unused, there
3027 * cannot be negative encoded values (We assume a two's complement
3028 * representation).
3029 */
3030 encoded = ((uint64) block << 16) | (uint16) offset;
3031
3032 return encoded;
3033 }
3034
3035 /*
3036 * itemptr_decode - Decode int64/int8 representation back to ItemPointer
3037 */
3038 static inline void
itemptr_decode(ItemPointer itemptr,int64 encoded)3039 itemptr_decode(ItemPointer itemptr, int64 encoded)
3040 {
3041 BlockNumber block = (BlockNumber) (encoded >> 16);
3042 OffsetNumber offset = (OffsetNumber) (encoded & 0xFFFF);
3043
3044 ItemPointerSet(itemptr, block, offset);
3045 }
3046
3047 /*
3048 * validate_index_callback - bulkdelete callback to collect the index TIDs
3049 */
3050 static bool
validate_index_callback(ItemPointer itemptr,void * opaque)3051 validate_index_callback(ItemPointer itemptr, void *opaque)
3052 {
3053 v_i_state *state = (v_i_state *) opaque;
3054 int64 encoded = itemptr_encode(itemptr);
3055
3056 tuplesort_putdatum(state->tuplesort, Int64GetDatum(encoded), false);
3057 state->itups += 1;
3058 return false; /* never actually delete anything */
3059 }
3060
3061 /*
3062 * validate_index_heapscan - second table scan for concurrent index build
3063 *
3064 * This has much code in common with IndexBuildHeapScan, but it's enough
3065 * different that it seems cleaner to have two routines not one.
3066 */
3067 static void
validate_index_heapscan(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,Snapshot snapshot,v_i_state * state)3068 validate_index_heapscan(Relation heapRelation,
3069 Relation indexRelation,
3070 IndexInfo *indexInfo,
3071 Snapshot snapshot,
3072 v_i_state *state)
3073 {
3074 HeapScanDesc scan;
3075 HeapTuple heapTuple;
3076 Datum values[INDEX_MAX_KEYS];
3077 bool isnull[INDEX_MAX_KEYS];
3078 ExprState *predicate;
3079 TupleTableSlot *slot;
3080 EState *estate;
3081 ExprContext *econtext;
3082 BlockNumber root_blkno = InvalidBlockNumber;
3083 OffsetNumber root_offsets[MaxHeapTuplesPerPage];
3084 bool in_index[MaxHeapTuplesPerPage];
3085
3086 /* state variables for the merge */
3087 ItemPointer indexcursor = NULL;
3088 ItemPointerData decoded;
3089 bool tuplesort_empty = false;
3090
3091 /*
3092 * sanity checks
3093 */
3094 Assert(OidIsValid(indexRelation->rd_rel->relam));
3095
3096 /*
3097 * Need an EState for evaluation of index expressions and partial-index
3098 * predicates. Also a slot to hold the current tuple.
3099 */
3100 estate = CreateExecutorState();
3101 econtext = GetPerTupleExprContext(estate);
3102 slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
3103
3104 /* Arrange for econtext's scan tuple to be the tuple under test */
3105 econtext->ecxt_scantuple = slot;
3106
3107 /* Set up execution state for predicate, if any. */
3108 predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
3109
3110 /*
3111 * Prepare for scan of the base relation. We need just those tuples
3112 * satisfying the passed-in reference snapshot. We must disable syncscan
3113 * here, because it's critical that we read from block zero forward to
3114 * match the sorted TIDs.
3115 */
3116 scan = heap_beginscan_strat(heapRelation, /* relation */
3117 snapshot, /* snapshot */
3118 0, /* number of keys */
3119 NULL, /* scan key */
3120 true, /* buffer access strategy OK */
3121 false); /* syncscan not OK */
3122
3123 /*
3124 * Scan all tuples matching the snapshot.
3125 */
3126 while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
3127 {
3128 ItemPointer heapcursor = &heapTuple->t_self;
3129 ItemPointerData rootTuple;
3130 OffsetNumber root_offnum;
3131
3132 CHECK_FOR_INTERRUPTS();
3133
3134 state->htups += 1;
3135
3136 /*
3137 * As commented in IndexBuildHeapScan, we should index heap-only
3138 * tuples under the TIDs of their root tuples; so when we advance onto
3139 * a new heap page, build a map of root item offsets on the page.
3140 *
3141 * This complicates merging against the tuplesort output: we will
3142 * visit the live tuples in order by their offsets, but the root
3143 * offsets that we need to compare against the index contents might be
3144 * ordered differently. So we might have to "look back" within the
3145 * tuplesort output, but only within the current page. We handle that
3146 * by keeping a bool array in_index[] showing all the
3147 * already-passed-over tuplesort output TIDs of the current page. We
3148 * clear that array here, when advancing onto a new heap page.
3149 */
3150 if (scan->rs_cblock != root_blkno)
3151 {
3152 Page page = BufferGetPage(scan->rs_cbuf);
3153
3154 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
3155 heap_get_root_tuples(page, root_offsets);
3156 LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
3157
3158 memset(in_index, 0, sizeof(in_index));
3159
3160 root_blkno = scan->rs_cblock;
3161 }
3162
3163 /* Convert actual tuple TID to root TID */
3164 rootTuple = *heapcursor;
3165 root_offnum = ItemPointerGetOffsetNumber(heapcursor);
3166
3167 if (HeapTupleIsHeapOnly(heapTuple))
3168 {
3169 root_offnum = root_offsets[root_offnum - 1];
3170 if (!OffsetNumberIsValid(root_offnum))
3171 ereport(ERROR,
3172 (errcode(ERRCODE_DATA_CORRUPTED),
3173 errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
3174 ItemPointerGetBlockNumber(heapcursor),
3175 ItemPointerGetOffsetNumber(heapcursor),
3176 RelationGetRelationName(heapRelation))));
3177 ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
3178 }
3179
3180 /*
3181 * "merge" by skipping through the index tuples until we find or pass
3182 * the current root tuple.
3183 */
3184 while (!tuplesort_empty &&
3185 (!indexcursor ||
3186 ItemPointerCompare(indexcursor, &rootTuple) < 0))
3187 {
3188 Datum ts_val;
3189 bool ts_isnull;
3190
3191 if (indexcursor)
3192 {
3193 /*
3194 * Remember index items seen earlier on the current heap page
3195 */
3196 if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
3197 in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
3198 }
3199
3200 tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
3201 &ts_val, &ts_isnull, NULL);
3202 Assert(tuplesort_empty || !ts_isnull);
3203 if (!tuplesort_empty)
3204 {
3205 itemptr_decode(&decoded, DatumGetInt64(ts_val));
3206 indexcursor = &decoded;
3207
3208 /* If int8 is pass-by-ref, free (encoded) TID Datum memory */
3209 #ifndef USE_FLOAT8_BYVAL
3210 pfree(DatumGetPointer(ts_val));
3211 #endif
3212 }
3213 else
3214 {
3215 /* Be tidy */
3216 indexcursor = NULL;
3217 }
3218 }
3219
3220 /*
3221 * If the tuplesort has overshot *and* we didn't see a match earlier,
3222 * then this tuple is missing from the index, so insert it.
3223 */
3224 if ((tuplesort_empty ||
3225 ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
3226 !in_index[root_offnum - 1])
3227 {
3228 MemoryContextReset(econtext->ecxt_per_tuple_memory);
3229
3230 /* Set up for predicate or expression evaluation */
3231 ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
3232
3233 /*
3234 * In a partial index, discard tuples that don't satisfy the
3235 * predicate.
3236 */
3237 if (predicate != NULL)
3238 {
3239 if (!ExecQual(predicate, econtext))
3240 continue;
3241 }
3242
3243 /*
3244 * For the current heap tuple, extract all the attributes we use
3245 * in this index, and note which are null. This also performs
3246 * evaluation of any expressions needed.
3247 */
3248 FormIndexDatum(indexInfo,
3249 slot,
3250 estate,
3251 values,
3252 isnull);
3253
3254 /*
3255 * You'd think we should go ahead and build the index tuple here,
3256 * but some index AMs want to do further processing on the data
3257 * first. So pass the values[] and isnull[] arrays, instead.
3258 */
3259
3260 /*
3261 * If the tuple is already committed dead, you might think we
3262 * could suppress uniqueness checking, but this is no longer true
3263 * in the presence of HOT, because the insert is actually a proxy
3264 * for a uniqueness check on the whole HOT-chain. That is, the
3265 * tuple we have here could be dead because it was already
3266 * HOT-updated, and if so the updating transaction will not have
3267 * thought it should insert index entries. The index AM will
3268 * check the whole HOT-chain and correctly detect a conflict if
3269 * there is one.
3270 */
3271
3272 index_insert(indexRelation,
3273 values,
3274 isnull,
3275 &rootTuple,
3276 heapRelation,
3277 indexInfo->ii_Unique ?
3278 UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
3279 indexInfo);
3280
3281 state->tups_inserted += 1;
3282 }
3283 }
3284
3285 heap_endscan(scan);
3286
3287 ExecDropSingleTupleTableSlot(slot);
3288
3289 FreeExecutorState(estate);
3290
3291 /* These may have been pointing to the now-gone estate */
3292 indexInfo->ii_ExpressionsState = NIL;
3293 indexInfo->ii_PredicateState = NULL;
3294 }
3295
3296
3297 /*
3298 * index_set_state_flags - adjust pg_index state flags
3299 *
3300 * This is used during CREATE/DROP INDEX CONCURRENTLY to adjust the pg_index
3301 * flags that denote the index's state.
3302 *
3303 * Note that CatalogTupleUpdate() sends a cache invalidation message for the
3304 * tuple, so other sessions will hear about the update as soon as we commit.
3305 */
3306 void
index_set_state_flags(Oid indexId,IndexStateFlagsAction action)3307 index_set_state_flags(Oid indexId, IndexStateFlagsAction action)
3308 {
3309 Relation pg_index;
3310 HeapTuple indexTuple;
3311 Form_pg_index indexForm;
3312
3313 /* Open pg_index and fetch a writable copy of the index's tuple */
3314 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
3315
3316 indexTuple = SearchSysCacheCopy1(INDEXRELID,
3317 ObjectIdGetDatum(indexId));
3318 if (!HeapTupleIsValid(indexTuple))
3319 elog(ERROR, "cache lookup failed for index %u", indexId);
3320 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3321
3322 /* Perform the requested state change on the copy */
3323 switch (action)
3324 {
3325 case INDEX_CREATE_SET_READY:
3326 /* Set indisready during a CREATE INDEX CONCURRENTLY sequence */
3327 Assert(indexForm->indislive);
3328 Assert(!indexForm->indisready);
3329 Assert(!indexForm->indisvalid);
3330 indexForm->indisready = true;
3331 break;
3332 case INDEX_CREATE_SET_VALID:
3333 /* Set indisvalid during a CREATE INDEX CONCURRENTLY sequence */
3334 Assert(indexForm->indislive);
3335 Assert(indexForm->indisready);
3336 Assert(!indexForm->indisvalid);
3337 indexForm->indisvalid = true;
3338 break;
3339 case INDEX_DROP_CLEAR_VALID:
3340
3341 /*
3342 * Clear indisvalid during a DROP INDEX CONCURRENTLY sequence
3343 *
3344 * If indisready == true we leave it set so the index still gets
3345 * maintained by active transactions. We only need to ensure that
3346 * indisvalid is false. (We don't assert that either is initially
3347 * true, though, since we want to be able to retry a DROP INDEX
3348 * CONCURRENTLY that failed partway through.)
3349 *
3350 * Note: the CLUSTER logic assumes that indisclustered cannot be
3351 * set on any invalid index, so clear that flag too.
3352 */
3353 indexForm->indisvalid = false;
3354 indexForm->indisclustered = false;
3355 break;
3356 case INDEX_DROP_SET_DEAD:
3357
3358 /*
3359 * Clear indisready/indislive during DROP INDEX CONCURRENTLY
3360 *
3361 * We clear both indisready and indislive, because we not only
3362 * want to stop updates, we want to prevent sessions from touching
3363 * the index at all.
3364 */
3365 Assert(!indexForm->indisvalid);
3366 indexForm->indisready = false;
3367 indexForm->indislive = false;
3368 break;
3369 }
3370
3371 /* ... and update it */
3372 CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
3373
3374 heap_close(pg_index, RowExclusiveLock);
3375 }
3376
3377
3378 /*
3379 * IndexGetRelation: given an index's relation OID, get the OID of the
3380 * relation it is an index on. Uses the system cache.
3381 */
3382 Oid
IndexGetRelation(Oid indexId,bool missing_ok)3383 IndexGetRelation(Oid indexId, bool missing_ok)
3384 {
3385 HeapTuple tuple;
3386 Form_pg_index index;
3387 Oid result;
3388
3389 tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
3390 if (!HeapTupleIsValid(tuple))
3391 {
3392 if (missing_ok)
3393 return InvalidOid;
3394 elog(ERROR, "cache lookup failed for index %u", indexId);
3395 }
3396 index = (Form_pg_index) GETSTRUCT(tuple);
3397 Assert(index->indexrelid == indexId);
3398
3399 result = index->indrelid;
3400 ReleaseSysCache(tuple);
3401 return result;
3402 }
3403
3404 /*
3405 * reindex_index - This routine is used to recreate a single index
3406 */
3407 void
reindex_index(Oid indexId,bool skip_constraint_checks,char persistence,int options)3408 reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
3409 int options)
3410 {
3411 Relation iRel,
3412 heapRelation;
3413 Oid heapId;
3414 IndexInfo *indexInfo;
3415 volatile bool skipped_constraint = false;
3416 PGRUsage ru0;
3417
3418 pg_rusage_init(&ru0);
3419
3420 /*
3421 * Open and lock the parent heap relation. ShareLock is sufficient since
3422 * we only need to be sure no schema or data changes are going on.
3423 */
3424 heapId = IndexGetRelation(indexId, false);
3425 heapRelation = heap_open(heapId, ShareLock);
3426
3427 /*
3428 * Open the target index relation and get an exclusive lock on it, to
3429 * ensure that no one else is touching this particular index.
3430 */
3431 iRel = index_open(indexId, AccessExclusiveLock);
3432
3433 /*
3434 * Don't allow reindex on temp tables of other backends ... their local
3435 * buffer manager is not going to cope.
3436 */
3437 if (RELATION_IS_OTHER_TEMP(iRel))
3438 ereport(ERROR,
3439 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3440 errmsg("cannot reindex temporary tables of other sessions")));
3441
3442 /*
3443 * Also check for active uses of the index in the current transaction; we
3444 * don't want to reindex underneath an open indexscan.
3445 */
3446 CheckTableNotInUse(iRel, "REINDEX INDEX");
3447
3448 /*
3449 * All predicate locks on the index are about to be made invalid. Promote
3450 * them to relation locks on the heap.
3451 */
3452 TransferPredicateLocksToHeapRelation(iRel);
3453
3454 /* Fetch info needed for index_build */
3455 indexInfo = BuildIndexInfo(iRel);
3456
3457 /* If requested, skip checking uniqueness/exclusion constraints */
3458 if (skip_constraint_checks)
3459 {
3460 if (indexInfo->ii_Unique || indexInfo->ii_ExclusionOps != NULL)
3461 skipped_constraint = true;
3462 indexInfo->ii_Unique = false;
3463 indexInfo->ii_ExclusionOps = NULL;
3464 indexInfo->ii_ExclusionProcs = NULL;
3465 indexInfo->ii_ExclusionStrats = NULL;
3466 }
3467
3468 /* Suppress use of the target index while rebuilding it */
3469 SetReindexProcessing(heapId, indexId);
3470
3471 /* Create a new physical relation for the index */
3472 RelationSetNewRelfilenode(iRel, persistence, InvalidTransactionId,
3473 InvalidMultiXactId);
3474
3475 /* Initialize the index and rebuild */
3476 /* Note: we do not need to re-establish pkey setting */
3477 index_build(heapRelation, iRel, indexInfo, false, true);
3478
3479 /* Re-allow use of target index */
3480 ResetReindexProcessing();
3481
3482 /*
3483 * If the index is marked invalid/not-ready/dead (ie, it's from a failed
3484 * CREATE INDEX CONCURRENTLY, or a DROP INDEX CONCURRENTLY failed midway),
3485 * and we didn't skip a uniqueness check, we can now mark it valid. This
3486 * allows REINDEX to be used to clean up in such cases.
3487 *
3488 * We can also reset indcheckxmin, because we have now done a
3489 * non-concurrent index build, *except* in the case where index_build
3490 * found some still-broken HOT chains. If it did, and we don't have to
3491 * change any of the other flags, we just leave indcheckxmin alone (note
3492 * that index_build won't have changed it, because this is a reindex).
3493 * This is okay and desirable because not updating the tuple leaves the
3494 * index's usability horizon (recorded as the tuple's xmin value) the same
3495 * as it was.
3496 *
3497 * But, if the index was invalid/not-ready/dead and there were broken HOT
3498 * chains, we had better force indcheckxmin true, because the normal
3499 * argument that the HOT chains couldn't conflict with the index is
3500 * suspect for an invalid index. (A conflict is definitely possible if
3501 * the index was dead. It probably shouldn't happen otherwise, but let's
3502 * be conservative.) In this case advancing the usability horizon is
3503 * appropriate.
3504 *
3505 * Another reason for avoiding unnecessary updates here is that while
3506 * reindexing pg_index itself, we must not try to update tuples in it.
3507 * pg_index's indexes should always have these flags in their clean state,
3508 * so that won't happen.
3509 *
3510 * If early pruning/vacuuming is enabled for the heap relation, the
3511 * usability horizon must be advanced to the current transaction on every
3512 * build or rebuild. pg_index is OK in this regard because catalog tables
3513 * are not subject to early cleanup.
3514 */
3515 if (!skipped_constraint)
3516 {
3517 Relation pg_index;
3518 HeapTuple indexTuple;
3519 Form_pg_index indexForm;
3520 bool index_bad;
3521 bool early_pruning_enabled = EarlyPruningEnabled(heapRelation);
3522
3523 pg_index = heap_open(IndexRelationId, RowExclusiveLock);
3524
3525 indexTuple = SearchSysCacheCopy1(INDEXRELID,
3526 ObjectIdGetDatum(indexId));
3527 if (!HeapTupleIsValid(indexTuple))
3528 elog(ERROR, "cache lookup failed for index %u", indexId);
3529 indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3530
3531 index_bad = (!indexForm->indisvalid ||
3532 !indexForm->indisready ||
3533 !indexForm->indislive);
3534 if (index_bad ||
3535 (indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain) ||
3536 early_pruning_enabled)
3537 {
3538 if (!indexInfo->ii_BrokenHotChain && !early_pruning_enabled)
3539 indexForm->indcheckxmin = false;
3540 else if (index_bad || early_pruning_enabled)
3541 indexForm->indcheckxmin = true;
3542 indexForm->indisvalid = true;
3543 indexForm->indisready = true;
3544 indexForm->indislive = true;
3545 CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
3546
3547 /*
3548 * Invalidate the relcache for the table, so that after we commit
3549 * all sessions will refresh the table's index list. This ensures
3550 * that if anyone misses seeing the pg_index row during this
3551 * update, they'll refresh their list before attempting any update
3552 * on the table.
3553 */
3554 CacheInvalidateRelcache(heapRelation);
3555 }
3556
3557 heap_close(pg_index, RowExclusiveLock);
3558 }
3559
3560 /* Log what we did */
3561 if (options & REINDEXOPT_VERBOSE)
3562 ereport(INFO,
3563 (errmsg("index \"%s\" was reindexed",
3564 get_rel_name(indexId)),
3565 errdetail_internal("%s",
3566 pg_rusage_show(&ru0))));
3567
3568 /* Close rels, but keep locks */
3569 index_close(iRel, NoLock);
3570 heap_close(heapRelation, NoLock);
3571 }
3572
3573 /*
3574 * reindex_relation - This routine is used to recreate all indexes
3575 * of a relation (and optionally its toast relation too, if any).
3576 *
3577 * "flags" is a bitmask that can include any combination of these bits:
3578 *
3579 * REINDEX_REL_PROCESS_TOAST: if true, process the toast table too (if any).
3580 *
3581 * REINDEX_REL_SUPPRESS_INDEX_USE: if true, the relation was just completely
3582 * rebuilt by an operation such as VACUUM FULL or CLUSTER, and therefore its
3583 * indexes are inconsistent with it. This makes things tricky if the relation
3584 * is a system catalog that we might consult during the reindexing. To deal
3585 * with that case, we mark all of the indexes as pending rebuild so that they
3586 * won't be trusted until rebuilt. The caller is required to call us *without*
3587 * having made the rebuilt table visible by doing CommandCounterIncrement;
3588 * we'll do CCI after having collected the index list. (This way we can still
3589 * use catalog indexes while collecting the list.)
3590 *
3591 * REINDEX_REL_CHECK_CONSTRAINTS: if true, recheck unique and exclusion
3592 * constraint conditions, else don't. To avoid deadlocks, VACUUM FULL or
3593 * CLUSTER on a system catalog must omit this flag. REINDEX should be used to
3594 * rebuild an index if constraint inconsistency is suspected. For optimal
3595 * performance, other callers should include the flag only after transforming
3596 * the data in a manner that risks a change in constraint validity.
3597 *
3598 * REINDEX_REL_FORCE_INDEXES_UNLOGGED: if true, set the persistence of the
3599 * rebuilt indexes to unlogged.
3600 *
3601 * REINDEX_REL_FORCE_INDEXES_PERMANENT: if true, set the persistence of the
3602 * rebuilt indexes to permanent.
3603 *
3604 * Returns true if any indexes were rebuilt (including toast table's index
3605 * when relevant). Note that a CommandCounterIncrement will occur after each
3606 * index rebuild.
3607 */
3608 bool
reindex_relation(Oid relid,int flags,int options)3609 reindex_relation(Oid relid, int flags, int options)
3610 {
3611 Relation rel;
3612 Oid toast_relid;
3613 List *indexIds;
3614 char persistence;
3615 bool result;
3616 ListCell *indexId;
3617
3618 /*
3619 * Open and lock the relation. ShareLock is sufficient since we only need
3620 * to prevent schema and data changes in it. The lock level used here
3621 * should match ReindexTable().
3622 */
3623 rel = heap_open(relid, ShareLock);
3624
3625 toast_relid = rel->rd_rel->reltoastrelid;
3626
3627 /*
3628 * Get the list of index OIDs for this relation. (We trust to the
3629 * relcache to get this with a sequential scan if ignoring system
3630 * indexes.)
3631 */
3632 indexIds = RelationGetIndexList(rel);
3633
3634 if (flags & REINDEX_REL_SUPPRESS_INDEX_USE)
3635 {
3636 /* Suppress use of all the indexes until they are rebuilt */
3637 SetReindexPending(indexIds);
3638
3639 /*
3640 * Make the new heap contents visible --- now things might be
3641 * inconsistent!
3642 */
3643 CommandCounterIncrement();
3644 }
3645
3646 /*
3647 * Compute persistence of indexes: same as that of owning rel, unless
3648 * caller specified otherwise.
3649 */
3650 if (flags & REINDEX_REL_FORCE_INDEXES_UNLOGGED)
3651 persistence = RELPERSISTENCE_UNLOGGED;
3652 else if (flags & REINDEX_REL_FORCE_INDEXES_PERMANENT)
3653 persistence = RELPERSISTENCE_PERMANENT;
3654 else
3655 persistence = rel->rd_rel->relpersistence;
3656
3657 /* Reindex all the indexes. */
3658 foreach(indexId, indexIds)
3659 {
3660 Oid indexOid = lfirst_oid(indexId);
3661
3662 reindex_index(indexOid, !(flags & REINDEX_REL_CHECK_CONSTRAINTS),
3663 persistence, options);
3664
3665 CommandCounterIncrement();
3666
3667 /* Index should no longer be in the pending list */
3668 Assert(!ReindexIsProcessingIndex(indexOid));
3669 }
3670
3671 /*
3672 * Close rel, but continue to hold the lock.
3673 */
3674 heap_close(rel, NoLock);
3675
3676 result = (indexIds != NIL);
3677
3678 /*
3679 * If the relation has a secondary toast rel, reindex that too while we
3680 * still hold the lock on the master table.
3681 */
3682 if ((flags & REINDEX_REL_PROCESS_TOAST) && OidIsValid(toast_relid))
3683 result |= reindex_relation(toast_relid, flags, options);
3684
3685 return result;
3686 }
3687
3688
3689 /* ----------------------------------------------------------------
3690 * System index reindexing support
3691 *
3692 * When we are busy reindexing a system index, this code provides support
3693 * for preventing catalog lookups from using that index. We also make use
3694 * of this to catch attempted uses of user indexes during reindexing of
3695 * those indexes.
3696 * ----------------------------------------------------------------
3697 */
3698
3699 static Oid currentlyReindexedHeap = InvalidOid;
3700 static Oid currentlyReindexedIndex = InvalidOid;
3701 static List *pendingReindexedIndexes = NIL;
3702 static int reindexingNestLevel = 0;
3703
3704 /*
3705 * ReindexIsProcessingHeap
3706 * True if heap specified by OID is currently being reindexed.
3707 */
3708 bool
ReindexIsProcessingHeap(Oid heapOid)3709 ReindexIsProcessingHeap(Oid heapOid)
3710 {
3711 return heapOid == currentlyReindexedHeap;
3712 }
3713
3714 /*
3715 * ReindexIsCurrentlyProcessingIndex
3716 * True if index specified by OID is currently being reindexed.
3717 */
3718 static bool
ReindexIsCurrentlyProcessingIndex(Oid indexOid)3719 ReindexIsCurrentlyProcessingIndex(Oid indexOid)
3720 {
3721 return indexOid == currentlyReindexedIndex;
3722 }
3723
3724 /*
3725 * ReindexIsProcessingIndex
3726 * True if index specified by OID is currently being reindexed,
3727 * or should be treated as invalid because it is awaiting reindex.
3728 */
3729 bool
ReindexIsProcessingIndex(Oid indexOid)3730 ReindexIsProcessingIndex(Oid indexOid)
3731 {
3732 return indexOid == currentlyReindexedIndex ||
3733 list_member_oid(pendingReindexedIndexes, indexOid);
3734 }
3735
3736 /*
3737 * SetReindexProcessing
3738 * Set flag that specified heap/index are being reindexed.
3739 */
3740 static void
SetReindexProcessing(Oid heapOid,Oid indexOid)3741 SetReindexProcessing(Oid heapOid, Oid indexOid)
3742 {
3743 Assert(OidIsValid(heapOid) && OidIsValid(indexOid));
3744 /* Reindexing is not re-entrant. */
3745 if (OidIsValid(currentlyReindexedHeap))
3746 elog(ERROR, "cannot reindex while reindexing");
3747 currentlyReindexedHeap = heapOid;
3748 currentlyReindexedIndex = indexOid;
3749 /* Index is no longer "pending" reindex. */
3750 RemoveReindexPending(indexOid);
3751 /* This may have been set already, but in case it isn't, do so now. */
3752 reindexingNestLevel = GetCurrentTransactionNestLevel();
3753 }
3754
3755 /*
3756 * ResetReindexProcessing
3757 * Unset reindexing status.
3758 */
3759 static void
ResetReindexProcessing(void)3760 ResetReindexProcessing(void)
3761 {
3762 currentlyReindexedHeap = InvalidOid;
3763 currentlyReindexedIndex = InvalidOid;
3764 /* reindexingNestLevel remains set till end of (sub)transaction */
3765 }
3766
3767 /*
3768 * SetReindexPending
3769 * Mark the given indexes as pending reindex.
3770 *
3771 * NB: we assume that the current memory context stays valid throughout.
3772 */
3773 static void
SetReindexPending(List * indexes)3774 SetReindexPending(List *indexes)
3775 {
3776 /* Reindexing is not re-entrant. */
3777 if (pendingReindexedIndexes)
3778 elog(ERROR, "cannot reindex while reindexing");
3779 pendingReindexedIndexes = list_copy(indexes);
3780 reindexingNestLevel = GetCurrentTransactionNestLevel();
3781 }
3782
3783 /*
3784 * RemoveReindexPending
3785 * Remove the given index from the pending list.
3786 */
3787 static void
RemoveReindexPending(Oid indexOid)3788 RemoveReindexPending(Oid indexOid)
3789 {
3790 pendingReindexedIndexes = list_delete_oid(pendingReindexedIndexes,
3791 indexOid);
3792 }
3793
3794 /*
3795 * ResetReindexState
3796 * Clear all reindexing state during (sub)transaction abort.
3797 */
3798 void
ResetReindexState(int nestLevel)3799 ResetReindexState(int nestLevel)
3800 {
3801 /*
3802 * Because reindexing is not re-entrant, we don't need to cope with nested
3803 * reindexing states. We just need to avoid messing up the outer-level
3804 * state in case a subtransaction fails within a REINDEX. So checking the
3805 * current nest level against that of the reindex operation is sufficient.
3806 */
3807 if (reindexingNestLevel >= nestLevel)
3808 {
3809 currentlyReindexedHeap = InvalidOid;
3810 currentlyReindexedIndex = InvalidOid;
3811
3812 /*
3813 * We needn't try to release the contents of pendingReindexedIndexes;
3814 * that list should be in a transaction-lifespan context, so it will
3815 * go away automatically.
3816 */
3817 pendingReindexedIndexes = NIL;
3818
3819 reindexingNestLevel = 0;
3820 }
3821 }
3822