1 /*-------------------------------------------------------------------------
2  *
3  * index.c
4  *	  code to create and destroy POSTGRES index relations
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/catalog/index.c
12  *
13  *
14  * INTERFACE ROUTINES
15  *		index_create()			- Create a cataloged index relation
16  *		index_drop()			- Removes index relation from catalogs
17  *		BuildIndexInfo()		- Prepare to insert index tuples
18  *		FormIndexDatum()		- Construct datum vector for one index tuple
19  *
20  *-------------------------------------------------------------------------
21  */
22 #include "postgres.h"
23 
24 #include <unistd.h>
25 
26 #include "access/amapi.h"
27 #include "access/heapam.h"
28 #include "access/multixact.h"
29 #include "access/reloptions.h"
30 #include "access/relscan.h"
31 #include "access/sysattr.h"
32 #include "access/tableam.h"
33 #include "access/toast_compression.h"
34 #include "access/transam.h"
35 #include "access/visibilitymap.h"
36 #include "access/xact.h"
37 #include "bootstrap/bootstrap.h"
38 #include "catalog/binary_upgrade.h"
39 #include "catalog/catalog.h"
40 #include "catalog/dependency.h"
41 #include "catalog/heap.h"
42 #include "catalog/index.h"
43 #include "catalog/objectaccess.h"
44 #include "catalog/partition.h"
45 #include "catalog/pg_am.h"
46 #include "catalog/pg_collation.h"
47 #include "catalog/pg_constraint.h"
48 #include "catalog/pg_depend.h"
49 #include "catalog/pg_description.h"
50 #include "catalog/pg_inherits.h"
51 #include "catalog/pg_opclass.h"
52 #include "catalog/pg_operator.h"
53 #include "catalog/pg_tablespace.h"
54 #include "catalog/pg_trigger.h"
55 #include "catalog/pg_type.h"
56 #include "catalog/storage.h"
57 #include "commands/event_trigger.h"
58 #include "commands/progress.h"
59 #include "commands/tablecmds.h"
60 #include "commands/tablespace.h"
61 #include "commands/trigger.h"
62 #include "executor/executor.h"
63 #include "miscadmin.h"
64 #include "nodes/makefuncs.h"
65 #include "nodes/nodeFuncs.h"
66 #include "optimizer/optimizer.h"
67 #include "parser/parser.h"
68 #include "pgstat.h"
69 #include "rewrite/rewriteManip.h"
70 #include "storage/bufmgr.h"
71 #include "storage/lmgr.h"
72 #include "storage/predicate.h"
73 #include "storage/procarray.h"
74 #include "storage/smgr.h"
75 #include "utils/builtins.h"
76 #include "utils/datum.h"
77 #include "utils/fmgroids.h"
78 #include "utils/guc.h"
79 #include "utils/inval.h"
80 #include "utils/lsyscache.h"
81 #include "utils/memutils.h"
82 #include "utils/pg_rusage.h"
83 #include "utils/rel.h"
84 #include "utils/snapmgr.h"
85 #include "utils/syscache.h"
86 #include "utils/tuplesort.h"
87 
88 /* Potentially set by pg_upgrade_support functions */
89 Oid			binary_upgrade_next_index_pg_class_oid = InvalidOid;
90 
91 /*
92  * Pointer-free representation of variables used when reindexing system
93  * catalogs; we use this to propagate those values to parallel workers.
94  */
95 typedef struct
96 {
97 	Oid			currentlyReindexedHeap;
98 	Oid			currentlyReindexedIndex;
99 	int			numPendingReindexedIndexes;
100 	Oid			pendingReindexedIndexes[FLEXIBLE_ARRAY_MEMBER];
101 } SerializedReindexState;
102 
103 /* non-export function prototypes */
104 static bool relationHasPrimaryKey(Relation rel);
105 static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
106 										  IndexInfo *indexInfo,
107 										  List *indexColNames,
108 										  Oid accessMethodObjectId,
109 										  Oid *collationObjectId,
110 										  Oid *classObjectId);
111 static void InitializeAttributeOids(Relation indexRelation,
112 									int numatts, Oid indexoid);
113 static void AppendAttributeTuples(Relation indexRelation, Datum *attopts);
114 static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
115 								Oid parentIndexId,
116 								IndexInfo *indexInfo,
117 								Oid *collationOids,
118 								Oid *classOids,
119 								int16 *coloptions,
120 								bool primary,
121 								bool isexclusion,
122 								bool immediate,
123 								bool isvalid,
124 								bool isready);
125 static void index_update_stats(Relation rel,
126 							   bool hasindex,
127 							   double reltuples);
128 static void IndexCheckExclusion(Relation heapRelation,
129 								Relation indexRelation,
130 								IndexInfo *indexInfo);
131 static bool validate_index_callback(ItemPointer itemptr, void *opaque);
132 static bool ReindexIsCurrentlyProcessingIndex(Oid indexOid);
133 static void SetReindexProcessing(Oid heapOid, Oid indexOid);
134 static void ResetReindexProcessing(void);
135 static void SetReindexPending(List *indexes);
136 static void RemoveReindexPending(Oid indexOid);
137 
138 
139 /*
140  * relationHasPrimaryKey
141  *		See whether an existing relation has a primary key.
142  *
143  * Caller must have suitable lock on the relation.
144  *
145  * Note: we intentionally do not check indisvalid here; that's because this
146  * is used to enforce the rule that there can be only one indisprimary index,
147  * and we want that to be true even if said index is invalid.
148  */
149 static bool
relationHasPrimaryKey(Relation rel)150 relationHasPrimaryKey(Relation rel)
151 {
152 	bool		result = false;
153 	List	   *indexoidlist;
154 	ListCell   *indexoidscan;
155 
156 	/*
157 	 * Get the list of index OIDs for the table from the relcache, and look up
158 	 * each one in the pg_index syscache until we find one marked primary key
159 	 * (hopefully there isn't more than one such).
160 	 */
161 	indexoidlist = RelationGetIndexList(rel);
162 
163 	foreach(indexoidscan, indexoidlist)
164 	{
165 		Oid			indexoid = lfirst_oid(indexoidscan);
166 		HeapTuple	indexTuple;
167 
168 		indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid));
169 		if (!HeapTupleIsValid(indexTuple))	/* should not happen */
170 			elog(ERROR, "cache lookup failed for index %u", indexoid);
171 		result = ((Form_pg_index) GETSTRUCT(indexTuple))->indisprimary;
172 		ReleaseSysCache(indexTuple);
173 		if (result)
174 			break;
175 	}
176 
177 	list_free(indexoidlist);
178 
179 	return result;
180 }
181 
182 /*
183  * index_check_primary_key
184  *		Apply special checks needed before creating a PRIMARY KEY index
185  *
186  * This processing used to be in DefineIndex(), but has been split out
187  * so that it can be applied during ALTER TABLE ADD PRIMARY KEY USING INDEX.
188  *
189  * We check for a pre-existing primary key, and that all columns of the index
190  * are simple column references (not expressions), and that all those
191  * columns are marked NOT NULL.  If not, fail.
192  *
193  * We used to automatically change unmarked columns to NOT NULL here by doing
194  * our own local ALTER TABLE command.  But that doesn't work well if we're
195  * executing one subcommand of an ALTER TABLE: the operations may not get
196  * performed in the right order overall.  Now we expect that the parser
197  * inserted any required ALTER TABLE SET NOT NULL operations before trying
198  * to create a primary-key index.
199  *
200  * Caller had better have at least ShareLock on the table, else the not-null
201  * checking isn't trustworthy.
202  */
203 void
index_check_primary_key(Relation heapRel,IndexInfo * indexInfo,bool is_alter_table,IndexStmt * stmt)204 index_check_primary_key(Relation heapRel,
205 						IndexInfo *indexInfo,
206 						bool is_alter_table,
207 						IndexStmt *stmt)
208 {
209 	int			i;
210 
211 	/*
212 	 * If ALTER TABLE or CREATE TABLE .. PARTITION OF, check that there isn't
213 	 * already a PRIMARY KEY.  In CREATE TABLE for an ordinary relation, we
214 	 * have faith that the parser rejected multiple pkey clauses; and CREATE
215 	 * INDEX doesn't have a way to say PRIMARY KEY, so it's no problem either.
216 	 */
217 	if ((is_alter_table || heapRel->rd_rel->relispartition) &&
218 		relationHasPrimaryKey(heapRel))
219 	{
220 		ereport(ERROR,
221 				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
222 				 errmsg("multiple primary keys for table \"%s\" are not allowed",
223 						RelationGetRelationName(heapRel))));
224 	}
225 
226 	/*
227 	 * Check that all of the attributes in a primary key are marked as not
228 	 * null.  (We don't really expect to see that; it'd mean the parser messed
229 	 * up.  But it seems wise to check anyway.)
230 	 */
231 	for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
232 	{
233 		AttrNumber	attnum = indexInfo->ii_IndexAttrNumbers[i];
234 		HeapTuple	atttuple;
235 		Form_pg_attribute attform;
236 
237 		if (attnum == 0)
238 			ereport(ERROR,
239 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
240 					 errmsg("primary keys cannot be expressions")));
241 
242 		/* System attributes are never null, so no need to check */
243 		if (attnum < 0)
244 			continue;
245 
246 		atttuple = SearchSysCache2(ATTNUM,
247 								   ObjectIdGetDatum(RelationGetRelid(heapRel)),
248 								   Int16GetDatum(attnum));
249 		if (!HeapTupleIsValid(atttuple))
250 			elog(ERROR, "cache lookup failed for attribute %d of relation %u",
251 				 attnum, RelationGetRelid(heapRel));
252 		attform = (Form_pg_attribute) GETSTRUCT(atttuple);
253 
254 		if (!attform->attnotnull)
255 			ereport(ERROR,
256 					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
257 					 errmsg("primary key column \"%s\" is not marked NOT NULL",
258 							NameStr(attform->attname))));
259 
260 		ReleaseSysCache(atttuple);
261 	}
262 }
263 
264 /*
265  *		ConstructTupleDescriptor
266  *
267  * Build an index tuple descriptor for a new index
268  */
269 static TupleDesc
ConstructTupleDescriptor(Relation heapRelation,IndexInfo * indexInfo,List * indexColNames,Oid accessMethodObjectId,Oid * collationObjectId,Oid * classObjectId)270 ConstructTupleDescriptor(Relation heapRelation,
271 						 IndexInfo *indexInfo,
272 						 List *indexColNames,
273 						 Oid accessMethodObjectId,
274 						 Oid *collationObjectId,
275 						 Oid *classObjectId)
276 {
277 	int			numatts = indexInfo->ii_NumIndexAttrs;
278 	int			numkeyatts = indexInfo->ii_NumIndexKeyAttrs;
279 	ListCell   *colnames_item = list_head(indexColNames);
280 	ListCell   *indexpr_item = list_head(indexInfo->ii_Expressions);
281 	IndexAmRoutine *amroutine;
282 	TupleDesc	heapTupDesc;
283 	TupleDesc	indexTupDesc;
284 	int			natts;			/* #atts in heap rel --- for error checks */
285 	int			i;
286 
287 	/* We need access to the index AM's API struct */
288 	amroutine = GetIndexAmRoutineByAmId(accessMethodObjectId, false);
289 
290 	/* ... and to the table's tuple descriptor */
291 	heapTupDesc = RelationGetDescr(heapRelation);
292 	natts = RelationGetForm(heapRelation)->relnatts;
293 
294 	/*
295 	 * allocate the new tuple descriptor
296 	 */
297 	indexTupDesc = CreateTemplateTupleDesc(numatts);
298 
299 	/*
300 	 * Fill in the pg_attribute row.
301 	 */
302 	for (i = 0; i < numatts; i++)
303 	{
304 		AttrNumber	atnum = indexInfo->ii_IndexAttrNumbers[i];
305 		Form_pg_attribute to = TupleDescAttr(indexTupDesc, i);
306 		HeapTuple	tuple;
307 		Form_pg_type typeTup;
308 		Form_pg_opclass opclassTup;
309 		Oid			keyType;
310 
311 		MemSet(to, 0, ATTRIBUTE_FIXED_PART_SIZE);
312 		to->attnum = i + 1;
313 		to->attstattarget = -1;
314 		to->attcacheoff = -1;
315 		to->attislocal = true;
316 		to->attcollation = (i < numkeyatts) ?
317 			collationObjectId[i] : InvalidOid;
318 
319 		/*
320 		 * Set the attribute name as specified by caller.
321 		 */
322 		if (colnames_item == NULL)	/* shouldn't happen */
323 			elog(ERROR, "too few entries in colnames list");
324 		namestrcpy(&to->attname, (const char *) lfirst(colnames_item));
325 		colnames_item = lnext(indexColNames, colnames_item);
326 
327 		/*
328 		 * For simple index columns, we copy some pg_attribute fields from the
329 		 * parent relation.  For expressions we have to look at the expression
330 		 * result.
331 		 */
332 		if (atnum != 0)
333 		{
334 			/* Simple index column */
335 			const FormData_pg_attribute *from;
336 
337 			Assert(atnum > 0);	/* should've been caught above */
338 
339 			if (atnum > natts)	/* safety check */
340 				elog(ERROR, "invalid column number %d", atnum);
341 			from = TupleDescAttr(heapTupDesc,
342 								 AttrNumberGetAttrOffset(atnum));
343 
344 			to->atttypid = from->atttypid;
345 			to->attlen = from->attlen;
346 			to->attndims = from->attndims;
347 			to->atttypmod = from->atttypmod;
348 			to->attbyval = from->attbyval;
349 			to->attalign = from->attalign;
350 			to->attstorage = from->attstorage;
351 			to->attcompression = from->attcompression;
352 		}
353 		else
354 		{
355 			/* Expressional index */
356 			Node	   *indexkey;
357 
358 			if (indexpr_item == NULL)	/* shouldn't happen */
359 				elog(ERROR, "too few entries in indexprs list");
360 			indexkey = (Node *) lfirst(indexpr_item);
361 			indexpr_item = lnext(indexInfo->ii_Expressions, indexpr_item);
362 
363 			/*
364 			 * Lookup the expression type in pg_type for the type length etc.
365 			 */
366 			keyType = exprType(indexkey);
367 			tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
368 			if (!HeapTupleIsValid(tuple))
369 				elog(ERROR, "cache lookup failed for type %u", keyType);
370 			typeTup = (Form_pg_type) GETSTRUCT(tuple);
371 
372 			/*
373 			 * Assign some of the attributes values. Leave the rest.
374 			 */
375 			to->atttypid = keyType;
376 			to->attlen = typeTup->typlen;
377 			to->atttypmod = exprTypmod(indexkey);
378 			to->attbyval = typeTup->typbyval;
379 			to->attalign = typeTup->typalign;
380 			to->attstorage = typeTup->typstorage;
381 
382 			/*
383 			 * For expression columns, set attcompression invalid, since
384 			 * there's no table column from which to copy the value. Whenever
385 			 * we actually need to compress a value, we'll use whatever the
386 			 * current value of default_toast_compression is at that point in
387 			 * time.
388 			 */
389 			to->attcompression = InvalidCompressionMethod;
390 
391 			ReleaseSysCache(tuple);
392 
393 			/*
394 			 * Make sure the expression yields a type that's safe to store in
395 			 * an index.  We need this defense because we have index opclasses
396 			 * for pseudo-types such as "record", and the actually stored type
397 			 * had better be safe; eg, a named composite type is okay, an
398 			 * anonymous record type is not.  The test is the same as for
399 			 * whether a table column is of a safe type (which is why we
400 			 * needn't check for the non-expression case).
401 			 */
402 			CheckAttributeType(NameStr(to->attname),
403 							   to->atttypid, to->attcollation,
404 							   NIL, 0);
405 		}
406 
407 		/*
408 		 * We do not yet have the correct relation OID for the index, so just
409 		 * set it invalid for now.  InitializeAttributeOids() will fix it
410 		 * later.
411 		 */
412 		to->attrelid = InvalidOid;
413 
414 		/*
415 		 * Check the opclass and index AM to see if either provides a keytype
416 		 * (overriding the attribute type).  Opclass (if exists) takes
417 		 * precedence.
418 		 */
419 		keyType = amroutine->amkeytype;
420 
421 		if (i < indexInfo->ii_NumIndexKeyAttrs)
422 		{
423 			tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(classObjectId[i]));
424 			if (!HeapTupleIsValid(tuple))
425 				elog(ERROR, "cache lookup failed for opclass %u",
426 					 classObjectId[i]);
427 			opclassTup = (Form_pg_opclass) GETSTRUCT(tuple);
428 			if (OidIsValid(opclassTup->opckeytype))
429 				keyType = opclassTup->opckeytype;
430 
431 			/*
432 			 * If keytype is specified as ANYELEMENT, and opcintype is
433 			 * ANYARRAY, then the attribute type must be an array (else it'd
434 			 * not have matched this opclass); use its element type.
435 			 *
436 			 * We could also allow ANYCOMPATIBLE/ANYCOMPATIBLEARRAY here, but
437 			 * there seems no need to do so; there's no reason to declare an
438 			 * opclass as taking ANYCOMPATIBLEARRAY rather than ANYARRAY.
439 			 */
440 			if (keyType == ANYELEMENTOID && opclassTup->opcintype == ANYARRAYOID)
441 			{
442 				keyType = get_base_element_type(to->atttypid);
443 				if (!OidIsValid(keyType))
444 					elog(ERROR, "could not get element type of array type %u",
445 						 to->atttypid);
446 			}
447 
448 			ReleaseSysCache(tuple);
449 		}
450 
451 		/*
452 		 * If a key type different from the heap value is specified, update
453 		 * the type-related fields in the index tupdesc.
454 		 */
455 		if (OidIsValid(keyType) && keyType != to->atttypid)
456 		{
457 			tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
458 			if (!HeapTupleIsValid(tuple))
459 				elog(ERROR, "cache lookup failed for type %u", keyType);
460 			typeTup = (Form_pg_type) GETSTRUCT(tuple);
461 
462 			to->atttypid = keyType;
463 			to->atttypmod = -1;
464 			to->attlen = typeTup->typlen;
465 			to->attbyval = typeTup->typbyval;
466 			to->attalign = typeTup->typalign;
467 			to->attstorage = typeTup->typstorage;
468 			/* As above, use the default compression method in this case */
469 			to->attcompression = InvalidCompressionMethod;
470 
471 			ReleaseSysCache(tuple);
472 		}
473 	}
474 
475 	pfree(amroutine);
476 
477 	return indexTupDesc;
478 }
479 
480 /* ----------------------------------------------------------------
481  *		InitializeAttributeOids
482  * ----------------------------------------------------------------
483  */
484 static void
InitializeAttributeOids(Relation indexRelation,int numatts,Oid indexoid)485 InitializeAttributeOids(Relation indexRelation,
486 						int numatts,
487 						Oid indexoid)
488 {
489 	TupleDesc	tupleDescriptor;
490 	int			i;
491 
492 	tupleDescriptor = RelationGetDescr(indexRelation);
493 
494 	for (i = 0; i < numatts; i += 1)
495 		TupleDescAttr(tupleDescriptor, i)->attrelid = indexoid;
496 }
497 
498 /* ----------------------------------------------------------------
499  *		AppendAttributeTuples
500  * ----------------------------------------------------------------
501  */
502 static void
AppendAttributeTuples(Relation indexRelation,Datum * attopts)503 AppendAttributeTuples(Relation indexRelation, Datum *attopts)
504 {
505 	Relation	pg_attribute;
506 	CatalogIndexState indstate;
507 	TupleDesc	indexTupDesc;
508 
509 	/*
510 	 * open the attribute relation and its indexes
511 	 */
512 	pg_attribute = table_open(AttributeRelationId, RowExclusiveLock);
513 
514 	indstate = CatalogOpenIndexes(pg_attribute);
515 
516 	/*
517 	 * insert data from new index's tupdesc into pg_attribute
518 	 */
519 	indexTupDesc = RelationGetDescr(indexRelation);
520 
521 	InsertPgAttributeTuples(pg_attribute, indexTupDesc, InvalidOid, attopts, indstate);
522 
523 	CatalogCloseIndexes(indstate);
524 
525 	table_close(pg_attribute, RowExclusiveLock);
526 }
527 
528 /* ----------------------------------------------------------------
529  *		UpdateIndexRelation
530  *
531  * Construct and insert a new entry in the pg_index catalog
532  * ----------------------------------------------------------------
533  */
534 static void
UpdateIndexRelation(Oid indexoid,Oid heapoid,Oid parentIndexId,IndexInfo * indexInfo,Oid * collationOids,Oid * classOids,int16 * coloptions,bool primary,bool isexclusion,bool immediate,bool isvalid,bool isready)535 UpdateIndexRelation(Oid indexoid,
536 					Oid heapoid,
537 					Oid parentIndexId,
538 					IndexInfo *indexInfo,
539 					Oid *collationOids,
540 					Oid *classOids,
541 					int16 *coloptions,
542 					bool primary,
543 					bool isexclusion,
544 					bool immediate,
545 					bool isvalid,
546 					bool isready)
547 {
548 	int2vector *indkey;
549 	oidvector  *indcollation;
550 	oidvector  *indclass;
551 	int2vector *indoption;
552 	Datum		exprsDatum;
553 	Datum		predDatum;
554 	Datum		values[Natts_pg_index];
555 	bool		nulls[Natts_pg_index];
556 	Relation	pg_index;
557 	HeapTuple	tuple;
558 	int			i;
559 
560 	/*
561 	 * Copy the index key, opclass, and indoption info into arrays (should we
562 	 * make the caller pass them like this to start with?)
563 	 */
564 	indkey = buildint2vector(NULL, indexInfo->ii_NumIndexAttrs);
565 	for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
566 		indkey->values[i] = indexInfo->ii_IndexAttrNumbers[i];
567 	indcollation = buildoidvector(collationOids, indexInfo->ii_NumIndexKeyAttrs);
568 	indclass = buildoidvector(classOids, indexInfo->ii_NumIndexKeyAttrs);
569 	indoption = buildint2vector(coloptions, indexInfo->ii_NumIndexKeyAttrs);
570 
571 	/*
572 	 * Convert the index expressions (if any) to a text datum
573 	 */
574 	if (indexInfo->ii_Expressions != NIL)
575 	{
576 		char	   *exprsString;
577 
578 		exprsString = nodeToString(indexInfo->ii_Expressions);
579 		exprsDatum = CStringGetTextDatum(exprsString);
580 		pfree(exprsString);
581 	}
582 	else
583 		exprsDatum = (Datum) 0;
584 
585 	/*
586 	 * Convert the index predicate (if any) to a text datum.  Note we convert
587 	 * implicit-AND format to normal explicit-AND for storage.
588 	 */
589 	if (indexInfo->ii_Predicate != NIL)
590 	{
591 		char	   *predString;
592 
593 		predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate));
594 		predDatum = CStringGetTextDatum(predString);
595 		pfree(predString);
596 	}
597 	else
598 		predDatum = (Datum) 0;
599 
600 
601 	/*
602 	 * open the system catalog index relation
603 	 */
604 	pg_index = table_open(IndexRelationId, RowExclusiveLock);
605 
606 	/*
607 	 * Build a pg_index tuple
608 	 */
609 	MemSet(nulls, false, sizeof(nulls));
610 
611 	values[Anum_pg_index_indexrelid - 1] = ObjectIdGetDatum(indexoid);
612 	values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid);
613 	values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs);
614 	values[Anum_pg_index_indnkeyatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexKeyAttrs);
615 	values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique);
616 	values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
617 	values[Anum_pg_index_indisexclusion - 1] = BoolGetDatum(isexclusion);
618 	values[Anum_pg_index_indimmediate - 1] = BoolGetDatum(immediate);
619 	values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
620 	values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
621 	values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false);
622 	values[Anum_pg_index_indisready - 1] = BoolGetDatum(isready);
623 	values[Anum_pg_index_indislive - 1] = BoolGetDatum(true);
624 	values[Anum_pg_index_indisreplident - 1] = BoolGetDatum(false);
625 	values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey);
626 	values[Anum_pg_index_indcollation - 1] = PointerGetDatum(indcollation);
627 	values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass);
628 	values[Anum_pg_index_indoption - 1] = PointerGetDatum(indoption);
629 	values[Anum_pg_index_indexprs - 1] = exprsDatum;
630 	if (exprsDatum == (Datum) 0)
631 		nulls[Anum_pg_index_indexprs - 1] = true;
632 	values[Anum_pg_index_indpred - 1] = predDatum;
633 	if (predDatum == (Datum) 0)
634 		nulls[Anum_pg_index_indpred - 1] = true;
635 
636 	tuple = heap_form_tuple(RelationGetDescr(pg_index), values, nulls);
637 
638 	/*
639 	 * insert the tuple into the pg_index catalog
640 	 */
641 	CatalogTupleInsert(pg_index, tuple);
642 
643 	/*
644 	 * close the relation and free the tuple
645 	 */
646 	table_close(pg_index, RowExclusiveLock);
647 	heap_freetuple(tuple);
648 }
649 
650 
651 /*
652  * index_create
653  *
654  * heapRelation: table to build index on (suitably locked by caller)
655  * indexRelationName: what it say
656  * indexRelationId: normally, pass InvalidOid to let this routine
657  *		generate an OID for the index.  During bootstrap this may be
658  *		nonzero to specify a preselected OID.
659  * parentIndexRelid: if creating an index partition, the OID of the
660  *		parent index; otherwise InvalidOid.
661  * parentConstraintId: if creating a constraint on a partition, the OID
662  *		of the constraint in the parent; otherwise InvalidOid.
663  * relFileNode: normally, pass InvalidOid to get new storage.  May be
664  *		nonzero to attach an existing valid build.
665  * indexInfo: same info executor uses to insert into the index
666  * indexColNames: column names to use for index (List of char *)
667  * accessMethodObjectId: OID of index AM to use
668  * tableSpaceId: OID of tablespace to use
669  * collationObjectId: array of collation OIDs, one per index column
670  * classObjectId: array of index opclass OIDs, one per index column
671  * coloptions: array of per-index-column indoption settings
672  * reloptions: AM-specific options
673  * flags: bitmask that can include any combination of these bits:
674  *		INDEX_CREATE_IS_PRIMARY
675  *			the index is a primary key
676  *		INDEX_CREATE_ADD_CONSTRAINT:
677  *			invoke index_constraint_create also
678  *		INDEX_CREATE_SKIP_BUILD:
679  *			skip the index_build() step for the moment; caller must do it
680  *			later (typically via reindex_index())
681  *		INDEX_CREATE_CONCURRENT:
682  *			do not lock the table against writers.  The index will be
683  *			marked "invalid" and the caller must take additional steps
684  *			to fix it up.
685  *		INDEX_CREATE_IF_NOT_EXISTS:
686  *			do not throw an error if a relation with the same name
687  *			already exists.
688  *		INDEX_CREATE_PARTITIONED:
689  *			create a partitioned index (table must be partitioned)
690  * constr_flags: flags passed to index_constraint_create
691  *		(only if INDEX_CREATE_ADD_CONSTRAINT is set)
692  * allow_system_table_mods: allow table to be a system catalog
693  * is_internal: if true, post creation hook for new index
694  * constraintId: if not NULL, receives OID of created constraint
695  *
696  * Returns the OID of the created index.
697  */
698 Oid
index_create(Relation heapRelation,const char * indexRelationName,Oid indexRelationId,Oid parentIndexRelid,Oid parentConstraintId,Oid relFileNode,IndexInfo * indexInfo,List * indexColNames,Oid accessMethodObjectId,Oid tableSpaceId,Oid * collationObjectId,Oid * classObjectId,int16 * coloptions,Datum reloptions,bits16 flags,bits16 constr_flags,bool allow_system_table_mods,bool is_internal,Oid * constraintId)699 index_create(Relation heapRelation,
700 			 const char *indexRelationName,
701 			 Oid indexRelationId,
702 			 Oid parentIndexRelid,
703 			 Oid parentConstraintId,
704 			 Oid relFileNode,
705 			 IndexInfo *indexInfo,
706 			 List *indexColNames,
707 			 Oid accessMethodObjectId,
708 			 Oid tableSpaceId,
709 			 Oid *collationObjectId,
710 			 Oid *classObjectId,
711 			 int16 *coloptions,
712 			 Datum reloptions,
713 			 bits16 flags,
714 			 bits16 constr_flags,
715 			 bool allow_system_table_mods,
716 			 bool is_internal,
717 			 Oid *constraintId)
718 {
719 	Oid			heapRelationId = RelationGetRelid(heapRelation);
720 	Relation	pg_class;
721 	Relation	indexRelation;
722 	TupleDesc	indexTupDesc;
723 	bool		shared_relation;
724 	bool		mapped_relation;
725 	bool		is_exclusion;
726 	Oid			namespaceId;
727 	int			i;
728 	char		relpersistence;
729 	bool		isprimary = (flags & INDEX_CREATE_IS_PRIMARY) != 0;
730 	bool		invalid = (flags & INDEX_CREATE_INVALID) != 0;
731 	bool		concurrent = (flags & INDEX_CREATE_CONCURRENT) != 0;
732 	bool		partitioned = (flags & INDEX_CREATE_PARTITIONED) != 0;
733 	char		relkind;
734 	TransactionId relfrozenxid;
735 	MultiXactId relminmxid;
736 
737 	/* constraint flags can only be set when a constraint is requested */
738 	Assert((constr_flags == 0) ||
739 		   ((flags & INDEX_CREATE_ADD_CONSTRAINT) != 0));
740 	/* partitioned indexes must never be "built" by themselves */
741 	Assert(!partitioned || (flags & INDEX_CREATE_SKIP_BUILD));
742 
743 	relkind = partitioned ? RELKIND_PARTITIONED_INDEX : RELKIND_INDEX;
744 	is_exclusion = (indexInfo->ii_ExclusionOps != NULL);
745 
746 	pg_class = table_open(RelationRelationId, RowExclusiveLock);
747 
748 	/*
749 	 * The index will be in the same namespace as its parent table, and is
750 	 * shared across databases if and only if the parent is.  Likewise, it
751 	 * will use the relfilenode map if and only if the parent does; and it
752 	 * inherits the parent's relpersistence.
753 	 */
754 	namespaceId = RelationGetNamespace(heapRelation);
755 	shared_relation = heapRelation->rd_rel->relisshared;
756 	mapped_relation = RelationIsMapped(heapRelation);
757 	relpersistence = heapRelation->rd_rel->relpersistence;
758 
759 	/*
760 	 * check parameters
761 	 */
762 	if (indexInfo->ii_NumIndexAttrs < 1)
763 		elog(ERROR, "must index at least one column");
764 
765 	if (!allow_system_table_mods &&
766 		IsSystemRelation(heapRelation) &&
767 		IsNormalProcessingMode())
768 		ereport(ERROR,
769 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
770 				 errmsg("user-defined indexes on system catalog tables are not supported")));
771 
772 	/*
773 	 * Btree text_pattern_ops uses text_eq as the equality operator, which is
774 	 * fine as long as the collation is deterministic; text_eq then reduces to
775 	 * bitwise equality and so it is semantically compatible with the other
776 	 * operators and functions in that opclass.  But with a nondeterministic
777 	 * collation, text_eq could yield results that are incompatible with the
778 	 * actual behavior of the index (which is determined by the opclass's
779 	 * comparison function).  We prevent such problems by refusing creation of
780 	 * an index with that opclass and a nondeterministic collation.
781 	 *
782 	 * The same applies to varchar_pattern_ops and bpchar_pattern_ops.  If we
783 	 * find more cases, we might decide to create a real mechanism for marking
784 	 * opclasses as incompatible with nondeterminism; but for now, this small
785 	 * hack suffices.
786 	 *
787 	 * Another solution is to use a special operator, not text_eq, as the
788 	 * equality opclass member; but that is undesirable because it would
789 	 * prevent index usage in many queries that work fine today.
790 	 */
791 	for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
792 	{
793 		Oid			collation = collationObjectId[i];
794 		Oid			opclass = classObjectId[i];
795 
796 		if (collation)
797 		{
798 			if ((opclass == TEXT_BTREE_PATTERN_OPS_OID ||
799 				 opclass == VARCHAR_BTREE_PATTERN_OPS_OID ||
800 				 opclass == BPCHAR_BTREE_PATTERN_OPS_OID) &&
801 				!get_collation_isdeterministic(collation))
802 			{
803 				HeapTuple	classtup;
804 
805 				classtup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclass));
806 				if (!HeapTupleIsValid(classtup))
807 					elog(ERROR, "cache lookup failed for operator class %u", opclass);
808 				ereport(ERROR,
809 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
810 						 errmsg("nondeterministic collations are not supported for operator class \"%s\"",
811 								NameStr(((Form_pg_opclass) GETSTRUCT(classtup))->opcname))));
812 				ReleaseSysCache(classtup);
813 			}
814 		}
815 	}
816 
817 	/*
818 	 * Concurrent index build on a system catalog is unsafe because we tend to
819 	 * release locks before committing in catalogs.
820 	 */
821 	if (concurrent &&
822 		IsCatalogRelation(heapRelation))
823 		ereport(ERROR,
824 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
825 				 errmsg("concurrent index creation on system catalog tables is not supported")));
826 
827 	/*
828 	 * This case is currently not supported.  There's no way to ask for it in
829 	 * the grammar with CREATE INDEX, but it can happen with REINDEX.
830 	 */
831 	if (concurrent && is_exclusion)
832 		ereport(ERROR,
833 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
834 				 errmsg("concurrent index creation for exclusion constraints is not supported")));
835 
836 	/*
837 	 * We cannot allow indexing a shared relation after initdb (because
838 	 * there's no way to make the entry in other databases' pg_class).
839 	 */
840 	if (shared_relation && !IsBootstrapProcessingMode())
841 		ereport(ERROR,
842 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
843 				 errmsg("shared indexes cannot be created after initdb")));
844 
845 	/*
846 	 * Shared relations must be in pg_global, too (last-ditch check)
847 	 */
848 	if (shared_relation && tableSpaceId != GLOBALTABLESPACE_OID)
849 		elog(ERROR, "shared relations must be placed in pg_global tablespace");
850 
851 	/*
852 	 * Check for duplicate name (both as to the index, and as to the
853 	 * associated constraint if any).  Such cases would fail on the relevant
854 	 * catalogs' unique indexes anyway, but we prefer to give a friendlier
855 	 * error message.
856 	 */
857 	if (get_relname_relid(indexRelationName, namespaceId))
858 	{
859 		if ((flags & INDEX_CREATE_IF_NOT_EXISTS) != 0)
860 		{
861 			ereport(NOTICE,
862 					(errcode(ERRCODE_DUPLICATE_TABLE),
863 					 errmsg("relation \"%s\" already exists, skipping",
864 							indexRelationName)));
865 			table_close(pg_class, RowExclusiveLock);
866 			return InvalidOid;
867 		}
868 
869 		ereport(ERROR,
870 				(errcode(ERRCODE_DUPLICATE_TABLE),
871 				 errmsg("relation \"%s\" already exists",
872 						indexRelationName)));
873 	}
874 
875 	if ((flags & INDEX_CREATE_ADD_CONSTRAINT) != 0 &&
876 		ConstraintNameIsUsed(CONSTRAINT_RELATION, heapRelationId,
877 							 indexRelationName))
878 	{
879 		/*
880 		 * INDEX_CREATE_IF_NOT_EXISTS does not apply here, since the
881 		 * conflicting constraint is not an index.
882 		 */
883 		ereport(ERROR,
884 				(errcode(ERRCODE_DUPLICATE_OBJECT),
885 				 errmsg("constraint \"%s\" for relation \"%s\" already exists",
886 						indexRelationName, RelationGetRelationName(heapRelation))));
887 	}
888 
889 	/*
890 	 * construct tuple descriptor for index tuples
891 	 */
892 	indexTupDesc = ConstructTupleDescriptor(heapRelation,
893 											indexInfo,
894 											indexColNames,
895 											accessMethodObjectId,
896 											collationObjectId,
897 											classObjectId);
898 
899 	/*
900 	 * Allocate an OID for the index, unless we were told what to use.
901 	 *
902 	 * The OID will be the relfilenode as well, so make sure it doesn't
903 	 * collide with either pg_class OIDs or existing physical files.
904 	 */
905 	if (!OidIsValid(indexRelationId))
906 	{
907 		/* Use binary-upgrade override for pg_class.oid/relfilenode? */
908 		if (IsBinaryUpgrade)
909 		{
910 			if (!OidIsValid(binary_upgrade_next_index_pg_class_oid))
911 				ereport(ERROR,
912 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
913 						 errmsg("pg_class index OID value not set when in binary upgrade mode")));
914 
915 			indexRelationId = binary_upgrade_next_index_pg_class_oid;
916 			binary_upgrade_next_index_pg_class_oid = InvalidOid;
917 		}
918 		else
919 		{
920 			indexRelationId =
921 				GetNewRelFileNode(tableSpaceId, pg_class, relpersistence);
922 		}
923 	}
924 
925 	/*
926 	 * create the index relation's relcache entry and, if necessary, the
927 	 * physical disk file. (If we fail further down, it's the smgr's
928 	 * responsibility to remove the disk file again, if any.)
929 	 */
930 	indexRelation = heap_create(indexRelationName,
931 								namespaceId,
932 								tableSpaceId,
933 								indexRelationId,
934 								relFileNode,
935 								accessMethodObjectId,
936 								indexTupDesc,
937 								relkind,
938 								relpersistence,
939 								shared_relation,
940 								mapped_relation,
941 								allow_system_table_mods,
942 								&relfrozenxid,
943 								&relminmxid);
944 
945 	Assert(relfrozenxid == InvalidTransactionId);
946 	Assert(relminmxid == InvalidMultiXactId);
947 	Assert(indexRelationId == RelationGetRelid(indexRelation));
948 
949 	/*
950 	 * Obtain exclusive lock on it.  Although no other transactions can see it
951 	 * until we commit, this prevents deadlock-risk complaints from lock
952 	 * manager in cases such as CLUSTER.
953 	 */
954 	LockRelation(indexRelation, AccessExclusiveLock);
955 
956 	/*
957 	 * Fill in fields of the index's pg_class entry that are not set correctly
958 	 * by heap_create.
959 	 *
960 	 * XXX should have a cleaner way to create cataloged indexes
961 	 */
962 	indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner;
963 	indexRelation->rd_rel->relam = accessMethodObjectId;
964 	indexRelation->rd_rel->relispartition = OidIsValid(parentIndexRelid);
965 
966 	/*
967 	 * store index's pg_class entry
968 	 */
969 	InsertPgClassTuple(pg_class, indexRelation,
970 					   RelationGetRelid(indexRelation),
971 					   (Datum) 0,
972 					   reloptions);
973 
974 	/* done with pg_class */
975 	table_close(pg_class, RowExclusiveLock);
976 
977 	/*
978 	 * now update the object id's of all the attribute tuple forms in the
979 	 * index relation's tuple descriptor
980 	 */
981 	InitializeAttributeOids(indexRelation,
982 							indexInfo->ii_NumIndexAttrs,
983 							indexRelationId);
984 
985 	/*
986 	 * append ATTRIBUTE tuples for the index
987 	 */
988 	AppendAttributeTuples(indexRelation, indexInfo->ii_OpclassOptions);
989 
990 	/* ----------------
991 	 *	  update pg_index
992 	 *	  (append INDEX tuple)
993 	 *
994 	 *	  Note that this stows away a representation of "predicate".
995 	 *	  (Or, could define a rule to maintain the predicate) --Nels, Feb '92
996 	 * ----------------
997 	 */
998 	UpdateIndexRelation(indexRelationId, heapRelationId, parentIndexRelid,
999 						indexInfo,
1000 						collationObjectId, classObjectId, coloptions,
1001 						isprimary, is_exclusion,
1002 						(constr_flags & INDEX_CONSTR_CREATE_DEFERRABLE) == 0,
1003 						!concurrent && !invalid,
1004 						!concurrent);
1005 
1006 	/*
1007 	 * Register relcache invalidation on the indexes' heap relation, to
1008 	 * maintain consistency of its index list
1009 	 */
1010 	CacheInvalidateRelcache(heapRelation);
1011 
1012 	/* update pg_inherits and the parent's relhassubclass, if needed */
1013 	if (OidIsValid(parentIndexRelid))
1014 	{
1015 		StoreSingleInheritance(indexRelationId, parentIndexRelid, 1);
1016 		SetRelationHasSubclass(parentIndexRelid, true);
1017 	}
1018 
1019 	/*
1020 	 * Register constraint and dependencies for the index.
1021 	 *
1022 	 * If the index is from a CONSTRAINT clause, construct a pg_constraint
1023 	 * entry.  The index will be linked to the constraint, which in turn is
1024 	 * linked to the table.  If it's not a CONSTRAINT, we need to make a
1025 	 * dependency directly on the table.
1026 	 *
1027 	 * We don't need a dependency on the namespace, because there'll be an
1028 	 * indirect dependency via our parent table.
1029 	 *
1030 	 * During bootstrap we can't register any dependencies, and we don't try
1031 	 * to make a constraint either.
1032 	 */
1033 	if (!IsBootstrapProcessingMode())
1034 	{
1035 		ObjectAddress myself,
1036 					referenced;
1037 		ObjectAddresses *addrs;
1038 
1039 		ObjectAddressSet(myself, RelationRelationId, indexRelationId);
1040 
1041 		if ((flags & INDEX_CREATE_ADD_CONSTRAINT) != 0)
1042 		{
1043 			char		constraintType;
1044 			ObjectAddress localaddr;
1045 
1046 			if (isprimary)
1047 				constraintType = CONSTRAINT_PRIMARY;
1048 			else if (indexInfo->ii_Unique)
1049 				constraintType = CONSTRAINT_UNIQUE;
1050 			else if (is_exclusion)
1051 				constraintType = CONSTRAINT_EXCLUSION;
1052 			else
1053 			{
1054 				elog(ERROR, "constraint must be PRIMARY, UNIQUE or EXCLUDE");
1055 				constraintType = 0; /* keep compiler quiet */
1056 			}
1057 
1058 			localaddr = index_constraint_create(heapRelation,
1059 												indexRelationId,
1060 												parentConstraintId,
1061 												indexInfo,
1062 												indexRelationName,
1063 												constraintType,
1064 												constr_flags,
1065 												allow_system_table_mods,
1066 												is_internal);
1067 			if (constraintId)
1068 				*constraintId = localaddr.objectId;
1069 		}
1070 		else
1071 		{
1072 			bool		have_simple_col = false;
1073 
1074 			addrs = new_object_addresses();
1075 
1076 			/* Create auto dependencies on simply-referenced columns */
1077 			for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1078 			{
1079 				if (indexInfo->ii_IndexAttrNumbers[i] != 0)
1080 				{
1081 					ObjectAddressSubSet(referenced, RelationRelationId,
1082 										heapRelationId,
1083 										indexInfo->ii_IndexAttrNumbers[i]);
1084 					add_exact_object_address(&referenced, addrs);
1085 					have_simple_col = true;
1086 				}
1087 			}
1088 
1089 			/*
1090 			 * If there are no simply-referenced columns, give the index an
1091 			 * auto dependency on the whole table.  In most cases, this will
1092 			 * be redundant, but it might not be if the index expressions and
1093 			 * predicate contain no Vars or only whole-row Vars.
1094 			 */
1095 			if (!have_simple_col)
1096 			{
1097 				ObjectAddressSet(referenced, RelationRelationId,
1098 								 heapRelationId);
1099 				add_exact_object_address(&referenced, addrs);
1100 			}
1101 
1102 			record_object_address_dependencies(&myself, addrs, DEPENDENCY_AUTO);
1103 			free_object_addresses(addrs);
1104 		}
1105 
1106 		/*
1107 		 * If this is an index partition, create partition dependencies on
1108 		 * both the parent index and the table.  (Note: these must be *in
1109 		 * addition to*, not instead of, all other dependencies.  Otherwise
1110 		 * we'll be short some dependencies after DETACH PARTITION.)
1111 		 */
1112 		if (OidIsValid(parentIndexRelid))
1113 		{
1114 			ObjectAddressSet(referenced, RelationRelationId, parentIndexRelid);
1115 			recordDependencyOn(&myself, &referenced, DEPENDENCY_PARTITION_PRI);
1116 
1117 			ObjectAddressSet(referenced, RelationRelationId, heapRelationId);
1118 			recordDependencyOn(&myself, &referenced, DEPENDENCY_PARTITION_SEC);
1119 		}
1120 
1121 		/* placeholder for normal dependencies */
1122 		addrs = new_object_addresses();
1123 
1124 		/* Store dependency on collations */
1125 
1126 		/* The default collation is pinned, so don't bother recording it */
1127 		for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
1128 		{
1129 			if (OidIsValid(collationObjectId[i]) &&
1130 				collationObjectId[i] != DEFAULT_COLLATION_OID)
1131 			{
1132 				ObjectAddressSet(referenced, CollationRelationId,
1133 								 collationObjectId[i]);
1134 				add_exact_object_address(&referenced, addrs);
1135 			}
1136 		}
1137 
1138 		/* Store dependency on operator classes */
1139 		for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
1140 		{
1141 			ObjectAddressSet(referenced, OperatorClassRelationId, classObjectId[i]);
1142 			add_exact_object_address(&referenced, addrs);
1143 		}
1144 
1145 		record_object_address_dependencies(&myself, addrs, DEPENDENCY_NORMAL);
1146 		free_object_addresses(addrs);
1147 
1148 		/* Store dependencies on anything mentioned in index expressions */
1149 		if (indexInfo->ii_Expressions)
1150 		{
1151 			recordDependencyOnSingleRelExpr(&myself,
1152 											(Node *) indexInfo->ii_Expressions,
1153 											heapRelationId,
1154 											DEPENDENCY_NORMAL,
1155 											DEPENDENCY_AUTO, false);
1156 		}
1157 
1158 		/* Store dependencies on anything mentioned in predicate */
1159 		if (indexInfo->ii_Predicate)
1160 		{
1161 			recordDependencyOnSingleRelExpr(&myself,
1162 											(Node *) indexInfo->ii_Predicate,
1163 											heapRelationId,
1164 											DEPENDENCY_NORMAL,
1165 											DEPENDENCY_AUTO, false);
1166 		}
1167 	}
1168 	else
1169 	{
1170 		/* Bootstrap mode - assert we weren't asked for constraint support */
1171 		Assert((flags & INDEX_CREATE_ADD_CONSTRAINT) == 0);
1172 	}
1173 
1174 	/* Post creation hook for new index */
1175 	InvokeObjectPostCreateHookArg(RelationRelationId,
1176 								  indexRelationId, 0, is_internal);
1177 
1178 	/*
1179 	 * Advance the command counter so that we can see the newly-entered
1180 	 * catalog tuples for the index.
1181 	 */
1182 	CommandCounterIncrement();
1183 
1184 	/*
1185 	 * In bootstrap mode, we have to fill in the index strategy structure with
1186 	 * information from the catalogs.  If we aren't bootstrapping, then the
1187 	 * relcache entry has already been rebuilt thanks to sinval update during
1188 	 * CommandCounterIncrement.
1189 	 */
1190 	if (IsBootstrapProcessingMode())
1191 		RelationInitIndexAccessInfo(indexRelation);
1192 	else
1193 		Assert(indexRelation->rd_indexcxt != NULL);
1194 
1195 	indexRelation->rd_index->indnkeyatts = indexInfo->ii_NumIndexKeyAttrs;
1196 
1197 	/* Validate opclass-specific options */
1198 	if (indexInfo->ii_OpclassOptions)
1199 		for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
1200 			(void) index_opclass_options(indexRelation, i + 1,
1201 										 indexInfo->ii_OpclassOptions[i],
1202 										 true);
1203 
1204 	/*
1205 	 * If this is bootstrap (initdb) time, then we don't actually fill in the
1206 	 * index yet.  We'll be creating more indexes and classes later, so we
1207 	 * delay filling them in until just before we're done with bootstrapping.
1208 	 * Similarly, if the caller specified to skip the build then filling the
1209 	 * index is delayed till later (ALTER TABLE can save work in some cases
1210 	 * with this).  Otherwise, we call the AM routine that constructs the
1211 	 * index.
1212 	 */
1213 	if (IsBootstrapProcessingMode())
1214 	{
1215 		index_register(heapRelationId, indexRelationId, indexInfo);
1216 	}
1217 	else if ((flags & INDEX_CREATE_SKIP_BUILD) != 0)
1218 	{
1219 		/*
1220 		 * Caller is responsible for filling the index later on.  However,
1221 		 * we'd better make sure that the heap relation is correctly marked as
1222 		 * having an index.
1223 		 */
1224 		index_update_stats(heapRelation,
1225 						   true,
1226 						   -1.0);
1227 		/* Make the above update visible */
1228 		CommandCounterIncrement();
1229 	}
1230 	else
1231 	{
1232 		index_build(heapRelation, indexRelation, indexInfo, false, true);
1233 	}
1234 
1235 	/*
1236 	 * Close the index; but we keep the lock that we acquired above until end
1237 	 * of transaction.  Closing the heap is caller's responsibility.
1238 	 */
1239 	index_close(indexRelation, NoLock);
1240 
1241 	return indexRelationId;
1242 }
1243 
1244 /*
1245  * index_concurrently_create_copy
1246  *
1247  * Create concurrently an index based on the definition of the one provided by
1248  * caller.  The index is inserted into catalogs and needs to be built later
1249  * on.  This is called during concurrent reindex processing.
1250  *
1251  * "tablespaceOid" is the tablespace to use for this index.
1252  */
1253 Oid
index_concurrently_create_copy(Relation heapRelation,Oid oldIndexId,Oid tablespaceOid,const char * newName)1254 index_concurrently_create_copy(Relation heapRelation, Oid oldIndexId,
1255 							   Oid tablespaceOid, const char *newName)
1256 {
1257 	Relation	indexRelation;
1258 	IndexInfo  *oldInfo,
1259 			   *newInfo;
1260 	Oid			newIndexId = InvalidOid;
1261 	HeapTuple	indexTuple,
1262 				classTuple;
1263 	Datum		indclassDatum,
1264 				colOptionDatum,
1265 				optionDatum;
1266 	oidvector  *indclass;
1267 	int2vector *indcoloptions;
1268 	bool		isnull;
1269 	List	   *indexColNames = NIL;
1270 	List	   *indexExprs = NIL;
1271 	List	   *indexPreds = NIL;
1272 
1273 	indexRelation = index_open(oldIndexId, RowExclusiveLock);
1274 
1275 	/* The new index needs some information from the old index */
1276 	oldInfo = BuildIndexInfo(indexRelation);
1277 
1278 	/*
1279 	 * Concurrent build of an index with exclusion constraints is not
1280 	 * supported.
1281 	 */
1282 	if (oldInfo->ii_ExclusionOps != NULL)
1283 		ereport(ERROR,
1284 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1285 				 errmsg("concurrent index creation for exclusion constraints is not supported")));
1286 
1287 	/* Get the array of class and column options IDs from index info */
1288 	indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(oldIndexId));
1289 	if (!HeapTupleIsValid(indexTuple))
1290 		elog(ERROR, "cache lookup failed for index %u", oldIndexId);
1291 	indclassDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
1292 									Anum_pg_index_indclass, &isnull);
1293 	Assert(!isnull);
1294 	indclass = (oidvector *) DatumGetPointer(indclassDatum);
1295 
1296 	colOptionDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
1297 									 Anum_pg_index_indoption, &isnull);
1298 	Assert(!isnull);
1299 	indcoloptions = (int2vector *) DatumGetPointer(colOptionDatum);
1300 
1301 	/* Fetch options of index if any */
1302 	classTuple = SearchSysCache1(RELOID, oldIndexId);
1303 	if (!HeapTupleIsValid(classTuple))
1304 		elog(ERROR, "cache lookup failed for relation %u", oldIndexId);
1305 	optionDatum = SysCacheGetAttr(RELOID, classTuple,
1306 								  Anum_pg_class_reloptions, &isnull);
1307 
1308 	/*
1309 	 * Fetch the list of expressions and predicates directly from the
1310 	 * catalogs.  This cannot rely on the information from IndexInfo of the
1311 	 * old index as these have been flattened for the planner.
1312 	 */
1313 	if (oldInfo->ii_Expressions != NIL)
1314 	{
1315 		Datum		exprDatum;
1316 		char	   *exprString;
1317 
1318 		exprDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
1319 									Anum_pg_index_indexprs, &isnull);
1320 		Assert(!isnull);
1321 		exprString = TextDatumGetCString(exprDatum);
1322 		indexExprs = (List *) stringToNode(exprString);
1323 		pfree(exprString);
1324 	}
1325 	if (oldInfo->ii_Predicate != NIL)
1326 	{
1327 		Datum		predDatum;
1328 		char	   *predString;
1329 
1330 		predDatum = SysCacheGetAttr(INDEXRELID, indexTuple,
1331 									Anum_pg_index_indpred, &isnull);
1332 		Assert(!isnull);
1333 		predString = TextDatumGetCString(predDatum);
1334 		indexPreds = (List *) stringToNode(predString);
1335 
1336 		/* Also convert to implicit-AND format */
1337 		indexPreds = make_ands_implicit((Expr *) indexPreds);
1338 		pfree(predString);
1339 	}
1340 
1341 	/*
1342 	 * Build the index information for the new index.  Note that rebuild of
1343 	 * indexes with exclusion constraints is not supported, hence there is no
1344 	 * need to fill all the ii_Exclusion* fields.
1345 	 */
1346 	newInfo = makeIndexInfo(oldInfo->ii_NumIndexAttrs,
1347 							oldInfo->ii_NumIndexKeyAttrs,
1348 							oldInfo->ii_Am,
1349 							indexExprs,
1350 							indexPreds,
1351 							oldInfo->ii_Unique,
1352 							false,	/* not ready for inserts */
1353 							true);
1354 
1355 	/*
1356 	 * Extract the list of column names and the column numbers for the new
1357 	 * index information.  All this information will be used for the index
1358 	 * creation.
1359 	 */
1360 	for (int i = 0; i < oldInfo->ii_NumIndexAttrs; i++)
1361 	{
1362 		TupleDesc	indexTupDesc = RelationGetDescr(indexRelation);
1363 		Form_pg_attribute att = TupleDescAttr(indexTupDesc, i);
1364 
1365 		indexColNames = lappend(indexColNames, NameStr(att->attname));
1366 		newInfo->ii_IndexAttrNumbers[i] = oldInfo->ii_IndexAttrNumbers[i];
1367 	}
1368 
1369 	/* Extract opclass parameters for each attribute, if any */
1370 	if (oldInfo->ii_OpclassOptions != NULL)
1371 	{
1372 		newInfo->ii_OpclassOptions = palloc0(sizeof(Datum) *
1373 											 newInfo->ii_NumIndexAttrs);
1374 		for (int i = 0; i < newInfo->ii_NumIndexAttrs; i++)
1375 			newInfo->ii_OpclassOptions[i] = get_attoptions(oldIndexId, i + 1);
1376 	}
1377 
1378 	/*
1379 	 * Now create the new index.
1380 	 *
1381 	 * For a partition index, we adjust the partition dependency later, to
1382 	 * ensure a consistent state at all times.  That is why parentIndexRelid
1383 	 * is not set here.
1384 	 */
1385 	newIndexId = index_create(heapRelation,
1386 							  newName,
1387 							  InvalidOid,	/* indexRelationId */
1388 							  InvalidOid,	/* parentIndexRelid */
1389 							  InvalidOid,	/* parentConstraintId */
1390 							  InvalidOid,	/* relFileNode */
1391 							  newInfo,
1392 							  indexColNames,
1393 							  indexRelation->rd_rel->relam,
1394 							  tablespaceOid,
1395 							  indexRelation->rd_indcollation,
1396 							  indclass->values,
1397 							  indcoloptions->values,
1398 							  optionDatum,
1399 							  INDEX_CREATE_SKIP_BUILD | INDEX_CREATE_CONCURRENT,
1400 							  0,
1401 							  true, /* allow table to be a system catalog? */
1402 							  false,	/* is_internal? */
1403 							  NULL);
1404 
1405 	/* Close the relations used and clean up */
1406 	index_close(indexRelation, NoLock);
1407 	ReleaseSysCache(indexTuple);
1408 	ReleaseSysCache(classTuple);
1409 
1410 	return newIndexId;
1411 }
1412 
1413 /*
1414  * index_concurrently_build
1415  *
1416  * Build index for a concurrent operation.  Low-level locks are taken when
1417  * this operation is performed to prevent only schema changes, but they need
1418  * to be kept until the end of the transaction performing this operation.
1419  * 'indexOid' refers to an index relation OID already created as part of
1420  * previous processing, and 'heapOid' refers to its parent heap relation.
1421  */
1422 void
index_concurrently_build(Oid heapRelationId,Oid indexRelationId)1423 index_concurrently_build(Oid heapRelationId,
1424 						 Oid indexRelationId)
1425 {
1426 	Relation	heapRel;
1427 	Relation	indexRelation;
1428 	IndexInfo  *indexInfo;
1429 
1430 	/* This had better make sure that a snapshot is active */
1431 	Assert(ActiveSnapshotSet());
1432 
1433 	/* Open and lock the parent heap relation */
1434 	heapRel = table_open(heapRelationId, ShareUpdateExclusiveLock);
1435 
1436 	/* And the target index relation */
1437 	indexRelation = index_open(indexRelationId, RowExclusiveLock);
1438 
1439 	/*
1440 	 * We have to re-build the IndexInfo struct, since it was lost in the
1441 	 * commit of the transaction where this concurrent index was created at
1442 	 * the catalog level.
1443 	 */
1444 	indexInfo = BuildIndexInfo(indexRelation);
1445 	Assert(!indexInfo->ii_ReadyForInserts);
1446 	indexInfo->ii_Concurrent = true;
1447 	indexInfo->ii_BrokenHotChain = false;
1448 
1449 	/* Now build the index */
1450 	index_build(heapRel, indexRelation, indexInfo, false, true);
1451 
1452 	/* Close both the relations, but keep the locks */
1453 	table_close(heapRel, NoLock);
1454 	index_close(indexRelation, NoLock);
1455 
1456 	/*
1457 	 * Update the pg_index row to mark the index as ready for inserts. Once we
1458 	 * commit this transaction, any new transactions that open the table must
1459 	 * insert new entries into the index for insertions and non-HOT updates.
1460 	 */
1461 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
1462 }
1463 
1464 /*
1465  * index_concurrently_swap
1466  *
1467  * Swap name, dependencies, and constraints of the old index over to the new
1468  * index, while marking the old index as invalid and the new as valid.
1469  */
1470 void
index_concurrently_swap(Oid newIndexId,Oid oldIndexId,const char * oldName)1471 index_concurrently_swap(Oid newIndexId, Oid oldIndexId, const char *oldName)
1472 {
1473 	Relation	pg_class,
1474 				pg_index,
1475 				pg_constraint,
1476 				pg_trigger;
1477 	Relation	oldClassRel,
1478 				newClassRel;
1479 	HeapTuple	oldClassTuple,
1480 				newClassTuple;
1481 	Form_pg_class oldClassForm,
1482 				newClassForm;
1483 	HeapTuple	oldIndexTuple,
1484 				newIndexTuple;
1485 	Form_pg_index oldIndexForm,
1486 				newIndexForm;
1487 	bool		isPartition;
1488 	Oid			indexConstraintOid;
1489 	List	   *constraintOids = NIL;
1490 	ListCell   *lc;
1491 
1492 	/*
1493 	 * Take a necessary lock on the old and new index before swapping them.
1494 	 */
1495 	oldClassRel = relation_open(oldIndexId, ShareUpdateExclusiveLock);
1496 	newClassRel = relation_open(newIndexId, ShareUpdateExclusiveLock);
1497 
1498 	/* Now swap names and dependencies of those indexes */
1499 	pg_class = table_open(RelationRelationId, RowExclusiveLock);
1500 
1501 	oldClassTuple = SearchSysCacheCopy1(RELOID,
1502 										ObjectIdGetDatum(oldIndexId));
1503 	if (!HeapTupleIsValid(oldClassTuple))
1504 		elog(ERROR, "could not find tuple for relation %u", oldIndexId);
1505 	newClassTuple = SearchSysCacheCopy1(RELOID,
1506 										ObjectIdGetDatum(newIndexId));
1507 	if (!HeapTupleIsValid(newClassTuple))
1508 		elog(ERROR, "could not find tuple for relation %u", newIndexId);
1509 
1510 	oldClassForm = (Form_pg_class) GETSTRUCT(oldClassTuple);
1511 	newClassForm = (Form_pg_class) GETSTRUCT(newClassTuple);
1512 
1513 	/* Swap the names */
1514 	namestrcpy(&newClassForm->relname, NameStr(oldClassForm->relname));
1515 	namestrcpy(&oldClassForm->relname, oldName);
1516 
1517 	/* Swap the partition flags to track inheritance properly */
1518 	isPartition = newClassForm->relispartition;
1519 	newClassForm->relispartition = oldClassForm->relispartition;
1520 	oldClassForm->relispartition = isPartition;
1521 
1522 	CatalogTupleUpdate(pg_class, &oldClassTuple->t_self, oldClassTuple);
1523 	CatalogTupleUpdate(pg_class, &newClassTuple->t_self, newClassTuple);
1524 
1525 	heap_freetuple(oldClassTuple);
1526 	heap_freetuple(newClassTuple);
1527 
1528 	/* Now swap index info */
1529 	pg_index = table_open(IndexRelationId, RowExclusiveLock);
1530 
1531 	oldIndexTuple = SearchSysCacheCopy1(INDEXRELID,
1532 										ObjectIdGetDatum(oldIndexId));
1533 	if (!HeapTupleIsValid(oldIndexTuple))
1534 		elog(ERROR, "could not find tuple for relation %u", oldIndexId);
1535 	newIndexTuple = SearchSysCacheCopy1(INDEXRELID,
1536 										ObjectIdGetDatum(newIndexId));
1537 	if (!HeapTupleIsValid(newIndexTuple))
1538 		elog(ERROR, "could not find tuple for relation %u", newIndexId);
1539 
1540 	oldIndexForm = (Form_pg_index) GETSTRUCT(oldIndexTuple);
1541 	newIndexForm = (Form_pg_index) GETSTRUCT(newIndexTuple);
1542 
1543 	/*
1544 	 * Copy constraint flags from the old index. This is safe because the old
1545 	 * index guaranteed uniqueness.
1546 	 */
1547 	newIndexForm->indisprimary = oldIndexForm->indisprimary;
1548 	oldIndexForm->indisprimary = false;
1549 	newIndexForm->indisexclusion = oldIndexForm->indisexclusion;
1550 	oldIndexForm->indisexclusion = false;
1551 	newIndexForm->indimmediate = oldIndexForm->indimmediate;
1552 	oldIndexForm->indimmediate = true;
1553 
1554 	/* Preserve indisreplident in the new index */
1555 	newIndexForm->indisreplident = oldIndexForm->indisreplident;
1556 
1557 	/* Preserve indisclustered in the new index */
1558 	newIndexForm->indisclustered = oldIndexForm->indisclustered;
1559 
1560 	/*
1561 	 * Mark the new index as valid, and the old index as invalid similarly to
1562 	 * what index_set_state_flags() does.
1563 	 */
1564 	newIndexForm->indisvalid = true;
1565 	oldIndexForm->indisvalid = false;
1566 	oldIndexForm->indisclustered = false;
1567 	oldIndexForm->indisreplident = false;
1568 
1569 	CatalogTupleUpdate(pg_index, &oldIndexTuple->t_self, oldIndexTuple);
1570 	CatalogTupleUpdate(pg_index, &newIndexTuple->t_self, newIndexTuple);
1571 
1572 	heap_freetuple(oldIndexTuple);
1573 	heap_freetuple(newIndexTuple);
1574 
1575 	/*
1576 	 * Move constraints and triggers over to the new index
1577 	 */
1578 
1579 	constraintOids = get_index_ref_constraints(oldIndexId);
1580 
1581 	indexConstraintOid = get_index_constraint(oldIndexId);
1582 
1583 	if (OidIsValid(indexConstraintOid))
1584 		constraintOids = lappend_oid(constraintOids, indexConstraintOid);
1585 
1586 	pg_constraint = table_open(ConstraintRelationId, RowExclusiveLock);
1587 	pg_trigger = table_open(TriggerRelationId, RowExclusiveLock);
1588 
1589 	foreach(lc, constraintOids)
1590 	{
1591 		HeapTuple	constraintTuple,
1592 					triggerTuple;
1593 		Form_pg_constraint conForm;
1594 		ScanKeyData key[1];
1595 		SysScanDesc scan;
1596 		Oid			constraintOid = lfirst_oid(lc);
1597 
1598 		/* Move the constraint from the old to the new index */
1599 		constraintTuple = SearchSysCacheCopy1(CONSTROID,
1600 											  ObjectIdGetDatum(constraintOid));
1601 		if (!HeapTupleIsValid(constraintTuple))
1602 			elog(ERROR, "could not find tuple for constraint %u", constraintOid);
1603 
1604 		conForm = ((Form_pg_constraint) GETSTRUCT(constraintTuple));
1605 
1606 		if (conForm->conindid == oldIndexId)
1607 		{
1608 			conForm->conindid = newIndexId;
1609 
1610 			CatalogTupleUpdate(pg_constraint, &constraintTuple->t_self, constraintTuple);
1611 		}
1612 
1613 		heap_freetuple(constraintTuple);
1614 
1615 		/* Search for trigger records */
1616 		ScanKeyInit(&key[0],
1617 					Anum_pg_trigger_tgconstraint,
1618 					BTEqualStrategyNumber, F_OIDEQ,
1619 					ObjectIdGetDatum(constraintOid));
1620 
1621 		scan = systable_beginscan(pg_trigger, TriggerConstraintIndexId, true,
1622 								  NULL, 1, key);
1623 
1624 		while (HeapTupleIsValid((triggerTuple = systable_getnext(scan))))
1625 		{
1626 			Form_pg_trigger tgForm = (Form_pg_trigger) GETSTRUCT(triggerTuple);
1627 
1628 			if (tgForm->tgconstrindid != oldIndexId)
1629 				continue;
1630 
1631 			/* Make a modifiable copy */
1632 			triggerTuple = heap_copytuple(triggerTuple);
1633 			tgForm = (Form_pg_trigger) GETSTRUCT(triggerTuple);
1634 
1635 			tgForm->tgconstrindid = newIndexId;
1636 
1637 			CatalogTupleUpdate(pg_trigger, &triggerTuple->t_self, triggerTuple);
1638 
1639 			heap_freetuple(triggerTuple);
1640 		}
1641 
1642 		systable_endscan(scan);
1643 	}
1644 
1645 	/*
1646 	 * Move comment if any
1647 	 */
1648 	{
1649 		Relation	description;
1650 		ScanKeyData skey[3];
1651 		SysScanDesc sd;
1652 		HeapTuple	tuple;
1653 		Datum		values[Natts_pg_description] = {0};
1654 		bool		nulls[Natts_pg_description] = {0};
1655 		bool		replaces[Natts_pg_description] = {0};
1656 
1657 		values[Anum_pg_description_objoid - 1] = ObjectIdGetDatum(newIndexId);
1658 		replaces[Anum_pg_description_objoid - 1] = true;
1659 
1660 		ScanKeyInit(&skey[0],
1661 					Anum_pg_description_objoid,
1662 					BTEqualStrategyNumber, F_OIDEQ,
1663 					ObjectIdGetDatum(oldIndexId));
1664 		ScanKeyInit(&skey[1],
1665 					Anum_pg_description_classoid,
1666 					BTEqualStrategyNumber, F_OIDEQ,
1667 					ObjectIdGetDatum(RelationRelationId));
1668 		ScanKeyInit(&skey[2],
1669 					Anum_pg_description_objsubid,
1670 					BTEqualStrategyNumber, F_INT4EQ,
1671 					Int32GetDatum(0));
1672 
1673 		description = table_open(DescriptionRelationId, RowExclusiveLock);
1674 
1675 		sd = systable_beginscan(description, DescriptionObjIndexId, true,
1676 								NULL, 3, skey);
1677 
1678 		while ((tuple = systable_getnext(sd)) != NULL)
1679 		{
1680 			tuple = heap_modify_tuple(tuple, RelationGetDescr(description),
1681 									  values, nulls, replaces);
1682 			CatalogTupleUpdate(description, &tuple->t_self, tuple);
1683 
1684 			break;				/* Assume there can be only one match */
1685 		}
1686 
1687 		systable_endscan(sd);
1688 		table_close(description, NoLock);
1689 	}
1690 
1691 	/*
1692 	 * Swap inheritance relationship with parent index
1693 	 */
1694 	if (get_rel_relispartition(oldIndexId))
1695 	{
1696 		List	   *ancestors = get_partition_ancestors(oldIndexId);
1697 		Oid			parentIndexRelid = linitial_oid(ancestors);
1698 
1699 		DeleteInheritsTuple(oldIndexId, parentIndexRelid, false, NULL);
1700 		StoreSingleInheritance(newIndexId, parentIndexRelid, 1);
1701 
1702 		list_free(ancestors);
1703 	}
1704 
1705 	/*
1706 	 * Swap all dependencies of and on the old index to the new one, and
1707 	 * vice-versa.  Note that a call to CommandCounterIncrement() would cause
1708 	 * duplicate entries in pg_depend, so this should not be done.
1709 	 */
1710 	changeDependenciesOf(RelationRelationId, newIndexId, oldIndexId);
1711 	changeDependenciesOn(RelationRelationId, newIndexId, oldIndexId);
1712 
1713 	changeDependenciesOf(RelationRelationId, oldIndexId, newIndexId);
1714 	changeDependenciesOn(RelationRelationId, oldIndexId, newIndexId);
1715 
1716 	/*
1717 	 * Copy over statistics from old to new index
1718 	 */
1719 	{
1720 		PgStat_StatTabEntry *tabentry;
1721 
1722 		tabentry = pgstat_fetch_stat_tabentry(oldIndexId);
1723 		if (tabentry)
1724 		{
1725 			if (newClassRel->pgstat_info)
1726 			{
1727 				newClassRel->pgstat_info->t_counts.t_numscans = tabentry->numscans;
1728 				newClassRel->pgstat_info->t_counts.t_tuples_returned = tabentry->tuples_returned;
1729 				newClassRel->pgstat_info->t_counts.t_tuples_fetched = tabentry->tuples_fetched;
1730 				newClassRel->pgstat_info->t_counts.t_blocks_fetched = tabentry->blocks_fetched;
1731 				newClassRel->pgstat_info->t_counts.t_blocks_hit = tabentry->blocks_hit;
1732 
1733 				/*
1734 				 * The data will be sent by the next pgstat_report_stat()
1735 				 * call.
1736 				 */
1737 			}
1738 		}
1739 	}
1740 
1741 	/* Copy data of pg_statistic from the old index to the new one */
1742 	CopyStatistics(oldIndexId, newIndexId);
1743 
1744 	/* Copy pg_attribute.attstattarget for each index attribute */
1745 	{
1746 		HeapTuple	attrTuple;
1747 		Relation	pg_attribute;
1748 		SysScanDesc scan;
1749 		ScanKeyData key[1];
1750 
1751 		pg_attribute = table_open(AttributeRelationId, RowExclusiveLock);
1752 		ScanKeyInit(&key[0],
1753 					Anum_pg_attribute_attrelid,
1754 					BTEqualStrategyNumber, F_OIDEQ,
1755 					ObjectIdGetDatum(newIndexId));
1756 		scan = systable_beginscan(pg_attribute, AttributeRelidNumIndexId,
1757 								  true, NULL, 1, key);
1758 
1759 		while (HeapTupleIsValid((attrTuple = systable_getnext(scan))))
1760 		{
1761 			Form_pg_attribute att = (Form_pg_attribute) GETSTRUCT(attrTuple);
1762 			Datum		repl_val[Natts_pg_attribute];
1763 			bool		repl_null[Natts_pg_attribute];
1764 			bool		repl_repl[Natts_pg_attribute];
1765 			int			attstattarget;
1766 			HeapTuple	newTuple;
1767 
1768 			/* Ignore dropped columns */
1769 			if (att->attisdropped)
1770 				continue;
1771 
1772 			/*
1773 			 * Get attstattarget from the old index and refresh the new value.
1774 			 */
1775 			attstattarget = get_attstattarget(oldIndexId, att->attnum);
1776 
1777 			/* no need for a refresh if both match */
1778 			if (attstattarget == att->attstattarget)
1779 				continue;
1780 
1781 			memset(repl_val, 0, sizeof(repl_val));
1782 			memset(repl_null, false, sizeof(repl_null));
1783 			memset(repl_repl, false, sizeof(repl_repl));
1784 
1785 			repl_repl[Anum_pg_attribute_attstattarget - 1] = true;
1786 			repl_val[Anum_pg_attribute_attstattarget - 1] = Int32GetDatum(attstattarget);
1787 
1788 			newTuple = heap_modify_tuple(attrTuple,
1789 										 RelationGetDescr(pg_attribute),
1790 										 repl_val, repl_null, repl_repl);
1791 			CatalogTupleUpdate(pg_attribute, &newTuple->t_self, newTuple);
1792 
1793 			heap_freetuple(newTuple);
1794 		}
1795 
1796 		systable_endscan(scan);
1797 		table_close(pg_attribute, RowExclusiveLock);
1798 	}
1799 
1800 	/* Close relations */
1801 	table_close(pg_class, RowExclusiveLock);
1802 	table_close(pg_index, RowExclusiveLock);
1803 	table_close(pg_constraint, RowExclusiveLock);
1804 	table_close(pg_trigger, RowExclusiveLock);
1805 
1806 	/* The lock taken previously is not released until the end of transaction */
1807 	relation_close(oldClassRel, NoLock);
1808 	relation_close(newClassRel, NoLock);
1809 }
1810 
1811 /*
1812  * index_concurrently_set_dead
1813  *
1814  * Perform the last invalidation stage of DROP INDEX CONCURRENTLY or REINDEX
1815  * CONCURRENTLY before actually dropping the index.  After calling this
1816  * function, the index is seen by all the backends as dead.  Low-level locks
1817  * taken here are kept until the end of the transaction calling this function.
1818  */
1819 void
index_concurrently_set_dead(Oid heapId,Oid indexId)1820 index_concurrently_set_dead(Oid heapId, Oid indexId)
1821 {
1822 	Relation	userHeapRelation;
1823 	Relation	userIndexRelation;
1824 
1825 	/*
1826 	 * No more predicate locks will be acquired on this index, and we're about
1827 	 * to stop doing inserts into the index which could show conflicts with
1828 	 * existing predicate locks, so now is the time to move them to the heap
1829 	 * relation.
1830 	 */
1831 	userHeapRelation = table_open(heapId, ShareUpdateExclusiveLock);
1832 	userIndexRelation = index_open(indexId, ShareUpdateExclusiveLock);
1833 	TransferPredicateLocksToHeapRelation(userIndexRelation);
1834 
1835 	/*
1836 	 * Now we are sure that nobody uses the index for queries; they just might
1837 	 * have it open for updating it.  So now we can unset indisready and
1838 	 * indislive, then wait till nobody could be using it at all anymore.
1839 	 */
1840 	index_set_state_flags(indexId, INDEX_DROP_SET_DEAD);
1841 
1842 	/*
1843 	 * Invalidate the relcache for the table, so that after this commit all
1844 	 * sessions will refresh the table's index list.  Forgetting just the
1845 	 * index's relcache entry is not enough.
1846 	 */
1847 	CacheInvalidateRelcache(userHeapRelation);
1848 
1849 	/*
1850 	 * Close the relations again, though still holding session lock.
1851 	 */
1852 	table_close(userHeapRelation, NoLock);
1853 	index_close(userIndexRelation, NoLock);
1854 }
1855 
1856 /*
1857  * index_constraint_create
1858  *
1859  * Set up a constraint associated with an index.  Return the new constraint's
1860  * address.
1861  *
1862  * heapRelation: table owning the index (must be suitably locked by caller)
1863  * indexRelationId: OID of the index
1864  * parentConstraintId: if constraint is on a partition, the OID of the
1865  *		constraint in the parent.
1866  * indexInfo: same info executor uses to insert into the index
1867  * constraintName: what it say (generally, should match name of index)
1868  * constraintType: one of CONSTRAINT_PRIMARY, CONSTRAINT_UNIQUE, or
1869  *		CONSTRAINT_EXCLUSION
1870  * flags: bitmask that can include any combination of these bits:
1871  *		INDEX_CONSTR_CREATE_MARK_AS_PRIMARY: index is a PRIMARY KEY
1872  *		INDEX_CONSTR_CREATE_DEFERRABLE: constraint is DEFERRABLE
1873  *		INDEX_CONSTR_CREATE_INIT_DEFERRED: constraint is INITIALLY DEFERRED
1874  *		INDEX_CONSTR_CREATE_UPDATE_INDEX: update the pg_index row
1875  *		INDEX_CONSTR_CREATE_REMOVE_OLD_DEPS: remove existing dependencies
1876  *			of index on table's columns
1877  * allow_system_table_mods: allow table to be a system catalog
1878  * is_internal: index is constructed due to internal process
1879  */
1880 ObjectAddress
index_constraint_create(Relation heapRelation,Oid indexRelationId,Oid parentConstraintId,IndexInfo * indexInfo,const char * constraintName,char constraintType,bits16 constr_flags,bool allow_system_table_mods,bool is_internal)1881 index_constraint_create(Relation heapRelation,
1882 						Oid indexRelationId,
1883 						Oid parentConstraintId,
1884 						IndexInfo *indexInfo,
1885 						const char *constraintName,
1886 						char constraintType,
1887 						bits16 constr_flags,
1888 						bool allow_system_table_mods,
1889 						bool is_internal)
1890 {
1891 	Oid			namespaceId = RelationGetNamespace(heapRelation);
1892 	ObjectAddress myself,
1893 				idxaddr;
1894 	Oid			conOid;
1895 	bool		deferrable;
1896 	bool		initdeferred;
1897 	bool		mark_as_primary;
1898 	bool		islocal;
1899 	bool		noinherit;
1900 	int			inhcount;
1901 
1902 	deferrable = (constr_flags & INDEX_CONSTR_CREATE_DEFERRABLE) != 0;
1903 	initdeferred = (constr_flags & INDEX_CONSTR_CREATE_INIT_DEFERRED) != 0;
1904 	mark_as_primary = (constr_flags & INDEX_CONSTR_CREATE_MARK_AS_PRIMARY) != 0;
1905 
1906 	/* constraint creation support doesn't work while bootstrapping */
1907 	Assert(!IsBootstrapProcessingMode());
1908 
1909 	/* enforce system-table restriction */
1910 	if (!allow_system_table_mods &&
1911 		IsSystemRelation(heapRelation) &&
1912 		IsNormalProcessingMode())
1913 		ereport(ERROR,
1914 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1915 				 errmsg("user-defined indexes on system catalog tables are not supported")));
1916 
1917 	/* primary/unique constraints shouldn't have any expressions */
1918 	if (indexInfo->ii_Expressions &&
1919 		constraintType != CONSTRAINT_EXCLUSION)
1920 		elog(ERROR, "constraints cannot have index expressions");
1921 
1922 	/*
1923 	 * If we're manufacturing a constraint for a pre-existing index, we need
1924 	 * to get rid of the existing auto dependencies for the index (the ones
1925 	 * that index_create() would have made instead of calling this function).
1926 	 *
1927 	 * Note: this code would not necessarily do the right thing if the index
1928 	 * has any expressions or predicate, but we'd never be turning such an
1929 	 * index into a UNIQUE or PRIMARY KEY constraint.
1930 	 */
1931 	if (constr_flags & INDEX_CONSTR_CREATE_REMOVE_OLD_DEPS)
1932 		deleteDependencyRecordsForClass(RelationRelationId, indexRelationId,
1933 										RelationRelationId, DEPENDENCY_AUTO);
1934 
1935 	if (OidIsValid(parentConstraintId))
1936 	{
1937 		islocal = false;
1938 		inhcount = 1;
1939 		noinherit = false;
1940 	}
1941 	else
1942 	{
1943 		islocal = true;
1944 		inhcount = 0;
1945 		noinherit = true;
1946 	}
1947 
1948 	/*
1949 	 * Construct a pg_constraint entry.
1950 	 */
1951 	conOid = CreateConstraintEntry(constraintName,
1952 								   namespaceId,
1953 								   constraintType,
1954 								   deferrable,
1955 								   initdeferred,
1956 								   true,
1957 								   parentConstraintId,
1958 								   RelationGetRelid(heapRelation),
1959 								   indexInfo->ii_IndexAttrNumbers,
1960 								   indexInfo->ii_NumIndexKeyAttrs,
1961 								   indexInfo->ii_NumIndexAttrs,
1962 								   InvalidOid,	/* no domain */
1963 								   indexRelationId, /* index OID */
1964 								   InvalidOid,	/* no foreign key */
1965 								   NULL,
1966 								   NULL,
1967 								   NULL,
1968 								   NULL,
1969 								   0,
1970 								   ' ',
1971 								   ' ',
1972 								   ' ',
1973 								   indexInfo->ii_ExclusionOps,
1974 								   NULL,	/* no check constraint */
1975 								   NULL,
1976 								   islocal,
1977 								   inhcount,
1978 								   noinherit,
1979 								   is_internal);
1980 
1981 	/*
1982 	 * Register the index as internally dependent on the constraint.
1983 	 *
1984 	 * Note that the constraint has a dependency on the table, so we don't
1985 	 * need (or want) any direct dependency from the index to the table.
1986 	 */
1987 	ObjectAddressSet(myself, ConstraintRelationId, conOid);
1988 	ObjectAddressSet(idxaddr, RelationRelationId, indexRelationId);
1989 	recordDependencyOn(&idxaddr, &myself, DEPENDENCY_INTERNAL);
1990 
1991 	/*
1992 	 * Also, if this is a constraint on a partition, give it partition-type
1993 	 * dependencies on the parent constraint as well as the table.
1994 	 */
1995 	if (OidIsValid(parentConstraintId))
1996 	{
1997 		ObjectAddress referenced;
1998 
1999 		ObjectAddressSet(referenced, ConstraintRelationId, parentConstraintId);
2000 		recordDependencyOn(&myself, &referenced, DEPENDENCY_PARTITION_PRI);
2001 		ObjectAddressSet(referenced, RelationRelationId,
2002 						 RelationGetRelid(heapRelation));
2003 		recordDependencyOn(&myself, &referenced, DEPENDENCY_PARTITION_SEC);
2004 	}
2005 
2006 	/*
2007 	 * If the constraint is deferrable, create the deferred uniqueness
2008 	 * checking trigger.  (The trigger will be given an internal dependency on
2009 	 * the constraint by CreateTrigger.)
2010 	 */
2011 	if (deferrable)
2012 	{
2013 		CreateTrigStmt *trigger = makeNode(CreateTrigStmt);
2014 
2015 		trigger->replace = false;
2016 		trigger->isconstraint = true;
2017 		trigger->trigname = (constraintType == CONSTRAINT_PRIMARY) ?
2018 			"PK_ConstraintTrigger" :
2019 			"Unique_ConstraintTrigger";
2020 		trigger->relation = NULL;
2021 		trigger->funcname = SystemFuncName("unique_key_recheck");
2022 		trigger->args = NIL;
2023 		trigger->row = true;
2024 		trigger->timing = TRIGGER_TYPE_AFTER;
2025 		trigger->events = TRIGGER_TYPE_INSERT | TRIGGER_TYPE_UPDATE;
2026 		trigger->columns = NIL;
2027 		trigger->whenClause = NULL;
2028 		trigger->transitionRels = NIL;
2029 		trigger->deferrable = true;
2030 		trigger->initdeferred = initdeferred;
2031 		trigger->constrrel = NULL;
2032 
2033 		(void) CreateTrigger(trigger, NULL, RelationGetRelid(heapRelation),
2034 							 InvalidOid, conOid, indexRelationId, InvalidOid,
2035 							 InvalidOid, NULL, true, false);
2036 	}
2037 
2038 	/*
2039 	 * If needed, mark the index as primary and/or deferred in pg_index.
2040 	 *
2041 	 * Note: When making an existing index into a constraint, caller must have
2042 	 * a table lock that prevents concurrent table updates; otherwise, there
2043 	 * is a risk that concurrent readers of the table will miss seeing this
2044 	 * index at all.
2045 	 */
2046 	if ((constr_flags & INDEX_CONSTR_CREATE_UPDATE_INDEX) &&
2047 		(mark_as_primary || deferrable))
2048 	{
2049 		Relation	pg_index;
2050 		HeapTuple	indexTuple;
2051 		Form_pg_index indexForm;
2052 		bool		dirty = false;
2053 
2054 		pg_index = table_open(IndexRelationId, RowExclusiveLock);
2055 
2056 		indexTuple = SearchSysCacheCopy1(INDEXRELID,
2057 										 ObjectIdGetDatum(indexRelationId));
2058 		if (!HeapTupleIsValid(indexTuple))
2059 			elog(ERROR, "cache lookup failed for index %u", indexRelationId);
2060 		indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
2061 
2062 		if (mark_as_primary && !indexForm->indisprimary)
2063 		{
2064 			indexForm->indisprimary = true;
2065 			dirty = true;
2066 		}
2067 
2068 		if (deferrable && indexForm->indimmediate)
2069 		{
2070 			indexForm->indimmediate = false;
2071 			dirty = true;
2072 		}
2073 
2074 		if (dirty)
2075 		{
2076 			CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
2077 
2078 			InvokeObjectPostAlterHookArg(IndexRelationId, indexRelationId, 0,
2079 										 InvalidOid, is_internal);
2080 		}
2081 
2082 		heap_freetuple(indexTuple);
2083 		table_close(pg_index, RowExclusiveLock);
2084 	}
2085 
2086 	return myself;
2087 }
2088 
2089 /*
2090  *		index_drop
2091  *
2092  * NOTE: this routine should now only be called through performDeletion(),
2093  * else associated dependencies won't be cleaned up.
2094  *
2095  * If concurrent is true, do a DROP INDEX CONCURRENTLY.  If concurrent is
2096  * false but concurrent_lock_mode is true, then do a normal DROP INDEX but
2097  * take a lock for CONCURRENTLY processing.  That is used as part of REINDEX
2098  * CONCURRENTLY.
2099  */
2100 void
index_drop(Oid indexId,bool concurrent,bool concurrent_lock_mode)2101 index_drop(Oid indexId, bool concurrent, bool concurrent_lock_mode)
2102 {
2103 	Oid			heapId;
2104 	Relation	userHeapRelation;
2105 	Relation	userIndexRelation;
2106 	Relation	indexRelation;
2107 	HeapTuple	tuple;
2108 	bool		hasexprs;
2109 	LockRelId	heaprelid,
2110 				indexrelid;
2111 	LOCKTAG		heaplocktag;
2112 	LOCKMODE	lockmode;
2113 
2114 	/*
2115 	 * A temporary relation uses a non-concurrent DROP.  Other backends can't
2116 	 * access a temporary relation, so there's no harm in grabbing a stronger
2117 	 * lock (see comments in RemoveRelations), and a non-concurrent DROP is
2118 	 * more efficient.
2119 	 */
2120 	Assert(get_rel_persistence(indexId) != RELPERSISTENCE_TEMP ||
2121 		   (!concurrent && !concurrent_lock_mode));
2122 
2123 	/*
2124 	 * To drop an index safely, we must grab exclusive lock on its parent
2125 	 * table.  Exclusive lock on the index alone is insufficient because
2126 	 * another backend might be about to execute a query on the parent table.
2127 	 * If it relies on a previously cached list of index OIDs, then it could
2128 	 * attempt to access the just-dropped index.  We must therefore take a
2129 	 * table lock strong enough to prevent all queries on the table from
2130 	 * proceeding until we commit and send out a shared-cache-inval notice
2131 	 * that will make them update their index lists.
2132 	 *
2133 	 * In the concurrent case we avoid this requirement by disabling index use
2134 	 * in multiple steps and waiting out any transactions that might be using
2135 	 * the index, so we don't need exclusive lock on the parent table. Instead
2136 	 * we take ShareUpdateExclusiveLock, to ensure that two sessions aren't
2137 	 * doing CREATE/DROP INDEX CONCURRENTLY on the same index.  (We will get
2138 	 * AccessExclusiveLock on the index below, once we're sure nobody else is
2139 	 * using it.)
2140 	 */
2141 	heapId = IndexGetRelation(indexId, false);
2142 	lockmode = (concurrent || concurrent_lock_mode) ? ShareUpdateExclusiveLock : AccessExclusiveLock;
2143 	userHeapRelation = table_open(heapId, lockmode);
2144 	userIndexRelation = index_open(indexId, lockmode);
2145 
2146 	/*
2147 	 * We might still have open queries using it in our own session, which the
2148 	 * above locking won't prevent, so test explicitly.
2149 	 */
2150 	CheckTableNotInUse(userIndexRelation, "DROP INDEX");
2151 
2152 	/*
2153 	 * Drop Index Concurrently is more or less the reverse process of Create
2154 	 * Index Concurrently.
2155 	 *
2156 	 * First we unset indisvalid so queries starting afterwards don't use the
2157 	 * index to answer queries anymore.  We have to keep indisready = true so
2158 	 * transactions that are still scanning the index can continue to see
2159 	 * valid index contents.  For instance, if they are using READ COMMITTED
2160 	 * mode, and another transaction makes changes and commits, they need to
2161 	 * see those new tuples in the index.
2162 	 *
2163 	 * After all transactions that could possibly have used the index for
2164 	 * queries end, we can unset indisready and indislive, then wait till
2165 	 * nobody could be touching it anymore.  (Note: we need indislive because
2166 	 * this state must be distinct from the initial state during CREATE INDEX
2167 	 * CONCURRENTLY, which has indislive true while indisready and indisvalid
2168 	 * are false.  That's because in that state, transactions must examine the
2169 	 * index for HOT-safety decisions, while in this state we don't want them
2170 	 * to open it at all.)
2171 	 *
2172 	 * Since all predicate locks on the index are about to be made invalid, we
2173 	 * must promote them to predicate locks on the heap.  In the
2174 	 * non-concurrent case we can just do that now.  In the concurrent case
2175 	 * it's a bit trickier.  The predicate locks must be moved when there are
2176 	 * no index scans in progress on the index and no more can subsequently
2177 	 * start, so that no new predicate locks can be made on the index.  Also,
2178 	 * they must be moved before heap inserts stop maintaining the index, else
2179 	 * the conflict with the predicate lock on the index gap could be missed
2180 	 * before the lock on the heap relation is in place to detect a conflict
2181 	 * based on the heap tuple insert.
2182 	 */
2183 	if (concurrent)
2184 	{
2185 		/*
2186 		 * We must commit our transaction in order to make the first pg_index
2187 		 * state update visible to other sessions.  If the DROP machinery has
2188 		 * already performed any other actions (removal of other objects,
2189 		 * pg_depend entries, etc), the commit would make those actions
2190 		 * permanent, which would leave us with inconsistent catalog state if
2191 		 * we fail partway through the following sequence.  Since DROP INDEX
2192 		 * CONCURRENTLY is restricted to dropping just one index that has no
2193 		 * dependencies, we should get here before anything's been done ---
2194 		 * but let's check that to be sure.  We can verify that the current
2195 		 * transaction has not executed any transactional updates by checking
2196 		 * that no XID has been assigned.
2197 		 */
2198 		if (GetTopTransactionIdIfAny() != InvalidTransactionId)
2199 			ereport(ERROR,
2200 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2201 					 errmsg("DROP INDEX CONCURRENTLY must be first action in transaction")));
2202 
2203 		/*
2204 		 * Mark index invalid by updating its pg_index entry
2205 		 */
2206 		index_set_state_flags(indexId, INDEX_DROP_CLEAR_VALID);
2207 
2208 		/*
2209 		 * Invalidate the relcache for the table, so that after this commit
2210 		 * all sessions will refresh any cached plans that might reference the
2211 		 * index.
2212 		 */
2213 		CacheInvalidateRelcache(userHeapRelation);
2214 
2215 		/* save lockrelid and locktag for below, then close but keep locks */
2216 		heaprelid = userHeapRelation->rd_lockInfo.lockRelId;
2217 		SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
2218 		indexrelid = userIndexRelation->rd_lockInfo.lockRelId;
2219 
2220 		table_close(userHeapRelation, NoLock);
2221 		index_close(userIndexRelation, NoLock);
2222 
2223 		/*
2224 		 * We must commit our current transaction so that the indisvalid
2225 		 * update becomes visible to other transactions; then start another.
2226 		 * Note that any previously-built data structures are lost in the
2227 		 * commit.  The only data we keep past here are the relation IDs.
2228 		 *
2229 		 * Before committing, get a session-level lock on the table, to ensure
2230 		 * that neither it nor the index can be dropped before we finish. This
2231 		 * cannot block, even if someone else is waiting for access, because
2232 		 * we already have the same lock within our transaction.
2233 		 */
2234 		LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
2235 		LockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
2236 
2237 		PopActiveSnapshot();
2238 		CommitTransactionCommand();
2239 		StartTransactionCommand();
2240 
2241 		/*
2242 		 * Now we must wait until no running transaction could be using the
2243 		 * index for a query.  Use AccessExclusiveLock here to check for
2244 		 * running transactions that hold locks of any kind on the table. Note
2245 		 * we do not need to worry about xacts that open the table for reading
2246 		 * after this point; they will see the index as invalid when they open
2247 		 * the relation.
2248 		 *
2249 		 * Note: the reason we use actual lock acquisition here, rather than
2250 		 * just checking the ProcArray and sleeping, is that deadlock is
2251 		 * possible if one of the transactions in question is blocked trying
2252 		 * to acquire an exclusive lock on our table.  The lock code will
2253 		 * detect deadlock and error out properly.
2254 		 *
2255 		 * Note: we report progress through WaitForLockers() unconditionally
2256 		 * here, even though it will only be used when we're called by REINDEX
2257 		 * CONCURRENTLY and not when called by DROP INDEX CONCURRENTLY.
2258 		 */
2259 		WaitForLockers(heaplocktag, AccessExclusiveLock, true);
2260 
2261 		/* Finish invalidation of index and mark it as dead */
2262 		index_concurrently_set_dead(heapId, indexId);
2263 
2264 		/*
2265 		 * Again, commit the transaction to make the pg_index update visible
2266 		 * to other sessions.
2267 		 */
2268 		CommitTransactionCommand();
2269 		StartTransactionCommand();
2270 
2271 		/*
2272 		 * Wait till every transaction that saw the old index state has
2273 		 * finished.  See above about progress reporting.
2274 		 */
2275 		WaitForLockers(heaplocktag, AccessExclusiveLock, true);
2276 
2277 		/*
2278 		 * Re-open relations to allow us to complete our actions.
2279 		 *
2280 		 * At this point, nothing should be accessing the index, but lets
2281 		 * leave nothing to chance and grab AccessExclusiveLock on the index
2282 		 * before the physical deletion.
2283 		 */
2284 		userHeapRelation = table_open(heapId, ShareUpdateExclusiveLock);
2285 		userIndexRelation = index_open(indexId, AccessExclusiveLock);
2286 	}
2287 	else
2288 	{
2289 		/* Not concurrent, so just transfer predicate locks and we're good */
2290 		TransferPredicateLocksToHeapRelation(userIndexRelation);
2291 	}
2292 
2293 	/*
2294 	 * Schedule physical removal of the files (if any)
2295 	 */
2296 	if (userIndexRelation->rd_rel->relkind != RELKIND_PARTITIONED_INDEX)
2297 		RelationDropStorage(userIndexRelation);
2298 
2299 	/*
2300 	 * Close and flush the index's relcache entry, to ensure relcache doesn't
2301 	 * try to rebuild it while we're deleting catalog entries. We keep the
2302 	 * lock though.
2303 	 */
2304 	index_close(userIndexRelation, NoLock);
2305 
2306 	RelationForgetRelation(indexId);
2307 
2308 	/*
2309 	 * fix INDEX relation, and check for expressional index
2310 	 */
2311 	indexRelation = table_open(IndexRelationId, RowExclusiveLock);
2312 
2313 	tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
2314 	if (!HeapTupleIsValid(tuple))
2315 		elog(ERROR, "cache lookup failed for index %u", indexId);
2316 
2317 	hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs,
2318 							   RelationGetDescr(indexRelation));
2319 
2320 	CatalogTupleDelete(indexRelation, &tuple->t_self);
2321 
2322 	ReleaseSysCache(tuple);
2323 	table_close(indexRelation, RowExclusiveLock);
2324 
2325 	/*
2326 	 * if it has any expression columns, we might have stored statistics about
2327 	 * them.
2328 	 */
2329 	if (hasexprs)
2330 		RemoveStatistics(indexId, 0);
2331 
2332 	/*
2333 	 * fix ATTRIBUTE relation
2334 	 */
2335 	DeleteAttributeTuples(indexId);
2336 
2337 	/*
2338 	 * fix RELATION relation
2339 	 */
2340 	DeleteRelationTuple(indexId);
2341 
2342 	/*
2343 	 * fix INHERITS relation
2344 	 */
2345 	DeleteInheritsTuple(indexId, InvalidOid, false, NULL);
2346 
2347 	/*
2348 	 * We are presently too lazy to attempt to compute the new correct value
2349 	 * of relhasindex (the next VACUUM will fix it if necessary). So there is
2350 	 * no need to update the pg_class tuple for the owning relation. But we
2351 	 * must send out a shared-cache-inval notice on the owning relation to
2352 	 * ensure other backends update their relcache lists of indexes.  (In the
2353 	 * concurrent case, this is redundant but harmless.)
2354 	 */
2355 	CacheInvalidateRelcache(userHeapRelation);
2356 
2357 	/*
2358 	 * Close owning rel, but keep lock
2359 	 */
2360 	table_close(userHeapRelation, NoLock);
2361 
2362 	/*
2363 	 * Release the session locks before we go.
2364 	 */
2365 	if (concurrent)
2366 	{
2367 		UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
2368 		UnlockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
2369 	}
2370 }
2371 
2372 /* ----------------------------------------------------------------
2373  *						index_build support
2374  * ----------------------------------------------------------------
2375  */
2376 
2377 /* ----------------
2378  *		BuildIndexInfo
2379  *			Construct an IndexInfo record for an open index
2380  *
2381  * IndexInfo stores the information about the index that's needed by
2382  * FormIndexDatum, which is used for both index_build() and later insertion
2383  * of individual index tuples.  Normally we build an IndexInfo for an index
2384  * just once per command, and then use it for (potentially) many tuples.
2385  * ----------------
2386  */
2387 IndexInfo *
BuildIndexInfo(Relation index)2388 BuildIndexInfo(Relation index)
2389 {
2390 	IndexInfo  *ii;
2391 	Form_pg_index indexStruct = index->rd_index;
2392 	int			i;
2393 	int			numAtts;
2394 
2395 	/* check the number of keys, and copy attr numbers into the IndexInfo */
2396 	numAtts = indexStruct->indnatts;
2397 	if (numAtts < 1 || numAtts > INDEX_MAX_KEYS)
2398 		elog(ERROR, "invalid indnatts %d for index %u",
2399 			 numAtts, RelationGetRelid(index));
2400 
2401 	/*
2402 	 * Create the node, fetching any expressions needed for expressional
2403 	 * indexes and index predicate if any.
2404 	 */
2405 	ii = makeIndexInfo(indexStruct->indnatts,
2406 					   indexStruct->indnkeyatts,
2407 					   index->rd_rel->relam,
2408 					   RelationGetIndexExpressions(index),
2409 					   RelationGetIndexPredicate(index),
2410 					   indexStruct->indisunique,
2411 					   indexStruct->indisready,
2412 					   false);
2413 
2414 	/* fill in attribute numbers */
2415 	for (i = 0; i < numAtts; i++)
2416 		ii->ii_IndexAttrNumbers[i] = indexStruct->indkey.values[i];
2417 
2418 	/* fetch exclusion constraint info if any */
2419 	if (indexStruct->indisexclusion)
2420 	{
2421 		RelationGetExclusionInfo(index,
2422 								 &ii->ii_ExclusionOps,
2423 								 &ii->ii_ExclusionProcs,
2424 								 &ii->ii_ExclusionStrats);
2425 	}
2426 
2427 	ii->ii_OpclassOptions = RelationGetIndexRawAttOptions(index);
2428 
2429 	return ii;
2430 }
2431 
2432 /* ----------------
2433  *		BuildDummyIndexInfo
2434  *			Construct a dummy IndexInfo record for an open index
2435  *
2436  * This differs from the real BuildIndexInfo in that it will never run any
2437  * user-defined code that might exist in index expressions or predicates.
2438  * Instead of the real index expressions, we return null constants that have
2439  * the right types/typmods/collations.  Predicates and exclusion clauses are
2440  * just ignored.  This is sufficient for the purpose of truncating an index,
2441  * since we will not need to actually evaluate the expressions or predicates;
2442  * the only thing that's likely to be done with the data is construction of
2443  * a tupdesc describing the index's rowtype.
2444  * ----------------
2445  */
2446 IndexInfo *
BuildDummyIndexInfo(Relation index)2447 BuildDummyIndexInfo(Relation index)
2448 {
2449 	IndexInfo  *ii;
2450 	Form_pg_index indexStruct = index->rd_index;
2451 	int			i;
2452 	int			numAtts;
2453 
2454 	/* check the number of keys, and copy attr numbers into the IndexInfo */
2455 	numAtts = indexStruct->indnatts;
2456 	if (numAtts < 1 || numAtts > INDEX_MAX_KEYS)
2457 		elog(ERROR, "invalid indnatts %d for index %u",
2458 			 numAtts, RelationGetRelid(index));
2459 
2460 	/*
2461 	 * Create the node, using dummy index expressions, and pretending there is
2462 	 * no predicate.
2463 	 */
2464 	ii = makeIndexInfo(indexStruct->indnatts,
2465 					   indexStruct->indnkeyatts,
2466 					   index->rd_rel->relam,
2467 					   RelationGetDummyIndexExpressions(index),
2468 					   NIL,
2469 					   indexStruct->indisunique,
2470 					   indexStruct->indisready,
2471 					   false);
2472 
2473 	/* fill in attribute numbers */
2474 	for (i = 0; i < numAtts; i++)
2475 		ii->ii_IndexAttrNumbers[i] = indexStruct->indkey.values[i];
2476 
2477 	/* We ignore the exclusion constraint if any */
2478 
2479 	return ii;
2480 }
2481 
2482 /*
2483  * CompareIndexInfo
2484  *		Return whether the properties of two indexes (in different tables)
2485  *		indicate that they have the "same" definitions.
2486  *
2487  * Note: passing collations and opfamilies separately is a kludge.  Adding
2488  * them to IndexInfo may result in better coding here and elsewhere.
2489  *
2490  * Use build_attrmap_by_name(index2, index1) to build the attmap.
2491  */
2492 bool
CompareIndexInfo(IndexInfo * info1,IndexInfo * info2,Oid * collations1,Oid * collations2,Oid * opfamilies1,Oid * opfamilies2,AttrMap * attmap)2493 CompareIndexInfo(IndexInfo *info1, IndexInfo *info2,
2494 				 Oid *collations1, Oid *collations2,
2495 				 Oid *opfamilies1, Oid *opfamilies2,
2496 				 AttrMap *attmap)
2497 {
2498 	int			i;
2499 
2500 	if (info1->ii_Unique != info2->ii_Unique)
2501 		return false;
2502 
2503 	/* indexes are only equivalent if they have the same access method */
2504 	if (info1->ii_Am != info2->ii_Am)
2505 		return false;
2506 
2507 	/* and same number of attributes */
2508 	if (info1->ii_NumIndexAttrs != info2->ii_NumIndexAttrs)
2509 		return false;
2510 
2511 	/* and same number of key attributes */
2512 	if (info1->ii_NumIndexKeyAttrs != info2->ii_NumIndexKeyAttrs)
2513 		return false;
2514 
2515 	/*
2516 	 * and columns match through the attribute map (actual attribute numbers
2517 	 * might differ!)  Note that this implies that index columns that are
2518 	 * expressions appear in the same positions.  We will next compare the
2519 	 * expressions themselves.
2520 	 */
2521 	for (i = 0; i < info1->ii_NumIndexAttrs; i++)
2522 	{
2523 		if (attmap->maplen < info2->ii_IndexAttrNumbers[i])
2524 			elog(ERROR, "incorrect attribute map");
2525 
2526 		/* ignore expressions at this stage */
2527 		if ((info1->ii_IndexAttrNumbers[i] != InvalidAttrNumber) &&
2528 			(attmap->attnums[info2->ii_IndexAttrNumbers[i] - 1] !=
2529 			 info1->ii_IndexAttrNumbers[i]))
2530 			return false;
2531 
2532 		/* collation and opfamily is not valid for including columns */
2533 		if (i >= info1->ii_NumIndexKeyAttrs)
2534 			continue;
2535 
2536 		if (collations1[i] != collations2[i])
2537 			return false;
2538 		if (opfamilies1[i] != opfamilies2[i])
2539 			return false;
2540 	}
2541 
2542 	/*
2543 	 * For expression indexes: either both are expression indexes, or neither
2544 	 * is; if they are, make sure the expressions match.
2545 	 */
2546 	if ((info1->ii_Expressions != NIL) != (info2->ii_Expressions != NIL))
2547 		return false;
2548 	if (info1->ii_Expressions != NIL)
2549 	{
2550 		bool		found_whole_row;
2551 		Node	   *mapped;
2552 
2553 		mapped = map_variable_attnos((Node *) info2->ii_Expressions,
2554 									 1, 0, attmap,
2555 									 InvalidOid, &found_whole_row);
2556 		if (found_whole_row)
2557 		{
2558 			/*
2559 			 * we could throw an error here, but seems out of scope for this
2560 			 * routine.
2561 			 */
2562 			return false;
2563 		}
2564 
2565 		if (!equal(info1->ii_Expressions, mapped))
2566 			return false;
2567 	}
2568 
2569 	/* Partial index predicates must be identical, if they exist */
2570 	if ((info1->ii_Predicate == NULL) != (info2->ii_Predicate == NULL))
2571 		return false;
2572 	if (info1->ii_Predicate != NULL)
2573 	{
2574 		bool		found_whole_row;
2575 		Node	   *mapped;
2576 
2577 		mapped = map_variable_attnos((Node *) info2->ii_Predicate,
2578 									 1, 0, attmap,
2579 									 InvalidOid, &found_whole_row);
2580 		if (found_whole_row)
2581 		{
2582 			/*
2583 			 * we could throw an error here, but seems out of scope for this
2584 			 * routine.
2585 			 */
2586 			return false;
2587 		}
2588 		if (!equal(info1->ii_Predicate, mapped))
2589 			return false;
2590 	}
2591 
2592 	/* No support currently for comparing exclusion indexes. */
2593 	if (info1->ii_ExclusionOps != NULL || info2->ii_ExclusionOps != NULL)
2594 		return false;
2595 
2596 	return true;
2597 }
2598 
2599 /* ----------------
2600  *		BuildSpeculativeIndexInfo
2601  *			Add extra state to IndexInfo record
2602  *
2603  * For unique indexes, we usually don't want to add info to the IndexInfo for
2604  * checking uniqueness, since the B-Tree AM handles that directly.  However,
2605  * in the case of speculative insertion, additional support is required.
2606  *
2607  * Do this processing here rather than in BuildIndexInfo() to not incur the
2608  * overhead in the common non-speculative cases.
2609  * ----------------
2610  */
2611 void
BuildSpeculativeIndexInfo(Relation index,IndexInfo * ii)2612 BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii)
2613 {
2614 	int			indnkeyatts;
2615 	int			i;
2616 
2617 	indnkeyatts = IndexRelationGetNumberOfKeyAttributes(index);
2618 
2619 	/*
2620 	 * fetch info for checking unique indexes
2621 	 */
2622 	Assert(ii->ii_Unique);
2623 
2624 	if (index->rd_rel->relam != BTREE_AM_OID)
2625 		elog(ERROR, "unexpected non-btree speculative unique index");
2626 
2627 	ii->ii_UniqueOps = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
2628 	ii->ii_UniqueProcs = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
2629 	ii->ii_UniqueStrats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts);
2630 
2631 	/*
2632 	 * We have to look up the operator's strategy number.  This provides a
2633 	 * cross-check that the operator does match the index.
2634 	 */
2635 	/* We need the func OIDs and strategy numbers too */
2636 	for (i = 0; i < indnkeyatts; i++)
2637 	{
2638 		ii->ii_UniqueStrats[i] = BTEqualStrategyNumber;
2639 		ii->ii_UniqueOps[i] =
2640 			get_opfamily_member(index->rd_opfamily[i],
2641 								index->rd_opcintype[i],
2642 								index->rd_opcintype[i],
2643 								ii->ii_UniqueStrats[i]);
2644 		if (!OidIsValid(ii->ii_UniqueOps[i]))
2645 			elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
2646 				 ii->ii_UniqueStrats[i], index->rd_opcintype[i],
2647 				 index->rd_opcintype[i], index->rd_opfamily[i]);
2648 		ii->ii_UniqueProcs[i] = get_opcode(ii->ii_UniqueOps[i]);
2649 	}
2650 }
2651 
2652 /* ----------------
2653  *		FormIndexDatum
2654  *			Construct values[] and isnull[] arrays for a new index tuple.
2655  *
2656  *	indexInfo		Info about the index
2657  *	slot			Heap tuple for which we must prepare an index entry
2658  *	estate			executor state for evaluating any index expressions
2659  *	values			Array of index Datums (output area)
2660  *	isnull			Array of is-null indicators (output area)
2661  *
2662  * When there are no index expressions, estate may be NULL.  Otherwise it
2663  * must be supplied, *and* the ecxt_scantuple slot of its per-tuple expr
2664  * context must point to the heap tuple passed in.
2665  *
2666  * Notice we don't actually call index_form_tuple() here; we just prepare
2667  * its input arrays values[] and isnull[].  This is because the index AM
2668  * may wish to alter the data before storage.
2669  * ----------------
2670  */
2671 void
FormIndexDatum(IndexInfo * indexInfo,TupleTableSlot * slot,EState * estate,Datum * values,bool * isnull)2672 FormIndexDatum(IndexInfo *indexInfo,
2673 			   TupleTableSlot *slot,
2674 			   EState *estate,
2675 			   Datum *values,
2676 			   bool *isnull)
2677 {
2678 	ListCell   *indexpr_item;
2679 	int			i;
2680 
2681 	if (indexInfo->ii_Expressions != NIL &&
2682 		indexInfo->ii_ExpressionsState == NIL)
2683 	{
2684 		/* First time through, set up expression evaluation state */
2685 		indexInfo->ii_ExpressionsState =
2686 			ExecPrepareExprList(indexInfo->ii_Expressions, estate);
2687 		/* Check caller has set up context correctly */
2688 		Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
2689 	}
2690 	indexpr_item = list_head(indexInfo->ii_ExpressionsState);
2691 
2692 	for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
2693 	{
2694 		int			keycol = indexInfo->ii_IndexAttrNumbers[i];
2695 		Datum		iDatum;
2696 		bool		isNull;
2697 
2698 		if (keycol < 0)
2699 			iDatum = slot_getsysattr(slot, keycol, &isNull);
2700 		else if (keycol != 0)
2701 		{
2702 			/*
2703 			 * Plain index column; get the value we need directly from the
2704 			 * heap tuple.
2705 			 */
2706 			iDatum = slot_getattr(slot, keycol, &isNull);
2707 		}
2708 		else
2709 		{
2710 			/*
2711 			 * Index expression --- need to evaluate it.
2712 			 */
2713 			if (indexpr_item == NULL)
2714 				elog(ERROR, "wrong number of index expressions");
2715 			iDatum = ExecEvalExprSwitchContext((ExprState *) lfirst(indexpr_item),
2716 											   GetPerTupleExprContext(estate),
2717 											   &isNull);
2718 			indexpr_item = lnext(indexInfo->ii_ExpressionsState, indexpr_item);
2719 		}
2720 		values[i] = iDatum;
2721 		isnull[i] = isNull;
2722 	}
2723 
2724 	if (indexpr_item != NULL)
2725 		elog(ERROR, "wrong number of index expressions");
2726 }
2727 
2728 
2729 /*
2730  * index_update_stats --- update pg_class entry after CREATE INDEX or REINDEX
2731  *
2732  * This routine updates the pg_class row of either an index or its parent
2733  * relation after CREATE INDEX or REINDEX.  Its rather bizarre API is designed
2734  * to ensure we can do all the necessary work in just one update.
2735  *
2736  * hasindex: set relhasindex to this value
2737  * reltuples: if >= 0, set reltuples to this value; else no change
2738  *
2739  * If reltuples >= 0, relpages and relallvisible are also updated (using
2740  * RelationGetNumberOfBlocks() and visibilitymap_count()).
2741  *
2742  * NOTE: an important side-effect of this operation is that an SI invalidation
2743  * message is sent out to all backends --- including me --- causing relcache
2744  * entries to be flushed or updated with the new data.  This must happen even
2745  * if we find that no change is needed in the pg_class row.  When updating
2746  * a heap entry, this ensures that other backends find out about the new
2747  * index.  When updating an index, it's important because some index AMs
2748  * expect a relcache flush to occur after REINDEX.
2749  */
2750 static void
index_update_stats(Relation rel,bool hasindex,double reltuples)2751 index_update_stats(Relation rel,
2752 				   bool hasindex,
2753 				   double reltuples)
2754 {
2755 	Oid			relid = RelationGetRelid(rel);
2756 	Relation	pg_class;
2757 	HeapTuple	tuple;
2758 	Form_pg_class rd_rel;
2759 	bool		dirty;
2760 
2761 	/*
2762 	 * We always update the pg_class row using a non-transactional,
2763 	 * overwrite-in-place update.  There are several reasons for this:
2764 	 *
2765 	 * 1. In bootstrap mode, we have no choice --- UPDATE wouldn't work.
2766 	 *
2767 	 * 2. We could be reindexing pg_class itself, in which case we can't move
2768 	 * its pg_class row because CatalogTupleInsert/CatalogTupleUpdate might
2769 	 * not know about all the indexes yet (see reindex_relation).
2770 	 *
2771 	 * 3. Because we execute CREATE INDEX with just share lock on the parent
2772 	 * rel (to allow concurrent index creations), an ordinary update could
2773 	 * suffer a tuple-concurrently-updated failure against another CREATE
2774 	 * INDEX committing at about the same time.  We can avoid that by having
2775 	 * them both do nontransactional updates (we assume they will both be
2776 	 * trying to change the pg_class row to the same thing, so it doesn't
2777 	 * matter which goes first).
2778 	 *
2779 	 * It is safe to use a non-transactional update even though our
2780 	 * transaction could still fail before committing.  Setting relhasindex
2781 	 * true is safe even if there are no indexes (VACUUM will eventually fix
2782 	 * it).  And of course the new relpages and reltuples counts are correct
2783 	 * regardless.  However, we don't want to change relpages (or
2784 	 * relallvisible) if the caller isn't providing an updated reltuples
2785 	 * count, because that would bollix the reltuples/relpages ratio which is
2786 	 * what's really important.
2787 	 */
2788 
2789 	pg_class = table_open(RelationRelationId, RowExclusiveLock);
2790 
2791 	/*
2792 	 * Make a copy of the tuple to update.  Normally we use the syscache, but
2793 	 * we can't rely on that during bootstrap or while reindexing pg_class
2794 	 * itself.
2795 	 */
2796 	if (IsBootstrapProcessingMode() ||
2797 		ReindexIsProcessingHeap(RelationRelationId))
2798 	{
2799 		/* don't assume syscache will work */
2800 		TableScanDesc pg_class_scan;
2801 		ScanKeyData key[1];
2802 
2803 		ScanKeyInit(&key[0],
2804 					Anum_pg_class_oid,
2805 					BTEqualStrategyNumber, F_OIDEQ,
2806 					ObjectIdGetDatum(relid));
2807 
2808 		pg_class_scan = table_beginscan_catalog(pg_class, 1, key);
2809 		tuple = heap_getnext(pg_class_scan, ForwardScanDirection);
2810 		tuple = heap_copytuple(tuple);
2811 		table_endscan(pg_class_scan);
2812 	}
2813 	else
2814 	{
2815 		/* normal case, use syscache */
2816 		tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
2817 	}
2818 
2819 	if (!HeapTupleIsValid(tuple))
2820 		elog(ERROR, "could not find tuple for relation %u", relid);
2821 	rd_rel = (Form_pg_class) GETSTRUCT(tuple);
2822 
2823 	/* Should this be a more comprehensive test? */
2824 	Assert(rd_rel->relkind != RELKIND_PARTITIONED_INDEX);
2825 
2826 	/*
2827 	 * As a special hack, if we are dealing with an empty table and the
2828 	 * existing reltuples is -1, we leave that alone.  This ensures that
2829 	 * creating an index as part of CREATE TABLE doesn't cause the table to
2830 	 * prematurely look like it's been vacuumed.
2831 	 */
2832 	if (reltuples == 0 && rd_rel->reltuples < 0)
2833 		reltuples = -1;
2834 
2835 	/* Apply required updates, if any, to copied tuple */
2836 
2837 	dirty = false;
2838 	if (rd_rel->relhasindex != hasindex)
2839 	{
2840 		rd_rel->relhasindex = hasindex;
2841 		dirty = true;
2842 	}
2843 
2844 	if (reltuples >= 0)
2845 	{
2846 		BlockNumber relpages = RelationGetNumberOfBlocks(rel);
2847 		BlockNumber relallvisible;
2848 
2849 		if (rd_rel->relkind != RELKIND_INDEX)
2850 			visibilitymap_count(rel, &relallvisible, NULL);
2851 		else					/* don't bother for indexes */
2852 			relallvisible = 0;
2853 
2854 		if (rd_rel->relpages != (int32) relpages)
2855 		{
2856 			rd_rel->relpages = (int32) relpages;
2857 			dirty = true;
2858 		}
2859 		if (rd_rel->reltuples != (float4) reltuples)
2860 		{
2861 			rd_rel->reltuples = (float4) reltuples;
2862 			dirty = true;
2863 		}
2864 		if (rd_rel->relallvisible != (int32) relallvisible)
2865 		{
2866 			rd_rel->relallvisible = (int32) relallvisible;
2867 			dirty = true;
2868 		}
2869 	}
2870 
2871 	/*
2872 	 * If anything changed, write out the tuple
2873 	 */
2874 	if (dirty)
2875 	{
2876 		heap_inplace_update(pg_class, tuple);
2877 		/* the above sends a cache inval message */
2878 	}
2879 	else
2880 	{
2881 		/* no need to change tuple, but force relcache inval anyway */
2882 		CacheInvalidateRelcacheByTuple(tuple);
2883 	}
2884 
2885 	heap_freetuple(tuple);
2886 
2887 	table_close(pg_class, RowExclusiveLock);
2888 }
2889 
2890 
2891 /*
2892  * index_build - invoke access-method-specific index build procedure
2893  *
2894  * On entry, the index's catalog entries are valid, and its physical disk
2895  * file has been created but is empty.  We call the AM-specific build
2896  * procedure to fill in the index contents.  We then update the pg_class
2897  * entries of the index and heap relation as needed, using statistics
2898  * returned by ambuild as well as data passed by the caller.
2899  *
2900  * isreindex indicates we are recreating a previously-existing index.
2901  * parallel indicates if parallelism may be useful.
2902  *
2903  * Note: before Postgres 8.2, the passed-in heap and index Relations
2904  * were automatically closed by this routine.  This is no longer the case.
2905  * The caller opened 'em, and the caller should close 'em.
2906  */
2907 void
index_build(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,bool isreindex,bool parallel)2908 index_build(Relation heapRelation,
2909 			Relation indexRelation,
2910 			IndexInfo *indexInfo,
2911 			bool isreindex,
2912 			bool parallel)
2913 {
2914 	IndexBuildResult *stats;
2915 	Oid			save_userid;
2916 	int			save_sec_context;
2917 	int			save_nestlevel;
2918 
2919 	/*
2920 	 * sanity checks
2921 	 */
2922 	Assert(RelationIsValid(indexRelation));
2923 	Assert(PointerIsValid(indexRelation->rd_indam));
2924 	Assert(PointerIsValid(indexRelation->rd_indam->ambuild));
2925 	Assert(PointerIsValid(indexRelation->rd_indam->ambuildempty));
2926 
2927 	/*
2928 	 * Determine worker process details for parallel CREATE INDEX.  Currently,
2929 	 * only btree has support for parallel builds.
2930 	 *
2931 	 * Note that planner considers parallel safety for us.
2932 	 */
2933 	if (parallel && IsNormalProcessingMode() &&
2934 		indexRelation->rd_rel->relam == BTREE_AM_OID)
2935 		indexInfo->ii_ParallelWorkers =
2936 			plan_create_index_workers(RelationGetRelid(heapRelation),
2937 									  RelationGetRelid(indexRelation));
2938 
2939 	if (indexInfo->ii_ParallelWorkers == 0)
2940 		ereport(DEBUG1,
2941 				(errmsg_internal("building index \"%s\" on table \"%s\" serially",
2942 								 RelationGetRelationName(indexRelation),
2943 								 RelationGetRelationName(heapRelation))));
2944 	else
2945 		ereport(DEBUG1,
2946 				(errmsg_internal("building index \"%s\" on table \"%s\" with request for %d parallel workers",
2947 								 RelationGetRelationName(indexRelation),
2948 								 RelationGetRelationName(heapRelation),
2949 								 indexInfo->ii_ParallelWorkers)));
2950 
2951 	/*
2952 	 * Switch to the table owner's userid, so that any index functions are run
2953 	 * as that user.  Also lock down security-restricted operations and
2954 	 * arrange to make GUC variable changes local to this command.
2955 	 */
2956 	GetUserIdAndSecContext(&save_userid, &save_sec_context);
2957 	SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
2958 						   save_sec_context | SECURITY_RESTRICTED_OPERATION);
2959 	save_nestlevel = NewGUCNestLevel();
2960 
2961 	/* Set up initial progress report status */
2962 	{
2963 		const int	progress_index[] = {
2964 			PROGRESS_CREATEIDX_PHASE,
2965 			PROGRESS_CREATEIDX_SUBPHASE,
2966 			PROGRESS_CREATEIDX_TUPLES_DONE,
2967 			PROGRESS_CREATEIDX_TUPLES_TOTAL,
2968 			PROGRESS_SCAN_BLOCKS_DONE,
2969 			PROGRESS_SCAN_BLOCKS_TOTAL
2970 		};
2971 		const int64 progress_vals[] = {
2972 			PROGRESS_CREATEIDX_PHASE_BUILD,
2973 			PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE,
2974 			0, 0, 0, 0
2975 		};
2976 
2977 		pgstat_progress_update_multi_param(6, progress_index, progress_vals);
2978 	}
2979 
2980 	/*
2981 	 * Call the access method's build procedure
2982 	 */
2983 	stats = indexRelation->rd_indam->ambuild(heapRelation, indexRelation,
2984 											 indexInfo);
2985 	Assert(PointerIsValid(stats));
2986 
2987 	/*
2988 	 * If this is an unlogged index, we may need to write out an init fork for
2989 	 * it -- but we must first check whether one already exists.  If, for
2990 	 * example, an unlogged relation is truncated in the transaction that
2991 	 * created it, or truncated twice in a subsequent transaction, the
2992 	 * relfilenode won't change, and nothing needs to be done here.
2993 	 */
2994 	if (indexRelation->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
2995 		!smgrexists(indexRelation->rd_smgr, INIT_FORKNUM))
2996 	{
2997 		RelationOpenSmgr(indexRelation);
2998 		smgrcreate(indexRelation->rd_smgr, INIT_FORKNUM, false);
2999 		indexRelation->rd_indam->ambuildempty(indexRelation);
3000 	}
3001 
3002 	/*
3003 	 * If we found any potentially broken HOT chains, mark the index as not
3004 	 * being usable until the current transaction is below the event horizon.
3005 	 * See src/backend/access/heap/README.HOT for discussion.  Also set this
3006 	 * if early pruning/vacuuming is enabled for the heap relation.  While it
3007 	 * might become safe to use the index earlier based on actual cleanup
3008 	 * activity and other active transactions, the test for that would be much
3009 	 * more complex and would require some form of blocking, so keep it simple
3010 	 * and fast by just using the current transaction.
3011 	 *
3012 	 * However, when reindexing an existing index, we should do nothing here.
3013 	 * Any HOT chains that are broken with respect to the index must predate
3014 	 * the index's original creation, so there is no need to change the
3015 	 * index's usability horizon.  Moreover, we *must not* try to change the
3016 	 * index's pg_index entry while reindexing pg_index itself, and this
3017 	 * optimization nicely prevents that.  The more complex rules needed for a
3018 	 * reindex are handled separately after this function returns.
3019 	 *
3020 	 * We also need not set indcheckxmin during a concurrent index build,
3021 	 * because we won't set indisvalid true until all transactions that care
3022 	 * about the broken HOT chains or early pruning/vacuuming are gone.
3023 	 *
3024 	 * Therefore, this code path can only be taken during non-concurrent
3025 	 * CREATE INDEX.  Thus the fact that heap_update will set the pg_index
3026 	 * tuple's xmin doesn't matter, because that tuple was created in the
3027 	 * current transaction anyway.  That also means we don't need to worry
3028 	 * about any concurrent readers of the tuple; no other transaction can see
3029 	 * it yet.
3030 	 */
3031 	if ((indexInfo->ii_BrokenHotChain || EarlyPruningEnabled(heapRelation)) &&
3032 		!isreindex &&
3033 		!indexInfo->ii_Concurrent)
3034 	{
3035 		Oid			indexId = RelationGetRelid(indexRelation);
3036 		Relation	pg_index;
3037 		HeapTuple	indexTuple;
3038 		Form_pg_index indexForm;
3039 
3040 		pg_index = table_open(IndexRelationId, RowExclusiveLock);
3041 
3042 		indexTuple = SearchSysCacheCopy1(INDEXRELID,
3043 										 ObjectIdGetDatum(indexId));
3044 		if (!HeapTupleIsValid(indexTuple))
3045 			elog(ERROR, "cache lookup failed for index %u", indexId);
3046 		indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3047 
3048 		/* If it's a new index, indcheckxmin shouldn't be set ... */
3049 		Assert(!indexForm->indcheckxmin);
3050 
3051 		indexForm->indcheckxmin = true;
3052 		CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
3053 
3054 		heap_freetuple(indexTuple);
3055 		table_close(pg_index, RowExclusiveLock);
3056 	}
3057 
3058 	/*
3059 	 * Update heap and index pg_class rows
3060 	 */
3061 	index_update_stats(heapRelation,
3062 					   true,
3063 					   stats->heap_tuples);
3064 
3065 	index_update_stats(indexRelation,
3066 					   false,
3067 					   stats->index_tuples);
3068 
3069 	/* Make the updated catalog row versions visible */
3070 	CommandCounterIncrement();
3071 
3072 	/*
3073 	 * If it's for an exclusion constraint, make a second pass over the heap
3074 	 * to verify that the constraint is satisfied.  We must not do this until
3075 	 * the index is fully valid.  (Broken HOT chains shouldn't matter, though;
3076 	 * see comments for IndexCheckExclusion.)
3077 	 */
3078 	if (indexInfo->ii_ExclusionOps != NULL)
3079 		IndexCheckExclusion(heapRelation, indexRelation, indexInfo);
3080 
3081 	/* Roll back any GUC changes executed by index functions */
3082 	AtEOXact_GUC(false, save_nestlevel);
3083 
3084 	/* Restore userid and security context */
3085 	SetUserIdAndSecContext(save_userid, save_sec_context);
3086 }
3087 
3088 /*
3089  * IndexCheckExclusion - verify that a new exclusion constraint is satisfied
3090  *
3091  * When creating an exclusion constraint, we first build the index normally
3092  * and then rescan the heap to check for conflicts.  We assume that we only
3093  * need to validate tuples that are live according to an up-to-date snapshot,
3094  * and that these were correctly indexed even in the presence of broken HOT
3095  * chains.  This should be OK since we are holding at least ShareLock on the
3096  * table, meaning there can be no uncommitted updates from other transactions.
3097  * (Note: that wouldn't necessarily work for system catalogs, since many
3098  * operations release write lock early on the system catalogs.)
3099  */
3100 static void
IndexCheckExclusion(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo)3101 IndexCheckExclusion(Relation heapRelation,
3102 					Relation indexRelation,
3103 					IndexInfo *indexInfo)
3104 {
3105 	TableScanDesc scan;
3106 	Datum		values[INDEX_MAX_KEYS];
3107 	bool		isnull[INDEX_MAX_KEYS];
3108 	ExprState  *predicate;
3109 	TupleTableSlot *slot;
3110 	EState	   *estate;
3111 	ExprContext *econtext;
3112 	Snapshot	snapshot;
3113 
3114 	/*
3115 	 * If we are reindexing the target index, mark it as no longer being
3116 	 * reindexed, to forestall an Assert in index_beginscan when we try to use
3117 	 * the index for probes.  This is OK because the index is now fully valid.
3118 	 */
3119 	if (ReindexIsCurrentlyProcessingIndex(RelationGetRelid(indexRelation)))
3120 		ResetReindexProcessing();
3121 
3122 	/*
3123 	 * Need an EState for evaluation of index expressions and partial-index
3124 	 * predicates.  Also a slot to hold the current tuple.
3125 	 */
3126 	estate = CreateExecutorState();
3127 	econtext = GetPerTupleExprContext(estate);
3128 	slot = table_slot_create(heapRelation, NULL);
3129 
3130 	/* Arrange for econtext's scan tuple to be the tuple under test */
3131 	econtext->ecxt_scantuple = slot;
3132 
3133 	/* Set up execution state for predicate, if any. */
3134 	predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
3135 
3136 	/*
3137 	 * Scan all live tuples in the base relation.
3138 	 */
3139 	snapshot = RegisterSnapshot(GetLatestSnapshot());
3140 	scan = table_beginscan_strat(heapRelation,	/* relation */
3141 								 snapshot,	/* snapshot */
3142 								 0, /* number of keys */
3143 								 NULL,	/* scan key */
3144 								 true,	/* buffer access strategy OK */
3145 								 true); /* syncscan OK */
3146 
3147 	while (table_scan_getnextslot(scan, ForwardScanDirection, slot))
3148 	{
3149 		CHECK_FOR_INTERRUPTS();
3150 
3151 		/*
3152 		 * In a partial index, ignore tuples that don't satisfy the predicate.
3153 		 */
3154 		if (predicate != NULL)
3155 		{
3156 			if (!ExecQual(predicate, econtext))
3157 				continue;
3158 		}
3159 
3160 		/*
3161 		 * Extract index column values, including computing expressions.
3162 		 */
3163 		FormIndexDatum(indexInfo,
3164 					   slot,
3165 					   estate,
3166 					   values,
3167 					   isnull);
3168 
3169 		/*
3170 		 * Check that this tuple has no conflicts.
3171 		 */
3172 		check_exclusion_constraint(heapRelation,
3173 								   indexRelation, indexInfo,
3174 								   &(slot->tts_tid), values, isnull,
3175 								   estate, true);
3176 
3177 		MemoryContextReset(econtext->ecxt_per_tuple_memory);
3178 	}
3179 
3180 	table_endscan(scan);
3181 	UnregisterSnapshot(snapshot);
3182 
3183 	ExecDropSingleTupleTableSlot(slot);
3184 
3185 	FreeExecutorState(estate);
3186 
3187 	/* These may have been pointing to the now-gone estate */
3188 	indexInfo->ii_ExpressionsState = NIL;
3189 	indexInfo->ii_PredicateState = NULL;
3190 }
3191 
3192 
3193 /*
3194  * validate_index - support code for concurrent index builds
3195  *
3196  * We do a concurrent index build by first inserting the catalog entry for the
3197  * index via index_create(), marking it not indisready and not indisvalid.
3198  * Then we commit our transaction and start a new one, then we wait for all
3199  * transactions that could have been modifying the table to terminate.  Now
3200  * we know that any subsequently-started transactions will see the index and
3201  * honor its constraints on HOT updates; so while existing HOT-chains might
3202  * be broken with respect to the index, no currently live tuple will have an
3203  * incompatible HOT update done to it.  We now build the index normally via
3204  * index_build(), while holding a weak lock that allows concurrent
3205  * insert/update/delete.  Also, we index only tuples that are valid
3206  * as of the start of the scan (see table_index_build_scan), whereas a normal
3207  * build takes care to include recently-dead tuples.  This is OK because
3208  * we won't mark the index valid until all transactions that might be able
3209  * to see those tuples are gone.  The reason for doing that is to avoid
3210  * bogus unique-index failures due to concurrent UPDATEs (we might see
3211  * different versions of the same row as being valid when we pass over them,
3212  * if we used HeapTupleSatisfiesVacuum).  This leaves us with an index that
3213  * does not contain any tuples added to the table while we built the index.
3214  *
3215  * Next, we mark the index "indisready" (but still not "indisvalid") and
3216  * commit the second transaction and start a third.  Again we wait for all
3217  * transactions that could have been modifying the table to terminate.  Now
3218  * we know that any subsequently-started transactions will see the index and
3219  * insert their new tuples into it.  We then take a new reference snapshot
3220  * which is passed to validate_index().  Any tuples that are valid according
3221  * to this snap, but are not in the index, must be added to the index.
3222  * (Any tuples committed live after the snap will be inserted into the
3223  * index by their originating transaction.  Any tuples committed dead before
3224  * the snap need not be indexed, because we will wait out all transactions
3225  * that might care about them before we mark the index valid.)
3226  *
3227  * validate_index() works by first gathering all the TIDs currently in the
3228  * index, using a bulkdelete callback that just stores the TIDs and doesn't
3229  * ever say "delete it".  (This should be faster than a plain indexscan;
3230  * also, not all index AMs support full-index indexscan.)  Then we sort the
3231  * TIDs, and finally scan the table doing a "merge join" against the TID list
3232  * to see which tuples are missing from the index.  Thus we will ensure that
3233  * all tuples valid according to the reference snapshot are in the index.
3234  *
3235  * Building a unique index this way is tricky: we might try to insert a
3236  * tuple that is already dead or is in process of being deleted, and we
3237  * mustn't have a uniqueness failure against an updated version of the same
3238  * row.  We could try to check the tuple to see if it's already dead and tell
3239  * index_insert() not to do the uniqueness check, but that still leaves us
3240  * with a race condition against an in-progress update.  To handle that,
3241  * we expect the index AM to recheck liveness of the to-be-inserted tuple
3242  * before it declares a uniqueness error.
3243  *
3244  * After completing validate_index(), we wait until all transactions that
3245  * were alive at the time of the reference snapshot are gone; this is
3246  * necessary to be sure there are none left with a transaction snapshot
3247  * older than the reference (and hence possibly able to see tuples we did
3248  * not index).  Then we mark the index "indisvalid" and commit.  Subsequent
3249  * transactions will be able to use it for queries.
3250  *
3251  * Doing two full table scans is a brute-force strategy.  We could try to be
3252  * cleverer, eg storing new tuples in a special area of the table (perhaps
3253  * making the table append-only by setting use_fsm).  However that would
3254  * add yet more locking issues.
3255  */
3256 void
validate_index(Oid heapId,Oid indexId,Snapshot snapshot)3257 validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
3258 {
3259 	Relation	heapRelation,
3260 				indexRelation;
3261 	IndexInfo  *indexInfo;
3262 	IndexVacuumInfo ivinfo;
3263 	ValidateIndexState state;
3264 	Oid			save_userid;
3265 	int			save_sec_context;
3266 	int			save_nestlevel;
3267 
3268 	{
3269 		const int	progress_index[] = {
3270 			PROGRESS_CREATEIDX_PHASE,
3271 			PROGRESS_CREATEIDX_TUPLES_DONE,
3272 			PROGRESS_CREATEIDX_TUPLES_TOTAL,
3273 			PROGRESS_SCAN_BLOCKS_DONE,
3274 			PROGRESS_SCAN_BLOCKS_TOTAL
3275 		};
3276 		const int64 progress_vals[] = {
3277 			PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN,
3278 			0, 0, 0, 0
3279 		};
3280 
3281 		pgstat_progress_update_multi_param(5, progress_index, progress_vals);
3282 	}
3283 
3284 	/* Open and lock the parent heap relation */
3285 	heapRelation = table_open(heapId, ShareUpdateExclusiveLock);
3286 	/* And the target index relation */
3287 	indexRelation = index_open(indexId, RowExclusiveLock);
3288 
3289 	/*
3290 	 * Fetch info needed for index_insert.  (You might think this should be
3291 	 * passed in from DefineIndex, but its copy is long gone due to having
3292 	 * been built in a previous transaction.)
3293 	 */
3294 	indexInfo = BuildIndexInfo(indexRelation);
3295 
3296 	/* mark build is concurrent just for consistency */
3297 	indexInfo->ii_Concurrent = true;
3298 
3299 	/*
3300 	 * Switch to the table owner's userid, so that any index functions are run
3301 	 * as that user.  Also lock down security-restricted operations and
3302 	 * arrange to make GUC variable changes local to this command.
3303 	 */
3304 	GetUserIdAndSecContext(&save_userid, &save_sec_context);
3305 	SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
3306 						   save_sec_context | SECURITY_RESTRICTED_OPERATION);
3307 	save_nestlevel = NewGUCNestLevel();
3308 
3309 	/*
3310 	 * Scan the index and gather up all the TIDs into a tuplesort object.
3311 	 */
3312 	ivinfo.index = indexRelation;
3313 	ivinfo.analyze_only = false;
3314 	ivinfo.report_progress = true;
3315 	ivinfo.estimated_count = true;
3316 	ivinfo.message_level = DEBUG2;
3317 	ivinfo.num_heap_tuples = heapRelation->rd_rel->reltuples;
3318 	ivinfo.strategy = NULL;
3319 
3320 	/*
3321 	 * Encode TIDs as int8 values for the sort, rather than directly sorting
3322 	 * item pointers.  This can be significantly faster, primarily because TID
3323 	 * is a pass-by-reference type on all platforms, whereas int8 is
3324 	 * pass-by-value on most platforms.
3325 	 */
3326 	state.tuplesort = tuplesort_begin_datum(INT8OID, Int8LessOperator,
3327 											InvalidOid, false,
3328 											maintenance_work_mem,
3329 											NULL, false);
3330 	state.htups = state.itups = state.tups_inserted = 0;
3331 
3332 	/* ambulkdelete updates progress metrics */
3333 	(void) index_bulk_delete(&ivinfo, NULL,
3334 							 validate_index_callback, (void *) &state);
3335 
3336 	/* Execute the sort */
3337 	{
3338 		const int	progress_index[] = {
3339 			PROGRESS_CREATEIDX_PHASE,
3340 			PROGRESS_SCAN_BLOCKS_DONE,
3341 			PROGRESS_SCAN_BLOCKS_TOTAL
3342 		};
3343 		const int64 progress_vals[] = {
3344 			PROGRESS_CREATEIDX_PHASE_VALIDATE_SORT,
3345 			0, 0
3346 		};
3347 
3348 		pgstat_progress_update_multi_param(3, progress_index, progress_vals);
3349 	}
3350 	tuplesort_performsort(state.tuplesort);
3351 
3352 	/*
3353 	 * Now scan the heap and "merge" it with the index
3354 	 */
3355 	pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
3356 								 PROGRESS_CREATEIDX_PHASE_VALIDATE_TABLESCAN);
3357 	table_index_validate_scan(heapRelation,
3358 							  indexRelation,
3359 							  indexInfo,
3360 							  snapshot,
3361 							  &state);
3362 
3363 	/* Done with tuplesort object */
3364 	tuplesort_end(state.tuplesort);
3365 
3366 	elog(DEBUG2,
3367 		 "validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples",
3368 		 state.htups, state.itups, state.tups_inserted);
3369 
3370 	/* Roll back any GUC changes executed by index functions */
3371 	AtEOXact_GUC(false, save_nestlevel);
3372 
3373 	/* Restore userid and security context */
3374 	SetUserIdAndSecContext(save_userid, save_sec_context);
3375 
3376 	/* Close rels, but keep locks */
3377 	index_close(indexRelation, NoLock);
3378 	table_close(heapRelation, NoLock);
3379 }
3380 
3381 /*
3382  * validate_index_callback - bulkdelete callback to collect the index TIDs
3383  */
3384 static bool
validate_index_callback(ItemPointer itemptr,void * opaque)3385 validate_index_callback(ItemPointer itemptr, void *opaque)
3386 {
3387 	ValidateIndexState *state = (ValidateIndexState *) opaque;
3388 	int64		encoded = itemptr_encode(itemptr);
3389 
3390 	tuplesort_putdatum(state->tuplesort, Int64GetDatum(encoded), false);
3391 	state->itups += 1;
3392 	return false;				/* never actually delete anything */
3393 }
3394 
3395 /*
3396  * index_set_state_flags - adjust pg_index state flags
3397  *
3398  * This is used during CREATE/DROP INDEX CONCURRENTLY to adjust the pg_index
3399  * flags that denote the index's state.
3400  *
3401  * Note that CatalogTupleUpdate() sends a cache invalidation message for the
3402  * tuple, so other sessions will hear about the update as soon as we commit.
3403  */
3404 void
index_set_state_flags(Oid indexId,IndexStateFlagsAction action)3405 index_set_state_flags(Oid indexId, IndexStateFlagsAction action)
3406 {
3407 	Relation	pg_index;
3408 	HeapTuple	indexTuple;
3409 	Form_pg_index indexForm;
3410 
3411 	/* Open pg_index and fetch a writable copy of the index's tuple */
3412 	pg_index = table_open(IndexRelationId, RowExclusiveLock);
3413 
3414 	indexTuple = SearchSysCacheCopy1(INDEXRELID,
3415 									 ObjectIdGetDatum(indexId));
3416 	if (!HeapTupleIsValid(indexTuple))
3417 		elog(ERROR, "cache lookup failed for index %u", indexId);
3418 	indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3419 
3420 	/* Perform the requested state change on the copy */
3421 	switch (action)
3422 	{
3423 		case INDEX_CREATE_SET_READY:
3424 			/* Set indisready during a CREATE INDEX CONCURRENTLY sequence */
3425 			Assert(indexForm->indislive);
3426 			Assert(!indexForm->indisready);
3427 			Assert(!indexForm->indisvalid);
3428 			indexForm->indisready = true;
3429 			break;
3430 		case INDEX_CREATE_SET_VALID:
3431 			/* Set indisvalid during a CREATE INDEX CONCURRENTLY sequence */
3432 			Assert(indexForm->indislive);
3433 			Assert(indexForm->indisready);
3434 			Assert(!indexForm->indisvalid);
3435 			indexForm->indisvalid = true;
3436 			break;
3437 		case INDEX_DROP_CLEAR_VALID:
3438 
3439 			/*
3440 			 * Clear indisvalid during a DROP INDEX CONCURRENTLY sequence
3441 			 *
3442 			 * If indisready == true we leave it set so the index still gets
3443 			 * maintained by active transactions.  We only need to ensure that
3444 			 * indisvalid is false.  (We don't assert that either is initially
3445 			 * true, though, since we want to be able to retry a DROP INDEX
3446 			 * CONCURRENTLY that failed partway through.)
3447 			 *
3448 			 * Note: the CLUSTER logic assumes that indisclustered cannot be
3449 			 * set on any invalid index, so clear that flag too.  Similarly,
3450 			 * ALTER TABLE assumes that indisreplident cannot be set for
3451 			 * invalid indexes.
3452 			 */
3453 			indexForm->indisvalid = false;
3454 			indexForm->indisclustered = false;
3455 			indexForm->indisreplident = false;
3456 			break;
3457 		case INDEX_DROP_SET_DEAD:
3458 
3459 			/*
3460 			 * Clear indisready/indislive during DROP INDEX CONCURRENTLY
3461 			 *
3462 			 * We clear both indisready and indislive, because we not only
3463 			 * want to stop updates, we want to prevent sessions from touching
3464 			 * the index at all.
3465 			 */
3466 			Assert(!indexForm->indisvalid);
3467 			Assert(!indexForm->indisclustered);
3468 			Assert(!indexForm->indisreplident);
3469 			indexForm->indisready = false;
3470 			indexForm->indislive = false;
3471 			break;
3472 	}
3473 
3474 	/* ... and update it */
3475 	CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
3476 
3477 	table_close(pg_index, RowExclusiveLock);
3478 }
3479 
3480 
3481 /*
3482  * IndexGetRelation: given an index's relation OID, get the OID of the
3483  * relation it is an index on.  Uses the system cache.
3484  */
3485 Oid
IndexGetRelation(Oid indexId,bool missing_ok)3486 IndexGetRelation(Oid indexId, bool missing_ok)
3487 {
3488 	HeapTuple	tuple;
3489 	Form_pg_index index;
3490 	Oid			result;
3491 
3492 	tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
3493 	if (!HeapTupleIsValid(tuple))
3494 	{
3495 		if (missing_ok)
3496 			return InvalidOid;
3497 		elog(ERROR, "cache lookup failed for index %u", indexId);
3498 	}
3499 	index = (Form_pg_index) GETSTRUCT(tuple);
3500 	Assert(index->indexrelid == indexId);
3501 
3502 	result = index->indrelid;
3503 	ReleaseSysCache(tuple);
3504 	return result;
3505 }
3506 
3507 /*
3508  * reindex_index - This routine is used to recreate a single index
3509  */
3510 void
reindex_index(Oid indexId,bool skip_constraint_checks,char persistence,ReindexParams * params)3511 reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
3512 			  ReindexParams *params)
3513 {
3514 	Relation	iRel,
3515 				heapRelation;
3516 	Oid			heapId;
3517 	IndexInfo  *indexInfo;
3518 	volatile bool skipped_constraint = false;
3519 	PGRUsage	ru0;
3520 	bool		progress = ((params->options & REINDEXOPT_REPORT_PROGRESS) != 0);
3521 	bool		set_tablespace = false;
3522 
3523 	pg_rusage_init(&ru0);
3524 
3525 	/*
3526 	 * Open and lock the parent heap relation.  ShareLock is sufficient since
3527 	 * we only need to be sure no schema or data changes are going on.
3528 	 */
3529 	heapId = IndexGetRelation(indexId,
3530 							  (params->options & REINDEXOPT_MISSING_OK) != 0);
3531 	/* if relation is missing, leave */
3532 	if (!OidIsValid(heapId))
3533 		return;
3534 
3535 	if ((params->options & REINDEXOPT_MISSING_OK) != 0)
3536 		heapRelation = try_table_open(heapId, ShareLock);
3537 	else
3538 		heapRelation = table_open(heapId, ShareLock);
3539 
3540 	/* if relation is gone, leave */
3541 	if (!heapRelation)
3542 		return;
3543 
3544 	if (progress)
3545 	{
3546 		const int	progress_cols[] = {
3547 			PROGRESS_CREATEIDX_COMMAND,
3548 			PROGRESS_CREATEIDX_INDEX_OID
3549 		};
3550 		const int64 progress_vals[] = {
3551 			PROGRESS_CREATEIDX_COMMAND_REINDEX,
3552 			indexId
3553 		};
3554 
3555 		pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
3556 									  heapId);
3557 		pgstat_progress_update_multi_param(2, progress_cols, progress_vals);
3558 	}
3559 
3560 	/*
3561 	 * Open the target index relation and get an exclusive lock on it, to
3562 	 * ensure that no one else is touching this particular index.
3563 	 */
3564 	iRel = index_open(indexId, AccessExclusiveLock);
3565 
3566 	if (progress)
3567 		pgstat_progress_update_param(PROGRESS_CREATEIDX_ACCESS_METHOD_OID,
3568 									 iRel->rd_rel->relam);
3569 
3570 	/*
3571 	 * Partitioned indexes should never get processed here, as they have no
3572 	 * physical storage.
3573 	 */
3574 	if (iRel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
3575 		elog(ERROR, "cannot reindex partitioned index \"%s.%s\"",
3576 			 get_namespace_name(RelationGetNamespace(iRel)),
3577 			 RelationGetRelationName(iRel));
3578 
3579 	/*
3580 	 * Don't allow reindex on temp tables of other backends ... their local
3581 	 * buffer manager is not going to cope.
3582 	 */
3583 	if (RELATION_IS_OTHER_TEMP(iRel))
3584 		ereport(ERROR,
3585 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3586 				 errmsg("cannot reindex temporary tables of other sessions")));
3587 
3588 	/*
3589 	 * Don't allow reindex of an invalid index on TOAST table.  This is a
3590 	 * leftover from a failed REINDEX CONCURRENTLY, and if rebuilt it would
3591 	 * not be possible to drop it anymore.
3592 	 */
3593 	if (IsToastNamespace(RelationGetNamespace(iRel)) &&
3594 		!get_index_isvalid(indexId))
3595 		ereport(ERROR,
3596 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3597 				 errmsg("cannot reindex invalid index on TOAST table")));
3598 
3599 	/*
3600 	 * System relations cannot be moved even if allow_system_table_mods is
3601 	 * enabled to keep things consistent with the concurrent case where all
3602 	 * the indexes of a relation are processed in series, including indexes of
3603 	 * toast relations.
3604 	 *
3605 	 * Note that this check is not part of CheckRelationTableSpaceMove() as it
3606 	 * gets used for ALTER TABLE SET TABLESPACE that could cascade across
3607 	 * toast relations.
3608 	 */
3609 	if (OidIsValid(params->tablespaceOid) &&
3610 		IsSystemRelation(iRel))
3611 		ereport(ERROR,
3612 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3613 				 errmsg("cannot move system relation \"%s\"",
3614 						RelationGetRelationName(iRel))));
3615 
3616 	/* Check if the tablespace of this index needs to be changed */
3617 	if (OidIsValid(params->tablespaceOid) &&
3618 		CheckRelationTableSpaceMove(iRel, params->tablespaceOid))
3619 		set_tablespace = true;
3620 
3621 	/*
3622 	 * Also check for active uses of the index in the current transaction; we
3623 	 * don't want to reindex underneath an open indexscan.
3624 	 */
3625 	CheckTableNotInUse(iRel, "REINDEX INDEX");
3626 
3627 	/* Set new tablespace, if requested */
3628 	if (set_tablespace)
3629 	{
3630 		/* Update its pg_class row */
3631 		SetRelationTableSpace(iRel, params->tablespaceOid, InvalidOid);
3632 
3633 		/*
3634 		 * Schedule unlinking of the old index storage at transaction commit.
3635 		 */
3636 		RelationDropStorage(iRel);
3637 		RelationAssumeNewRelfilenode(iRel);
3638 
3639 		/* Make sure the reltablespace change is visible */
3640 		CommandCounterIncrement();
3641 	}
3642 
3643 	/*
3644 	 * All predicate locks on the index are about to be made invalid. Promote
3645 	 * them to relation locks on the heap.
3646 	 */
3647 	TransferPredicateLocksToHeapRelation(iRel);
3648 
3649 	/* Fetch info needed for index_build */
3650 	indexInfo = BuildIndexInfo(iRel);
3651 
3652 	/* If requested, skip checking uniqueness/exclusion constraints */
3653 	if (skip_constraint_checks)
3654 	{
3655 		if (indexInfo->ii_Unique || indexInfo->ii_ExclusionOps != NULL)
3656 			skipped_constraint = true;
3657 		indexInfo->ii_Unique = false;
3658 		indexInfo->ii_ExclusionOps = NULL;
3659 		indexInfo->ii_ExclusionProcs = NULL;
3660 		indexInfo->ii_ExclusionStrats = NULL;
3661 	}
3662 
3663 	/* Suppress use of the target index while rebuilding it */
3664 	SetReindexProcessing(heapId, indexId);
3665 
3666 	/* Create a new physical relation for the index */
3667 	RelationSetNewRelfilenode(iRel, persistence);
3668 
3669 	/* Initialize the index and rebuild */
3670 	/* Note: we do not need to re-establish pkey setting */
3671 	index_build(heapRelation, iRel, indexInfo, true, true);
3672 
3673 	/* Re-allow use of target index */
3674 	ResetReindexProcessing();
3675 
3676 	/*
3677 	 * If the index is marked invalid/not-ready/dead (ie, it's from a failed
3678 	 * CREATE INDEX CONCURRENTLY, or a DROP INDEX CONCURRENTLY failed midway),
3679 	 * and we didn't skip a uniqueness check, we can now mark it valid.  This
3680 	 * allows REINDEX to be used to clean up in such cases.
3681 	 *
3682 	 * We can also reset indcheckxmin, because we have now done a
3683 	 * non-concurrent index build, *except* in the case where index_build
3684 	 * found some still-broken HOT chains. If it did, and we don't have to
3685 	 * change any of the other flags, we just leave indcheckxmin alone (note
3686 	 * that index_build won't have changed it, because this is a reindex).
3687 	 * This is okay and desirable because not updating the tuple leaves the
3688 	 * index's usability horizon (recorded as the tuple's xmin value) the same
3689 	 * as it was.
3690 	 *
3691 	 * But, if the index was invalid/not-ready/dead and there were broken HOT
3692 	 * chains, we had better force indcheckxmin true, because the normal
3693 	 * argument that the HOT chains couldn't conflict with the index is
3694 	 * suspect for an invalid index.  (A conflict is definitely possible if
3695 	 * the index was dead.  It probably shouldn't happen otherwise, but let's
3696 	 * be conservative.)  In this case advancing the usability horizon is
3697 	 * appropriate.
3698 	 *
3699 	 * Another reason for avoiding unnecessary updates here is that while
3700 	 * reindexing pg_index itself, we must not try to update tuples in it.
3701 	 * pg_index's indexes should always have these flags in their clean state,
3702 	 * so that won't happen.
3703 	 *
3704 	 * If early pruning/vacuuming is enabled for the heap relation, the
3705 	 * usability horizon must be advanced to the current transaction on every
3706 	 * build or rebuild.  pg_index is OK in this regard because catalog tables
3707 	 * are not subject to early cleanup.
3708 	 */
3709 	if (!skipped_constraint)
3710 	{
3711 		Relation	pg_index;
3712 		HeapTuple	indexTuple;
3713 		Form_pg_index indexForm;
3714 		bool		index_bad;
3715 		bool		early_pruning_enabled = EarlyPruningEnabled(heapRelation);
3716 
3717 		pg_index = table_open(IndexRelationId, RowExclusiveLock);
3718 
3719 		indexTuple = SearchSysCacheCopy1(INDEXRELID,
3720 										 ObjectIdGetDatum(indexId));
3721 		if (!HeapTupleIsValid(indexTuple))
3722 			elog(ERROR, "cache lookup failed for index %u", indexId);
3723 		indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3724 
3725 		index_bad = (!indexForm->indisvalid ||
3726 					 !indexForm->indisready ||
3727 					 !indexForm->indislive);
3728 		if (index_bad ||
3729 			(indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain) ||
3730 			early_pruning_enabled)
3731 		{
3732 			if (!indexInfo->ii_BrokenHotChain && !early_pruning_enabled)
3733 				indexForm->indcheckxmin = false;
3734 			else if (index_bad || early_pruning_enabled)
3735 				indexForm->indcheckxmin = true;
3736 			indexForm->indisvalid = true;
3737 			indexForm->indisready = true;
3738 			indexForm->indislive = true;
3739 			CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
3740 
3741 			/*
3742 			 * Invalidate the relcache for the table, so that after we commit
3743 			 * all sessions will refresh the table's index list.  This ensures
3744 			 * that if anyone misses seeing the pg_index row during this
3745 			 * update, they'll refresh their list before attempting any update
3746 			 * on the table.
3747 			 */
3748 			CacheInvalidateRelcache(heapRelation);
3749 		}
3750 
3751 		table_close(pg_index, RowExclusiveLock);
3752 	}
3753 
3754 	/* Log what we did */
3755 	if ((params->options & REINDEXOPT_VERBOSE) != 0)
3756 		ereport(INFO,
3757 				(errmsg("index \"%s\" was reindexed",
3758 						get_rel_name(indexId)),
3759 				 errdetail_internal("%s",
3760 									pg_rusage_show(&ru0))));
3761 
3762 	if (progress)
3763 		pgstat_progress_end_command();
3764 
3765 	/* Close rels, but keep locks */
3766 	index_close(iRel, NoLock);
3767 	table_close(heapRelation, NoLock);
3768 }
3769 
3770 /*
3771  * reindex_relation - This routine is used to recreate all indexes
3772  * of a relation (and optionally its toast relation too, if any).
3773  *
3774  * "flags" is a bitmask that can include any combination of these bits:
3775  *
3776  * REINDEX_REL_PROCESS_TOAST: if true, process the toast table too (if any).
3777  *
3778  * REINDEX_REL_SUPPRESS_INDEX_USE: if true, the relation was just completely
3779  * rebuilt by an operation such as VACUUM FULL or CLUSTER, and therefore its
3780  * indexes are inconsistent with it.  This makes things tricky if the relation
3781  * is a system catalog that we might consult during the reindexing.  To deal
3782  * with that case, we mark all of the indexes as pending rebuild so that they
3783  * won't be trusted until rebuilt.  The caller is required to call us *without*
3784  * having made the rebuilt table visible by doing CommandCounterIncrement;
3785  * we'll do CCI after having collected the index list.  (This way we can still
3786  * use catalog indexes while collecting the list.)
3787  *
3788  * REINDEX_REL_CHECK_CONSTRAINTS: if true, recheck unique and exclusion
3789  * constraint conditions, else don't.  To avoid deadlocks, VACUUM FULL or
3790  * CLUSTER on a system catalog must omit this flag.  REINDEX should be used to
3791  * rebuild an index if constraint inconsistency is suspected.  For optimal
3792  * performance, other callers should include the flag only after transforming
3793  * the data in a manner that risks a change in constraint validity.
3794  *
3795  * REINDEX_REL_FORCE_INDEXES_UNLOGGED: if true, set the persistence of the
3796  * rebuilt indexes to unlogged.
3797  *
3798  * REINDEX_REL_FORCE_INDEXES_PERMANENT: if true, set the persistence of the
3799  * rebuilt indexes to permanent.
3800  *
3801  * Returns true if any indexes were rebuilt (including toast table's index
3802  * when relevant).  Note that a CommandCounterIncrement will occur after each
3803  * index rebuild.
3804  */
3805 bool
reindex_relation(Oid relid,int flags,ReindexParams * params)3806 reindex_relation(Oid relid, int flags, ReindexParams *params)
3807 {
3808 	Relation	rel;
3809 	Oid			toast_relid;
3810 	List	   *indexIds;
3811 	char		persistence;
3812 	bool		result;
3813 	ListCell   *indexId;
3814 	int			i;
3815 
3816 	/*
3817 	 * Open and lock the relation.  ShareLock is sufficient since we only need
3818 	 * to prevent schema and data changes in it.  The lock level used here
3819 	 * should match ReindexTable().
3820 	 */
3821 	if ((params->options & REINDEXOPT_MISSING_OK) != 0)
3822 		rel = try_table_open(relid, ShareLock);
3823 	else
3824 		rel = table_open(relid, ShareLock);
3825 
3826 	/* if relation is gone, leave */
3827 	if (!rel)
3828 		return false;
3829 
3830 	/*
3831 	 * Partitioned tables should never get processed here, as they have no
3832 	 * physical storage.
3833 	 */
3834 	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
3835 		elog(ERROR, "cannot reindex partitioned table \"%s.%s\"",
3836 			 get_namespace_name(RelationGetNamespace(rel)),
3837 			 RelationGetRelationName(rel));
3838 
3839 	toast_relid = rel->rd_rel->reltoastrelid;
3840 
3841 	/*
3842 	 * Get the list of index OIDs for this relation.  (We trust to the
3843 	 * relcache to get this with a sequential scan if ignoring system
3844 	 * indexes.)
3845 	 */
3846 	indexIds = RelationGetIndexList(rel);
3847 
3848 	if (flags & REINDEX_REL_SUPPRESS_INDEX_USE)
3849 	{
3850 		/* Suppress use of all the indexes until they are rebuilt */
3851 		SetReindexPending(indexIds);
3852 
3853 		/*
3854 		 * Make the new heap contents visible --- now things might be
3855 		 * inconsistent!
3856 		 */
3857 		CommandCounterIncrement();
3858 	}
3859 
3860 	/*
3861 	 * Compute persistence of indexes: same as that of owning rel, unless
3862 	 * caller specified otherwise.
3863 	 */
3864 	if (flags & REINDEX_REL_FORCE_INDEXES_UNLOGGED)
3865 		persistence = RELPERSISTENCE_UNLOGGED;
3866 	else if (flags & REINDEX_REL_FORCE_INDEXES_PERMANENT)
3867 		persistence = RELPERSISTENCE_PERMANENT;
3868 	else
3869 		persistence = rel->rd_rel->relpersistence;
3870 
3871 	/* Reindex all the indexes. */
3872 	i = 1;
3873 	foreach(indexId, indexIds)
3874 	{
3875 		Oid			indexOid = lfirst_oid(indexId);
3876 		Oid			indexNamespaceId = get_rel_namespace(indexOid);
3877 
3878 		/*
3879 		 * Skip any invalid indexes on a TOAST table.  These can only be
3880 		 * duplicate leftovers from a failed REINDEX CONCURRENTLY, and if
3881 		 * rebuilt it would not be possible to drop them anymore.
3882 		 */
3883 		if (IsToastNamespace(indexNamespaceId) &&
3884 			!get_index_isvalid(indexOid))
3885 		{
3886 			ereport(WARNING,
3887 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3888 					 errmsg("cannot reindex invalid index \"%s.%s\" on TOAST table, skipping",
3889 							get_namespace_name(indexNamespaceId),
3890 							get_rel_name(indexOid))));
3891 			continue;
3892 		}
3893 
3894 		reindex_index(indexOid, !(flags & REINDEX_REL_CHECK_CONSTRAINTS),
3895 					  persistence, params);
3896 
3897 		CommandCounterIncrement();
3898 
3899 		/* Index should no longer be in the pending list */
3900 		Assert(!ReindexIsProcessingIndex(indexOid));
3901 
3902 		/* Set index rebuild count */
3903 		pgstat_progress_update_param(PROGRESS_CLUSTER_INDEX_REBUILD_COUNT,
3904 									 i);
3905 		i++;
3906 	}
3907 
3908 	/*
3909 	 * Close rel, but continue to hold the lock.
3910 	 */
3911 	table_close(rel, NoLock);
3912 
3913 	result = (indexIds != NIL);
3914 
3915 	/*
3916 	 * If the relation has a secondary toast rel, reindex that too while we
3917 	 * still hold the lock on the main table.
3918 	 */
3919 	if ((flags & REINDEX_REL_PROCESS_TOAST) && OidIsValid(toast_relid))
3920 	{
3921 		/*
3922 		 * Note that this should fail if the toast relation is missing, so
3923 		 * reset REINDEXOPT_MISSING_OK.  Even if a new tablespace is set for
3924 		 * the parent relation, the indexes on its toast table are not moved.
3925 		 * This rule is enforced by setting tablespaceOid to InvalidOid.
3926 		 */
3927 		ReindexParams newparams = *params;
3928 
3929 		newparams.options &= ~(REINDEXOPT_MISSING_OK);
3930 		newparams.tablespaceOid = InvalidOid;
3931 		result |= reindex_relation(toast_relid, flags, &newparams);
3932 	}
3933 
3934 	return result;
3935 }
3936 
3937 
3938 /* ----------------------------------------------------------------
3939  *		System index reindexing support
3940  *
3941  * When we are busy reindexing a system index, this code provides support
3942  * for preventing catalog lookups from using that index.  We also make use
3943  * of this to catch attempted uses of user indexes during reindexing of
3944  * those indexes.  This information is propagated to parallel workers;
3945  * attempting to change it during a parallel operation is not permitted.
3946  * ----------------------------------------------------------------
3947  */
3948 
3949 static Oid	currentlyReindexedHeap = InvalidOid;
3950 static Oid	currentlyReindexedIndex = InvalidOid;
3951 static List *pendingReindexedIndexes = NIL;
3952 static int	reindexingNestLevel = 0;
3953 
3954 /*
3955  * ReindexIsProcessingHeap
3956  *		True if heap specified by OID is currently being reindexed.
3957  */
3958 bool
ReindexIsProcessingHeap(Oid heapOid)3959 ReindexIsProcessingHeap(Oid heapOid)
3960 {
3961 	return heapOid == currentlyReindexedHeap;
3962 }
3963 
3964 /*
3965  * ReindexIsCurrentlyProcessingIndex
3966  *		True if index specified by OID is currently being reindexed.
3967  */
3968 static bool
ReindexIsCurrentlyProcessingIndex(Oid indexOid)3969 ReindexIsCurrentlyProcessingIndex(Oid indexOid)
3970 {
3971 	return indexOid == currentlyReindexedIndex;
3972 }
3973 
3974 /*
3975  * ReindexIsProcessingIndex
3976  *		True if index specified by OID is currently being reindexed,
3977  *		or should be treated as invalid because it is awaiting reindex.
3978  */
3979 bool
ReindexIsProcessingIndex(Oid indexOid)3980 ReindexIsProcessingIndex(Oid indexOid)
3981 {
3982 	return indexOid == currentlyReindexedIndex ||
3983 		list_member_oid(pendingReindexedIndexes, indexOid);
3984 }
3985 
3986 /*
3987  * SetReindexProcessing
3988  *		Set flag that specified heap/index are being reindexed.
3989  */
3990 static void
SetReindexProcessing(Oid heapOid,Oid indexOid)3991 SetReindexProcessing(Oid heapOid, Oid indexOid)
3992 {
3993 	Assert(OidIsValid(heapOid) && OidIsValid(indexOid));
3994 	/* Reindexing is not re-entrant. */
3995 	if (OidIsValid(currentlyReindexedHeap))
3996 		elog(ERROR, "cannot reindex while reindexing");
3997 	currentlyReindexedHeap = heapOid;
3998 	currentlyReindexedIndex = indexOid;
3999 	/* Index is no longer "pending" reindex. */
4000 	RemoveReindexPending(indexOid);
4001 	/* This may have been set already, but in case it isn't, do so now. */
4002 	reindexingNestLevel = GetCurrentTransactionNestLevel();
4003 }
4004 
4005 /*
4006  * ResetReindexProcessing
4007  *		Unset reindexing status.
4008  */
4009 static void
ResetReindexProcessing(void)4010 ResetReindexProcessing(void)
4011 {
4012 	currentlyReindexedHeap = InvalidOid;
4013 	currentlyReindexedIndex = InvalidOid;
4014 	/* reindexingNestLevel remains set till end of (sub)transaction */
4015 }
4016 
4017 /*
4018  * SetReindexPending
4019  *		Mark the given indexes as pending reindex.
4020  *
4021  * NB: we assume that the current memory context stays valid throughout.
4022  */
4023 static void
SetReindexPending(List * indexes)4024 SetReindexPending(List *indexes)
4025 {
4026 	/* Reindexing is not re-entrant. */
4027 	if (pendingReindexedIndexes)
4028 		elog(ERROR, "cannot reindex while reindexing");
4029 	if (IsInParallelMode())
4030 		elog(ERROR, "cannot modify reindex state during a parallel operation");
4031 	pendingReindexedIndexes = list_copy(indexes);
4032 	reindexingNestLevel = GetCurrentTransactionNestLevel();
4033 }
4034 
4035 /*
4036  * RemoveReindexPending
4037  *		Remove the given index from the pending list.
4038  */
4039 static void
RemoveReindexPending(Oid indexOid)4040 RemoveReindexPending(Oid indexOid)
4041 {
4042 	if (IsInParallelMode())
4043 		elog(ERROR, "cannot modify reindex state during a parallel operation");
4044 	pendingReindexedIndexes = list_delete_oid(pendingReindexedIndexes,
4045 											  indexOid);
4046 }
4047 
4048 /*
4049  * ResetReindexState
4050  *		Clear all reindexing state during (sub)transaction abort.
4051  */
4052 void
ResetReindexState(int nestLevel)4053 ResetReindexState(int nestLevel)
4054 {
4055 	/*
4056 	 * Because reindexing is not re-entrant, we don't need to cope with nested
4057 	 * reindexing states.  We just need to avoid messing up the outer-level
4058 	 * state in case a subtransaction fails within a REINDEX.  So checking the
4059 	 * current nest level against that of the reindex operation is sufficient.
4060 	 */
4061 	if (reindexingNestLevel >= nestLevel)
4062 	{
4063 		currentlyReindexedHeap = InvalidOid;
4064 		currentlyReindexedIndex = InvalidOid;
4065 
4066 		/*
4067 		 * We needn't try to release the contents of pendingReindexedIndexes;
4068 		 * that list should be in a transaction-lifespan context, so it will
4069 		 * go away automatically.
4070 		 */
4071 		pendingReindexedIndexes = NIL;
4072 
4073 		reindexingNestLevel = 0;
4074 	}
4075 }
4076 
4077 /*
4078  * EstimateReindexStateSpace
4079  *		Estimate space needed to pass reindex state to parallel workers.
4080  */
4081 Size
EstimateReindexStateSpace(void)4082 EstimateReindexStateSpace(void)
4083 {
4084 	return offsetof(SerializedReindexState, pendingReindexedIndexes)
4085 		+ mul_size(sizeof(Oid), list_length(pendingReindexedIndexes));
4086 }
4087 
4088 /*
4089  * SerializeReindexState
4090  *		Serialize reindex state for parallel workers.
4091  */
4092 void
SerializeReindexState(Size maxsize,char * start_address)4093 SerializeReindexState(Size maxsize, char *start_address)
4094 {
4095 	SerializedReindexState *sistate = (SerializedReindexState *) start_address;
4096 	int			c = 0;
4097 	ListCell   *lc;
4098 
4099 	sistate->currentlyReindexedHeap = currentlyReindexedHeap;
4100 	sistate->currentlyReindexedIndex = currentlyReindexedIndex;
4101 	sistate->numPendingReindexedIndexes = list_length(pendingReindexedIndexes);
4102 	foreach(lc, pendingReindexedIndexes)
4103 		sistate->pendingReindexedIndexes[c++] = lfirst_oid(lc);
4104 }
4105 
4106 /*
4107  * RestoreReindexState
4108  *		Restore reindex state in a parallel worker.
4109  */
4110 void
RestoreReindexState(void * reindexstate)4111 RestoreReindexState(void *reindexstate)
4112 {
4113 	SerializedReindexState *sistate = (SerializedReindexState *) reindexstate;
4114 	int			c = 0;
4115 	MemoryContext oldcontext;
4116 
4117 	currentlyReindexedHeap = sistate->currentlyReindexedHeap;
4118 	currentlyReindexedIndex = sistate->currentlyReindexedIndex;
4119 
4120 	Assert(pendingReindexedIndexes == NIL);
4121 	oldcontext = MemoryContextSwitchTo(TopMemoryContext);
4122 	for (c = 0; c < sistate->numPendingReindexedIndexes; ++c)
4123 		pendingReindexedIndexes =
4124 			lappend_oid(pendingReindexedIndexes,
4125 						sistate->pendingReindexedIndexes[c]);
4126 	MemoryContextSwitchTo(oldcontext);
4127 
4128 	/* Note the worker has its own transaction nesting level */
4129 	reindexingNestLevel = GetCurrentTransactionNestLevel();
4130 }
4131