1 /*-------------------------------------------------------------------------
2  *
3  * index.c
4  *	  code to create and destroy POSTGRES index relations
5  *
6  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/catalog/index.c
12  *
13  *
14  * INTERFACE ROUTINES
15  *		index_create()			- Create a cataloged index relation
16  *		index_drop()			- Removes index relation from catalogs
17  *		BuildIndexInfo()		- Prepare to insert index tuples
18  *		FormIndexDatum()		- Construct datum vector for one index tuple
19  *
20  *-------------------------------------------------------------------------
21  */
22 #include "postgres.h"
23 
24 #include <unistd.h>
25 
26 #include "access/amapi.h"
27 #include "access/multixact.h"
28 #include "access/relscan.h"
29 #include "access/sysattr.h"
30 #include "access/transam.h"
31 #include "access/visibilitymap.h"
32 #include "access/xact.h"
33 #include "bootstrap/bootstrap.h"
34 #include "catalog/binary_upgrade.h"
35 #include "catalog/catalog.h"
36 #include "catalog/dependency.h"
37 #include "catalog/heap.h"
38 #include "catalog/index.h"
39 #include "catalog/objectaccess.h"
40 #include "catalog/pg_am.h"
41 #include "catalog/pg_collation.h"
42 #include "catalog/pg_constraint.h"
43 #include "catalog/pg_constraint_fn.h"
44 #include "catalog/pg_operator.h"
45 #include "catalog/pg_opclass.h"
46 #include "catalog/pg_tablespace.h"
47 #include "catalog/pg_trigger.h"
48 #include "catalog/pg_type.h"
49 #include "catalog/storage.h"
50 #include "commands/tablecmds.h"
51 #include "commands/event_trigger.h"
52 #include "commands/trigger.h"
53 #include "executor/executor.h"
54 #include "miscadmin.h"
55 #include "nodes/makefuncs.h"
56 #include "nodes/nodeFuncs.h"
57 #include "optimizer/clauses.h"
58 #include "parser/parser.h"
59 #include "storage/bufmgr.h"
60 #include "storage/lmgr.h"
61 #include "storage/predicate.h"
62 #include "storage/procarray.h"
63 #include "storage/smgr.h"
64 #include "utils/builtins.h"
65 #include "utils/fmgroids.h"
66 #include "utils/guc.h"
67 #include "utils/inval.h"
68 #include "utils/lsyscache.h"
69 #include "utils/memutils.h"
70 #include "utils/pg_rusage.h"
71 #include "utils/syscache.h"
72 #include "utils/tuplesort.h"
73 #include "utils/snapmgr.h"
74 #include "utils/tqual.h"
75 
76 
77 /* Potentially set by pg_upgrade_support functions */
78 Oid			binary_upgrade_next_index_pg_class_oid = InvalidOid;
79 
80 /* state info for validate_index bulkdelete callback */
81 typedef struct
82 {
83 	Tuplesortstate *tuplesort;	/* for sorting the index TIDs */
84 	/* statistics (for debug purposes only): */
85 	double		htups,
86 				itups,
87 				tups_inserted;
88 } v_i_state;
89 
90 /* non-export function prototypes */
91 static bool relationHasPrimaryKey(Relation rel);
92 static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
93 						 IndexInfo *indexInfo,
94 						 List *indexColNames,
95 						 Oid accessMethodObjectId,
96 						 Oid *collationObjectId,
97 						 Oid *classObjectId);
98 static void InitializeAttributeOids(Relation indexRelation,
99 						int numatts, Oid indexoid);
100 static void AppendAttributeTuples(Relation indexRelation, int numatts);
101 static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
102 					IndexInfo *indexInfo,
103 					Oid *collationOids,
104 					Oid *classOids,
105 					int16 *coloptions,
106 					bool primary,
107 					bool isexclusion,
108 					bool immediate,
109 					bool isvalid);
110 static void index_update_stats(Relation rel,
111 				   bool hasindex, bool isprimary,
112 				   double reltuples);
113 static void IndexCheckExclusion(Relation heapRelation,
114 					Relation indexRelation,
115 					IndexInfo *indexInfo);
116 static inline int64 itemptr_encode(ItemPointer itemptr);
117 static inline void itemptr_decode(ItemPointer itemptr, int64 encoded);
118 static bool validate_index_callback(ItemPointer itemptr, void *opaque);
119 static void validate_index_heapscan(Relation heapRelation,
120 						Relation indexRelation,
121 						IndexInfo *indexInfo,
122 						Snapshot snapshot,
123 						v_i_state *state);
124 static bool ReindexIsCurrentlyProcessingIndex(Oid indexOid);
125 static void SetReindexProcessing(Oid heapOid, Oid indexOid);
126 static void ResetReindexProcessing(void);
127 static void SetReindexPending(List *indexes);
128 static void RemoveReindexPending(Oid indexOid);
129 
130 
131 /*
132  * relationHasPrimaryKey
133  *		See whether an existing relation has a primary key.
134  *
135  * Caller must have suitable lock on the relation.
136  *
137  * Note: we intentionally do not check IndexIsValid here; that's because this
138  * is used to enforce the rule that there can be only one indisprimary index,
139  * and we want that to be true even if said index is invalid.
140  */
141 static bool
relationHasPrimaryKey(Relation rel)142 relationHasPrimaryKey(Relation rel)
143 {
144 	bool		result = false;
145 	List	   *indexoidlist;
146 	ListCell   *indexoidscan;
147 
148 	/*
149 	 * Get the list of index OIDs for the table from the relcache, and look up
150 	 * each one in the pg_index syscache until we find one marked primary key
151 	 * (hopefully there isn't more than one such).
152 	 */
153 	indexoidlist = RelationGetIndexList(rel);
154 
155 	foreach(indexoidscan, indexoidlist)
156 	{
157 		Oid			indexoid = lfirst_oid(indexoidscan);
158 		HeapTuple	indexTuple;
159 
160 		indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid));
161 		if (!HeapTupleIsValid(indexTuple))		/* should not happen */
162 			elog(ERROR, "cache lookup failed for index %u", indexoid);
163 		result = ((Form_pg_index) GETSTRUCT(indexTuple))->indisprimary;
164 		ReleaseSysCache(indexTuple);
165 		if (result)
166 			break;
167 	}
168 
169 	list_free(indexoidlist);
170 
171 	return result;
172 }
173 
174 /*
175  * index_check_primary_key
176  *		Apply special checks needed before creating a PRIMARY KEY index
177  *
178  * This processing used to be in DefineIndex(), but has been split out
179  * so that it can be applied during ALTER TABLE ADD PRIMARY KEY USING INDEX.
180  *
181  * We check for a pre-existing primary key, and that all columns of the index
182  * are simple column references (not expressions), and that all those
183  * columns are marked NOT NULL.  If they aren't (which can only happen during
184  * ALTER TABLE ADD CONSTRAINT, since the parser forces such columns to be
185  * created NOT NULL during CREATE TABLE), do an ALTER SET NOT NULL to mark
186  * them so --- or fail if they are not in fact nonnull.
187  *
188  * Caller had better have at least ShareLock on the table, else the not-null
189  * checking isn't trustworthy.
190  */
191 void
index_check_primary_key(Relation heapRel,IndexInfo * indexInfo,bool is_alter_table,IndexStmt * stmt)192 index_check_primary_key(Relation heapRel,
193 						IndexInfo *indexInfo,
194 						bool is_alter_table,
195 						IndexStmt *stmt)
196 {
197 	List	   *cmds;
198 	int			i;
199 
200 	/*
201 	 * If ALTER TABLE, check that there isn't already a PRIMARY KEY. In CREATE
202 	 * TABLE, we have faith that the parser rejected multiple pkey clauses;
203 	 * and CREATE INDEX doesn't have a way to say PRIMARY KEY, so it's no
204 	 * problem either.
205 	 */
206 	if (is_alter_table &&
207 		relationHasPrimaryKey(heapRel))
208 	{
209 		ereport(ERROR,
210 				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
211 			 errmsg("multiple primary keys for table \"%s\" are not allowed",
212 					RelationGetRelationName(heapRel))));
213 	}
214 
215 	/*
216 	 * Check that all of the attributes in a primary key are marked as not
217 	 * null, otherwise attempt to ALTER TABLE .. SET NOT NULL
218 	 */
219 	cmds = NIL;
220 	for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
221 	{
222 		AttrNumber	attnum = indexInfo->ii_KeyAttrNumbers[i];
223 		HeapTuple	atttuple;
224 		Form_pg_attribute attform;
225 
226 		if (attnum == 0)
227 			ereport(ERROR,
228 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
229 					 errmsg("primary keys cannot be expressions")));
230 
231 		/* System attributes are never null, so no need to check */
232 		if (attnum < 0)
233 			continue;
234 
235 		atttuple = SearchSysCache2(ATTNUM,
236 								 ObjectIdGetDatum(RelationGetRelid(heapRel)),
237 								   Int16GetDatum(attnum));
238 		if (!HeapTupleIsValid(atttuple))
239 			elog(ERROR, "cache lookup failed for attribute %d of relation %u",
240 				 attnum, RelationGetRelid(heapRel));
241 		attform = (Form_pg_attribute) GETSTRUCT(atttuple);
242 
243 		if (!attform->attnotnull)
244 		{
245 			/* Add a subcommand to make this one NOT NULL */
246 			AlterTableCmd *cmd = makeNode(AlterTableCmd);
247 
248 			cmd->subtype = AT_SetNotNull;
249 			cmd->name = pstrdup(NameStr(attform->attname));
250 			cmds = lappend(cmds, cmd);
251 		}
252 
253 		ReleaseSysCache(atttuple);
254 	}
255 
256 	/*
257 	 * XXX: Shouldn't the ALTER TABLE .. SET NOT NULL cascade to child tables?
258 	 * Currently, since the PRIMARY KEY itself doesn't cascade, we don't
259 	 * cascade the notnull constraint(s) either; but this is pretty debatable.
260 	 *
261 	 * XXX: possible future improvement: when being called from ALTER TABLE,
262 	 * it would be more efficient to merge this with the outer ALTER TABLE, so
263 	 * as to avoid two scans.  But that seems to complicate DefineIndex's API
264 	 * unduly.
265 	 */
266 	if (cmds)
267 	{
268 		EventTriggerAlterTableStart((Node *) stmt);
269 		AlterTableInternal(RelationGetRelid(heapRel), cmds, false);
270 		EventTriggerAlterTableEnd();
271 	}
272 }
273 
274 /*
275  *		ConstructTupleDescriptor
276  *
277  * Build an index tuple descriptor for a new index
278  */
279 static TupleDesc
ConstructTupleDescriptor(Relation heapRelation,IndexInfo * indexInfo,List * indexColNames,Oid accessMethodObjectId,Oid * collationObjectId,Oid * classObjectId)280 ConstructTupleDescriptor(Relation heapRelation,
281 						 IndexInfo *indexInfo,
282 						 List *indexColNames,
283 						 Oid accessMethodObjectId,
284 						 Oid *collationObjectId,
285 						 Oid *classObjectId)
286 {
287 	int			numatts = indexInfo->ii_NumIndexAttrs;
288 	ListCell   *colnames_item = list_head(indexColNames);
289 	ListCell   *indexpr_item = list_head(indexInfo->ii_Expressions);
290 	IndexAmRoutine *amroutine;
291 	TupleDesc	heapTupDesc;
292 	TupleDesc	indexTupDesc;
293 	int			natts;			/* #atts in heap rel --- for error checks */
294 	int			i;
295 
296 	/* We need access to the index AM's API struct */
297 	amroutine = GetIndexAmRoutineByAmId(accessMethodObjectId, false);
298 
299 	/* ... and to the table's tuple descriptor */
300 	heapTupDesc = RelationGetDescr(heapRelation);
301 	natts = RelationGetForm(heapRelation)->relnatts;
302 
303 	/*
304 	 * allocate the new tuple descriptor
305 	 */
306 	indexTupDesc = CreateTemplateTupleDesc(numatts, false);
307 
308 	/*
309 	 * For simple index columns, we copy the pg_attribute row from the parent
310 	 * relation and modify it as necessary.  For expressions we have to cons
311 	 * up a pg_attribute row the hard way.
312 	 */
313 	for (i = 0; i < numatts; i++)
314 	{
315 		AttrNumber	atnum = indexInfo->ii_KeyAttrNumbers[i];
316 		Form_pg_attribute to = indexTupDesc->attrs[i];
317 		HeapTuple	tuple;
318 		Form_pg_type typeTup;
319 		Form_pg_opclass opclassTup;
320 		Oid			keyType;
321 
322 		if (atnum != 0)
323 		{
324 			/* Simple index column */
325 			Form_pg_attribute from;
326 
327 			if (atnum < 0)
328 			{
329 				/*
330 				 * here we are indexing on a system attribute (-1...-n)
331 				 */
332 				from = SystemAttributeDefinition(atnum,
333 										   heapRelation->rd_rel->relhasoids);
334 			}
335 			else
336 			{
337 				/*
338 				 * here we are indexing on a normal attribute (1...n)
339 				 */
340 				if (atnum > natts)		/* safety check */
341 					elog(ERROR, "invalid column number %d", atnum);
342 				from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)];
343 			}
344 
345 			/*
346 			 * now that we've determined the "from", let's copy the tuple desc
347 			 * data...
348 			 */
349 			memcpy(to, from, ATTRIBUTE_FIXED_PART_SIZE);
350 
351 			/*
352 			 * Set the attribute name as specified by caller.
353 			 */
354 			if (colnames_item == NULL)		/* shouldn't happen */
355 				elog(ERROR, "too few entries in colnames list");
356 			namestrcpy(&to->attname, (const char *) lfirst(colnames_item));
357 			colnames_item = lnext(colnames_item);
358 
359 			/*
360 			 * Fix the stuff that should not be the same as the underlying
361 			 * attr
362 			 */
363 			to->attnum = i + 1;
364 
365 			to->attstattarget = -1;
366 			to->attcacheoff = -1;
367 			to->attnotnull = false;
368 			to->atthasdef = false;
369 			to->attislocal = true;
370 			to->attinhcount = 0;
371 			to->attcollation = collationObjectId[i];
372 		}
373 		else
374 		{
375 			/* Expressional index */
376 			Node	   *indexkey;
377 
378 			MemSet(to, 0, ATTRIBUTE_FIXED_PART_SIZE);
379 
380 			/*
381 			 * Set the attribute name as specified by caller.
382 			 */
383 			if (colnames_item == NULL)		/* shouldn't happen */
384 				elog(ERROR, "too few entries in colnames list");
385 			namestrcpy(&to->attname, (const char *) lfirst(colnames_item));
386 			colnames_item = lnext(colnames_item);
387 
388 			if (indexpr_item == NULL)	/* shouldn't happen */
389 				elog(ERROR, "too few entries in indexprs list");
390 			indexkey = (Node *) lfirst(indexpr_item);
391 			indexpr_item = lnext(indexpr_item);
392 
393 			/*
394 			 * Lookup the expression type in pg_type for the type length etc.
395 			 */
396 			keyType = exprType(indexkey);
397 			tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
398 			if (!HeapTupleIsValid(tuple))
399 				elog(ERROR, "cache lookup failed for type %u", keyType);
400 			typeTup = (Form_pg_type) GETSTRUCT(tuple);
401 
402 			/*
403 			 * Assign some of the attributes values. Leave the rest as 0.
404 			 */
405 			to->attnum = i + 1;
406 			to->atttypid = keyType;
407 			to->attlen = typeTup->typlen;
408 			to->attbyval = typeTup->typbyval;
409 			to->attstorage = typeTup->typstorage;
410 			to->attalign = typeTup->typalign;
411 			to->attstattarget = -1;
412 			to->attcacheoff = -1;
413 			to->atttypmod = exprTypmod(indexkey);
414 			to->attislocal = true;
415 			to->attcollation = collationObjectId[i];
416 
417 			ReleaseSysCache(tuple);
418 
419 			/*
420 			 * Make sure the expression yields a type that's safe to store in
421 			 * an index.  We need this defense because we have index opclasses
422 			 * for pseudo-types such as "record", and the actually stored type
423 			 * had better be safe; eg, a named composite type is okay, an
424 			 * anonymous record type is not.  The test is the same as for
425 			 * whether a table column is of a safe type (which is why we
426 			 * needn't check for the non-expression case).
427 			 */
428 			CheckAttributeType(NameStr(to->attname),
429 							   to->atttypid, to->attcollation,
430 							   NIL, false);
431 		}
432 
433 		/*
434 		 * We do not yet have the correct relation OID for the index, so just
435 		 * set it invalid for now.  InitializeAttributeOids() will fix it
436 		 * later.
437 		 */
438 		to->attrelid = InvalidOid;
439 
440 		/*
441 		 * Check the opclass and index AM to see if either provides a keytype
442 		 * (overriding the attribute type).  Opclass takes precedence.
443 		 */
444 		tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(classObjectId[i]));
445 		if (!HeapTupleIsValid(tuple))
446 			elog(ERROR, "cache lookup failed for opclass %u",
447 				 classObjectId[i]);
448 		opclassTup = (Form_pg_opclass) GETSTRUCT(tuple);
449 		if (OidIsValid(opclassTup->opckeytype))
450 			keyType = opclassTup->opckeytype;
451 		else
452 			keyType = amroutine->amkeytype;
453 		ReleaseSysCache(tuple);
454 
455 		if (OidIsValid(keyType) && keyType != to->atttypid)
456 		{
457 			/* index value and heap value have different types */
458 			tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
459 			if (!HeapTupleIsValid(tuple))
460 				elog(ERROR, "cache lookup failed for type %u", keyType);
461 			typeTup = (Form_pg_type) GETSTRUCT(tuple);
462 
463 			to->atttypid = keyType;
464 			to->atttypmod = -1;
465 			to->attlen = typeTup->typlen;
466 			to->attbyval = typeTup->typbyval;
467 			to->attalign = typeTup->typalign;
468 			to->attstorage = typeTup->typstorage;
469 
470 			ReleaseSysCache(tuple);
471 		}
472 	}
473 
474 	pfree(amroutine);
475 
476 	return indexTupDesc;
477 }
478 
479 /* ----------------------------------------------------------------
480  *		InitializeAttributeOids
481  * ----------------------------------------------------------------
482  */
483 static void
InitializeAttributeOids(Relation indexRelation,int numatts,Oid indexoid)484 InitializeAttributeOids(Relation indexRelation,
485 						int numatts,
486 						Oid indexoid)
487 {
488 	TupleDesc	tupleDescriptor;
489 	int			i;
490 
491 	tupleDescriptor = RelationGetDescr(indexRelation);
492 
493 	for (i = 0; i < numatts; i += 1)
494 		tupleDescriptor->attrs[i]->attrelid = indexoid;
495 }
496 
497 /* ----------------------------------------------------------------
498  *		AppendAttributeTuples
499  * ----------------------------------------------------------------
500  */
501 static void
AppendAttributeTuples(Relation indexRelation,int numatts)502 AppendAttributeTuples(Relation indexRelation, int numatts)
503 {
504 	Relation	pg_attribute;
505 	CatalogIndexState indstate;
506 	TupleDesc	indexTupDesc;
507 	int			i;
508 
509 	/*
510 	 * open the attribute relation and its indexes
511 	 */
512 	pg_attribute = heap_open(AttributeRelationId, RowExclusiveLock);
513 
514 	indstate = CatalogOpenIndexes(pg_attribute);
515 
516 	/*
517 	 * insert data from new index's tupdesc into pg_attribute
518 	 */
519 	indexTupDesc = RelationGetDescr(indexRelation);
520 
521 	for (i = 0; i < numatts; i++)
522 	{
523 		/*
524 		 * There used to be very grotty code here to set these fields, but I
525 		 * think it's unnecessary.  They should be set already.
526 		 */
527 		Assert(indexTupDesc->attrs[i]->attnum == i + 1);
528 		Assert(indexTupDesc->attrs[i]->attcacheoff == -1);
529 
530 		InsertPgAttributeTuple(pg_attribute, indexTupDesc->attrs[i], indstate);
531 	}
532 
533 	CatalogCloseIndexes(indstate);
534 
535 	heap_close(pg_attribute, RowExclusiveLock);
536 }
537 
538 /* ----------------------------------------------------------------
539  *		UpdateIndexRelation
540  *
541  * Construct and insert a new entry in the pg_index catalog
542  * ----------------------------------------------------------------
543  */
544 static void
UpdateIndexRelation(Oid indexoid,Oid heapoid,IndexInfo * indexInfo,Oid * collationOids,Oid * classOids,int16 * coloptions,bool primary,bool isexclusion,bool immediate,bool isvalid)545 UpdateIndexRelation(Oid indexoid,
546 					Oid heapoid,
547 					IndexInfo *indexInfo,
548 					Oid *collationOids,
549 					Oid *classOids,
550 					int16 *coloptions,
551 					bool primary,
552 					bool isexclusion,
553 					bool immediate,
554 					bool isvalid)
555 {
556 	int2vector *indkey;
557 	oidvector  *indcollation;
558 	oidvector  *indclass;
559 	int2vector *indoption;
560 	Datum		exprsDatum;
561 	Datum		predDatum;
562 	Datum		values[Natts_pg_index];
563 	bool		nulls[Natts_pg_index];
564 	Relation	pg_index;
565 	HeapTuple	tuple;
566 	int			i;
567 
568 	/*
569 	 * Copy the index key, opclass, and indoption info into arrays (should we
570 	 * make the caller pass them like this to start with?)
571 	 */
572 	indkey = buildint2vector(NULL, indexInfo->ii_NumIndexAttrs);
573 	for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
574 		indkey->values[i] = indexInfo->ii_KeyAttrNumbers[i];
575 	indcollation = buildoidvector(collationOids, indexInfo->ii_NumIndexAttrs);
576 	indclass = buildoidvector(classOids, indexInfo->ii_NumIndexAttrs);
577 	indoption = buildint2vector(coloptions, indexInfo->ii_NumIndexAttrs);
578 
579 	/*
580 	 * Convert the index expressions (if any) to a text datum
581 	 */
582 	if (indexInfo->ii_Expressions != NIL)
583 	{
584 		char	   *exprsString;
585 
586 		exprsString = nodeToString(indexInfo->ii_Expressions);
587 		exprsDatum = CStringGetTextDatum(exprsString);
588 		pfree(exprsString);
589 	}
590 	else
591 		exprsDatum = (Datum) 0;
592 
593 	/*
594 	 * Convert the index predicate (if any) to a text datum.  Note we convert
595 	 * implicit-AND format to normal explicit-AND for storage.
596 	 */
597 	if (indexInfo->ii_Predicate != NIL)
598 	{
599 		char	   *predString;
600 
601 		predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate));
602 		predDatum = CStringGetTextDatum(predString);
603 		pfree(predString);
604 	}
605 	else
606 		predDatum = (Datum) 0;
607 
608 	/*
609 	 * open the system catalog index relation
610 	 */
611 	pg_index = heap_open(IndexRelationId, RowExclusiveLock);
612 
613 	/*
614 	 * Build a pg_index tuple
615 	 */
616 	MemSet(nulls, false, sizeof(nulls));
617 
618 	values[Anum_pg_index_indexrelid - 1] = ObjectIdGetDatum(indexoid);
619 	values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid);
620 	values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs);
621 	values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique);
622 	values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
623 	values[Anum_pg_index_indisexclusion - 1] = BoolGetDatum(isexclusion);
624 	values[Anum_pg_index_indimmediate - 1] = BoolGetDatum(immediate);
625 	values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
626 	values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
627 	values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false);
628 	/* we set isvalid and isready the same way */
629 	values[Anum_pg_index_indisready - 1] = BoolGetDatum(isvalid);
630 	values[Anum_pg_index_indislive - 1] = BoolGetDatum(true);
631 	values[Anum_pg_index_indisreplident - 1] = BoolGetDatum(false);
632 	values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey);
633 	values[Anum_pg_index_indcollation - 1] = PointerGetDatum(indcollation);
634 	values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass);
635 	values[Anum_pg_index_indoption - 1] = PointerGetDatum(indoption);
636 	values[Anum_pg_index_indexprs - 1] = exprsDatum;
637 	if (exprsDatum == (Datum) 0)
638 		nulls[Anum_pg_index_indexprs - 1] = true;
639 	values[Anum_pg_index_indpred - 1] = predDatum;
640 	if (predDatum == (Datum) 0)
641 		nulls[Anum_pg_index_indpred - 1] = true;
642 
643 	tuple = heap_form_tuple(RelationGetDescr(pg_index), values, nulls);
644 
645 	/*
646 	 * insert the tuple into the pg_index catalog
647 	 */
648 	simple_heap_insert(pg_index, tuple);
649 
650 	/* update the indexes on pg_index */
651 	CatalogUpdateIndexes(pg_index, tuple);
652 
653 	/*
654 	 * close the relation and free the tuple
655 	 */
656 	heap_close(pg_index, RowExclusiveLock);
657 	heap_freetuple(tuple);
658 }
659 
660 
661 /*
662  * index_create
663  *
664  * heapRelation: table to build index on (suitably locked by caller)
665  * indexRelationName: what it say
666  * indexRelationId: normally, pass InvalidOid to let this routine
667  *		generate an OID for the index.  During bootstrap this may be
668  *		nonzero to specify a preselected OID.
669  * relFileNode: normally, pass InvalidOid to get new storage.  May be
670  *		nonzero to attach an existing valid build.
671  * indexInfo: same info executor uses to insert into the index
672  * indexColNames: column names to use for index (List of char *)
673  * accessMethodObjectId: OID of index AM to use
674  * tableSpaceId: OID of tablespace to use
675  * collationObjectId: array of collation OIDs, one per index column
676  * classObjectId: array of index opclass OIDs, one per index column
677  * coloptions: array of per-index-column indoption settings
678  * reloptions: AM-specific options
679  * isprimary: index is a PRIMARY KEY
680  * isconstraint: index is owned by PRIMARY KEY, UNIQUE, or EXCLUSION constraint
681  * deferrable: constraint is DEFERRABLE
682  * initdeferred: constraint is INITIALLY DEFERRED
683  * allow_system_table_mods: allow table to be a system catalog
684  * skip_build: true to skip the index_build() step for the moment; caller
685  *		must do it later (typically via reindex_index())
686  * concurrent: if true, do not lock the table against writers.  The index
687  *		will be marked "invalid" and the caller must take additional steps
688  *		to fix it up.
689  * is_internal: if true, post creation hook for new index
690  * if_not_exists: if true, do not throw an error if a relation with
691  *		the same name already exists.
692  *
693  * Returns the OID of the created index.
694  */
695 Oid
index_create(Relation heapRelation,const char * indexRelationName,Oid indexRelationId,Oid relFileNode,IndexInfo * indexInfo,List * indexColNames,Oid accessMethodObjectId,Oid tableSpaceId,Oid * collationObjectId,Oid * classObjectId,int16 * coloptions,Datum reloptions,bool isprimary,bool isconstraint,bool deferrable,bool initdeferred,bool allow_system_table_mods,bool skip_build,bool concurrent,bool is_internal,bool if_not_exists)696 index_create(Relation heapRelation,
697 			 const char *indexRelationName,
698 			 Oid indexRelationId,
699 			 Oid relFileNode,
700 			 IndexInfo *indexInfo,
701 			 List *indexColNames,
702 			 Oid accessMethodObjectId,
703 			 Oid tableSpaceId,
704 			 Oid *collationObjectId,
705 			 Oid *classObjectId,
706 			 int16 *coloptions,
707 			 Datum reloptions,
708 			 bool isprimary,
709 			 bool isconstraint,
710 			 bool deferrable,
711 			 bool initdeferred,
712 			 bool allow_system_table_mods,
713 			 bool skip_build,
714 			 bool concurrent,
715 			 bool is_internal,
716 			 bool if_not_exists)
717 {
718 	Oid			heapRelationId = RelationGetRelid(heapRelation);
719 	Relation	pg_class;
720 	Relation	indexRelation;
721 	TupleDesc	indexTupDesc;
722 	bool		shared_relation;
723 	bool		mapped_relation;
724 	bool		is_exclusion;
725 	Oid			namespaceId;
726 	int			i;
727 	char		relpersistence;
728 
729 	is_exclusion = (indexInfo->ii_ExclusionOps != NULL);
730 
731 	pg_class = heap_open(RelationRelationId, RowExclusiveLock);
732 
733 	/*
734 	 * The index will be in the same namespace as its parent table, and is
735 	 * shared across databases if and only if the parent is.  Likewise, it
736 	 * will use the relfilenode map if and only if the parent does; and it
737 	 * inherits the parent's relpersistence.
738 	 */
739 	namespaceId = RelationGetNamespace(heapRelation);
740 	shared_relation = heapRelation->rd_rel->relisshared;
741 	mapped_relation = RelationIsMapped(heapRelation);
742 	relpersistence = heapRelation->rd_rel->relpersistence;
743 
744 	/*
745 	 * check parameters
746 	 */
747 	if (indexInfo->ii_NumIndexAttrs < 1)
748 		elog(ERROR, "must index at least one column");
749 
750 	if (!allow_system_table_mods &&
751 		IsSystemRelation(heapRelation) &&
752 		IsNormalProcessingMode())
753 		ereport(ERROR,
754 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
755 				 errmsg("user-defined indexes on system catalog tables are not supported")));
756 
757 	/*
758 	 * concurrent index build on a system catalog is unsafe because we tend to
759 	 * release locks before committing in catalogs
760 	 */
761 	if (concurrent &&
762 		IsSystemRelation(heapRelation))
763 		ereport(ERROR,
764 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
765 				 errmsg("concurrent index creation on system catalog tables is not supported")));
766 
767 	/*
768 	 * This case is currently not supported, but there's no way to ask for it
769 	 * in the grammar anyway, so it can't happen.
770 	 */
771 	if (concurrent && is_exclusion)
772 		ereport(ERROR,
773 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
774 				 errmsg_internal("concurrent index creation for exclusion constraints is not supported")));
775 
776 	/*
777 	 * We cannot allow indexing a shared relation after initdb (because
778 	 * there's no way to make the entry in other databases' pg_class).
779 	 */
780 	if (shared_relation && !IsBootstrapProcessingMode())
781 		ereport(ERROR,
782 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
783 				 errmsg("shared indexes cannot be created after initdb")));
784 
785 	/*
786 	 * Shared relations must be in pg_global, too (last-ditch check)
787 	 */
788 	if (shared_relation && tableSpaceId != GLOBALTABLESPACE_OID)
789 		elog(ERROR, "shared relations must be placed in pg_global tablespace");
790 
791 	if (get_relname_relid(indexRelationName, namespaceId))
792 	{
793 		if (if_not_exists)
794 		{
795 			ereport(NOTICE,
796 					(errcode(ERRCODE_DUPLICATE_TABLE),
797 					 errmsg("relation \"%s\" already exists, skipping",
798 							indexRelationName)));
799 			heap_close(pg_class, RowExclusiveLock);
800 			return InvalidOid;
801 		}
802 
803 		ereport(ERROR,
804 				(errcode(ERRCODE_DUPLICATE_TABLE),
805 				 errmsg("relation \"%s\" already exists",
806 						indexRelationName)));
807 	}
808 
809 	/*
810 	 * construct tuple descriptor for index tuples
811 	 */
812 	indexTupDesc = ConstructTupleDescriptor(heapRelation,
813 											indexInfo,
814 											indexColNames,
815 											accessMethodObjectId,
816 											collationObjectId,
817 											classObjectId);
818 
819 	/*
820 	 * Allocate an OID for the index, unless we were told what to use.
821 	 *
822 	 * The OID will be the relfilenode as well, so make sure it doesn't
823 	 * collide with either pg_class OIDs or existing physical files.
824 	 */
825 	if (!OidIsValid(indexRelationId))
826 	{
827 		/* Use binary-upgrade override for pg_class.oid/relfilenode? */
828 		if (IsBinaryUpgrade)
829 		{
830 			if (!OidIsValid(binary_upgrade_next_index_pg_class_oid))
831 				ereport(ERROR,
832 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
833 						 errmsg("pg_class index OID value not set when in binary upgrade mode")));
834 
835 			indexRelationId = binary_upgrade_next_index_pg_class_oid;
836 			binary_upgrade_next_index_pg_class_oid = InvalidOid;
837 		}
838 		else
839 		{
840 			indexRelationId =
841 				GetNewRelFileNode(tableSpaceId, pg_class, relpersistence);
842 		}
843 	}
844 
845 	/*
846 	 * create the index relation's relcache entry and physical disk file. (If
847 	 * we fail further down, it's the smgr's responsibility to remove the disk
848 	 * file again.)
849 	 */
850 	indexRelation = heap_create(indexRelationName,
851 								namespaceId,
852 								tableSpaceId,
853 								indexRelationId,
854 								relFileNode,
855 								indexTupDesc,
856 								RELKIND_INDEX,
857 								relpersistence,
858 								shared_relation,
859 								mapped_relation,
860 								allow_system_table_mods);
861 
862 	Assert(indexRelationId == RelationGetRelid(indexRelation));
863 
864 	/*
865 	 * Obtain exclusive lock on it.  Although no other backends can see it
866 	 * until we commit, this prevents deadlock-risk complaints from lock
867 	 * manager in cases such as CLUSTER.
868 	 */
869 	LockRelation(indexRelation, AccessExclusiveLock);
870 
871 	/*
872 	 * Fill in fields of the index's pg_class entry that are not set correctly
873 	 * by heap_create.
874 	 *
875 	 * XXX should have a cleaner way to create cataloged indexes
876 	 */
877 	indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner;
878 	indexRelation->rd_rel->relam = accessMethodObjectId;
879 	indexRelation->rd_rel->relhasoids = false;
880 
881 	/*
882 	 * store index's pg_class entry
883 	 */
884 	InsertPgClassTuple(pg_class, indexRelation,
885 					   RelationGetRelid(indexRelation),
886 					   (Datum) 0,
887 					   reloptions);
888 
889 	/* done with pg_class */
890 	heap_close(pg_class, RowExclusiveLock);
891 
892 	/*
893 	 * now update the object id's of all the attribute tuple forms in the
894 	 * index relation's tuple descriptor
895 	 */
896 	InitializeAttributeOids(indexRelation,
897 							indexInfo->ii_NumIndexAttrs,
898 							indexRelationId);
899 
900 	/*
901 	 * append ATTRIBUTE tuples for the index
902 	 */
903 	AppendAttributeTuples(indexRelation, indexInfo->ii_NumIndexAttrs);
904 
905 	/* ----------------
906 	 *	  update pg_index
907 	 *	  (append INDEX tuple)
908 	 *
909 	 *	  Note that this stows away a representation of "predicate".
910 	 *	  (Or, could define a rule to maintain the predicate) --Nels, Feb '92
911 	 * ----------------
912 	 */
913 	UpdateIndexRelation(indexRelationId, heapRelationId, indexInfo,
914 						collationObjectId, classObjectId, coloptions,
915 						isprimary, is_exclusion,
916 						!deferrable,
917 						!concurrent);
918 
919 	/*
920 	 * Register relcache invalidation on the indexes' heap relation, to
921 	 * maintain consistency of its index list
922 	 */
923 	CacheInvalidateRelcache(heapRelation);
924 
925 	/*
926 	 * Register constraint and dependencies for the index.
927 	 *
928 	 * If the index is from a CONSTRAINT clause, construct a pg_constraint
929 	 * entry.  The index will be linked to the constraint, which in turn is
930 	 * linked to the table.  If it's not a CONSTRAINT, we need to make a
931 	 * dependency directly on the table.
932 	 *
933 	 * We don't need a dependency on the namespace, because there'll be an
934 	 * indirect dependency via our parent table.
935 	 *
936 	 * During bootstrap we can't register any dependencies, and we don't try
937 	 * to make a constraint either.
938 	 */
939 	if (!IsBootstrapProcessingMode())
940 	{
941 		ObjectAddress myself,
942 					referenced;
943 
944 		myself.classId = RelationRelationId;
945 		myself.objectId = indexRelationId;
946 		myself.objectSubId = 0;
947 
948 		if (isconstraint)
949 		{
950 			char		constraintType;
951 
952 			if (isprimary)
953 				constraintType = CONSTRAINT_PRIMARY;
954 			else if (indexInfo->ii_Unique)
955 				constraintType = CONSTRAINT_UNIQUE;
956 			else if (is_exclusion)
957 				constraintType = CONSTRAINT_EXCLUSION;
958 			else
959 			{
960 				elog(ERROR, "constraint must be PRIMARY, UNIQUE or EXCLUDE");
961 				constraintType = 0;		/* keep compiler quiet */
962 			}
963 
964 			index_constraint_create(heapRelation,
965 									indexRelationId,
966 									indexInfo,
967 									indexRelationName,
968 									constraintType,
969 									deferrable,
970 									initdeferred,
971 									false,		/* already marked primary */
972 									false,		/* pg_index entry is OK */
973 									false,		/* no old dependencies */
974 									allow_system_table_mods,
975 									is_internal);
976 		}
977 		else
978 		{
979 			bool		have_simple_col = false;
980 
981 			/* Create auto dependencies on simply-referenced columns */
982 			for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
983 			{
984 				if (indexInfo->ii_KeyAttrNumbers[i] != 0)
985 				{
986 					referenced.classId = RelationRelationId;
987 					referenced.objectId = heapRelationId;
988 					referenced.objectSubId = indexInfo->ii_KeyAttrNumbers[i];
989 
990 					recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
991 
992 					have_simple_col = true;
993 				}
994 			}
995 
996 			/*
997 			 * If there are no simply-referenced columns, give the index an
998 			 * auto dependency on the whole table.  In most cases, this will
999 			 * be redundant, but it might not be if the index expressions and
1000 			 * predicate contain no Vars or only whole-row Vars.
1001 			 */
1002 			if (!have_simple_col)
1003 			{
1004 				referenced.classId = RelationRelationId;
1005 				referenced.objectId = heapRelationId;
1006 				referenced.objectSubId = 0;
1007 
1008 				recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
1009 			}
1010 
1011 			/* Non-constraint indexes can't be deferrable */
1012 			Assert(!deferrable);
1013 			Assert(!initdeferred);
1014 		}
1015 
1016 		/* Store dependency on collations */
1017 		/* The default collation is pinned, so don't bother recording it */
1018 		for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1019 		{
1020 			if (OidIsValid(collationObjectId[i]) &&
1021 				collationObjectId[i] != DEFAULT_COLLATION_OID)
1022 			{
1023 				referenced.classId = CollationRelationId;
1024 				referenced.objectId = collationObjectId[i];
1025 				referenced.objectSubId = 0;
1026 
1027 				recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
1028 			}
1029 		}
1030 
1031 		/* Store dependency on operator classes */
1032 		for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1033 		{
1034 			referenced.classId = OperatorClassRelationId;
1035 			referenced.objectId = classObjectId[i];
1036 			referenced.objectSubId = 0;
1037 
1038 			recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
1039 		}
1040 
1041 		/* Store dependencies on anything mentioned in index expressions */
1042 		if (indexInfo->ii_Expressions)
1043 		{
1044 			recordDependencyOnSingleRelExpr(&myself,
1045 										  (Node *) indexInfo->ii_Expressions,
1046 											heapRelationId,
1047 											DEPENDENCY_NORMAL,
1048 											DEPENDENCY_AUTO);
1049 		}
1050 
1051 		/* Store dependencies on anything mentioned in predicate */
1052 		if (indexInfo->ii_Predicate)
1053 		{
1054 			recordDependencyOnSingleRelExpr(&myself,
1055 											(Node *) indexInfo->ii_Predicate,
1056 											heapRelationId,
1057 											DEPENDENCY_NORMAL,
1058 											DEPENDENCY_AUTO);
1059 		}
1060 	}
1061 	else
1062 	{
1063 		/* Bootstrap mode - assert we weren't asked for constraint support */
1064 		Assert(!isconstraint);
1065 		Assert(!deferrable);
1066 		Assert(!initdeferred);
1067 	}
1068 
1069 	/* Post creation hook for new index */
1070 	InvokeObjectPostCreateHookArg(RelationRelationId,
1071 								  indexRelationId, 0, is_internal);
1072 
1073 	/*
1074 	 * Advance the command counter so that we can see the newly-entered
1075 	 * catalog tuples for the index.
1076 	 */
1077 	CommandCounterIncrement();
1078 
1079 	/*
1080 	 * In bootstrap mode, we have to fill in the index strategy structure with
1081 	 * information from the catalogs.  If we aren't bootstrapping, then the
1082 	 * relcache entry has already been rebuilt thanks to sinval update during
1083 	 * CommandCounterIncrement.
1084 	 */
1085 	if (IsBootstrapProcessingMode())
1086 		RelationInitIndexAccessInfo(indexRelation);
1087 	else
1088 		Assert(indexRelation->rd_indexcxt != NULL);
1089 
1090 	/*
1091 	 * If this is bootstrap (initdb) time, then we don't actually fill in the
1092 	 * index yet.  We'll be creating more indexes and classes later, so we
1093 	 * delay filling them in until just before we're done with bootstrapping.
1094 	 * Similarly, if the caller specified skip_build then filling the index is
1095 	 * delayed till later (ALTER TABLE can save work in some cases with this).
1096 	 * Otherwise, we call the AM routine that constructs the index.
1097 	 */
1098 	if (IsBootstrapProcessingMode())
1099 	{
1100 		index_register(heapRelationId, indexRelationId, indexInfo);
1101 	}
1102 	else if (skip_build)
1103 	{
1104 		/*
1105 		 * Caller is responsible for filling the index later on.  However,
1106 		 * we'd better make sure that the heap relation is correctly marked as
1107 		 * having an index.
1108 		 */
1109 		index_update_stats(heapRelation,
1110 						   true,
1111 						   isprimary,
1112 						   -1.0);
1113 		/* Make the above update visible */
1114 		CommandCounterIncrement();
1115 	}
1116 	else
1117 	{
1118 		index_build(heapRelation, indexRelation, indexInfo, isprimary, false);
1119 	}
1120 
1121 	/*
1122 	 * Close the index; but we keep the lock that we acquired above until end
1123 	 * of transaction.  Closing the heap is caller's responsibility.
1124 	 */
1125 	index_close(indexRelation, NoLock);
1126 
1127 	return indexRelationId;
1128 }
1129 
1130 /*
1131  * index_constraint_create
1132  *
1133  * Set up a constraint associated with an index.  Return the new constraint's
1134  * address.
1135  *
1136  * heapRelation: table owning the index (must be suitably locked by caller)
1137  * indexRelationId: OID of the index
1138  * indexInfo: same info executor uses to insert into the index
1139  * constraintName: what it say (generally, should match name of index)
1140  * constraintType: one of CONSTRAINT_PRIMARY, CONSTRAINT_UNIQUE, or
1141  *		CONSTRAINT_EXCLUSION
1142  * deferrable: constraint is DEFERRABLE
1143  * initdeferred: constraint is INITIALLY DEFERRED
1144  * mark_as_primary: if true, set flags to mark index as primary key
1145  * update_pgindex: if true, update pg_index row (else caller's done that)
1146  * remove_old_dependencies: if true, remove existing dependencies of index
1147  *		on table's columns
1148  * allow_system_table_mods: allow table to be a system catalog
1149  * is_internal: index is constructed due to internal process
1150  */
1151 ObjectAddress
index_constraint_create(Relation heapRelation,Oid indexRelationId,IndexInfo * indexInfo,const char * constraintName,char constraintType,bool deferrable,bool initdeferred,bool mark_as_primary,bool update_pgindex,bool remove_old_dependencies,bool allow_system_table_mods,bool is_internal)1152 index_constraint_create(Relation heapRelation,
1153 						Oid indexRelationId,
1154 						IndexInfo *indexInfo,
1155 						const char *constraintName,
1156 						char constraintType,
1157 						bool deferrable,
1158 						bool initdeferred,
1159 						bool mark_as_primary,
1160 						bool update_pgindex,
1161 						bool remove_old_dependencies,
1162 						bool allow_system_table_mods,
1163 						bool is_internal)
1164 {
1165 	Oid			namespaceId = RelationGetNamespace(heapRelation);
1166 	ObjectAddress myself,
1167 				referenced;
1168 	Oid			conOid;
1169 
1170 	/* constraint creation support doesn't work while bootstrapping */
1171 	Assert(!IsBootstrapProcessingMode());
1172 
1173 	/* enforce system-table restriction */
1174 	if (!allow_system_table_mods &&
1175 		IsSystemRelation(heapRelation) &&
1176 		IsNormalProcessingMode())
1177 		ereport(ERROR,
1178 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1179 				 errmsg("user-defined indexes on system catalog tables are not supported")));
1180 
1181 	/* primary/unique constraints shouldn't have any expressions */
1182 	if (indexInfo->ii_Expressions &&
1183 		constraintType != CONSTRAINT_EXCLUSION)
1184 		elog(ERROR, "constraints cannot have index expressions");
1185 
1186 	/*
1187 	 * If we're manufacturing a constraint for a pre-existing index, we need
1188 	 * to get rid of the existing auto dependencies for the index (the ones
1189 	 * that index_create() would have made instead of calling this function).
1190 	 *
1191 	 * Note: this code would not necessarily do the right thing if the index
1192 	 * has any expressions or predicate, but we'd never be turning such an
1193 	 * index into a UNIQUE or PRIMARY KEY constraint.
1194 	 */
1195 	if (remove_old_dependencies)
1196 		deleteDependencyRecordsForClass(RelationRelationId, indexRelationId,
1197 										RelationRelationId, DEPENDENCY_AUTO);
1198 
1199 	/*
1200 	 * Construct a pg_constraint entry.
1201 	 */
1202 	conOid = CreateConstraintEntry(constraintName,
1203 								   namespaceId,
1204 								   constraintType,
1205 								   deferrable,
1206 								   initdeferred,
1207 								   true,
1208 								   RelationGetRelid(heapRelation),
1209 								   indexInfo->ii_KeyAttrNumbers,
1210 								   indexInfo->ii_NumIndexAttrs,
1211 								   InvalidOid,	/* no domain */
1212 								   indexRelationId,		/* index OID */
1213 								   InvalidOid,	/* no foreign key */
1214 								   NULL,
1215 								   NULL,
1216 								   NULL,
1217 								   NULL,
1218 								   0,
1219 								   ' ',
1220 								   ' ',
1221 								   ' ',
1222 								   indexInfo->ii_ExclusionOps,
1223 								   NULL,		/* no check constraint */
1224 								   NULL,
1225 								   NULL,
1226 								   true,		/* islocal */
1227 								   0,	/* inhcount */
1228 								   true,		/* noinherit */
1229 								   is_internal);
1230 
1231 	/*
1232 	 * Register the index as internally dependent on the constraint.
1233 	 *
1234 	 * Note that the constraint has a dependency on the table, so we don't
1235 	 * need (or want) any direct dependency from the index to the table.
1236 	 */
1237 	myself.classId = RelationRelationId;
1238 	myself.objectId = indexRelationId;
1239 	myself.objectSubId = 0;
1240 
1241 	referenced.classId = ConstraintRelationId;
1242 	referenced.objectId = conOid;
1243 	referenced.objectSubId = 0;
1244 
1245 	recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
1246 
1247 	/*
1248 	 * If the constraint is deferrable, create the deferred uniqueness
1249 	 * checking trigger.  (The trigger will be given an internal dependency on
1250 	 * the constraint by CreateTrigger.)
1251 	 */
1252 	if (deferrable)
1253 	{
1254 		CreateTrigStmt *trigger;
1255 
1256 		trigger = makeNode(CreateTrigStmt);
1257 		trigger->trigname = (constraintType == CONSTRAINT_PRIMARY) ?
1258 			"PK_ConstraintTrigger" :
1259 			"Unique_ConstraintTrigger";
1260 		trigger->relation = NULL;
1261 		trigger->funcname = SystemFuncName("unique_key_recheck");
1262 		trigger->args = NIL;
1263 		trigger->row = true;
1264 		trigger->timing = TRIGGER_TYPE_AFTER;
1265 		trigger->events = TRIGGER_TYPE_INSERT | TRIGGER_TYPE_UPDATE;
1266 		trigger->columns = NIL;
1267 		trigger->whenClause = NULL;
1268 		trigger->isconstraint = true;
1269 		trigger->deferrable = true;
1270 		trigger->initdeferred = initdeferred;
1271 		trigger->constrrel = NULL;
1272 
1273 		(void) CreateTrigger(trigger, NULL, RelationGetRelid(heapRelation),
1274 							 InvalidOid, conOid, indexRelationId, true);
1275 	}
1276 
1277 	/*
1278 	 * If needed, mark the table as having a primary key.  We assume it can't
1279 	 * have been so marked already, so no need to clear the flag in the other
1280 	 * case.
1281 	 *
1282 	 * Note: this might better be done by callers.  We do it here to avoid
1283 	 * exposing index_update_stats() globally, but that wouldn't be necessary
1284 	 * if relhaspkey went away.
1285 	 */
1286 	if (mark_as_primary)
1287 		index_update_stats(heapRelation,
1288 						   true,
1289 						   true,
1290 						   -1.0);
1291 
1292 	/*
1293 	 * If needed, mark the index as primary and/or deferred in pg_index.
1294 	 *
1295 	 * Note: When making an existing index into a constraint, caller must have
1296 	 * a table lock that prevents concurrent table updates; otherwise, there
1297 	 * is a risk that concurrent readers of the table will miss seeing this
1298 	 * index at all.
1299 	 */
1300 	if (update_pgindex && (mark_as_primary || deferrable))
1301 	{
1302 		Relation	pg_index;
1303 		HeapTuple	indexTuple;
1304 		Form_pg_index indexForm;
1305 		bool		dirty = false;
1306 
1307 		pg_index = heap_open(IndexRelationId, RowExclusiveLock);
1308 
1309 		indexTuple = SearchSysCacheCopy1(INDEXRELID,
1310 										 ObjectIdGetDatum(indexRelationId));
1311 		if (!HeapTupleIsValid(indexTuple))
1312 			elog(ERROR, "cache lookup failed for index %u", indexRelationId);
1313 		indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
1314 
1315 		if (mark_as_primary && !indexForm->indisprimary)
1316 		{
1317 			indexForm->indisprimary = true;
1318 			dirty = true;
1319 		}
1320 
1321 		if (deferrable && indexForm->indimmediate)
1322 		{
1323 			indexForm->indimmediate = false;
1324 			dirty = true;
1325 		}
1326 
1327 		if (dirty)
1328 		{
1329 			simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
1330 			CatalogUpdateIndexes(pg_index, indexTuple);
1331 
1332 			InvokeObjectPostAlterHookArg(IndexRelationId, indexRelationId, 0,
1333 										 InvalidOid, is_internal);
1334 		}
1335 
1336 		heap_freetuple(indexTuple);
1337 		heap_close(pg_index, RowExclusiveLock);
1338 	}
1339 
1340 	return referenced;
1341 }
1342 
1343 /*
1344  *		index_drop
1345  *
1346  * NOTE: this routine should now only be called through performDeletion(),
1347  * else associated dependencies won't be cleaned up.
1348  */
1349 void
index_drop(Oid indexId,bool concurrent)1350 index_drop(Oid indexId, bool concurrent)
1351 {
1352 	Oid			heapId;
1353 	Relation	userHeapRelation;
1354 	Relation	userIndexRelation;
1355 	Relation	indexRelation;
1356 	HeapTuple	tuple;
1357 	bool		hasexprs;
1358 	LockRelId	heaprelid,
1359 				indexrelid;
1360 	LOCKTAG		heaplocktag;
1361 	LOCKMODE	lockmode;
1362 
1363 	/*
1364 	 * A temporary relation uses a non-concurrent DROP.  Other backends can't
1365 	 * access a temporary relation, so there's no harm in grabbing a stronger
1366 	 * lock (see comments in RemoveRelations), and a non-concurrent DROP is
1367 	 * more efficient.
1368 	 */
1369 	Assert(get_rel_persistence(indexId) != RELPERSISTENCE_TEMP ||
1370 		   !concurrent);
1371 
1372 	/*
1373 	 * To drop an index safely, we must grab exclusive lock on its parent
1374 	 * table.  Exclusive lock on the index alone is insufficient because
1375 	 * another backend might be about to execute a query on the parent table.
1376 	 * If it relies on a previously cached list of index OIDs, then it could
1377 	 * attempt to access the just-dropped index.  We must therefore take a
1378 	 * table lock strong enough to prevent all queries on the table from
1379 	 * proceeding until we commit and send out a shared-cache-inval notice
1380 	 * that will make them update their index lists.
1381 	 *
1382 	 * In the concurrent case we avoid this requirement by disabling index use
1383 	 * in multiple steps and waiting out any transactions that might be using
1384 	 * the index, so we don't need exclusive lock on the parent table. Instead
1385 	 * we take ShareUpdateExclusiveLock, to ensure that two sessions aren't
1386 	 * doing CREATE/DROP INDEX CONCURRENTLY on the same index.  (We will get
1387 	 * AccessExclusiveLock on the index below, once we're sure nobody else is
1388 	 * using it.)
1389 	 */
1390 	heapId = IndexGetRelation(indexId, false);
1391 	lockmode = concurrent ? ShareUpdateExclusiveLock : AccessExclusiveLock;
1392 	userHeapRelation = heap_open(heapId, lockmode);
1393 	userIndexRelation = index_open(indexId, lockmode);
1394 
1395 	/*
1396 	 * We might still have open queries using it in our own session, which the
1397 	 * above locking won't prevent, so test explicitly.
1398 	 */
1399 	CheckTableNotInUse(userIndexRelation, "DROP INDEX");
1400 
1401 	/*
1402 	 * Drop Index Concurrently is more or less the reverse process of Create
1403 	 * Index Concurrently.
1404 	 *
1405 	 * First we unset indisvalid so queries starting afterwards don't use the
1406 	 * index to answer queries anymore.  We have to keep indisready = true so
1407 	 * transactions that are still scanning the index can continue to see
1408 	 * valid index contents.  For instance, if they are using READ COMMITTED
1409 	 * mode, and another transaction makes changes and commits, they need to
1410 	 * see those new tuples in the index.
1411 	 *
1412 	 * After all transactions that could possibly have used the index for
1413 	 * queries end, we can unset indisready and indislive, then wait till
1414 	 * nobody could be touching it anymore.  (Note: we need indislive because
1415 	 * this state must be distinct from the initial state during CREATE INDEX
1416 	 * CONCURRENTLY, which has indislive true while indisready and indisvalid
1417 	 * are false.  That's because in that state, transactions must examine the
1418 	 * index for HOT-safety decisions, while in this state we don't want them
1419 	 * to open it at all.)
1420 	 *
1421 	 * Since all predicate locks on the index are about to be made invalid, we
1422 	 * must promote them to predicate locks on the heap.  In the
1423 	 * non-concurrent case we can just do that now.  In the concurrent case
1424 	 * it's a bit trickier.  The predicate locks must be moved when there are
1425 	 * no index scans in progress on the index and no more can subsequently
1426 	 * start, so that no new predicate locks can be made on the index.  Also,
1427 	 * they must be moved before heap inserts stop maintaining the index, else
1428 	 * the conflict with the predicate lock on the index gap could be missed
1429 	 * before the lock on the heap relation is in place to detect a conflict
1430 	 * based on the heap tuple insert.
1431 	 */
1432 	if (concurrent)
1433 	{
1434 		/*
1435 		 * We must commit our transaction in order to make the first pg_index
1436 		 * state update visible to other sessions.  If the DROP machinery has
1437 		 * already performed any other actions (removal of other objects,
1438 		 * pg_depend entries, etc), the commit would make those actions
1439 		 * permanent, which would leave us with inconsistent catalog state if
1440 		 * we fail partway through the following sequence.  Since DROP INDEX
1441 		 * CONCURRENTLY is restricted to dropping just one index that has no
1442 		 * dependencies, we should get here before anything's been done ---
1443 		 * but let's check that to be sure.  We can verify that the current
1444 		 * transaction has not executed any transactional updates by checking
1445 		 * that no XID has been assigned.
1446 		 */
1447 		if (GetTopTransactionIdIfAny() != InvalidTransactionId)
1448 			ereport(ERROR,
1449 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1450 					 errmsg("DROP INDEX CONCURRENTLY must be first action in transaction")));
1451 
1452 		/*
1453 		 * Mark index invalid by updating its pg_index entry
1454 		 */
1455 		index_set_state_flags(indexId, INDEX_DROP_CLEAR_VALID);
1456 
1457 		/*
1458 		 * Invalidate the relcache for the table, so that after this commit
1459 		 * all sessions will refresh any cached plans that might reference the
1460 		 * index.
1461 		 */
1462 		CacheInvalidateRelcache(userHeapRelation);
1463 
1464 		/* save lockrelid and locktag for below, then close but keep locks */
1465 		heaprelid = userHeapRelation->rd_lockInfo.lockRelId;
1466 		SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
1467 		indexrelid = userIndexRelation->rd_lockInfo.lockRelId;
1468 
1469 		heap_close(userHeapRelation, NoLock);
1470 		index_close(userIndexRelation, NoLock);
1471 
1472 		/*
1473 		 * We must commit our current transaction so that the indisvalid
1474 		 * update becomes visible to other transactions; then start another.
1475 		 * Note that any previously-built data structures are lost in the
1476 		 * commit.  The only data we keep past here are the relation IDs.
1477 		 *
1478 		 * Before committing, get a session-level lock on the table, to ensure
1479 		 * that neither it nor the index can be dropped before we finish. This
1480 		 * cannot block, even if someone else is waiting for access, because
1481 		 * we already have the same lock within our transaction.
1482 		 */
1483 		LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
1484 		LockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
1485 
1486 		PopActiveSnapshot();
1487 		CommitTransactionCommand();
1488 		StartTransactionCommand();
1489 
1490 		/*
1491 		 * Now we must wait until no running transaction could be using the
1492 		 * index for a query.  Use AccessExclusiveLock here to check for
1493 		 * running transactions that hold locks of any kind on the table. Note
1494 		 * we do not need to worry about xacts that open the table for reading
1495 		 * after this point; they will see the index as invalid when they open
1496 		 * the relation.
1497 		 *
1498 		 * Note: the reason we use actual lock acquisition here, rather than
1499 		 * just checking the ProcArray and sleeping, is that deadlock is
1500 		 * possible if one of the transactions in question is blocked trying
1501 		 * to acquire an exclusive lock on our table.  The lock code will
1502 		 * detect deadlock and error out properly.
1503 		 */
1504 		WaitForLockers(heaplocktag, AccessExclusiveLock);
1505 
1506 		/*
1507 		 * No more predicate locks will be acquired on this index, and we're
1508 		 * about to stop doing inserts into the index which could show
1509 		 * conflicts with existing predicate locks, so now is the time to move
1510 		 * them to the heap relation.
1511 		 */
1512 		userHeapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
1513 		userIndexRelation = index_open(indexId, ShareUpdateExclusiveLock);
1514 		TransferPredicateLocksToHeapRelation(userIndexRelation);
1515 
1516 		/*
1517 		 * Now we are sure that nobody uses the index for queries; they just
1518 		 * might have it open for updating it.  So now we can unset indisready
1519 		 * and indislive, then wait till nobody could be using it at all
1520 		 * anymore.
1521 		 */
1522 		index_set_state_flags(indexId, INDEX_DROP_SET_DEAD);
1523 
1524 		/*
1525 		 * Invalidate the relcache for the table, so that after this commit
1526 		 * all sessions will refresh the table's index list.  Forgetting just
1527 		 * the index's relcache entry is not enough.
1528 		 */
1529 		CacheInvalidateRelcache(userHeapRelation);
1530 
1531 		/*
1532 		 * Close the relations again, though still holding session lock.
1533 		 */
1534 		heap_close(userHeapRelation, NoLock);
1535 		index_close(userIndexRelation, NoLock);
1536 
1537 		/*
1538 		 * Again, commit the transaction to make the pg_index update visible
1539 		 * to other sessions.
1540 		 */
1541 		CommitTransactionCommand();
1542 		StartTransactionCommand();
1543 
1544 		/*
1545 		 * Wait till every transaction that saw the old index state has
1546 		 * finished.
1547 		 */
1548 		WaitForLockers(heaplocktag, AccessExclusiveLock);
1549 
1550 		/*
1551 		 * Re-open relations to allow us to complete our actions.
1552 		 *
1553 		 * At this point, nothing should be accessing the index, but lets
1554 		 * leave nothing to chance and grab AccessExclusiveLock on the index
1555 		 * before the physical deletion.
1556 		 */
1557 		userHeapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
1558 		userIndexRelation = index_open(indexId, AccessExclusiveLock);
1559 	}
1560 	else
1561 	{
1562 		/* Not concurrent, so just transfer predicate locks and we're good */
1563 		TransferPredicateLocksToHeapRelation(userIndexRelation);
1564 	}
1565 
1566 	/*
1567 	 * Schedule physical removal of the files
1568 	 */
1569 	RelationDropStorage(userIndexRelation);
1570 
1571 	/*
1572 	 * Close and flush the index's relcache entry, to ensure relcache doesn't
1573 	 * try to rebuild it while we're deleting catalog entries. We keep the
1574 	 * lock though.
1575 	 */
1576 	index_close(userIndexRelation, NoLock);
1577 
1578 	RelationForgetRelation(indexId);
1579 
1580 	/*
1581 	 * fix INDEX relation, and check for expressional index
1582 	 */
1583 	indexRelation = heap_open(IndexRelationId, RowExclusiveLock);
1584 
1585 	tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
1586 	if (!HeapTupleIsValid(tuple))
1587 		elog(ERROR, "cache lookup failed for index %u", indexId);
1588 
1589 	hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs);
1590 
1591 	simple_heap_delete(indexRelation, &tuple->t_self);
1592 
1593 	ReleaseSysCache(tuple);
1594 	heap_close(indexRelation, RowExclusiveLock);
1595 
1596 	/*
1597 	 * if it has any expression columns, we might have stored statistics about
1598 	 * them.
1599 	 */
1600 	if (hasexprs)
1601 		RemoveStatistics(indexId, 0);
1602 
1603 	/*
1604 	 * fix ATTRIBUTE relation
1605 	 */
1606 	DeleteAttributeTuples(indexId);
1607 
1608 	/*
1609 	 * fix RELATION relation
1610 	 */
1611 	DeleteRelationTuple(indexId);
1612 
1613 	/*
1614 	 * We are presently too lazy to attempt to compute the new correct value
1615 	 * of relhasindex (the next VACUUM will fix it if necessary). So there is
1616 	 * no need to update the pg_class tuple for the owning relation. But we
1617 	 * must send out a shared-cache-inval notice on the owning relation to
1618 	 * ensure other backends update their relcache lists of indexes.  (In the
1619 	 * concurrent case, this is redundant but harmless.)
1620 	 */
1621 	CacheInvalidateRelcache(userHeapRelation);
1622 
1623 	/*
1624 	 * Close owning rel, but keep lock
1625 	 */
1626 	heap_close(userHeapRelation, NoLock);
1627 
1628 	/*
1629 	 * Release the session locks before we go.
1630 	 */
1631 	if (concurrent)
1632 	{
1633 		UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
1634 		UnlockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
1635 	}
1636 }
1637 
1638 /* ----------------------------------------------------------------
1639  *						index_build support
1640  * ----------------------------------------------------------------
1641  */
1642 
1643 /* ----------------
1644  *		BuildIndexInfo
1645  *			Construct an IndexInfo record for an open index
1646  *
1647  * IndexInfo stores the information about the index that's needed by
1648  * FormIndexDatum, which is used for both index_build() and later insertion
1649  * of individual index tuples.  Normally we build an IndexInfo for an index
1650  * just once per command, and then use it for (potentially) many tuples.
1651  * ----------------
1652  */
1653 IndexInfo *
BuildIndexInfo(Relation index)1654 BuildIndexInfo(Relation index)
1655 {
1656 	IndexInfo  *ii = makeNode(IndexInfo);
1657 	Form_pg_index indexStruct = index->rd_index;
1658 	int			i;
1659 	int			numKeys;
1660 
1661 	/* check the number of keys, and copy attr numbers into the IndexInfo */
1662 	numKeys = indexStruct->indnatts;
1663 	if (numKeys < 1 || numKeys > INDEX_MAX_KEYS)
1664 		elog(ERROR, "invalid indnatts %d for index %u",
1665 			 numKeys, RelationGetRelid(index));
1666 	ii->ii_NumIndexAttrs = numKeys;
1667 	for (i = 0; i < numKeys; i++)
1668 		ii->ii_KeyAttrNumbers[i] = indexStruct->indkey.values[i];
1669 
1670 	/* fetch any expressions needed for expressional indexes */
1671 	ii->ii_Expressions = RelationGetIndexExpressions(index);
1672 	ii->ii_ExpressionsState = NIL;
1673 
1674 	/* fetch index predicate if any */
1675 	ii->ii_Predicate = RelationGetIndexPredicate(index);
1676 	ii->ii_PredicateState = NIL;
1677 
1678 	/* fetch exclusion constraint info if any */
1679 	if (indexStruct->indisexclusion)
1680 	{
1681 		RelationGetExclusionInfo(index,
1682 								 &ii->ii_ExclusionOps,
1683 								 &ii->ii_ExclusionProcs,
1684 								 &ii->ii_ExclusionStrats);
1685 	}
1686 	else
1687 	{
1688 		ii->ii_ExclusionOps = NULL;
1689 		ii->ii_ExclusionProcs = NULL;
1690 		ii->ii_ExclusionStrats = NULL;
1691 	}
1692 
1693 	/* other info */
1694 	ii->ii_Unique = indexStruct->indisunique;
1695 	ii->ii_ReadyForInserts = IndexIsReady(indexStruct);
1696 	/* assume not doing speculative insertion for now */
1697 	ii->ii_UniqueOps = NULL;
1698 	ii->ii_UniqueProcs = NULL;
1699 	ii->ii_UniqueStrats = NULL;
1700 
1701 	/* initialize index-build state to default */
1702 	ii->ii_Concurrent = false;
1703 	ii->ii_BrokenHotChain = false;
1704 
1705 	return ii;
1706 }
1707 
1708 /* ----------------
1709  *		BuildDummyIndexInfo
1710  *			Construct a dummy IndexInfo record for an open index
1711  *
1712  * This differs from the real BuildIndexInfo in that it will never run any
1713  * user-defined code that might exist in index expressions or predicates.
1714  * Instead of the real index expressions, we return null constants that have
1715  * the right types/typmods/collations.  Predicates and exclusion clauses are
1716  * just ignored.  This is sufficient for the purpose of truncating an index,
1717  * since we will not need to actually evaluate the expressions or predicates;
1718  * the only thing that's likely to be done with the data is construction of
1719  * a tupdesc describing the index's rowtype.
1720  * ----------------
1721  */
1722 IndexInfo *
BuildDummyIndexInfo(Relation index)1723 BuildDummyIndexInfo(Relation index)
1724 {
1725 	IndexInfo  *ii = makeNode(IndexInfo);
1726 	Form_pg_index indexStruct = index->rd_index;
1727 	int			i;
1728 	int			numKeys;
1729 
1730 	/* check the number of keys, and copy attr numbers into the IndexInfo */
1731 	numKeys = indexStruct->indnatts;
1732 	if (numKeys < 1 || numKeys > INDEX_MAX_KEYS)
1733 		elog(ERROR, "invalid indnatts %d for index %u",
1734 			 numKeys, RelationGetRelid(index));
1735 	ii->ii_NumIndexAttrs = numKeys;
1736 	for (i = 0; i < numKeys; i++)
1737 		ii->ii_KeyAttrNumbers[i] = indexStruct->indkey.values[i];
1738 
1739 	/* fetch dummy expressions for expressional indexes */
1740 	ii->ii_Expressions = RelationGetDummyIndexExpressions(index);
1741 	ii->ii_ExpressionsState = NIL;
1742 
1743 	/* pretend there is no predicate */
1744 	ii->ii_Predicate = NIL;
1745 	ii->ii_PredicateState = NULL;
1746 
1747 	/* We ignore the exclusion constraint if any */
1748 	ii->ii_ExclusionOps = NULL;
1749 	ii->ii_ExclusionProcs = NULL;
1750 	ii->ii_ExclusionStrats = NULL;
1751 
1752 	/* other info */
1753 	ii->ii_Unique = indexStruct->indisunique;
1754 	ii->ii_ReadyForInserts = IndexIsReady(indexStruct);
1755 	/* assume not doing speculative insertion for now */
1756 	ii->ii_UniqueOps = NULL;
1757 	ii->ii_UniqueProcs = NULL;
1758 	ii->ii_UniqueStrats = NULL;
1759 
1760 	/* initialize index-build state to default */
1761 	ii->ii_Concurrent = false;
1762 	ii->ii_BrokenHotChain = false;
1763 
1764 	return ii;
1765 }
1766 
1767 /* ----------------
1768  *		BuildSpeculativeIndexInfo
1769  *			Add extra state to IndexInfo record
1770  *
1771  * For unique indexes, we usually don't want to add info to the IndexInfo for
1772  * checking uniqueness, since the B-Tree AM handles that directly.  However,
1773  * in the case of speculative insertion, additional support is required.
1774  *
1775  * Do this processing here rather than in BuildIndexInfo() to not incur the
1776  * overhead in the common non-speculative cases.
1777  * ----------------
1778  */
1779 void
BuildSpeculativeIndexInfo(Relation index,IndexInfo * ii)1780 BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii)
1781 {
1782 	int			ncols = index->rd_rel->relnatts;
1783 	int			i;
1784 
1785 	/*
1786 	 * fetch info for checking unique indexes
1787 	 */
1788 	Assert(ii->ii_Unique);
1789 
1790 	if (index->rd_rel->relam != BTREE_AM_OID)
1791 		elog(ERROR, "unexpected non-btree speculative unique index");
1792 
1793 	ii->ii_UniqueOps = (Oid *) palloc(sizeof(Oid) * ncols);
1794 	ii->ii_UniqueProcs = (Oid *) palloc(sizeof(Oid) * ncols);
1795 	ii->ii_UniqueStrats = (uint16 *) palloc(sizeof(uint16) * ncols);
1796 
1797 	/*
1798 	 * We have to look up the operator's strategy number.  This provides a
1799 	 * cross-check that the operator does match the index.
1800 	 */
1801 	/* We need the func OIDs and strategy numbers too */
1802 	for (i = 0; i < ncols; i++)
1803 	{
1804 		ii->ii_UniqueStrats[i] = BTEqualStrategyNumber;
1805 		ii->ii_UniqueOps[i] =
1806 			get_opfamily_member(index->rd_opfamily[i],
1807 								index->rd_opcintype[i],
1808 								index->rd_opcintype[i],
1809 								ii->ii_UniqueStrats[i]);
1810 		ii->ii_UniqueProcs[i] = get_opcode(ii->ii_UniqueOps[i]);
1811 	}
1812 }
1813 
1814 /* ----------------
1815  *		FormIndexDatum
1816  *			Construct values[] and isnull[] arrays for a new index tuple.
1817  *
1818  *	indexInfo		Info about the index
1819  *	slot			Heap tuple for which we must prepare an index entry
1820  *	estate			executor state for evaluating any index expressions
1821  *	values			Array of index Datums (output area)
1822  *	isnull			Array of is-null indicators (output area)
1823  *
1824  * When there are no index expressions, estate may be NULL.  Otherwise it
1825  * must be supplied, *and* the ecxt_scantuple slot of its per-tuple expr
1826  * context must point to the heap tuple passed in.
1827  *
1828  * Notice we don't actually call index_form_tuple() here; we just prepare
1829  * its input arrays values[] and isnull[].  This is because the index AM
1830  * may wish to alter the data before storage.
1831  * ----------------
1832  */
1833 void
FormIndexDatum(IndexInfo * indexInfo,TupleTableSlot * slot,EState * estate,Datum * values,bool * isnull)1834 FormIndexDatum(IndexInfo *indexInfo,
1835 			   TupleTableSlot *slot,
1836 			   EState *estate,
1837 			   Datum *values,
1838 			   bool *isnull)
1839 {
1840 	ListCell   *indexpr_item;
1841 	int			i;
1842 
1843 	if (indexInfo->ii_Expressions != NIL &&
1844 		indexInfo->ii_ExpressionsState == NIL)
1845 	{
1846 		/* First time through, set up expression evaluation state */
1847 		indexInfo->ii_ExpressionsState = (List *)
1848 			ExecPrepareExpr((Expr *) indexInfo->ii_Expressions,
1849 							estate);
1850 		/* Check caller has set up context correctly */
1851 		Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
1852 	}
1853 	indexpr_item = list_head(indexInfo->ii_ExpressionsState);
1854 
1855 	for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1856 	{
1857 		int			keycol = indexInfo->ii_KeyAttrNumbers[i];
1858 		Datum		iDatum;
1859 		bool		isNull;
1860 
1861 		if (keycol != 0)
1862 		{
1863 			/*
1864 			 * Plain index column; get the value we need directly from the
1865 			 * heap tuple.
1866 			 */
1867 			iDatum = slot_getattr(slot, keycol, &isNull);
1868 		}
1869 		else
1870 		{
1871 			/*
1872 			 * Index expression --- need to evaluate it.
1873 			 */
1874 			if (indexpr_item == NULL)
1875 				elog(ERROR, "wrong number of index expressions");
1876 			iDatum = ExecEvalExprSwitchContext((ExprState *) lfirst(indexpr_item),
1877 											   GetPerTupleExprContext(estate),
1878 											   &isNull,
1879 											   NULL);
1880 			indexpr_item = lnext(indexpr_item);
1881 		}
1882 		values[i] = iDatum;
1883 		isnull[i] = isNull;
1884 	}
1885 
1886 	if (indexpr_item != NULL)
1887 		elog(ERROR, "wrong number of index expressions");
1888 }
1889 
1890 
1891 /*
1892  * index_update_stats --- update pg_class entry after CREATE INDEX or REINDEX
1893  *
1894  * This routine updates the pg_class row of either an index or its parent
1895  * relation after CREATE INDEX or REINDEX.  Its rather bizarre API is designed
1896  * to ensure we can do all the necessary work in just one update.
1897  *
1898  * hasindex: set relhasindex to this value
1899  * isprimary: if true, set relhaspkey true; else no change
1900  * reltuples: if >= 0, set reltuples to this value; else no change
1901  *
1902  * If reltuples >= 0, relpages and relallvisible are also updated (using
1903  * RelationGetNumberOfBlocks() and visibilitymap_count()).
1904  *
1905  * NOTE: an important side-effect of this operation is that an SI invalidation
1906  * message is sent out to all backends --- including me --- causing relcache
1907  * entries to be flushed or updated with the new data.  This must happen even
1908  * if we find that no change is needed in the pg_class row.  When updating
1909  * a heap entry, this ensures that other backends find out about the new
1910  * index.  When updating an index, it's important because some index AMs
1911  * expect a relcache flush to occur after REINDEX.
1912  */
1913 static void
index_update_stats(Relation rel,bool hasindex,bool isprimary,double reltuples)1914 index_update_stats(Relation rel,
1915 				   bool hasindex,
1916 				   bool isprimary,
1917 				   double reltuples)
1918 {
1919 	Oid			relid = RelationGetRelid(rel);
1920 	Relation	pg_class;
1921 	HeapTuple	tuple;
1922 	Form_pg_class rd_rel;
1923 	bool		dirty;
1924 
1925 	/*
1926 	 * We always update the pg_class row using a non-transactional,
1927 	 * overwrite-in-place update.  There are several reasons for this:
1928 	 *
1929 	 * 1. In bootstrap mode, we have no choice --- UPDATE wouldn't work.
1930 	 *
1931 	 * 2. We could be reindexing pg_class itself, in which case we can't move
1932 	 * its pg_class row because CatalogUpdateIndexes might not know about all
1933 	 * the indexes yet (see reindex_relation).
1934 	 *
1935 	 * 3. Because we execute CREATE INDEX with just share lock on the parent
1936 	 * rel (to allow concurrent index creations), an ordinary update could
1937 	 * suffer a tuple-concurrently-updated failure against another CREATE
1938 	 * INDEX committing at about the same time.  We can avoid that by having
1939 	 * them both do nontransactional updates (we assume they will both be
1940 	 * trying to change the pg_class row to the same thing, so it doesn't
1941 	 * matter which goes first).
1942 	 *
1943 	 * It is safe to use a non-transactional update even though our
1944 	 * transaction could still fail before committing.  Setting relhasindex
1945 	 * true is safe even if there are no indexes (VACUUM will eventually fix
1946 	 * it), likewise for relhaspkey.  And of course the new relpages and
1947 	 * reltuples counts are correct regardless.  However, we don't want to
1948 	 * change relpages (or relallvisible) if the caller isn't providing an
1949 	 * updated reltuples count, because that would bollix the
1950 	 * reltuples/relpages ratio which is what's really important.
1951 	 */
1952 
1953 	pg_class = heap_open(RelationRelationId, RowExclusiveLock);
1954 
1955 	/*
1956 	 * Make a copy of the tuple to update.  Normally we use the syscache, but
1957 	 * we can't rely on that during bootstrap or while reindexing pg_class
1958 	 * itself.
1959 	 */
1960 	if (IsBootstrapProcessingMode() ||
1961 		ReindexIsProcessingHeap(RelationRelationId))
1962 	{
1963 		/* don't assume syscache will work */
1964 		HeapScanDesc pg_class_scan;
1965 		ScanKeyData key[1];
1966 
1967 		ScanKeyInit(&key[0],
1968 					ObjectIdAttributeNumber,
1969 					BTEqualStrategyNumber, F_OIDEQ,
1970 					ObjectIdGetDatum(relid));
1971 
1972 		pg_class_scan = heap_beginscan_catalog(pg_class, 1, key);
1973 		tuple = heap_getnext(pg_class_scan, ForwardScanDirection);
1974 		tuple = heap_copytuple(tuple);
1975 		heap_endscan(pg_class_scan);
1976 	}
1977 	else
1978 	{
1979 		/* normal case, use syscache */
1980 		tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
1981 	}
1982 
1983 	if (!HeapTupleIsValid(tuple))
1984 		elog(ERROR, "could not find tuple for relation %u", relid);
1985 	rd_rel = (Form_pg_class) GETSTRUCT(tuple);
1986 
1987 	/* Apply required updates, if any, to copied tuple */
1988 
1989 	dirty = false;
1990 	if (rd_rel->relhasindex != hasindex)
1991 	{
1992 		rd_rel->relhasindex = hasindex;
1993 		dirty = true;
1994 	}
1995 	if (isprimary)
1996 	{
1997 		if (!rd_rel->relhaspkey)
1998 		{
1999 			rd_rel->relhaspkey = true;
2000 			dirty = true;
2001 		}
2002 	}
2003 
2004 	if (reltuples >= 0)
2005 	{
2006 		BlockNumber relpages = RelationGetNumberOfBlocks(rel);
2007 		BlockNumber relallvisible;
2008 
2009 		if (rd_rel->relkind != RELKIND_INDEX)
2010 			visibilitymap_count(rel, &relallvisible, NULL);
2011 		else	/* don't bother for indexes */
2012 			relallvisible = 0;
2013 
2014 		if (rd_rel->relpages != (int32) relpages)
2015 		{
2016 			rd_rel->relpages = (int32) relpages;
2017 			dirty = true;
2018 		}
2019 		if (rd_rel->reltuples != (float4) reltuples)
2020 		{
2021 			rd_rel->reltuples = (float4) reltuples;
2022 			dirty = true;
2023 		}
2024 		if (rd_rel->relallvisible != (int32) relallvisible)
2025 		{
2026 			rd_rel->relallvisible = (int32) relallvisible;
2027 			dirty = true;
2028 		}
2029 	}
2030 
2031 	/*
2032 	 * If anything changed, write out the tuple
2033 	 */
2034 	if (dirty)
2035 	{
2036 		heap_inplace_update(pg_class, tuple);
2037 		/* the above sends a cache inval message */
2038 	}
2039 	else
2040 	{
2041 		/* no need to change tuple, but force relcache inval anyway */
2042 		CacheInvalidateRelcacheByTuple(tuple);
2043 	}
2044 
2045 	heap_freetuple(tuple);
2046 
2047 	heap_close(pg_class, RowExclusiveLock);
2048 }
2049 
2050 
2051 /*
2052  * index_build - invoke access-method-specific index build procedure
2053  *
2054  * On entry, the index's catalog entries are valid, and its physical disk
2055  * file has been created but is empty.  We call the AM-specific build
2056  * procedure to fill in the index contents.  We then update the pg_class
2057  * entries of the index and heap relation as needed, using statistics
2058  * returned by ambuild as well as data passed by the caller.
2059  *
2060  * isprimary tells whether to mark the index as a primary-key index.
2061  * isreindex indicates we are recreating a previously-existing index.
2062  *
2063  * Note: when reindexing an existing index, isprimary can be false even if
2064  * the index is a PK; it's already properly marked and need not be re-marked.
2065  *
2066  * Note: before Postgres 8.2, the passed-in heap and index Relations
2067  * were automatically closed by this routine.  This is no longer the case.
2068  * The caller opened 'em, and the caller should close 'em.
2069  */
2070 void
index_build(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,bool isprimary,bool isreindex)2071 index_build(Relation heapRelation,
2072 			Relation indexRelation,
2073 			IndexInfo *indexInfo,
2074 			bool isprimary,
2075 			bool isreindex)
2076 {
2077 	IndexBuildResult *stats;
2078 	Oid			save_userid;
2079 	int			save_sec_context;
2080 	int			save_nestlevel;
2081 
2082 	/*
2083 	 * sanity checks
2084 	 */
2085 	Assert(RelationIsValid(indexRelation));
2086 	Assert(PointerIsValid(indexRelation->rd_amroutine));
2087 	Assert(PointerIsValid(indexRelation->rd_amroutine->ambuild));
2088 	Assert(PointerIsValid(indexRelation->rd_amroutine->ambuildempty));
2089 
2090 	ereport(DEBUG1,
2091 			(errmsg("building index \"%s\" on table \"%s\"",
2092 					RelationGetRelationName(indexRelation),
2093 					RelationGetRelationName(heapRelation))));
2094 
2095 	/*
2096 	 * Switch to the table owner's userid, so that any index functions are run
2097 	 * as that user.  Also lock down security-restricted operations and
2098 	 * arrange to make GUC variable changes local to this command.
2099 	 */
2100 	GetUserIdAndSecContext(&save_userid, &save_sec_context);
2101 	SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
2102 						   save_sec_context | SECURITY_RESTRICTED_OPERATION);
2103 	save_nestlevel = NewGUCNestLevel();
2104 
2105 	/*
2106 	 * Call the access method's build procedure
2107 	 */
2108 	stats = indexRelation->rd_amroutine->ambuild(heapRelation, indexRelation,
2109 												 indexInfo);
2110 	Assert(PointerIsValid(stats));
2111 
2112 	/*
2113 	 * If this is an unlogged index, we may need to write out an init fork for
2114 	 * it -- but we must first check whether one already exists.  If, for
2115 	 * example, an unlogged relation is truncated in the transaction that
2116 	 * created it, or truncated twice in a subsequent transaction, the
2117 	 * relfilenode won't change, and nothing needs to be done here.
2118 	 */
2119 	if (indexRelation->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
2120 		!smgrexists(indexRelation->rd_smgr, INIT_FORKNUM))
2121 	{
2122 		RelationOpenSmgr(indexRelation);
2123 		smgrcreate(indexRelation->rd_smgr, INIT_FORKNUM, false);
2124 		indexRelation->rd_amroutine->ambuildempty(indexRelation);
2125 	}
2126 
2127 	/*
2128 	 * If we found any potentially broken HOT chains, mark the index as not
2129 	 * being usable until the current transaction is below the event horizon.
2130 	 * See src/backend/access/heap/README.HOT for discussion.  Also set this
2131 	 * if early pruning/vacuuming is enabled for the heap relation.  While it
2132 	 * might become safe to use the index earlier based on actual cleanup
2133 	 * activity and other active transactions, the test for that would be much
2134 	 * more complex and would require some form of blocking, so keep it simple
2135 	 * and fast by just using the current transaction.
2136 	 *
2137 	 * However, when reindexing an existing index, we should do nothing here.
2138 	 * Any HOT chains that are broken with respect to the index must predate
2139 	 * the index's original creation, so there is no need to change the
2140 	 * index's usability horizon.  Moreover, we *must not* try to change the
2141 	 * index's pg_index entry while reindexing pg_index itself, and this
2142 	 * optimization nicely prevents that.  The more complex rules needed for a
2143 	 * reindex are handled separately after this function returns.
2144 	 *
2145 	 * We also need not set indcheckxmin during a concurrent index build,
2146 	 * because we won't set indisvalid true until all transactions that care
2147 	 * about the broken HOT chains or early pruning/vacuuming are gone.
2148 	 *
2149 	 * Therefore, this code path can only be taken during non-concurrent
2150 	 * CREATE INDEX.  Thus the fact that heap_update will set the pg_index
2151 	 * tuple's xmin doesn't matter, because that tuple was created in the
2152 	 * current transaction anyway.  That also means we don't need to worry
2153 	 * about any concurrent readers of the tuple; no other transaction can see
2154 	 * it yet.
2155 	 */
2156 	if ((indexInfo->ii_BrokenHotChain || EarlyPruningEnabled(heapRelation)) &&
2157 		!isreindex &&
2158 		!indexInfo->ii_Concurrent)
2159 	{
2160 		Oid			indexId = RelationGetRelid(indexRelation);
2161 		Relation	pg_index;
2162 		HeapTuple	indexTuple;
2163 		Form_pg_index indexForm;
2164 
2165 		pg_index = heap_open(IndexRelationId, RowExclusiveLock);
2166 
2167 		indexTuple = SearchSysCacheCopy1(INDEXRELID,
2168 										 ObjectIdGetDatum(indexId));
2169 		if (!HeapTupleIsValid(indexTuple))
2170 			elog(ERROR, "cache lookup failed for index %u", indexId);
2171 		indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
2172 
2173 		/* If it's a new index, indcheckxmin shouldn't be set ... */
2174 		Assert(!indexForm->indcheckxmin);
2175 
2176 		indexForm->indcheckxmin = true;
2177 		simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
2178 		CatalogUpdateIndexes(pg_index, indexTuple);
2179 
2180 		heap_freetuple(indexTuple);
2181 		heap_close(pg_index, RowExclusiveLock);
2182 	}
2183 
2184 	/*
2185 	 * Update heap and index pg_class rows
2186 	 */
2187 	index_update_stats(heapRelation,
2188 					   true,
2189 					   isprimary,
2190 					   stats->heap_tuples);
2191 
2192 	index_update_stats(indexRelation,
2193 					   false,
2194 					   false,
2195 					   stats->index_tuples);
2196 
2197 	/* Make the updated catalog row versions visible */
2198 	CommandCounterIncrement();
2199 
2200 	/*
2201 	 * If it's for an exclusion constraint, make a second pass over the heap
2202 	 * to verify that the constraint is satisfied.  We must not do this until
2203 	 * the index is fully valid.  (Broken HOT chains shouldn't matter, though;
2204 	 * see comments for IndexCheckExclusion.)
2205 	 */
2206 	if (indexInfo->ii_ExclusionOps != NULL)
2207 		IndexCheckExclusion(heapRelation, indexRelation, indexInfo);
2208 
2209 	/* Roll back any GUC changes executed by index functions */
2210 	AtEOXact_GUC(false, save_nestlevel);
2211 
2212 	/* Restore userid and security context */
2213 	SetUserIdAndSecContext(save_userid, save_sec_context);
2214 }
2215 
2216 
2217 /*
2218  * IndexBuildHeapScan - scan the heap relation to find tuples to be indexed
2219  *
2220  * This is called back from an access-method-specific index build procedure
2221  * after the AM has done whatever setup it needs.  The parent heap relation
2222  * is scanned to find tuples that should be entered into the index.  Each
2223  * such tuple is passed to the AM's callback routine, which does the right
2224  * things to add it to the new index.  After we return, the AM's index
2225  * build procedure does whatever cleanup it needs.
2226  *
2227  * The total count of heap tuples is returned.  This is for updating pg_class
2228  * statistics.  (It's annoying not to be able to do that here, but we want
2229  * to merge that update with others; see index_update_stats.)  Note that the
2230  * index AM itself must keep track of the number of index tuples; we don't do
2231  * so here because the AM might reject some of the tuples for its own reasons,
2232  * such as being unable to store NULLs.
2233  *
2234  * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
2235  * any potentially broken HOT chains.  Currently, we set this if there are
2236  * any RECENTLY_DEAD or DELETE_IN_PROGRESS entries in a HOT chain, without
2237  * trying very hard to detect whether they're really incompatible with the
2238  * chain tip.
2239  */
2240 double
IndexBuildHeapScan(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,bool allow_sync,IndexBuildCallback callback,void * callback_state)2241 IndexBuildHeapScan(Relation heapRelation,
2242 				   Relation indexRelation,
2243 				   IndexInfo *indexInfo,
2244 				   bool allow_sync,
2245 				   IndexBuildCallback callback,
2246 				   void *callback_state)
2247 {
2248 	return IndexBuildHeapRangeScan(heapRelation, indexRelation,
2249 								   indexInfo, allow_sync,
2250 								   false,
2251 								   0, InvalidBlockNumber,
2252 								   callback, callback_state);
2253 }
2254 
2255 /*
2256  * As above, except that instead of scanning the complete heap, only the given
2257  * number of blocks are scanned.  Scan to end-of-rel can be signalled by
2258  * passing InvalidBlockNumber as numblocks.  Note that restricting the range
2259  * to scan cannot be done when requesting syncscan.
2260  *
2261  * When "anyvisible" mode is requested, all tuples visible to any transaction
2262  * are considered, including those inserted or deleted by transactions that are
2263  * still in progress.
2264  */
2265 double
IndexBuildHeapRangeScan(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,bool allow_sync,bool anyvisible,BlockNumber start_blockno,BlockNumber numblocks,IndexBuildCallback callback,void * callback_state)2266 IndexBuildHeapRangeScan(Relation heapRelation,
2267 						Relation indexRelation,
2268 						IndexInfo *indexInfo,
2269 						bool allow_sync,
2270 						bool anyvisible,
2271 						BlockNumber start_blockno,
2272 						BlockNumber numblocks,
2273 						IndexBuildCallback callback,
2274 						void *callback_state)
2275 {
2276 	bool		is_system_catalog;
2277 	bool		checking_uniqueness;
2278 	HeapScanDesc scan;
2279 	HeapTuple	heapTuple;
2280 	Datum		values[INDEX_MAX_KEYS];
2281 	bool		isnull[INDEX_MAX_KEYS];
2282 	double		reltuples;
2283 	List	   *predicate;
2284 	TupleTableSlot *slot;
2285 	EState	   *estate;
2286 	ExprContext *econtext;
2287 	Snapshot	snapshot;
2288 	TransactionId OldestXmin;
2289 	BlockNumber root_blkno = InvalidBlockNumber;
2290 	OffsetNumber root_offsets[MaxHeapTuplesPerPage];
2291 
2292 	/*
2293 	 * sanity checks
2294 	 */
2295 	Assert(OidIsValid(indexRelation->rd_rel->relam));
2296 
2297 	/* Remember if it's a system catalog */
2298 	is_system_catalog = IsSystemRelation(heapRelation);
2299 
2300 	/* See whether we're verifying uniqueness/exclusion properties */
2301 	checking_uniqueness = (indexInfo->ii_Unique ||
2302 						   indexInfo->ii_ExclusionOps != NULL);
2303 
2304 	/*
2305 	 * "Any visible" mode is not compatible with uniqueness checks; make sure
2306 	 * only one of those is requested.
2307 	 */
2308 	Assert(!(anyvisible && checking_uniqueness));
2309 
2310 	/*
2311 	 * Need an EState for evaluation of index expressions and partial-index
2312 	 * predicates.  Also a slot to hold the current tuple.
2313 	 */
2314 	estate = CreateExecutorState();
2315 	econtext = GetPerTupleExprContext(estate);
2316 	slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
2317 
2318 	/* Arrange for econtext's scan tuple to be the tuple under test */
2319 	econtext->ecxt_scantuple = slot;
2320 
2321 	/* Set up execution state for predicate, if any. */
2322 	predicate = (List *)
2323 		ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
2324 						estate);
2325 
2326 	/*
2327 	 * Prepare for scan of the base relation.  In a normal index build, we use
2328 	 * SnapshotAny because we must retrieve all tuples and do our own time
2329 	 * qual checks (because we have to index RECENTLY_DEAD tuples). In a
2330 	 * concurrent build, or during bootstrap, we take a regular MVCC snapshot
2331 	 * and index whatever's live according to that.
2332 	 */
2333 	if (IsBootstrapProcessingMode() || indexInfo->ii_Concurrent)
2334 	{
2335 		snapshot = RegisterSnapshot(GetTransactionSnapshot());
2336 		OldestXmin = InvalidTransactionId;		/* not used */
2337 
2338 		/* "any visible" mode is not compatible with this */
2339 		Assert(!anyvisible);
2340 	}
2341 	else
2342 	{
2343 		snapshot = SnapshotAny;
2344 		/* okay to ignore lazy VACUUMs here */
2345 		OldestXmin = GetOldestXmin(heapRelation, true);
2346 	}
2347 
2348 	scan = heap_beginscan_strat(heapRelation,	/* relation */
2349 								snapshot,		/* snapshot */
2350 								0,		/* number of keys */
2351 								NULL,	/* scan key */
2352 								true,	/* buffer access strategy OK */
2353 								allow_sync);	/* syncscan OK? */
2354 
2355 	/* set our scan endpoints */
2356 	if (!allow_sync)
2357 		heap_setscanlimits(scan, start_blockno, numblocks);
2358 	else
2359 	{
2360 		/* syncscan can only be requested on whole relation */
2361 		Assert(start_blockno == 0);
2362 		Assert(numblocks == InvalidBlockNumber);
2363 	}
2364 
2365 	reltuples = 0;
2366 
2367 	/*
2368 	 * Scan all tuples in the base relation.
2369 	 */
2370 	while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2371 	{
2372 		bool		tupleIsAlive;
2373 
2374 		CHECK_FOR_INTERRUPTS();
2375 
2376 		/*
2377 		 * When dealing with a HOT-chain of updated tuples, we want to index
2378 		 * the values of the live tuple (if any), but index it under the TID
2379 		 * of the chain's root tuple.  This approach is necessary to preserve
2380 		 * the HOT-chain structure in the heap. So we need to be able to find
2381 		 * the root item offset for every tuple that's in a HOT-chain.  When
2382 		 * first reaching a new page of the relation, call
2383 		 * heap_get_root_tuples() to build a map of root item offsets on the
2384 		 * page.
2385 		 *
2386 		 * It might look unsafe to use this information across buffer
2387 		 * lock/unlock.  However, we hold ShareLock on the table so no
2388 		 * ordinary insert/update/delete should occur; and we hold pin on the
2389 		 * buffer continuously while visiting the page, so no pruning
2390 		 * operation can occur either.
2391 		 *
2392 		 * In cases with only ShareUpdateExclusiveLock on the table, it's
2393 		 * possible for some HOT tuples to appear that we didn't know about
2394 		 * when we first read the page.  To handle that case, we re-obtain the
2395 		 * list of root offsets when a HOT tuple points to a root item that we
2396 		 * don't know about.
2397 		 *
2398 		 * Also, although our opinions about tuple liveness could change while
2399 		 * we scan the page (due to concurrent transaction commits/aborts),
2400 		 * the chain root locations won't, so this info doesn't need to be
2401 		 * rebuilt after waiting for another transaction.
2402 		 *
2403 		 * Note the implied assumption that there is no more than one live
2404 		 * tuple per HOT-chain --- else we could create more than one index
2405 		 * entry pointing to the same root tuple.
2406 		 */
2407 		if (scan->rs_cblock != root_blkno)
2408 		{
2409 			Page		page = BufferGetPage(scan->rs_cbuf);
2410 
2411 			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
2412 			heap_get_root_tuples(page, root_offsets);
2413 			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2414 
2415 			root_blkno = scan->rs_cblock;
2416 		}
2417 
2418 		if (snapshot == SnapshotAny)
2419 		{
2420 			/* do our own time qual check */
2421 			bool		indexIt;
2422 			TransactionId xwait;
2423 
2424 	recheck:
2425 
2426 			/*
2427 			 * We could possibly get away with not locking the buffer here,
2428 			 * since caller should hold ShareLock on the relation, but let's
2429 			 * be conservative about it.  (This remark is still correct even
2430 			 * with HOT-pruning: our pin on the buffer prevents pruning.)
2431 			 */
2432 			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
2433 
2434 			switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
2435 											 scan->rs_cbuf))
2436 			{
2437 				case HEAPTUPLE_DEAD:
2438 					/* Definitely dead, we can ignore it */
2439 					indexIt = false;
2440 					tupleIsAlive = false;
2441 					break;
2442 				case HEAPTUPLE_LIVE:
2443 					/* Normal case, index and unique-check it */
2444 					indexIt = true;
2445 					tupleIsAlive = true;
2446 					break;
2447 				case HEAPTUPLE_RECENTLY_DEAD:
2448 
2449 					/*
2450 					 * If tuple is recently deleted then we must index it
2451 					 * anyway to preserve MVCC semantics.  (Pre-existing
2452 					 * transactions could try to use the index after we finish
2453 					 * building it, and may need to see such tuples.)
2454 					 *
2455 					 * However, if it was HOT-updated then we must only index
2456 					 * the live tuple at the end of the HOT-chain.  Since this
2457 					 * breaks semantics for pre-existing snapshots, mark the
2458 					 * index as unusable for them.
2459 					 */
2460 					if (HeapTupleIsHotUpdated(heapTuple))
2461 					{
2462 						indexIt = false;
2463 						/* mark the index as unsafe for old snapshots */
2464 						indexInfo->ii_BrokenHotChain = true;
2465 					}
2466 					else
2467 						indexIt = true;
2468 					/* In any case, exclude the tuple from unique-checking */
2469 					tupleIsAlive = false;
2470 					break;
2471 				case HEAPTUPLE_INSERT_IN_PROGRESS:
2472 
2473 					/*
2474 					 * In "anyvisible" mode, this tuple is visible and we
2475 					 * don't need any further checks.
2476 					 */
2477 					if (anyvisible)
2478 					{
2479 						indexIt = true;
2480 						tupleIsAlive = true;
2481 						break;
2482 					}
2483 
2484 					/*
2485 					 * Since caller should hold ShareLock or better, normally
2486 					 * the only way to see this is if it was inserted earlier
2487 					 * in our own transaction.  However, it can happen in
2488 					 * system catalogs, since we tend to release write lock
2489 					 * before commit there.  Give a warning if neither case
2490 					 * applies.
2491 					 */
2492 					xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
2493 					if (!TransactionIdIsCurrentTransactionId(xwait))
2494 					{
2495 						if (!is_system_catalog)
2496 							elog(WARNING, "concurrent insert in progress within table \"%s\"",
2497 								 RelationGetRelationName(heapRelation));
2498 
2499 						/*
2500 						 * If we are performing uniqueness checks, indexing
2501 						 * such a tuple could lead to a bogus uniqueness
2502 						 * failure.  In that case we wait for the inserting
2503 						 * transaction to finish and check again.
2504 						 */
2505 						if (checking_uniqueness)
2506 						{
2507 							/*
2508 							 * Must drop the lock on the buffer before we wait
2509 							 */
2510 							LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2511 							XactLockTableWait(xwait, heapRelation,
2512 											  &heapTuple->t_self,
2513 											  XLTW_InsertIndexUnique);
2514 							CHECK_FOR_INTERRUPTS();
2515 							goto recheck;
2516 						}
2517 					}
2518 
2519 					/*
2520 					 * We must index such tuples, since if the index build
2521 					 * commits then they're good.
2522 					 */
2523 					indexIt = true;
2524 					tupleIsAlive = true;
2525 					break;
2526 				case HEAPTUPLE_DELETE_IN_PROGRESS:
2527 
2528 					/*
2529 					 * As with INSERT_IN_PROGRESS case, this is unexpected
2530 					 * unless it's our own deletion or a system catalog; but
2531 					 * in anyvisible mode, this tuple is visible.
2532 					 */
2533 					if (anyvisible)
2534 					{
2535 						indexIt = true;
2536 						tupleIsAlive = false;
2537 						break;
2538 					}
2539 
2540 					xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
2541 					if (!TransactionIdIsCurrentTransactionId(xwait))
2542 					{
2543 						if (!is_system_catalog)
2544 							elog(WARNING, "concurrent delete in progress within table \"%s\"",
2545 								 RelationGetRelationName(heapRelation));
2546 
2547 						/*
2548 						 * If we are performing uniqueness checks, assuming
2549 						 * the tuple is dead could lead to missing a
2550 						 * uniqueness violation.  In that case we wait for the
2551 						 * deleting transaction to finish and check again.
2552 						 *
2553 						 * Also, if it's a HOT-updated tuple, we should not
2554 						 * index it but rather the live tuple at the end of
2555 						 * the HOT-chain.  However, the deleting transaction
2556 						 * could abort, possibly leaving this tuple as live
2557 						 * after all, in which case it has to be indexed. The
2558 						 * only way to know what to do is to wait for the
2559 						 * deleting transaction to finish and check again.
2560 						 */
2561 						if (checking_uniqueness ||
2562 							HeapTupleIsHotUpdated(heapTuple))
2563 						{
2564 							/*
2565 							 * Must drop the lock on the buffer before we wait
2566 							 */
2567 							LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2568 							XactLockTableWait(xwait, heapRelation,
2569 											  &heapTuple->t_self,
2570 											  XLTW_InsertIndexUnique);
2571 							CHECK_FOR_INTERRUPTS();
2572 							goto recheck;
2573 						}
2574 
2575 						/*
2576 						 * Otherwise index it but don't check for uniqueness,
2577 						 * the same as a RECENTLY_DEAD tuple.
2578 						 */
2579 						indexIt = true;
2580 					}
2581 					else if (HeapTupleIsHotUpdated(heapTuple))
2582 					{
2583 						/*
2584 						 * It's a HOT-updated tuple deleted by our own xact.
2585 						 * We can assume the deletion will commit (else the
2586 						 * index contents don't matter), so treat the same as
2587 						 * RECENTLY_DEAD HOT-updated tuples.
2588 						 */
2589 						indexIt = false;
2590 						/* mark the index as unsafe for old snapshots */
2591 						indexInfo->ii_BrokenHotChain = true;
2592 					}
2593 					else
2594 					{
2595 						/*
2596 						 * It's a regular tuple deleted by our own xact. Index
2597 						 * it but don't check for uniqueness, the same as a
2598 						 * RECENTLY_DEAD tuple.
2599 						 */
2600 						indexIt = true;
2601 					}
2602 					/* In any case, exclude the tuple from unique-checking */
2603 					tupleIsAlive = false;
2604 					break;
2605 				default:
2606 					elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2607 					indexIt = tupleIsAlive = false;		/* keep compiler quiet */
2608 					break;
2609 			}
2610 
2611 			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2612 
2613 			if (!indexIt)
2614 				continue;
2615 		}
2616 		else
2617 		{
2618 			/* heap_getnext did the time qual check */
2619 			tupleIsAlive = true;
2620 		}
2621 
2622 		reltuples += 1;
2623 
2624 		MemoryContextReset(econtext->ecxt_per_tuple_memory);
2625 
2626 		/* Set up for predicate or expression evaluation */
2627 		ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
2628 
2629 		/*
2630 		 * In a partial index, discard tuples that don't satisfy the
2631 		 * predicate.
2632 		 */
2633 		if (predicate != NIL)
2634 		{
2635 			if (!ExecQual(predicate, econtext, false))
2636 				continue;
2637 		}
2638 
2639 		/*
2640 		 * For the current heap tuple, extract all the attributes we use in
2641 		 * this index, and note which are null.  This also performs evaluation
2642 		 * of any expressions needed.
2643 		 */
2644 		FormIndexDatum(indexInfo,
2645 					   slot,
2646 					   estate,
2647 					   values,
2648 					   isnull);
2649 
2650 		/*
2651 		 * You'd think we should go ahead and build the index tuple here, but
2652 		 * some index AMs want to do further processing on the data first.  So
2653 		 * pass the values[] and isnull[] arrays, instead.
2654 		 */
2655 
2656 		if (HeapTupleIsHeapOnly(heapTuple))
2657 		{
2658 			/*
2659 			 * For a heap-only tuple, pretend its TID is that of the root. See
2660 			 * src/backend/access/heap/README.HOT for discussion.
2661 			 */
2662 			HeapTupleData rootTuple;
2663 			OffsetNumber offnum;
2664 
2665 			rootTuple = *heapTuple;
2666 			offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
2667 
2668 			/*
2669 			 * If a HOT tuple points to a root that we don't know
2670 			 * about, obtain root items afresh.  If that still fails,
2671 			 * report it as corruption.
2672 			 */
2673 			if (root_offsets[offnum - 1] == InvalidOffsetNumber)
2674 			{
2675 				Page	page = BufferGetPage(scan->rs_cbuf);
2676 
2677 				LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
2678 				heap_get_root_tuples(page, root_offsets);
2679 				LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2680 			}
2681 
2682 			if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
2683 				ereport(ERROR,
2684 						(errcode(ERRCODE_DATA_CORRUPTED),
2685 						 errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
2686 										 ItemPointerGetBlockNumber(&heapTuple->t_self),
2687 										 offnum,
2688 										 RelationGetRelationName(heapRelation))));
2689 
2690 			ItemPointerSetOffsetNumber(&rootTuple.t_self,
2691 									   root_offsets[offnum - 1]);
2692 
2693 			/* Call the AM's callback routine to process the tuple */
2694 			callback(indexRelation, &rootTuple, values, isnull, tupleIsAlive,
2695 					 callback_state);
2696 		}
2697 		else
2698 		{
2699 			/* Call the AM's callback routine to process the tuple */
2700 			callback(indexRelation, heapTuple, values, isnull, tupleIsAlive,
2701 					 callback_state);
2702 		}
2703 	}
2704 
2705 	heap_endscan(scan);
2706 
2707 	/* we can now forget our snapshot, if set */
2708 	if (IsBootstrapProcessingMode() || indexInfo->ii_Concurrent)
2709 		UnregisterSnapshot(snapshot);
2710 
2711 	ExecDropSingleTupleTableSlot(slot);
2712 
2713 	FreeExecutorState(estate);
2714 
2715 	/* These may have been pointing to the now-gone estate */
2716 	indexInfo->ii_ExpressionsState = NIL;
2717 	indexInfo->ii_PredicateState = NIL;
2718 
2719 	return reltuples;
2720 }
2721 
2722 
2723 /*
2724  * IndexCheckExclusion - verify that a new exclusion constraint is satisfied
2725  *
2726  * When creating an exclusion constraint, we first build the index normally
2727  * and then rescan the heap to check for conflicts.  We assume that we only
2728  * need to validate tuples that are live according to an up-to-date snapshot,
2729  * and that these were correctly indexed even in the presence of broken HOT
2730  * chains.  This should be OK since we are holding at least ShareLock on the
2731  * table, meaning there can be no uncommitted updates from other transactions.
2732  * (Note: that wouldn't necessarily work for system catalogs, since many
2733  * operations release write lock early on the system catalogs.)
2734  */
2735 static void
IndexCheckExclusion(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo)2736 IndexCheckExclusion(Relation heapRelation,
2737 					Relation indexRelation,
2738 					IndexInfo *indexInfo)
2739 {
2740 	HeapScanDesc scan;
2741 	HeapTuple	heapTuple;
2742 	Datum		values[INDEX_MAX_KEYS];
2743 	bool		isnull[INDEX_MAX_KEYS];
2744 	List	   *predicate;
2745 	TupleTableSlot *slot;
2746 	EState	   *estate;
2747 	ExprContext *econtext;
2748 	Snapshot	snapshot;
2749 
2750 	/*
2751 	 * If we are reindexing the target index, mark it as no longer being
2752 	 * reindexed, to forestall an Assert in index_beginscan when we try to use
2753 	 * the index for probes.  This is OK because the index is now fully valid.
2754 	 */
2755 	if (ReindexIsCurrentlyProcessingIndex(RelationGetRelid(indexRelation)))
2756 		ResetReindexProcessing();
2757 
2758 	/*
2759 	 * Need an EState for evaluation of index expressions and partial-index
2760 	 * predicates.  Also a slot to hold the current tuple.
2761 	 */
2762 	estate = CreateExecutorState();
2763 	econtext = GetPerTupleExprContext(estate);
2764 	slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
2765 
2766 	/* Arrange for econtext's scan tuple to be the tuple under test */
2767 	econtext->ecxt_scantuple = slot;
2768 
2769 	/* Set up execution state for predicate, if any. */
2770 	predicate = (List *)
2771 		ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
2772 						estate);
2773 
2774 	/*
2775 	 * Scan all live tuples in the base relation.
2776 	 */
2777 	snapshot = RegisterSnapshot(GetLatestSnapshot());
2778 	scan = heap_beginscan_strat(heapRelation,	/* relation */
2779 								snapshot,		/* snapshot */
2780 								0,		/* number of keys */
2781 								NULL,	/* scan key */
2782 								true,	/* buffer access strategy OK */
2783 								true);	/* syncscan OK */
2784 
2785 	while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2786 	{
2787 		CHECK_FOR_INTERRUPTS();
2788 
2789 		MemoryContextReset(econtext->ecxt_per_tuple_memory);
2790 
2791 		/* Set up for predicate or expression evaluation */
2792 		ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
2793 
2794 		/*
2795 		 * In a partial index, ignore tuples that don't satisfy the predicate.
2796 		 */
2797 		if (predicate != NIL)
2798 		{
2799 			if (!ExecQual(predicate, econtext, false))
2800 				continue;
2801 		}
2802 
2803 		/*
2804 		 * Extract index column values, including computing expressions.
2805 		 */
2806 		FormIndexDatum(indexInfo,
2807 					   slot,
2808 					   estate,
2809 					   values,
2810 					   isnull);
2811 
2812 		/*
2813 		 * Check that this tuple has no conflicts.
2814 		 */
2815 		check_exclusion_constraint(heapRelation,
2816 								   indexRelation, indexInfo,
2817 								   &(heapTuple->t_self), values, isnull,
2818 								   estate, true);
2819 	}
2820 
2821 	heap_endscan(scan);
2822 	UnregisterSnapshot(snapshot);
2823 
2824 	ExecDropSingleTupleTableSlot(slot);
2825 
2826 	FreeExecutorState(estate);
2827 
2828 	/* These may have been pointing to the now-gone estate */
2829 	indexInfo->ii_ExpressionsState = NIL;
2830 	indexInfo->ii_PredicateState = NIL;
2831 }
2832 
2833 
2834 /*
2835  * validate_index - support code for concurrent index builds
2836  *
2837  * We do a concurrent index build by first inserting the catalog entry for the
2838  * index via index_create(), marking it not indisready and not indisvalid.
2839  * Then we commit our transaction and start a new one, then we wait for all
2840  * transactions that could have been modifying the table to terminate.  Now
2841  * we know that any subsequently-started transactions will see the index and
2842  * honor its constraints on HOT updates; so while existing HOT-chains might
2843  * be broken with respect to the index, no currently live tuple will have an
2844  * incompatible HOT update done to it.  We now build the index normally via
2845  * index_build(), while holding a weak lock that allows concurrent
2846  * insert/update/delete.  Also, we index only tuples that are valid
2847  * as of the start of the scan (see IndexBuildHeapScan), whereas a normal
2848  * build takes care to include recently-dead tuples.  This is OK because
2849  * we won't mark the index valid until all transactions that might be able
2850  * to see those tuples are gone.  The reason for doing that is to avoid
2851  * bogus unique-index failures due to concurrent UPDATEs (we might see
2852  * different versions of the same row as being valid when we pass over them,
2853  * if we used HeapTupleSatisfiesVacuum).  This leaves us with an index that
2854  * does not contain any tuples added to the table while we built the index.
2855  *
2856  * Next, we mark the index "indisready" (but still not "indisvalid") and
2857  * commit the second transaction and start a third.  Again we wait for all
2858  * transactions that could have been modifying the table to terminate.  Now
2859  * we know that any subsequently-started transactions will see the index and
2860  * insert their new tuples into it.  We then take a new reference snapshot
2861  * which is passed to validate_index().  Any tuples that are valid according
2862  * to this snap, but are not in the index, must be added to the index.
2863  * (Any tuples committed live after the snap will be inserted into the
2864  * index by their originating transaction.  Any tuples committed dead before
2865  * the snap need not be indexed, because we will wait out all transactions
2866  * that might care about them before we mark the index valid.)
2867  *
2868  * validate_index() works by first gathering all the TIDs currently in the
2869  * index, using a bulkdelete callback that just stores the TIDs and doesn't
2870  * ever say "delete it".  (This should be faster than a plain indexscan;
2871  * also, not all index AMs support full-index indexscan.)  Then we sort the
2872  * TIDs, and finally scan the table doing a "merge join" against the TID list
2873  * to see which tuples are missing from the index.  Thus we will ensure that
2874  * all tuples valid according to the reference snapshot are in the index.
2875  *
2876  * Building a unique index this way is tricky: we might try to insert a
2877  * tuple that is already dead or is in process of being deleted, and we
2878  * mustn't have a uniqueness failure against an updated version of the same
2879  * row.  We could try to check the tuple to see if it's already dead and tell
2880  * index_insert() not to do the uniqueness check, but that still leaves us
2881  * with a race condition against an in-progress update.  To handle that,
2882  * we expect the index AM to recheck liveness of the to-be-inserted tuple
2883  * before it declares a uniqueness error.
2884  *
2885  * After completing validate_index(), we wait until all transactions that
2886  * were alive at the time of the reference snapshot are gone; this is
2887  * necessary to be sure there are none left with a transaction snapshot
2888  * older than the reference (and hence possibly able to see tuples we did
2889  * not index).  Then we mark the index "indisvalid" and commit.  Subsequent
2890  * transactions will be able to use it for queries.
2891  *
2892  * Doing two full table scans is a brute-force strategy.  We could try to be
2893  * cleverer, eg storing new tuples in a special area of the table (perhaps
2894  * making the table append-only by setting use_fsm).  However that would
2895  * add yet more locking issues.
2896  */
2897 void
validate_index(Oid heapId,Oid indexId,Snapshot snapshot)2898 validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
2899 {
2900 	Relation	heapRelation,
2901 				indexRelation;
2902 	IndexInfo  *indexInfo;
2903 	IndexVacuumInfo ivinfo;
2904 	v_i_state	state;
2905 	Oid			save_userid;
2906 	int			save_sec_context;
2907 	int			save_nestlevel;
2908 
2909 	/* Open and lock the parent heap relation */
2910 	heapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
2911 	/* And the target index relation */
2912 	indexRelation = index_open(indexId, RowExclusiveLock);
2913 
2914 	/*
2915 	 * Fetch info needed for index_insert.  (You might think this should be
2916 	 * passed in from DefineIndex, but its copy is long gone due to having
2917 	 * been built in a previous transaction.)
2918 	 */
2919 	indexInfo = BuildIndexInfo(indexRelation);
2920 
2921 	/* mark build is concurrent just for consistency */
2922 	indexInfo->ii_Concurrent = true;
2923 
2924 	/*
2925 	 * Switch to the table owner's userid, so that any index functions are run
2926 	 * as that user.  Also lock down security-restricted operations and
2927 	 * arrange to make GUC variable changes local to this command.
2928 	 */
2929 	GetUserIdAndSecContext(&save_userid, &save_sec_context);
2930 	SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
2931 						   save_sec_context | SECURITY_RESTRICTED_OPERATION);
2932 	save_nestlevel = NewGUCNestLevel();
2933 
2934 	/*
2935 	 * Scan the index and gather up all the TIDs into a tuplesort object.
2936 	 */
2937 	ivinfo.index = indexRelation;
2938 	ivinfo.analyze_only = false;
2939 	ivinfo.estimated_count = true;
2940 	ivinfo.message_level = DEBUG2;
2941 	ivinfo.num_heap_tuples = heapRelation->rd_rel->reltuples;
2942 	ivinfo.strategy = NULL;
2943 
2944 	/*
2945 	 * Encode TIDs as int8 values for the sort, rather than directly sorting
2946 	 * item pointers.  This can be significantly faster, primarily because TID
2947 	 * is a pass-by-reference type on all platforms, whereas int8 is
2948 	 * pass-by-value on most platforms.
2949 	 */
2950 	state.tuplesort = tuplesort_begin_datum(INT8OID, Int8LessOperator,
2951 											InvalidOid, false,
2952 											maintenance_work_mem,
2953 											false);
2954 	state.htups = state.itups = state.tups_inserted = 0;
2955 
2956 	(void) index_bulk_delete(&ivinfo, NULL,
2957 							 validate_index_callback, (void *) &state);
2958 
2959 	/* Execute the sort */
2960 	tuplesort_performsort(state.tuplesort);
2961 
2962 	/*
2963 	 * Now scan the heap and "merge" it with the index
2964 	 */
2965 	validate_index_heapscan(heapRelation,
2966 							indexRelation,
2967 							indexInfo,
2968 							snapshot,
2969 							&state);
2970 
2971 	/* Done with tuplesort object */
2972 	tuplesort_end(state.tuplesort);
2973 
2974 	elog(DEBUG2,
2975 		 "validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples",
2976 		 state.htups, state.itups, state.tups_inserted);
2977 
2978 	/* Roll back any GUC changes executed by index functions */
2979 	AtEOXact_GUC(false, save_nestlevel);
2980 
2981 	/* Restore userid and security context */
2982 	SetUserIdAndSecContext(save_userid, save_sec_context);
2983 
2984 	/* Close rels, but keep locks */
2985 	index_close(indexRelation, NoLock);
2986 	heap_close(heapRelation, NoLock);
2987 }
2988 
2989 /*
2990  * itemptr_encode - Encode ItemPointer as int64/int8
2991  *
2992  * This representation must produce values encoded as int64 that sort in the
2993  * same order as their corresponding original TID values would (using the
2994  * default int8 opclass to produce a result equivalent to the default TID
2995  * opclass).
2996  *
2997  * As noted in validate_index(), this can be significantly faster.
2998  */
2999 static inline int64
itemptr_encode(ItemPointer itemptr)3000 itemptr_encode(ItemPointer itemptr)
3001 {
3002 	BlockNumber block = ItemPointerGetBlockNumber(itemptr);
3003 	OffsetNumber offset = ItemPointerGetOffsetNumber(itemptr);
3004 	int64		encoded;
3005 
3006 	/*
3007 	 * Use the 16 least significant bits for the offset.  32 adjacent bits are
3008 	 * used for the block number.  Since remaining bits are unused, there
3009 	 * cannot be negative encoded values (We assume a two's complement
3010 	 * representation).
3011 	 */
3012 	encoded = ((uint64) block << 16) | (uint16) offset;
3013 
3014 	return encoded;
3015 }
3016 
3017 /*
3018  * itemptr_decode - Decode int64/int8 representation back to ItemPointer
3019  */
3020 static inline void
itemptr_decode(ItemPointer itemptr,int64 encoded)3021 itemptr_decode(ItemPointer itemptr, int64 encoded)
3022 {
3023 	BlockNumber block = (BlockNumber) (encoded >> 16);
3024 	OffsetNumber offset = (OffsetNumber) (encoded & 0xFFFF);
3025 
3026 	ItemPointerSet(itemptr, block, offset);
3027 }
3028 
3029 /*
3030  * validate_index_callback - bulkdelete callback to collect the index TIDs
3031  */
3032 static bool
validate_index_callback(ItemPointer itemptr,void * opaque)3033 validate_index_callback(ItemPointer itemptr, void *opaque)
3034 {
3035 	v_i_state  *state = (v_i_state *) opaque;
3036 	int64		encoded = itemptr_encode(itemptr);
3037 
3038 	tuplesort_putdatum(state->tuplesort, Int64GetDatum(encoded), false);
3039 	state->itups += 1;
3040 	return false;				/* never actually delete anything */
3041 }
3042 
3043 /*
3044  * validate_index_heapscan - second table scan for concurrent index build
3045  *
3046  * This has much code in common with IndexBuildHeapScan, but it's enough
3047  * different that it seems cleaner to have two routines not one.
3048  */
3049 static void
validate_index_heapscan(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,Snapshot snapshot,v_i_state * state)3050 validate_index_heapscan(Relation heapRelation,
3051 						Relation indexRelation,
3052 						IndexInfo *indexInfo,
3053 						Snapshot snapshot,
3054 						v_i_state *state)
3055 {
3056 	HeapScanDesc scan;
3057 	HeapTuple	heapTuple;
3058 	Datum		values[INDEX_MAX_KEYS];
3059 	bool		isnull[INDEX_MAX_KEYS];
3060 	List	   *predicate;
3061 	TupleTableSlot *slot;
3062 	EState	   *estate;
3063 	ExprContext *econtext;
3064 	BlockNumber root_blkno = InvalidBlockNumber;
3065 	OffsetNumber root_offsets[MaxHeapTuplesPerPage];
3066 	bool		in_index[MaxHeapTuplesPerPage];
3067 
3068 	/* state variables for the merge */
3069 	ItemPointer indexcursor = NULL;
3070 	ItemPointerData decoded;
3071 	bool		tuplesort_empty = false;
3072 
3073 	/*
3074 	 * sanity checks
3075 	 */
3076 	Assert(OidIsValid(indexRelation->rd_rel->relam));
3077 
3078 	/*
3079 	 * Need an EState for evaluation of index expressions and partial-index
3080 	 * predicates.  Also a slot to hold the current tuple.
3081 	 */
3082 	estate = CreateExecutorState();
3083 	econtext = GetPerTupleExprContext(estate);
3084 	slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
3085 
3086 	/* Arrange for econtext's scan tuple to be the tuple under test */
3087 	econtext->ecxt_scantuple = slot;
3088 
3089 	/* Set up execution state for predicate, if any. */
3090 	predicate = (List *)
3091 		ExecPrepareExpr((Expr *) indexInfo->ii_Predicate,
3092 						estate);
3093 
3094 	/*
3095 	 * Prepare for scan of the base relation.  We need just those tuples
3096 	 * satisfying the passed-in reference snapshot.  We must disable syncscan
3097 	 * here, because it's critical that we read from block zero forward to
3098 	 * match the sorted TIDs.
3099 	 */
3100 	scan = heap_beginscan_strat(heapRelation,	/* relation */
3101 								snapshot,		/* snapshot */
3102 								0,		/* number of keys */
3103 								NULL,	/* scan key */
3104 								true,	/* buffer access strategy OK */
3105 								false); /* syncscan not OK */
3106 
3107 	/*
3108 	 * Scan all tuples matching the snapshot.
3109 	 */
3110 	while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
3111 	{
3112 		ItemPointer heapcursor = &heapTuple->t_self;
3113 		ItemPointerData rootTuple;
3114 		OffsetNumber root_offnum;
3115 
3116 		CHECK_FOR_INTERRUPTS();
3117 
3118 		state->htups += 1;
3119 
3120 		/*
3121 		 * As commented in IndexBuildHeapScan, we should index heap-only
3122 		 * tuples under the TIDs of their root tuples; so when we advance onto
3123 		 * a new heap page, build a map of root item offsets on the page.
3124 		 *
3125 		 * This complicates merging against the tuplesort output: we will
3126 		 * visit the live tuples in order by their offsets, but the root
3127 		 * offsets that we need to compare against the index contents might be
3128 		 * ordered differently.  So we might have to "look back" within the
3129 		 * tuplesort output, but only within the current page.  We handle that
3130 		 * by keeping a bool array in_index[] showing all the
3131 		 * already-passed-over tuplesort output TIDs of the current page. We
3132 		 * clear that array here, when advancing onto a new heap page.
3133 		 */
3134 		if (scan->rs_cblock != root_blkno)
3135 		{
3136 			Page		page = BufferGetPage(scan->rs_cbuf);
3137 
3138 			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
3139 			heap_get_root_tuples(page, root_offsets);
3140 			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
3141 
3142 			memset(in_index, 0, sizeof(in_index));
3143 
3144 			root_blkno = scan->rs_cblock;
3145 		}
3146 
3147 		/* Convert actual tuple TID to root TID */
3148 		rootTuple = *heapcursor;
3149 		root_offnum = ItemPointerGetOffsetNumber(heapcursor);
3150 
3151 		if (HeapTupleIsHeapOnly(heapTuple))
3152 		{
3153 			root_offnum = root_offsets[root_offnum - 1];
3154 			if (!OffsetNumberIsValid(root_offnum))
3155 				ereport(ERROR,
3156 						(errcode(ERRCODE_DATA_CORRUPTED),
3157 						 errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
3158 										 ItemPointerGetBlockNumber(heapcursor),
3159 										 ItemPointerGetOffsetNumber(heapcursor),
3160 										 RelationGetRelationName(heapRelation))));
3161 			ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
3162 		}
3163 
3164 		/*
3165 		 * "merge" by skipping through the index tuples until we find or pass
3166 		 * the current root tuple.
3167 		 */
3168 		while (!tuplesort_empty &&
3169 			   (!indexcursor ||
3170 				ItemPointerCompare(indexcursor, &rootTuple) < 0))
3171 		{
3172 			Datum		ts_val;
3173 			bool		ts_isnull;
3174 
3175 			if (indexcursor)
3176 			{
3177 				/*
3178 				 * Remember index items seen earlier on the current heap page
3179 				 */
3180 				if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
3181 					in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
3182 			}
3183 
3184 			tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
3185 												  &ts_val, &ts_isnull, NULL);
3186 			Assert(tuplesort_empty || !ts_isnull);
3187 			if (!tuplesort_empty)
3188 			{
3189 				itemptr_decode(&decoded, DatumGetInt64(ts_val));
3190 				indexcursor = &decoded;
3191 
3192 				/* If int8 is pass-by-ref, free (encoded) TID Datum memory */
3193 #ifndef USE_FLOAT8_BYVAL
3194 				pfree(DatumGetPointer(ts_val));
3195 #endif
3196 			}
3197 			else
3198 			{
3199 				/* Be tidy */
3200 				indexcursor = NULL;
3201 			}
3202 		}
3203 
3204 		/*
3205 		 * If the tuplesort has overshot *and* we didn't see a match earlier,
3206 		 * then this tuple is missing from the index, so insert it.
3207 		 */
3208 		if ((tuplesort_empty ||
3209 			 ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
3210 			!in_index[root_offnum - 1])
3211 		{
3212 			MemoryContextReset(econtext->ecxt_per_tuple_memory);
3213 
3214 			/* Set up for predicate or expression evaluation */
3215 			ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
3216 
3217 			/*
3218 			 * In a partial index, discard tuples that don't satisfy the
3219 			 * predicate.
3220 			 */
3221 			if (predicate != NIL)
3222 			{
3223 				if (!ExecQual(predicate, econtext, false))
3224 					continue;
3225 			}
3226 
3227 			/*
3228 			 * For the current heap tuple, extract all the attributes we use
3229 			 * in this index, and note which are null.  This also performs
3230 			 * evaluation of any expressions needed.
3231 			 */
3232 			FormIndexDatum(indexInfo,
3233 						   slot,
3234 						   estate,
3235 						   values,
3236 						   isnull);
3237 
3238 			/*
3239 			 * You'd think we should go ahead and build the index tuple here,
3240 			 * but some index AMs want to do further processing on the data
3241 			 * first. So pass the values[] and isnull[] arrays, instead.
3242 			 */
3243 
3244 			/*
3245 			 * If the tuple is already committed dead, you might think we
3246 			 * could suppress uniqueness checking, but this is no longer true
3247 			 * in the presence of HOT, because the insert is actually a proxy
3248 			 * for a uniqueness check on the whole HOT-chain.  That is, the
3249 			 * tuple we have here could be dead because it was already
3250 			 * HOT-updated, and if so the updating transaction will not have
3251 			 * thought it should insert index entries.  The index AM will
3252 			 * check the whole HOT-chain and correctly detect a conflict if
3253 			 * there is one.
3254 			 */
3255 
3256 			index_insert(indexRelation,
3257 						 values,
3258 						 isnull,
3259 						 &rootTuple,
3260 						 heapRelation,
3261 						 indexInfo->ii_Unique ?
3262 						 UNIQUE_CHECK_YES : UNIQUE_CHECK_NO);
3263 
3264 			state->tups_inserted += 1;
3265 		}
3266 	}
3267 
3268 	heap_endscan(scan);
3269 
3270 	ExecDropSingleTupleTableSlot(slot);
3271 
3272 	FreeExecutorState(estate);
3273 
3274 	/* These may have been pointing to the now-gone estate */
3275 	indexInfo->ii_ExpressionsState = NIL;
3276 	indexInfo->ii_PredicateState = NIL;
3277 }
3278 
3279 
3280 /*
3281  * index_set_state_flags - adjust pg_index state flags
3282  *
3283  * This is used during CREATE/DROP INDEX CONCURRENTLY to adjust the pg_index
3284  * flags that denote the index's state.
3285  *
3286  * Note that simple_heap_update() sends a cache invalidation message for the
3287  * tuple, so other sessions will hear about the update as soon as we commit.
3288  */
3289 void
index_set_state_flags(Oid indexId,IndexStateFlagsAction action)3290 index_set_state_flags(Oid indexId, IndexStateFlagsAction action)
3291 {
3292 	Relation	pg_index;
3293 	HeapTuple	indexTuple;
3294 	Form_pg_index indexForm;
3295 
3296 	/* Open pg_index and fetch a writable copy of the index's tuple */
3297 	pg_index = heap_open(IndexRelationId, RowExclusiveLock);
3298 
3299 	indexTuple = SearchSysCacheCopy1(INDEXRELID,
3300 									 ObjectIdGetDatum(indexId));
3301 	if (!HeapTupleIsValid(indexTuple))
3302 		elog(ERROR, "cache lookup failed for index %u", indexId);
3303 	indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3304 
3305 	/* Perform the requested state change on the copy */
3306 	switch (action)
3307 	{
3308 		case INDEX_CREATE_SET_READY:
3309 			/* Set indisready during a CREATE INDEX CONCURRENTLY sequence */
3310 			Assert(indexForm->indislive);
3311 			Assert(!indexForm->indisready);
3312 			Assert(!indexForm->indisvalid);
3313 			indexForm->indisready = true;
3314 			break;
3315 		case INDEX_CREATE_SET_VALID:
3316 			/* Set indisvalid during a CREATE INDEX CONCURRENTLY sequence */
3317 			Assert(indexForm->indislive);
3318 			Assert(indexForm->indisready);
3319 			Assert(!indexForm->indisvalid);
3320 			indexForm->indisvalid = true;
3321 			break;
3322 		case INDEX_DROP_CLEAR_VALID:
3323 
3324 			/*
3325 			 * Clear indisvalid during a DROP INDEX CONCURRENTLY sequence
3326 			 *
3327 			 * If indisready == true we leave it set so the index still gets
3328 			 * maintained by active transactions.  We only need to ensure that
3329 			 * indisvalid is false.  (We don't assert that either is initially
3330 			 * true, though, since we want to be able to retry a DROP INDEX
3331 			 * CONCURRENTLY that failed partway through.)
3332 			 *
3333 			 * Note: the CLUSTER logic assumes that indisclustered cannot be
3334 			 * set on any invalid index, so clear that flag too.
3335 			 */
3336 			indexForm->indisvalid = false;
3337 			indexForm->indisclustered = false;
3338 			break;
3339 		case INDEX_DROP_SET_DEAD:
3340 
3341 			/*
3342 			 * Clear indisready/indislive during DROP INDEX CONCURRENTLY
3343 			 *
3344 			 * We clear both indisready and indislive, because we not only
3345 			 * want to stop updates, we want to prevent sessions from touching
3346 			 * the index at all.
3347 			 */
3348 			Assert(!indexForm->indisvalid);
3349 			indexForm->indisready = false;
3350 			indexForm->indislive = false;
3351 			break;
3352 	}
3353 
3354 	/* ... and update it */
3355 	simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
3356 	CatalogUpdateIndexes(pg_index, indexTuple);
3357 
3358 	heap_close(pg_index, RowExclusiveLock);
3359 }
3360 
3361 
3362 /*
3363  * IndexGetRelation: given an index's relation OID, get the OID of the
3364  * relation it is an index on.  Uses the system cache.
3365  */
3366 Oid
IndexGetRelation(Oid indexId,bool missing_ok)3367 IndexGetRelation(Oid indexId, bool missing_ok)
3368 {
3369 	HeapTuple	tuple;
3370 	Form_pg_index index;
3371 	Oid			result;
3372 
3373 	tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
3374 	if (!HeapTupleIsValid(tuple))
3375 	{
3376 		if (missing_ok)
3377 			return InvalidOid;
3378 		elog(ERROR, "cache lookup failed for index %u", indexId);
3379 	}
3380 	index = (Form_pg_index) GETSTRUCT(tuple);
3381 	Assert(index->indexrelid == indexId);
3382 
3383 	result = index->indrelid;
3384 	ReleaseSysCache(tuple);
3385 	return result;
3386 }
3387 
3388 /*
3389  * reindex_index - This routine is used to recreate a single index
3390  */
3391 void
reindex_index(Oid indexId,bool skip_constraint_checks,char persistence,int options)3392 reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
3393 			  int options)
3394 {
3395 	Relation	iRel,
3396 				heapRelation;
3397 	Oid			heapId;
3398 	IndexInfo  *indexInfo;
3399 	volatile bool skipped_constraint = false;
3400 	PGRUsage	ru0;
3401 
3402 	pg_rusage_init(&ru0);
3403 
3404 	/*
3405 	 * Open and lock the parent heap relation.  ShareLock is sufficient since
3406 	 * we only need to be sure no schema or data changes are going on.
3407 	 */
3408 	heapId = IndexGetRelation(indexId, false);
3409 	heapRelation = heap_open(heapId, ShareLock);
3410 
3411 	/*
3412 	 * Open the target index relation and get an exclusive lock on it, to
3413 	 * ensure that no one else is touching this particular index.
3414 	 */
3415 	iRel = index_open(indexId, AccessExclusiveLock);
3416 
3417 	/*
3418 	 * Don't allow reindex on temp tables of other backends ... their local
3419 	 * buffer manager is not going to cope.
3420 	 */
3421 	if (RELATION_IS_OTHER_TEMP(iRel))
3422 		ereport(ERROR,
3423 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3424 			   errmsg("cannot reindex temporary tables of other sessions")));
3425 
3426 	/*
3427 	 * Also check for active uses of the index in the current transaction; we
3428 	 * don't want to reindex underneath an open indexscan.
3429 	 */
3430 	CheckTableNotInUse(iRel, "REINDEX INDEX");
3431 
3432 	/*
3433 	 * All predicate locks on the index are about to be made invalid. Promote
3434 	 * them to relation locks on the heap.
3435 	 */
3436 	TransferPredicateLocksToHeapRelation(iRel);
3437 
3438 	/* Fetch info needed for index_build */
3439 	indexInfo = BuildIndexInfo(iRel);
3440 
3441 	/* If requested, skip checking uniqueness/exclusion constraints */
3442 	if (skip_constraint_checks)
3443 	{
3444 		if (indexInfo->ii_Unique || indexInfo->ii_ExclusionOps != NULL)
3445 			skipped_constraint = true;
3446 		indexInfo->ii_Unique = false;
3447 		indexInfo->ii_ExclusionOps = NULL;
3448 		indexInfo->ii_ExclusionProcs = NULL;
3449 		indexInfo->ii_ExclusionStrats = NULL;
3450 	}
3451 
3452 	/* Suppress use of the target index while rebuilding it */
3453 	SetReindexProcessing(heapId, indexId);
3454 
3455 	/* Create a new physical relation for the index */
3456 	RelationSetNewRelfilenode(iRel, persistence, InvalidTransactionId,
3457 							  InvalidMultiXactId);
3458 
3459 	/* Initialize the index and rebuild */
3460 	/* Note: we do not need to re-establish pkey setting */
3461 	index_build(heapRelation, iRel, indexInfo, false, true);
3462 
3463 	/* Re-allow use of target index */
3464 	ResetReindexProcessing();
3465 
3466 	/*
3467 	 * If the index is marked invalid/not-ready/dead (ie, it's from a failed
3468 	 * CREATE INDEX CONCURRENTLY, or a DROP INDEX CONCURRENTLY failed midway),
3469 	 * and we didn't skip a uniqueness check, we can now mark it valid.  This
3470 	 * allows REINDEX to be used to clean up in such cases.
3471 	 *
3472 	 * We can also reset indcheckxmin, because we have now done a
3473 	 * non-concurrent index build, *except* in the case where index_build
3474 	 * found some still-broken HOT chains. If it did, and we don't have to
3475 	 * change any of the other flags, we just leave indcheckxmin alone (note
3476 	 * that index_build won't have changed it, because this is a reindex).
3477 	 * This is okay and desirable because not updating the tuple leaves the
3478 	 * index's usability horizon (recorded as the tuple's xmin value) the same
3479 	 * as it was.
3480 	 *
3481 	 * But, if the index was invalid/not-ready/dead and there were broken HOT
3482 	 * chains, we had better force indcheckxmin true, because the normal
3483 	 * argument that the HOT chains couldn't conflict with the index is
3484 	 * suspect for an invalid index.  (A conflict is definitely possible if
3485 	 * the index was dead.  It probably shouldn't happen otherwise, but let's
3486 	 * be conservative.)  In this case advancing the usability horizon is
3487 	 * appropriate.
3488 	 *
3489 	 * Another reason for avoiding unnecessary updates here is that while
3490 	 * reindexing pg_index itself, we must not try to update tuples in it.
3491 	 * pg_index's indexes should always have these flags in their clean state,
3492 	 * so that won't happen.
3493 	 *
3494 	 * If early pruning/vacuuming is enabled for the heap relation, the
3495 	 * usability horizon must be advanced to the current transaction on every
3496 	 * build or rebuild.  pg_index is OK in this regard because catalog tables
3497 	 * are not subject to early cleanup.
3498 	 */
3499 	if (!skipped_constraint)
3500 	{
3501 		Relation	pg_index;
3502 		HeapTuple	indexTuple;
3503 		Form_pg_index indexForm;
3504 		bool		index_bad;
3505 		bool		early_pruning_enabled = EarlyPruningEnabled(heapRelation);
3506 
3507 		pg_index = heap_open(IndexRelationId, RowExclusiveLock);
3508 
3509 		indexTuple = SearchSysCacheCopy1(INDEXRELID,
3510 										 ObjectIdGetDatum(indexId));
3511 		if (!HeapTupleIsValid(indexTuple))
3512 			elog(ERROR, "cache lookup failed for index %u", indexId);
3513 		indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3514 
3515 		index_bad = (!indexForm->indisvalid ||
3516 					 !indexForm->indisready ||
3517 					 !indexForm->indislive);
3518 		if (index_bad ||
3519 			(indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain) ||
3520 			early_pruning_enabled)
3521 		{
3522 			if (!indexInfo->ii_BrokenHotChain && !early_pruning_enabled)
3523 				indexForm->indcheckxmin = false;
3524 			else if (index_bad || early_pruning_enabled)
3525 				indexForm->indcheckxmin = true;
3526 			indexForm->indisvalid = true;
3527 			indexForm->indisready = true;
3528 			indexForm->indislive = true;
3529 			simple_heap_update(pg_index, &indexTuple->t_self, indexTuple);
3530 			CatalogUpdateIndexes(pg_index, indexTuple);
3531 
3532 			/*
3533 			 * Invalidate the relcache for the table, so that after we commit
3534 			 * all sessions will refresh the table's index list.  This ensures
3535 			 * that if anyone misses seeing the pg_index row during this
3536 			 * update, they'll refresh their list before attempting any update
3537 			 * on the table.
3538 			 */
3539 			CacheInvalidateRelcache(heapRelation);
3540 		}
3541 
3542 		heap_close(pg_index, RowExclusiveLock);
3543 	}
3544 
3545 	/* Log what we did */
3546 	if (options & REINDEXOPT_VERBOSE)
3547 		ereport(INFO,
3548 				(errmsg("index \"%s\" was reindexed",
3549 						get_rel_name(indexId)),
3550 				 errdetail_internal("%s",
3551 						   pg_rusage_show(&ru0))));
3552 
3553 	/* Close rels, but keep locks */
3554 	index_close(iRel, NoLock);
3555 	heap_close(heapRelation, NoLock);
3556 }
3557 
3558 /*
3559  * reindex_relation - This routine is used to recreate all indexes
3560  * of a relation (and optionally its toast relation too, if any).
3561  *
3562  * "flags" is a bitmask that can include any combination of these bits:
3563  *
3564  * REINDEX_REL_PROCESS_TOAST: if true, process the toast table too (if any).
3565  *
3566  * REINDEX_REL_SUPPRESS_INDEX_USE: if true, the relation was just completely
3567  * rebuilt by an operation such as VACUUM FULL or CLUSTER, and therefore its
3568  * indexes are inconsistent with it.  This makes things tricky if the relation
3569  * is a system catalog that we might consult during the reindexing.  To deal
3570  * with that case, we mark all of the indexes as pending rebuild so that they
3571  * won't be trusted until rebuilt.  The caller is required to call us *without*
3572  * having made the rebuilt table visible by doing CommandCounterIncrement;
3573  * we'll do CCI after having collected the index list.  (This way we can still
3574  * use catalog indexes while collecting the list.)
3575  *
3576  * REINDEX_REL_CHECK_CONSTRAINTS: if true, recheck unique and exclusion
3577  * constraint conditions, else don't.  To avoid deadlocks, VACUUM FULL or
3578  * CLUSTER on a system catalog must omit this flag.  REINDEX should be used to
3579  * rebuild an index if constraint inconsistency is suspected.  For optimal
3580  * performance, other callers should include the flag only after transforming
3581  * the data in a manner that risks a change in constraint validity.
3582  *
3583  * REINDEX_REL_FORCE_INDEXES_UNLOGGED: if true, set the persistence of the
3584  * rebuilt indexes to unlogged.
3585  *
3586  * REINDEX_REL_FORCE_INDEXES_PERMANENT: if true, set the persistence of the
3587  * rebuilt indexes to permanent.
3588  *
3589  * Returns true if any indexes were rebuilt (including toast table's index
3590  * when relevant).  Note that a CommandCounterIncrement will occur after each
3591  * index rebuild.
3592  */
3593 bool
reindex_relation(Oid relid,int flags,int options)3594 reindex_relation(Oid relid, int flags, int options)
3595 {
3596 	Relation	rel;
3597 	Oid			toast_relid;
3598 	List	   *indexIds;
3599 	char		persistence;
3600 	bool		result;
3601 	ListCell   *indexId;
3602 
3603 	/*
3604 	 * Open and lock the relation.  ShareLock is sufficient since we only need
3605 	 * to prevent schema and data changes in it.  The lock level used here
3606 	 * should match ReindexTable().
3607 	 */
3608 	rel = heap_open(relid, ShareLock);
3609 
3610 	toast_relid = rel->rd_rel->reltoastrelid;
3611 
3612 	/*
3613 	 * Get the list of index OIDs for this relation.  (We trust to the
3614 	 * relcache to get this with a sequential scan if ignoring system
3615 	 * indexes.)
3616 	 */
3617 	indexIds = RelationGetIndexList(rel);
3618 
3619 	if (flags & REINDEX_REL_SUPPRESS_INDEX_USE)
3620 	{
3621 		/* Suppress use of all the indexes until they are rebuilt */
3622 		SetReindexPending(indexIds);
3623 
3624 		/*
3625 		 * Make the new heap contents visible --- now things might be
3626 		 * inconsistent!
3627 		 */
3628 		CommandCounterIncrement();
3629 	}
3630 
3631 	/*
3632 	 * Compute persistence of indexes: same as that of owning rel, unless
3633 	 * caller specified otherwise.
3634 	 */
3635 	if (flags & REINDEX_REL_FORCE_INDEXES_UNLOGGED)
3636 		persistence = RELPERSISTENCE_UNLOGGED;
3637 	else if (flags & REINDEX_REL_FORCE_INDEXES_PERMANENT)
3638 		persistence = RELPERSISTENCE_PERMANENT;
3639 	else
3640 		persistence = rel->rd_rel->relpersistence;
3641 
3642 	/* Reindex all the indexes. */
3643 	foreach(indexId, indexIds)
3644 	{
3645 		Oid			indexOid = lfirst_oid(indexId);
3646 
3647 		reindex_index(indexOid, !(flags & REINDEX_REL_CHECK_CONSTRAINTS),
3648 					  persistence, options);
3649 
3650 		CommandCounterIncrement();
3651 
3652 		/* Index should no longer be in the pending list */
3653 		Assert(!ReindexIsProcessingIndex(indexOid));
3654 	}
3655 
3656 	/*
3657 	 * Close rel, but continue to hold the lock.
3658 	 */
3659 	heap_close(rel, NoLock);
3660 
3661 	result = (indexIds != NIL);
3662 
3663 	/*
3664 	 * If the relation has a secondary toast rel, reindex that too while we
3665 	 * still hold the lock on the master table.
3666 	 */
3667 	if ((flags & REINDEX_REL_PROCESS_TOAST) && OidIsValid(toast_relid))
3668 		result |= reindex_relation(toast_relid, flags, options);
3669 
3670 	return result;
3671 }
3672 
3673 
3674 /* ----------------------------------------------------------------
3675  *		System index reindexing support
3676  *
3677  * When we are busy reindexing a system index, this code provides support
3678  * for preventing catalog lookups from using that index.  We also make use
3679  * of this to catch attempted uses of user indexes during reindexing of
3680  * those indexes.
3681  * ----------------------------------------------------------------
3682  */
3683 
3684 static Oid	currentlyReindexedHeap = InvalidOid;
3685 static Oid	currentlyReindexedIndex = InvalidOid;
3686 static List *pendingReindexedIndexes = NIL;
3687 static int	reindexingNestLevel = 0;
3688 
3689 /*
3690  * ReindexIsProcessingHeap
3691  *		True if heap specified by OID is currently being reindexed.
3692  */
3693 bool
ReindexIsProcessingHeap(Oid heapOid)3694 ReindexIsProcessingHeap(Oid heapOid)
3695 {
3696 	return heapOid == currentlyReindexedHeap;
3697 }
3698 
3699 /*
3700  * ReindexIsCurrentlyProcessingIndex
3701  *		True if index specified by OID is currently being reindexed.
3702  */
3703 static bool
ReindexIsCurrentlyProcessingIndex(Oid indexOid)3704 ReindexIsCurrentlyProcessingIndex(Oid indexOid)
3705 {
3706 	return indexOid == currentlyReindexedIndex;
3707 }
3708 
3709 /*
3710  * ReindexIsProcessingIndex
3711  *		True if index specified by OID is currently being reindexed,
3712  *		or should be treated as invalid because it is awaiting reindex.
3713  */
3714 bool
ReindexIsProcessingIndex(Oid indexOid)3715 ReindexIsProcessingIndex(Oid indexOid)
3716 {
3717 	return indexOid == currentlyReindexedIndex ||
3718 		list_member_oid(pendingReindexedIndexes, indexOid);
3719 }
3720 
3721 /*
3722  * SetReindexProcessing
3723  *		Set flag that specified heap/index are being reindexed.
3724  */
3725 static void
SetReindexProcessing(Oid heapOid,Oid indexOid)3726 SetReindexProcessing(Oid heapOid, Oid indexOid)
3727 {
3728 	Assert(OidIsValid(heapOid) && OidIsValid(indexOid));
3729 	/* Reindexing is not re-entrant. */
3730 	if (OidIsValid(currentlyReindexedHeap))
3731 		elog(ERROR, "cannot reindex while reindexing");
3732 	currentlyReindexedHeap = heapOid;
3733 	currentlyReindexedIndex = indexOid;
3734 	/* Index is no longer "pending" reindex. */
3735 	RemoveReindexPending(indexOid);
3736 	/* This may have been set already, but in case it isn't, do so now. */
3737 	reindexingNestLevel = GetCurrentTransactionNestLevel();
3738 }
3739 
3740 /*
3741  * ResetReindexProcessing
3742  *		Unset reindexing status.
3743  */
3744 static void
ResetReindexProcessing(void)3745 ResetReindexProcessing(void)
3746 {
3747 	currentlyReindexedHeap = InvalidOid;
3748 	currentlyReindexedIndex = InvalidOid;
3749 	/* reindexingNestLevel remains set till end of (sub)transaction */
3750 }
3751 
3752 /*
3753  * SetReindexPending
3754  *		Mark the given indexes as pending reindex.
3755  *
3756  * NB: we assume that the current memory context stays valid throughout.
3757  */
3758 static void
SetReindexPending(List * indexes)3759 SetReindexPending(List *indexes)
3760 {
3761 	/* Reindexing is not re-entrant. */
3762 	if (pendingReindexedIndexes)
3763 		elog(ERROR, "cannot reindex while reindexing");
3764 	pendingReindexedIndexes = list_copy(indexes);
3765 	reindexingNestLevel = GetCurrentTransactionNestLevel();
3766 }
3767 
3768 /*
3769  * RemoveReindexPending
3770  *		Remove the given index from the pending list.
3771  */
3772 static void
RemoveReindexPending(Oid indexOid)3773 RemoveReindexPending(Oid indexOid)
3774 {
3775 	pendingReindexedIndexes = list_delete_oid(pendingReindexedIndexes,
3776 											  indexOid);
3777 }
3778 
3779 /*
3780  * ResetReindexState
3781  *		Clear all reindexing state during (sub)transaction abort.
3782  */
3783 void
ResetReindexState(int nestLevel)3784 ResetReindexState(int nestLevel)
3785 {
3786 	/*
3787 	 * Because reindexing is not re-entrant, we don't need to cope with nested
3788 	 * reindexing states.  We just need to avoid messing up the outer-level
3789 	 * state in case a subtransaction fails within a REINDEX.  So checking the
3790 	 * current nest level against that of the reindex operation is sufficient.
3791 	 */
3792 	if (reindexingNestLevel >= nestLevel)
3793 	{
3794 		currentlyReindexedHeap = InvalidOid;
3795 		currentlyReindexedIndex = InvalidOid;
3796 
3797 		/*
3798 		 * We needn't try to release the contents of pendingReindexedIndexes;
3799 		 * that list should be in a transaction-lifespan context, so it will
3800 		 * go away automatically.
3801 		 */
3802 		pendingReindexedIndexes = NIL;
3803 
3804 		reindexingNestLevel = 0;
3805 	}
3806 }
3807