1 /*-------------------------------------------------------------------------
2  *
3  * index.c
4  *	  code to create and destroy POSTGRES index relations
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/catalog/index.c
12  *
13  *
14  * INTERFACE ROUTINES
15  *		index_create()			- Create a cataloged index relation
16  *		index_drop()			- Removes index relation from catalogs
17  *		BuildIndexInfo()		- Prepare to insert index tuples
18  *		FormIndexDatum()		- Construct datum vector for one index tuple
19  *
20  *-------------------------------------------------------------------------
21  */
22 #include "postgres.h"
23 
24 #include <unistd.h>
25 
26 #include "access/amapi.h"
27 #include "access/multixact.h"
28 #include "access/relscan.h"
29 #include "access/sysattr.h"
30 #include "access/transam.h"
31 #include "access/visibilitymap.h"
32 #include "access/xact.h"
33 #include "bootstrap/bootstrap.h"
34 #include "catalog/binary_upgrade.h"
35 #include "catalog/catalog.h"
36 #include "catalog/dependency.h"
37 #include "catalog/heap.h"
38 #include "catalog/index.h"
39 #include "catalog/objectaccess.h"
40 #include "catalog/pg_am.h"
41 #include "catalog/pg_collation.h"
42 #include "catalog/pg_constraint.h"
43 #include "catalog/pg_constraint_fn.h"
44 #include "catalog/pg_operator.h"
45 #include "catalog/pg_opclass.h"
46 #include "catalog/pg_tablespace.h"
47 #include "catalog/pg_trigger.h"
48 #include "catalog/pg_type.h"
49 #include "catalog/storage.h"
50 #include "commands/tablecmds.h"
51 #include "commands/event_trigger.h"
52 #include "commands/trigger.h"
53 #include "executor/executor.h"
54 #include "miscadmin.h"
55 #include "nodes/makefuncs.h"
56 #include "nodes/nodeFuncs.h"
57 #include "optimizer/clauses.h"
58 #include "parser/parser.h"
59 #include "storage/bufmgr.h"
60 #include "storage/lmgr.h"
61 #include "storage/predicate.h"
62 #include "storage/procarray.h"
63 #include "storage/smgr.h"
64 #include "utils/builtins.h"
65 #include "utils/fmgroids.h"
66 #include "utils/guc.h"
67 #include "utils/inval.h"
68 #include "utils/lsyscache.h"
69 #include "utils/memutils.h"
70 #include "utils/pg_rusage.h"
71 #include "utils/syscache.h"
72 #include "utils/tuplesort.h"
73 #include "utils/snapmgr.h"
74 #include "utils/tqual.h"
75 
76 
77 /* Potentially set by pg_upgrade_support functions */
78 Oid			binary_upgrade_next_index_pg_class_oid = InvalidOid;
79 
80 /* state info for validate_index bulkdelete callback */
81 typedef struct
82 {
83 	Tuplesortstate *tuplesort;	/* for sorting the index TIDs */
84 	/* statistics (for debug purposes only): */
85 	double		htups,
86 				itups,
87 				tups_inserted;
88 } v_i_state;
89 
90 /* non-export function prototypes */
91 static bool relationHasPrimaryKey(Relation rel);
92 static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
93 						 IndexInfo *indexInfo,
94 						 List *indexColNames,
95 						 Oid accessMethodObjectId,
96 						 Oid *collationObjectId,
97 						 Oid *classObjectId);
98 static void InitializeAttributeOids(Relation indexRelation,
99 						int numatts, Oid indexoid);
100 static void AppendAttributeTuples(Relation indexRelation, int numatts);
101 static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
102 					IndexInfo *indexInfo,
103 					Oid *collationOids,
104 					Oid *classOids,
105 					int16 *coloptions,
106 					bool primary,
107 					bool isexclusion,
108 					bool immediate,
109 					bool isvalid);
110 static void index_update_stats(Relation rel,
111 				   bool hasindex, bool isprimary,
112 				   double reltuples);
113 static void IndexCheckExclusion(Relation heapRelation,
114 					Relation indexRelation,
115 					IndexInfo *indexInfo);
116 static inline int64 itemptr_encode(ItemPointer itemptr);
117 static inline void itemptr_decode(ItemPointer itemptr, int64 encoded);
118 static bool validate_index_callback(ItemPointer itemptr, void *opaque);
119 static void validate_index_heapscan(Relation heapRelation,
120 						Relation indexRelation,
121 						IndexInfo *indexInfo,
122 						Snapshot snapshot,
123 						v_i_state *state);
124 static bool ReindexIsCurrentlyProcessingIndex(Oid indexOid);
125 static void SetReindexProcessing(Oid heapOid, Oid indexOid);
126 static void ResetReindexProcessing(void);
127 static void SetReindexPending(List *indexes);
128 static void RemoveReindexPending(Oid indexOid);
129 
130 
131 /*
132  * relationHasPrimaryKey
133  *		See whether an existing relation has a primary key.
134  *
135  * Caller must have suitable lock on the relation.
136  *
137  * Note: we intentionally do not check IndexIsValid here; that's because this
138  * is used to enforce the rule that there can be only one indisprimary index,
139  * and we want that to be true even if said index is invalid.
140  */
141 static bool
relationHasPrimaryKey(Relation rel)142 relationHasPrimaryKey(Relation rel)
143 {
144 	bool		result = false;
145 	List	   *indexoidlist;
146 	ListCell   *indexoidscan;
147 
148 	/*
149 	 * Get the list of index OIDs for the table from the relcache, and look up
150 	 * each one in the pg_index syscache until we find one marked primary key
151 	 * (hopefully there isn't more than one such).
152 	 */
153 	indexoidlist = RelationGetIndexList(rel);
154 
155 	foreach(indexoidscan, indexoidlist)
156 	{
157 		Oid			indexoid = lfirst_oid(indexoidscan);
158 		HeapTuple	indexTuple;
159 
160 		indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid));
161 		if (!HeapTupleIsValid(indexTuple))	/* should not happen */
162 			elog(ERROR, "cache lookup failed for index %u", indexoid);
163 		result = ((Form_pg_index) GETSTRUCT(indexTuple))->indisprimary;
164 		ReleaseSysCache(indexTuple);
165 		if (result)
166 			break;
167 	}
168 
169 	list_free(indexoidlist);
170 
171 	return result;
172 }
173 
174 /*
175  * index_check_primary_key
176  *		Apply special checks needed before creating a PRIMARY KEY index
177  *
178  * This processing used to be in DefineIndex(), but has been split out
179  * so that it can be applied during ALTER TABLE ADD PRIMARY KEY USING INDEX.
180  *
181  * We check for a pre-existing primary key, and that all columns of the index
182  * are simple column references (not expressions), and that all those
183  * columns are marked NOT NULL.  If they aren't (which can only happen during
184  * ALTER TABLE ADD CONSTRAINT, since the parser forces such columns to be
185  * created NOT NULL during CREATE TABLE), do an ALTER SET NOT NULL to mark
186  * them so --- or fail if they are not in fact nonnull.
187  *
188  * As of PG v10, the SET NOT NULL is applied to child tables as well, so
189  * that the behavior is like a manual SET NOT NULL.
190  *
191  * Caller had better have at least ShareLock on the table, else the not-null
192  * checking isn't trustworthy.
193  */
194 void
index_check_primary_key(Relation heapRel,IndexInfo * indexInfo,bool is_alter_table,IndexStmt * stmt)195 index_check_primary_key(Relation heapRel,
196 						IndexInfo *indexInfo,
197 						bool is_alter_table,
198 						IndexStmt *stmt)
199 {
200 	List	   *cmds;
201 	int			i;
202 
203 	/*
204 	 * If ALTER TABLE, check that there isn't already a PRIMARY KEY. In CREATE
205 	 * TABLE, we have faith that the parser rejected multiple pkey clauses;
206 	 * and CREATE INDEX doesn't have a way to say PRIMARY KEY, so it's no
207 	 * problem either.
208 	 */
209 	if (is_alter_table &&
210 		relationHasPrimaryKey(heapRel))
211 	{
212 		ereport(ERROR,
213 				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
214 				 errmsg("multiple primary keys for table \"%s\" are not allowed",
215 						RelationGetRelationName(heapRel))));
216 	}
217 
218 	/*
219 	 * Check that all of the attributes in a primary key are marked as not
220 	 * null, otherwise attempt to ALTER TABLE .. SET NOT NULL
221 	 */
222 	cmds = NIL;
223 	for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
224 	{
225 		AttrNumber	attnum = indexInfo->ii_KeyAttrNumbers[i];
226 		HeapTuple	atttuple;
227 		Form_pg_attribute attform;
228 
229 		if (attnum == 0)
230 			ereport(ERROR,
231 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
232 					 errmsg("primary keys cannot be expressions")));
233 
234 		/* System attributes are never null, so no need to check */
235 		if (attnum < 0)
236 			continue;
237 
238 		atttuple = SearchSysCache2(ATTNUM,
239 								   ObjectIdGetDatum(RelationGetRelid(heapRel)),
240 								   Int16GetDatum(attnum));
241 		if (!HeapTupleIsValid(atttuple))
242 			elog(ERROR, "cache lookup failed for attribute %d of relation %u",
243 				 attnum, RelationGetRelid(heapRel));
244 		attform = (Form_pg_attribute) GETSTRUCT(atttuple);
245 
246 		if (!attform->attnotnull)
247 		{
248 			/* Add a subcommand to make this one NOT NULL */
249 			AlterTableCmd *cmd = makeNode(AlterTableCmd);
250 
251 			cmd->subtype = AT_SetNotNull;
252 			cmd->name = pstrdup(NameStr(attform->attname));
253 			cmds = lappend(cmds, cmd);
254 		}
255 
256 		ReleaseSysCache(atttuple);
257 	}
258 
259 	/*
260 	 * XXX: possible future improvement: when being called from ALTER TABLE,
261 	 * it would be more efficient to merge this with the outer ALTER TABLE, so
262 	 * as to avoid two scans.  But that seems to complicate DefineIndex's API
263 	 * unduly.
264 	 */
265 	if (cmds)
266 	{
267 		EventTriggerAlterTableStart((Node *) stmt);
268 		AlterTableInternal(RelationGetRelid(heapRel), cmds, true);
269 		EventTriggerAlterTableEnd();
270 	}
271 }
272 
273 /*
274  *		ConstructTupleDescriptor
275  *
276  * Build an index tuple descriptor for a new index
277  */
278 static TupleDesc
ConstructTupleDescriptor(Relation heapRelation,IndexInfo * indexInfo,List * indexColNames,Oid accessMethodObjectId,Oid * collationObjectId,Oid * classObjectId)279 ConstructTupleDescriptor(Relation heapRelation,
280 						 IndexInfo *indexInfo,
281 						 List *indexColNames,
282 						 Oid accessMethodObjectId,
283 						 Oid *collationObjectId,
284 						 Oid *classObjectId)
285 {
286 	int			numatts = indexInfo->ii_NumIndexAttrs;
287 	ListCell   *colnames_item = list_head(indexColNames);
288 	ListCell   *indexpr_item = list_head(indexInfo->ii_Expressions);
289 	IndexAmRoutine *amroutine;
290 	TupleDesc	heapTupDesc;
291 	TupleDesc	indexTupDesc;
292 	int			natts;			/* #atts in heap rel --- for error checks */
293 	int			i;
294 
295 	/* We need access to the index AM's API struct */
296 	amroutine = GetIndexAmRoutineByAmId(accessMethodObjectId, false);
297 
298 	/* ... and to the table's tuple descriptor */
299 	heapTupDesc = RelationGetDescr(heapRelation);
300 	natts = RelationGetForm(heapRelation)->relnatts;
301 
302 	/*
303 	 * allocate the new tuple descriptor
304 	 */
305 	indexTupDesc = CreateTemplateTupleDesc(numatts, false);
306 
307 	/*
308 	 * For simple index columns, we copy the pg_attribute row from the parent
309 	 * relation and modify it as necessary.  For expressions we have to cons
310 	 * up a pg_attribute row the hard way.
311 	 */
312 	for (i = 0; i < numatts; i++)
313 	{
314 		AttrNumber	atnum = indexInfo->ii_KeyAttrNumbers[i];
315 		Form_pg_attribute to = indexTupDesc->attrs[i];
316 		HeapTuple	tuple;
317 		Form_pg_type typeTup;
318 		Form_pg_opclass opclassTup;
319 		Oid			keyType;
320 
321 		if (atnum != 0)
322 		{
323 			/* Simple index column */
324 			Form_pg_attribute from;
325 
326 			if (atnum < 0)
327 			{
328 				/*
329 				 * here we are indexing on a system attribute (-1...-n)
330 				 */
331 				from = SystemAttributeDefinition(atnum,
332 												 heapRelation->rd_rel->relhasoids);
333 			}
334 			else
335 			{
336 				/*
337 				 * here we are indexing on a normal attribute (1...n)
338 				 */
339 				if (atnum > natts)	/* safety check */
340 					elog(ERROR, "invalid column number %d", atnum);
341 				from = heapTupDesc->attrs[AttrNumberGetAttrOffset(atnum)];
342 			}
343 
344 			/*
345 			 * now that we've determined the "from", let's copy the tuple desc
346 			 * data...
347 			 */
348 			memcpy(to, from, ATTRIBUTE_FIXED_PART_SIZE);
349 
350 			/*
351 			 * Set the attribute name as specified by caller.
352 			 */
353 			if (colnames_item == NULL)	/* shouldn't happen */
354 				elog(ERROR, "too few entries in colnames list");
355 			namestrcpy(&to->attname, (const char *) lfirst(colnames_item));
356 			colnames_item = lnext(colnames_item);
357 
358 			/*
359 			 * Fix the stuff that should not be the same as the underlying
360 			 * attr
361 			 */
362 			to->attnum = i + 1;
363 
364 			to->attstattarget = -1;
365 			to->attcacheoff = -1;
366 			to->attnotnull = false;
367 			to->atthasdef = false;
368 			to->attidentity = '\0';
369 			to->attislocal = true;
370 			to->attinhcount = 0;
371 			to->attcollation = collationObjectId[i];
372 		}
373 		else
374 		{
375 			/* Expressional index */
376 			Node	   *indexkey;
377 
378 			MemSet(to, 0, ATTRIBUTE_FIXED_PART_SIZE);
379 
380 			/*
381 			 * Set the attribute name as specified by caller.
382 			 */
383 			if (colnames_item == NULL)	/* shouldn't happen */
384 				elog(ERROR, "too few entries in colnames list");
385 			namestrcpy(&to->attname, (const char *) lfirst(colnames_item));
386 			colnames_item = lnext(colnames_item);
387 
388 			if (indexpr_item == NULL)	/* shouldn't happen */
389 				elog(ERROR, "too few entries in indexprs list");
390 			indexkey = (Node *) lfirst(indexpr_item);
391 			indexpr_item = lnext(indexpr_item);
392 
393 			/*
394 			 * Lookup the expression type in pg_type for the type length etc.
395 			 */
396 			keyType = exprType(indexkey);
397 			tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
398 			if (!HeapTupleIsValid(tuple))
399 				elog(ERROR, "cache lookup failed for type %u", keyType);
400 			typeTup = (Form_pg_type) GETSTRUCT(tuple);
401 
402 			/*
403 			 * Assign some of the attributes values. Leave the rest as 0.
404 			 */
405 			to->attnum = i + 1;
406 			to->atttypid = keyType;
407 			to->attlen = typeTup->typlen;
408 			to->attbyval = typeTup->typbyval;
409 			to->attstorage = typeTup->typstorage;
410 			to->attalign = typeTup->typalign;
411 			to->attstattarget = -1;
412 			to->attcacheoff = -1;
413 			to->atttypmod = exprTypmod(indexkey);
414 			to->attislocal = true;
415 			to->attcollation = collationObjectId[i];
416 
417 			ReleaseSysCache(tuple);
418 
419 			/*
420 			 * Make sure the expression yields a type that's safe to store in
421 			 * an index.  We need this defense because we have index opclasses
422 			 * for pseudo-types such as "record", and the actually stored type
423 			 * had better be safe; eg, a named composite type is okay, an
424 			 * anonymous record type is not.  The test is the same as for
425 			 * whether a table column is of a safe type (which is why we
426 			 * needn't check for the non-expression case).
427 			 */
428 			CheckAttributeType(NameStr(to->attname),
429 							   to->atttypid, to->attcollation,
430 							   NIL, false);
431 		}
432 
433 		/*
434 		 * We do not yet have the correct relation OID for the index, so just
435 		 * set it invalid for now.  InitializeAttributeOids() will fix it
436 		 * later.
437 		 */
438 		to->attrelid = InvalidOid;
439 
440 		/*
441 		 * Check the opclass and index AM to see if either provides a keytype
442 		 * (overriding the attribute type).  Opclass takes precedence.
443 		 */
444 		tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(classObjectId[i]));
445 		if (!HeapTupleIsValid(tuple))
446 			elog(ERROR, "cache lookup failed for opclass %u",
447 				 classObjectId[i]);
448 		opclassTup = (Form_pg_opclass) GETSTRUCT(tuple);
449 		if (OidIsValid(opclassTup->opckeytype))
450 			keyType = opclassTup->opckeytype;
451 		else
452 			keyType = amroutine->amkeytype;
453 
454 		/*
455 		 * If keytype is specified as ANYELEMENT, and opcintype is ANYARRAY,
456 		 * then the attribute type must be an array (else it'd not have
457 		 * matched this opclass); use its element type.
458 		 */
459 		if (keyType == ANYELEMENTOID && opclassTup->opcintype == ANYARRAYOID)
460 		{
461 			keyType = get_base_element_type(to->atttypid);
462 			if (!OidIsValid(keyType))
463 				elog(ERROR, "could not get element type of array type %u",
464 					 to->atttypid);
465 		}
466 
467 		ReleaseSysCache(tuple);
468 
469 		/*
470 		 * If a key type different from the heap value is specified, update
471 		 * the type-related fields in the index tupdesc.
472 		 */
473 		if (OidIsValid(keyType) && keyType != to->atttypid)
474 		{
475 			tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
476 			if (!HeapTupleIsValid(tuple))
477 				elog(ERROR, "cache lookup failed for type %u", keyType);
478 			typeTup = (Form_pg_type) GETSTRUCT(tuple);
479 
480 			to->atttypid = keyType;
481 			to->atttypmod = -1;
482 			to->attlen = typeTup->typlen;
483 			to->attbyval = typeTup->typbyval;
484 			to->attalign = typeTup->typalign;
485 			to->attstorage = typeTup->typstorage;
486 
487 			ReleaseSysCache(tuple);
488 		}
489 	}
490 
491 	pfree(amroutine);
492 
493 	return indexTupDesc;
494 }
495 
496 /* ----------------------------------------------------------------
497  *		InitializeAttributeOids
498  * ----------------------------------------------------------------
499  */
500 static void
InitializeAttributeOids(Relation indexRelation,int numatts,Oid indexoid)501 InitializeAttributeOids(Relation indexRelation,
502 						int numatts,
503 						Oid indexoid)
504 {
505 	TupleDesc	tupleDescriptor;
506 	int			i;
507 
508 	tupleDescriptor = RelationGetDescr(indexRelation);
509 
510 	for (i = 0; i < numatts; i += 1)
511 		tupleDescriptor->attrs[i]->attrelid = indexoid;
512 }
513 
514 /* ----------------------------------------------------------------
515  *		AppendAttributeTuples
516  * ----------------------------------------------------------------
517  */
518 static void
AppendAttributeTuples(Relation indexRelation,int numatts)519 AppendAttributeTuples(Relation indexRelation, int numatts)
520 {
521 	Relation	pg_attribute;
522 	CatalogIndexState indstate;
523 	TupleDesc	indexTupDesc;
524 	int			i;
525 
526 	/*
527 	 * open the attribute relation and its indexes
528 	 */
529 	pg_attribute = heap_open(AttributeRelationId, RowExclusiveLock);
530 
531 	indstate = CatalogOpenIndexes(pg_attribute);
532 
533 	/*
534 	 * insert data from new index's tupdesc into pg_attribute
535 	 */
536 	indexTupDesc = RelationGetDescr(indexRelation);
537 
538 	for (i = 0; i < numatts; i++)
539 	{
540 		/*
541 		 * There used to be very grotty code here to set these fields, but I
542 		 * think it's unnecessary.  They should be set already.
543 		 */
544 		Assert(indexTupDesc->attrs[i]->attnum == i + 1);
545 		Assert(indexTupDesc->attrs[i]->attcacheoff == -1);
546 
547 		InsertPgAttributeTuple(pg_attribute, indexTupDesc->attrs[i], indstate);
548 	}
549 
550 	CatalogCloseIndexes(indstate);
551 
552 	heap_close(pg_attribute, RowExclusiveLock);
553 }
554 
555 /* ----------------------------------------------------------------
556  *		UpdateIndexRelation
557  *
558  * Construct and insert a new entry in the pg_index catalog
559  * ----------------------------------------------------------------
560  */
561 static void
UpdateIndexRelation(Oid indexoid,Oid heapoid,IndexInfo * indexInfo,Oid * collationOids,Oid * classOids,int16 * coloptions,bool primary,bool isexclusion,bool immediate,bool isvalid)562 UpdateIndexRelation(Oid indexoid,
563 					Oid heapoid,
564 					IndexInfo *indexInfo,
565 					Oid *collationOids,
566 					Oid *classOids,
567 					int16 *coloptions,
568 					bool primary,
569 					bool isexclusion,
570 					bool immediate,
571 					bool isvalid)
572 {
573 	int2vector *indkey;
574 	oidvector  *indcollation;
575 	oidvector  *indclass;
576 	int2vector *indoption;
577 	Datum		exprsDatum;
578 	Datum		predDatum;
579 	Datum		values[Natts_pg_index];
580 	bool		nulls[Natts_pg_index];
581 	Relation	pg_index;
582 	HeapTuple	tuple;
583 	int			i;
584 
585 	/*
586 	 * Copy the index key, opclass, and indoption info into arrays (should we
587 	 * make the caller pass them like this to start with?)
588 	 */
589 	indkey = buildint2vector(NULL, indexInfo->ii_NumIndexAttrs);
590 	for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
591 		indkey->values[i] = indexInfo->ii_KeyAttrNumbers[i];
592 	indcollation = buildoidvector(collationOids, indexInfo->ii_NumIndexAttrs);
593 	indclass = buildoidvector(classOids, indexInfo->ii_NumIndexAttrs);
594 	indoption = buildint2vector(coloptions, indexInfo->ii_NumIndexAttrs);
595 
596 	/*
597 	 * Convert the index expressions (if any) to a text datum
598 	 */
599 	if (indexInfo->ii_Expressions != NIL)
600 	{
601 		char	   *exprsString;
602 
603 		exprsString = nodeToString(indexInfo->ii_Expressions);
604 		exprsDatum = CStringGetTextDatum(exprsString);
605 		pfree(exprsString);
606 	}
607 	else
608 		exprsDatum = (Datum) 0;
609 
610 	/*
611 	 * Convert the index predicate (if any) to a text datum.  Note we convert
612 	 * implicit-AND format to normal explicit-AND for storage.
613 	 */
614 	if (indexInfo->ii_Predicate != NIL)
615 	{
616 		char	   *predString;
617 
618 		predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate));
619 		predDatum = CStringGetTextDatum(predString);
620 		pfree(predString);
621 	}
622 	else
623 		predDatum = (Datum) 0;
624 
625 	/*
626 	 * open the system catalog index relation
627 	 */
628 	pg_index = heap_open(IndexRelationId, RowExclusiveLock);
629 
630 	/*
631 	 * Build a pg_index tuple
632 	 */
633 	MemSet(nulls, false, sizeof(nulls));
634 
635 	values[Anum_pg_index_indexrelid - 1] = ObjectIdGetDatum(indexoid);
636 	values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid);
637 	values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs);
638 	values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique);
639 	values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
640 	values[Anum_pg_index_indisexclusion - 1] = BoolGetDatum(isexclusion);
641 	values[Anum_pg_index_indimmediate - 1] = BoolGetDatum(immediate);
642 	values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
643 	values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
644 	values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false);
645 	/* we set isvalid and isready the same way */
646 	values[Anum_pg_index_indisready - 1] = BoolGetDatum(isvalid);
647 	values[Anum_pg_index_indislive - 1] = BoolGetDatum(true);
648 	values[Anum_pg_index_indisreplident - 1] = BoolGetDatum(false);
649 	values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey);
650 	values[Anum_pg_index_indcollation - 1] = PointerGetDatum(indcollation);
651 	values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass);
652 	values[Anum_pg_index_indoption - 1] = PointerGetDatum(indoption);
653 	values[Anum_pg_index_indexprs - 1] = exprsDatum;
654 	if (exprsDatum == (Datum) 0)
655 		nulls[Anum_pg_index_indexprs - 1] = true;
656 	values[Anum_pg_index_indpred - 1] = predDatum;
657 	if (predDatum == (Datum) 0)
658 		nulls[Anum_pg_index_indpred - 1] = true;
659 
660 	tuple = heap_form_tuple(RelationGetDescr(pg_index), values, nulls);
661 
662 	/*
663 	 * insert the tuple into the pg_index catalog
664 	 */
665 	CatalogTupleInsert(pg_index, tuple);
666 
667 	/*
668 	 * close the relation and free the tuple
669 	 */
670 	heap_close(pg_index, RowExclusiveLock);
671 	heap_freetuple(tuple);
672 }
673 
674 
675 /*
676  * index_create
677  *
678  * heapRelation: table to build index on (suitably locked by caller)
679  * indexRelationName: what it say
680  * indexRelationId: normally, pass InvalidOid to let this routine
681  *		generate an OID for the index.  During bootstrap this may be
682  *		nonzero to specify a preselected OID.
683  * relFileNode: normally, pass InvalidOid to get new storage.  May be
684  *		nonzero to attach an existing valid build.
685  * indexInfo: same info executor uses to insert into the index
686  * indexColNames: column names to use for index (List of char *)
687  * accessMethodObjectId: OID of index AM to use
688  * tableSpaceId: OID of tablespace to use
689  * collationObjectId: array of collation OIDs, one per index column
690  * classObjectId: array of index opclass OIDs, one per index column
691  * coloptions: array of per-index-column indoption settings
692  * reloptions: AM-specific options
693  * isprimary: index is a PRIMARY KEY
694  * isconstraint: index is owned by PRIMARY KEY, UNIQUE, or EXCLUSION constraint
695  * deferrable: constraint is DEFERRABLE
696  * initdeferred: constraint is INITIALLY DEFERRED
697  * allow_system_table_mods: allow table to be a system catalog
698  * skip_build: true to skip the index_build() step for the moment; caller
699  *		must do it later (typically via reindex_index())
700  * concurrent: if true, do not lock the table against writers.  The index
701  *		will be marked "invalid" and the caller must take additional steps
702  *		to fix it up.
703  * is_internal: if true, post creation hook for new index
704  * if_not_exists: if true, do not throw an error if a relation with
705  *		the same name already exists.
706  *
707  * Returns the OID of the created index.
708  */
709 Oid
index_create(Relation heapRelation,const char * indexRelationName,Oid indexRelationId,Oid relFileNode,IndexInfo * indexInfo,List * indexColNames,Oid accessMethodObjectId,Oid tableSpaceId,Oid * collationObjectId,Oid * classObjectId,int16 * coloptions,Datum reloptions,bool isprimary,bool isconstraint,bool deferrable,bool initdeferred,bool allow_system_table_mods,bool skip_build,bool concurrent,bool is_internal,bool if_not_exists)710 index_create(Relation heapRelation,
711 			 const char *indexRelationName,
712 			 Oid indexRelationId,
713 			 Oid relFileNode,
714 			 IndexInfo *indexInfo,
715 			 List *indexColNames,
716 			 Oid accessMethodObjectId,
717 			 Oid tableSpaceId,
718 			 Oid *collationObjectId,
719 			 Oid *classObjectId,
720 			 int16 *coloptions,
721 			 Datum reloptions,
722 			 bool isprimary,
723 			 bool isconstraint,
724 			 bool deferrable,
725 			 bool initdeferred,
726 			 bool allow_system_table_mods,
727 			 bool skip_build,
728 			 bool concurrent,
729 			 bool is_internal,
730 			 bool if_not_exists)
731 {
732 	Oid			heapRelationId = RelationGetRelid(heapRelation);
733 	Relation	pg_class;
734 	Relation	indexRelation;
735 	TupleDesc	indexTupDesc;
736 	bool		shared_relation;
737 	bool		mapped_relation;
738 	bool		is_exclusion;
739 	Oid			namespaceId;
740 	int			i;
741 	char		relpersistence;
742 
743 	is_exclusion = (indexInfo->ii_ExclusionOps != NULL);
744 
745 	pg_class = heap_open(RelationRelationId, RowExclusiveLock);
746 
747 	/*
748 	 * The index will be in the same namespace as its parent table, and is
749 	 * shared across databases if and only if the parent is.  Likewise, it
750 	 * will use the relfilenode map if and only if the parent does; and it
751 	 * inherits the parent's relpersistence.
752 	 */
753 	namespaceId = RelationGetNamespace(heapRelation);
754 	shared_relation = heapRelation->rd_rel->relisshared;
755 	mapped_relation = RelationIsMapped(heapRelation);
756 	relpersistence = heapRelation->rd_rel->relpersistence;
757 
758 	/*
759 	 * check parameters
760 	 */
761 	if (indexInfo->ii_NumIndexAttrs < 1)
762 		elog(ERROR, "must index at least one column");
763 
764 	if (!allow_system_table_mods &&
765 		IsSystemRelation(heapRelation) &&
766 		IsNormalProcessingMode())
767 		ereport(ERROR,
768 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
769 				 errmsg("user-defined indexes on system catalog tables are not supported")));
770 
771 	/*
772 	 * concurrent index build on a system catalog is unsafe because we tend to
773 	 * release locks before committing in catalogs
774 	 */
775 	if (concurrent &&
776 		IsSystemRelation(heapRelation))
777 		ereport(ERROR,
778 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
779 				 errmsg("concurrent index creation on system catalog tables is not supported")));
780 
781 	/*
782 	 * This case is currently not supported, but there's no way to ask for it
783 	 * in the grammar anyway, so it can't happen.
784 	 */
785 	if (concurrent && is_exclusion)
786 		ereport(ERROR,
787 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
788 				 errmsg_internal("concurrent index creation for exclusion constraints is not supported")));
789 
790 	/*
791 	 * We cannot allow indexing a shared relation after initdb (because
792 	 * there's no way to make the entry in other databases' pg_class).
793 	 */
794 	if (shared_relation && !IsBootstrapProcessingMode())
795 		ereport(ERROR,
796 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
797 				 errmsg("shared indexes cannot be created after initdb")));
798 
799 	/*
800 	 * Shared relations must be in pg_global, too (last-ditch check)
801 	 */
802 	if (shared_relation && tableSpaceId != GLOBALTABLESPACE_OID)
803 		elog(ERROR, "shared relations must be placed in pg_global tablespace");
804 
805 	if (get_relname_relid(indexRelationName, namespaceId))
806 	{
807 		if (if_not_exists)
808 		{
809 			ereport(NOTICE,
810 					(errcode(ERRCODE_DUPLICATE_TABLE),
811 					 errmsg("relation \"%s\" already exists, skipping",
812 							indexRelationName)));
813 			heap_close(pg_class, RowExclusiveLock);
814 			return InvalidOid;
815 		}
816 
817 		ereport(ERROR,
818 				(errcode(ERRCODE_DUPLICATE_TABLE),
819 				 errmsg("relation \"%s\" already exists",
820 						indexRelationName)));
821 	}
822 
823 	/*
824 	 * construct tuple descriptor for index tuples
825 	 */
826 	indexTupDesc = ConstructTupleDescriptor(heapRelation,
827 											indexInfo,
828 											indexColNames,
829 											accessMethodObjectId,
830 											collationObjectId,
831 											classObjectId);
832 
833 	/*
834 	 * Allocate an OID for the index, unless we were told what to use.
835 	 *
836 	 * The OID will be the relfilenode as well, so make sure it doesn't
837 	 * collide with either pg_class OIDs or existing physical files.
838 	 */
839 	if (!OidIsValid(indexRelationId))
840 	{
841 		/* Use binary-upgrade override for pg_class.oid/relfilenode? */
842 		if (IsBinaryUpgrade)
843 		{
844 			if (!OidIsValid(binary_upgrade_next_index_pg_class_oid))
845 				ereport(ERROR,
846 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
847 						 errmsg("pg_class index OID value not set when in binary upgrade mode")));
848 
849 			indexRelationId = binary_upgrade_next_index_pg_class_oid;
850 			binary_upgrade_next_index_pg_class_oid = InvalidOid;
851 		}
852 		else
853 		{
854 			indexRelationId =
855 				GetNewRelFileNode(tableSpaceId, pg_class, relpersistence);
856 		}
857 	}
858 
859 	/*
860 	 * create the index relation's relcache entry and physical disk file. (If
861 	 * we fail further down, it's the smgr's responsibility to remove the disk
862 	 * file again.)
863 	 */
864 	indexRelation = heap_create(indexRelationName,
865 								namespaceId,
866 								tableSpaceId,
867 								indexRelationId,
868 								relFileNode,
869 								indexTupDesc,
870 								RELKIND_INDEX,
871 								relpersistence,
872 								shared_relation,
873 								mapped_relation,
874 								allow_system_table_mods);
875 
876 	Assert(indexRelationId == RelationGetRelid(indexRelation));
877 
878 	/*
879 	 * Obtain exclusive lock on it.  Although no other backends can see it
880 	 * until we commit, this prevents deadlock-risk complaints from lock
881 	 * manager in cases such as CLUSTER.
882 	 */
883 	LockRelation(indexRelation, AccessExclusiveLock);
884 
885 	/*
886 	 * Fill in fields of the index's pg_class entry that are not set correctly
887 	 * by heap_create.
888 	 *
889 	 * XXX should have a cleaner way to create cataloged indexes
890 	 */
891 	indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner;
892 	indexRelation->rd_rel->relam = accessMethodObjectId;
893 	indexRelation->rd_rel->relhasoids = false;
894 
895 	/*
896 	 * store index's pg_class entry
897 	 */
898 	InsertPgClassTuple(pg_class, indexRelation,
899 					   RelationGetRelid(indexRelation),
900 					   (Datum) 0,
901 					   reloptions);
902 
903 	/* done with pg_class */
904 	heap_close(pg_class, RowExclusiveLock);
905 
906 	/*
907 	 * now update the object id's of all the attribute tuple forms in the
908 	 * index relation's tuple descriptor
909 	 */
910 	InitializeAttributeOids(indexRelation,
911 							indexInfo->ii_NumIndexAttrs,
912 							indexRelationId);
913 
914 	/*
915 	 * append ATTRIBUTE tuples for the index
916 	 */
917 	AppendAttributeTuples(indexRelation, indexInfo->ii_NumIndexAttrs);
918 
919 	/* ----------------
920 	 *	  update pg_index
921 	 *	  (append INDEX tuple)
922 	 *
923 	 *	  Note that this stows away a representation of "predicate".
924 	 *	  (Or, could define a rule to maintain the predicate) --Nels, Feb '92
925 	 * ----------------
926 	 */
927 	UpdateIndexRelation(indexRelationId, heapRelationId, indexInfo,
928 						collationObjectId, classObjectId, coloptions,
929 						isprimary, is_exclusion,
930 						!deferrable,
931 						!concurrent);
932 
933 	/*
934 	 * Register relcache invalidation on the indexes' heap relation, to
935 	 * maintain consistency of its index list
936 	 */
937 	CacheInvalidateRelcache(heapRelation);
938 
939 	/*
940 	 * Register constraint and dependencies for the index.
941 	 *
942 	 * If the index is from a CONSTRAINT clause, construct a pg_constraint
943 	 * entry.  The index will be linked to the constraint, which in turn is
944 	 * linked to the table.  If it's not a CONSTRAINT, we need to make a
945 	 * dependency directly on the table.
946 	 *
947 	 * We don't need a dependency on the namespace, because there'll be an
948 	 * indirect dependency via our parent table.
949 	 *
950 	 * During bootstrap we can't register any dependencies, and we don't try
951 	 * to make a constraint either.
952 	 */
953 	if (!IsBootstrapProcessingMode())
954 	{
955 		ObjectAddress myself,
956 					referenced;
957 
958 		myself.classId = RelationRelationId;
959 		myself.objectId = indexRelationId;
960 		myself.objectSubId = 0;
961 
962 		if (isconstraint)
963 		{
964 			char		constraintType;
965 
966 			if (isprimary)
967 				constraintType = CONSTRAINT_PRIMARY;
968 			else if (indexInfo->ii_Unique)
969 				constraintType = CONSTRAINT_UNIQUE;
970 			else if (is_exclusion)
971 				constraintType = CONSTRAINT_EXCLUSION;
972 			else
973 			{
974 				elog(ERROR, "constraint must be PRIMARY, UNIQUE or EXCLUDE");
975 				constraintType = 0; /* keep compiler quiet */
976 			}
977 
978 			index_constraint_create(heapRelation,
979 									indexRelationId,
980 									indexInfo,
981 									indexRelationName,
982 									constraintType,
983 									deferrable,
984 									initdeferred,
985 									false,	/* already marked primary */
986 									false,	/* pg_index entry is OK */
987 									false,	/* no old dependencies */
988 									allow_system_table_mods,
989 									is_internal);
990 		}
991 		else
992 		{
993 			bool		have_simple_col = false;
994 
995 			/* Create auto dependencies on simply-referenced columns */
996 			for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
997 			{
998 				if (indexInfo->ii_KeyAttrNumbers[i] != 0)
999 				{
1000 					referenced.classId = RelationRelationId;
1001 					referenced.objectId = heapRelationId;
1002 					referenced.objectSubId = indexInfo->ii_KeyAttrNumbers[i];
1003 
1004 					recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
1005 
1006 					have_simple_col = true;
1007 				}
1008 			}
1009 
1010 			/*
1011 			 * If there are no simply-referenced columns, give the index an
1012 			 * auto dependency on the whole table.  In most cases, this will
1013 			 * be redundant, but it might not be if the index expressions and
1014 			 * predicate contain no Vars or only whole-row Vars.
1015 			 */
1016 			if (!have_simple_col)
1017 			{
1018 				referenced.classId = RelationRelationId;
1019 				referenced.objectId = heapRelationId;
1020 				referenced.objectSubId = 0;
1021 
1022 				recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
1023 			}
1024 
1025 			/* Non-constraint indexes can't be deferrable */
1026 			Assert(!deferrable);
1027 			Assert(!initdeferred);
1028 		}
1029 
1030 		/* Store dependency on collations */
1031 		/* The default collation is pinned, so don't bother recording it */
1032 		for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1033 		{
1034 			if (OidIsValid(collationObjectId[i]) &&
1035 				collationObjectId[i] != DEFAULT_COLLATION_OID)
1036 			{
1037 				referenced.classId = CollationRelationId;
1038 				referenced.objectId = collationObjectId[i];
1039 				referenced.objectSubId = 0;
1040 
1041 				recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
1042 			}
1043 		}
1044 
1045 		/* Store dependency on operator classes */
1046 		for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1047 		{
1048 			referenced.classId = OperatorClassRelationId;
1049 			referenced.objectId = classObjectId[i];
1050 			referenced.objectSubId = 0;
1051 
1052 			recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
1053 		}
1054 
1055 		/* Store dependencies on anything mentioned in index expressions */
1056 		if (indexInfo->ii_Expressions)
1057 		{
1058 			recordDependencyOnSingleRelExpr(&myself,
1059 											(Node *) indexInfo->ii_Expressions,
1060 											heapRelationId,
1061 											DEPENDENCY_NORMAL,
1062 											DEPENDENCY_AUTO, false);
1063 		}
1064 
1065 		/* Store dependencies on anything mentioned in predicate */
1066 		if (indexInfo->ii_Predicate)
1067 		{
1068 			recordDependencyOnSingleRelExpr(&myself,
1069 											(Node *) indexInfo->ii_Predicate,
1070 											heapRelationId,
1071 											DEPENDENCY_NORMAL,
1072 											DEPENDENCY_AUTO, false);
1073 		}
1074 	}
1075 	else
1076 	{
1077 		/* Bootstrap mode - assert we weren't asked for constraint support */
1078 		Assert(!isconstraint);
1079 		Assert(!deferrable);
1080 		Assert(!initdeferred);
1081 	}
1082 
1083 	/* Post creation hook for new index */
1084 	InvokeObjectPostCreateHookArg(RelationRelationId,
1085 								  indexRelationId, 0, is_internal);
1086 
1087 	/*
1088 	 * Advance the command counter so that we can see the newly-entered
1089 	 * catalog tuples for the index.
1090 	 */
1091 	CommandCounterIncrement();
1092 
1093 	/*
1094 	 * In bootstrap mode, we have to fill in the index strategy structure with
1095 	 * information from the catalogs.  If we aren't bootstrapping, then the
1096 	 * relcache entry has already been rebuilt thanks to sinval update during
1097 	 * CommandCounterIncrement.
1098 	 */
1099 	if (IsBootstrapProcessingMode())
1100 		RelationInitIndexAccessInfo(indexRelation);
1101 	else
1102 		Assert(indexRelation->rd_indexcxt != NULL);
1103 
1104 	/*
1105 	 * If this is bootstrap (initdb) time, then we don't actually fill in the
1106 	 * index yet.  We'll be creating more indexes and classes later, so we
1107 	 * delay filling them in until just before we're done with bootstrapping.
1108 	 * Similarly, if the caller specified skip_build then filling the index is
1109 	 * delayed till later (ALTER TABLE can save work in some cases with this).
1110 	 * Otherwise, we call the AM routine that constructs the index.
1111 	 */
1112 	if (IsBootstrapProcessingMode())
1113 	{
1114 		index_register(heapRelationId, indexRelationId, indexInfo);
1115 	}
1116 	else if (skip_build)
1117 	{
1118 		/*
1119 		 * Caller is responsible for filling the index later on.  However,
1120 		 * we'd better make sure that the heap relation is correctly marked as
1121 		 * having an index.
1122 		 */
1123 		index_update_stats(heapRelation,
1124 						   true,
1125 						   isprimary,
1126 						   -1.0);
1127 		/* Make the above update visible */
1128 		CommandCounterIncrement();
1129 	}
1130 	else
1131 	{
1132 		index_build(heapRelation, indexRelation, indexInfo, isprimary, false);
1133 	}
1134 
1135 	/*
1136 	 * Close the index; but we keep the lock that we acquired above until end
1137 	 * of transaction.  Closing the heap is caller's responsibility.
1138 	 */
1139 	index_close(indexRelation, NoLock);
1140 
1141 	return indexRelationId;
1142 }
1143 
1144 /*
1145  * index_constraint_create
1146  *
1147  * Set up a constraint associated with an index.  Return the new constraint's
1148  * address.
1149  *
1150  * heapRelation: table owning the index (must be suitably locked by caller)
1151  * indexRelationId: OID of the index
1152  * indexInfo: same info executor uses to insert into the index
1153  * constraintName: what it say (generally, should match name of index)
1154  * constraintType: one of CONSTRAINT_PRIMARY, CONSTRAINT_UNIQUE, or
1155  *		CONSTRAINT_EXCLUSION
1156  * deferrable: constraint is DEFERRABLE
1157  * initdeferred: constraint is INITIALLY DEFERRED
1158  * mark_as_primary: if true, set flags to mark index as primary key
1159  * update_pgindex: if true, update pg_index row (else caller's done that)
1160  * remove_old_dependencies: if true, remove existing dependencies of index
1161  *		on table's columns
1162  * allow_system_table_mods: allow table to be a system catalog
1163  * is_internal: index is constructed due to internal process
1164  */
1165 ObjectAddress
index_constraint_create(Relation heapRelation,Oid indexRelationId,IndexInfo * indexInfo,const char * constraintName,char constraintType,bool deferrable,bool initdeferred,bool mark_as_primary,bool update_pgindex,bool remove_old_dependencies,bool allow_system_table_mods,bool is_internal)1166 index_constraint_create(Relation heapRelation,
1167 						Oid indexRelationId,
1168 						IndexInfo *indexInfo,
1169 						const char *constraintName,
1170 						char constraintType,
1171 						bool deferrable,
1172 						bool initdeferred,
1173 						bool mark_as_primary,
1174 						bool update_pgindex,
1175 						bool remove_old_dependencies,
1176 						bool allow_system_table_mods,
1177 						bool is_internal)
1178 {
1179 	Oid			namespaceId = RelationGetNamespace(heapRelation);
1180 	ObjectAddress myself,
1181 				referenced;
1182 	Oid			conOid;
1183 
1184 	/* constraint creation support doesn't work while bootstrapping */
1185 	Assert(!IsBootstrapProcessingMode());
1186 
1187 	/* enforce system-table restriction */
1188 	if (!allow_system_table_mods &&
1189 		IsSystemRelation(heapRelation) &&
1190 		IsNormalProcessingMode())
1191 		ereport(ERROR,
1192 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1193 				 errmsg("user-defined indexes on system catalog tables are not supported")));
1194 
1195 	/* primary/unique constraints shouldn't have any expressions */
1196 	if (indexInfo->ii_Expressions &&
1197 		constraintType != CONSTRAINT_EXCLUSION)
1198 		elog(ERROR, "constraints cannot have index expressions");
1199 
1200 	/*
1201 	 * If we're manufacturing a constraint for a pre-existing index, we need
1202 	 * to get rid of the existing auto dependencies for the index (the ones
1203 	 * that index_create() would have made instead of calling this function).
1204 	 *
1205 	 * Note: this code would not necessarily do the right thing if the index
1206 	 * has any expressions or predicate, but we'd never be turning such an
1207 	 * index into a UNIQUE or PRIMARY KEY constraint.
1208 	 */
1209 	if (remove_old_dependencies)
1210 		deleteDependencyRecordsForClass(RelationRelationId, indexRelationId,
1211 										RelationRelationId, DEPENDENCY_AUTO);
1212 
1213 	/*
1214 	 * Construct a pg_constraint entry.
1215 	 */
1216 	conOid = CreateConstraintEntry(constraintName,
1217 								   namespaceId,
1218 								   constraintType,
1219 								   deferrable,
1220 								   initdeferred,
1221 								   true,
1222 								   RelationGetRelid(heapRelation),
1223 								   indexInfo->ii_KeyAttrNumbers,
1224 								   indexInfo->ii_NumIndexAttrs,
1225 								   InvalidOid,	/* no domain */
1226 								   indexRelationId, /* index OID */
1227 								   InvalidOid,	/* no foreign key */
1228 								   NULL,
1229 								   NULL,
1230 								   NULL,
1231 								   NULL,
1232 								   0,
1233 								   ' ',
1234 								   ' ',
1235 								   ' ',
1236 								   indexInfo->ii_ExclusionOps,
1237 								   NULL,	/* no check constraint */
1238 								   NULL,
1239 								   NULL,
1240 								   true,	/* islocal */
1241 								   0,	/* inhcount */
1242 								   true,	/* noinherit */
1243 								   is_internal);
1244 
1245 	/*
1246 	 * Register the index as internally dependent on the constraint.
1247 	 *
1248 	 * Note that the constraint has a dependency on the table, so we don't
1249 	 * need (or want) any direct dependency from the index to the table.
1250 	 */
1251 	myself.classId = RelationRelationId;
1252 	myself.objectId = indexRelationId;
1253 	myself.objectSubId = 0;
1254 
1255 	referenced.classId = ConstraintRelationId;
1256 	referenced.objectId = conOid;
1257 	referenced.objectSubId = 0;
1258 
1259 	recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
1260 
1261 	/*
1262 	 * If the constraint is deferrable, create the deferred uniqueness
1263 	 * checking trigger.  (The trigger will be given an internal dependency on
1264 	 * the constraint by CreateTrigger.)
1265 	 */
1266 	if (deferrable)
1267 	{
1268 		CreateTrigStmt *trigger;
1269 
1270 		trigger = makeNode(CreateTrigStmt);
1271 		trigger->trigname = (constraintType == CONSTRAINT_PRIMARY) ?
1272 			"PK_ConstraintTrigger" :
1273 			"Unique_ConstraintTrigger";
1274 		trigger->relation = NULL;
1275 		trigger->funcname = SystemFuncName("unique_key_recheck");
1276 		trigger->args = NIL;
1277 		trigger->row = true;
1278 		trigger->timing = TRIGGER_TYPE_AFTER;
1279 		trigger->events = TRIGGER_TYPE_INSERT | TRIGGER_TYPE_UPDATE;
1280 		trigger->columns = NIL;
1281 		trigger->whenClause = NULL;
1282 		trigger->isconstraint = true;
1283 		trigger->deferrable = true;
1284 		trigger->initdeferred = initdeferred;
1285 		trigger->constrrel = NULL;
1286 
1287 		(void) CreateTrigger(trigger, NULL, RelationGetRelid(heapRelation),
1288 							 InvalidOid, conOid, indexRelationId, true);
1289 	}
1290 
1291 	/*
1292 	 * If needed, mark the table as having a primary key.  We assume it can't
1293 	 * have been so marked already, so no need to clear the flag in the other
1294 	 * case.
1295 	 *
1296 	 * Note: this might better be done by callers.  We do it here to avoid
1297 	 * exposing index_update_stats() globally, but that wouldn't be necessary
1298 	 * if relhaspkey went away.
1299 	 */
1300 	if (mark_as_primary)
1301 		index_update_stats(heapRelation,
1302 						   true,
1303 						   true,
1304 						   -1.0);
1305 
1306 	/*
1307 	 * If needed, mark the index as primary and/or deferred in pg_index.
1308 	 *
1309 	 * Note: When making an existing index into a constraint, caller must have
1310 	 * a table lock that prevents concurrent table updates; otherwise, there
1311 	 * is a risk that concurrent readers of the table will miss seeing this
1312 	 * index at all.
1313 	 */
1314 	if (update_pgindex && (mark_as_primary || deferrable))
1315 	{
1316 		Relation	pg_index;
1317 		HeapTuple	indexTuple;
1318 		Form_pg_index indexForm;
1319 		bool		dirty = false;
1320 
1321 		pg_index = heap_open(IndexRelationId, RowExclusiveLock);
1322 
1323 		indexTuple = SearchSysCacheCopy1(INDEXRELID,
1324 										 ObjectIdGetDatum(indexRelationId));
1325 		if (!HeapTupleIsValid(indexTuple))
1326 			elog(ERROR, "cache lookup failed for index %u", indexRelationId);
1327 		indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
1328 
1329 		if (mark_as_primary && !indexForm->indisprimary)
1330 		{
1331 			indexForm->indisprimary = true;
1332 			dirty = true;
1333 		}
1334 
1335 		if (deferrable && indexForm->indimmediate)
1336 		{
1337 			indexForm->indimmediate = false;
1338 			dirty = true;
1339 		}
1340 
1341 		if (dirty)
1342 		{
1343 			CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
1344 
1345 			InvokeObjectPostAlterHookArg(IndexRelationId, indexRelationId, 0,
1346 										 InvalidOid, is_internal);
1347 		}
1348 
1349 		heap_freetuple(indexTuple);
1350 		heap_close(pg_index, RowExclusiveLock);
1351 	}
1352 
1353 	return referenced;
1354 }
1355 
1356 /*
1357  *		index_drop
1358  *
1359  * NOTE: this routine should now only be called through performDeletion(),
1360  * else associated dependencies won't be cleaned up.
1361  */
1362 void
index_drop(Oid indexId,bool concurrent)1363 index_drop(Oid indexId, bool concurrent)
1364 {
1365 	Oid			heapId;
1366 	Relation	userHeapRelation;
1367 	Relation	userIndexRelation;
1368 	Relation	indexRelation;
1369 	HeapTuple	tuple;
1370 	bool		hasexprs;
1371 	LockRelId	heaprelid,
1372 				indexrelid;
1373 	LOCKTAG		heaplocktag;
1374 	LOCKMODE	lockmode;
1375 
1376 	/*
1377 	 * A temporary relation uses a non-concurrent DROP.  Other backends can't
1378 	 * access a temporary relation, so there's no harm in grabbing a stronger
1379 	 * lock (see comments in RemoveRelations), and a non-concurrent DROP is
1380 	 * more efficient.
1381 	 */
1382 	Assert(get_rel_persistence(indexId) != RELPERSISTENCE_TEMP ||
1383 		   !concurrent);
1384 
1385 	/*
1386 	 * To drop an index safely, we must grab exclusive lock on its parent
1387 	 * table.  Exclusive lock on the index alone is insufficient because
1388 	 * another backend might be about to execute a query on the parent table.
1389 	 * If it relies on a previously cached list of index OIDs, then it could
1390 	 * attempt to access the just-dropped index.  We must therefore take a
1391 	 * table lock strong enough to prevent all queries on the table from
1392 	 * proceeding until we commit and send out a shared-cache-inval notice
1393 	 * that will make them update their index lists.
1394 	 *
1395 	 * In the concurrent case we avoid this requirement by disabling index use
1396 	 * in multiple steps and waiting out any transactions that might be using
1397 	 * the index, so we don't need exclusive lock on the parent table. Instead
1398 	 * we take ShareUpdateExclusiveLock, to ensure that two sessions aren't
1399 	 * doing CREATE/DROP INDEX CONCURRENTLY on the same index.  (We will get
1400 	 * AccessExclusiveLock on the index below, once we're sure nobody else is
1401 	 * using it.)
1402 	 */
1403 	heapId = IndexGetRelation(indexId, false);
1404 	lockmode = concurrent ? ShareUpdateExclusiveLock : AccessExclusiveLock;
1405 	userHeapRelation = heap_open(heapId, lockmode);
1406 	userIndexRelation = index_open(indexId, lockmode);
1407 
1408 	/*
1409 	 * We might still have open queries using it in our own session, which the
1410 	 * above locking won't prevent, so test explicitly.
1411 	 */
1412 	CheckTableNotInUse(userIndexRelation, "DROP INDEX");
1413 
1414 	/*
1415 	 * Drop Index Concurrently is more or less the reverse process of Create
1416 	 * Index Concurrently.
1417 	 *
1418 	 * First we unset indisvalid so queries starting afterwards don't use the
1419 	 * index to answer queries anymore.  We have to keep indisready = true so
1420 	 * transactions that are still scanning the index can continue to see
1421 	 * valid index contents.  For instance, if they are using READ COMMITTED
1422 	 * mode, and another transaction makes changes and commits, they need to
1423 	 * see those new tuples in the index.
1424 	 *
1425 	 * After all transactions that could possibly have used the index for
1426 	 * queries end, we can unset indisready and indislive, then wait till
1427 	 * nobody could be touching it anymore.  (Note: we need indislive because
1428 	 * this state must be distinct from the initial state during CREATE INDEX
1429 	 * CONCURRENTLY, which has indislive true while indisready and indisvalid
1430 	 * are false.  That's because in that state, transactions must examine the
1431 	 * index for HOT-safety decisions, while in this state we don't want them
1432 	 * to open it at all.)
1433 	 *
1434 	 * Since all predicate locks on the index are about to be made invalid, we
1435 	 * must promote them to predicate locks on the heap.  In the
1436 	 * non-concurrent case we can just do that now.  In the concurrent case
1437 	 * it's a bit trickier.  The predicate locks must be moved when there are
1438 	 * no index scans in progress on the index and no more can subsequently
1439 	 * start, so that no new predicate locks can be made on the index.  Also,
1440 	 * they must be moved before heap inserts stop maintaining the index, else
1441 	 * the conflict with the predicate lock on the index gap could be missed
1442 	 * before the lock on the heap relation is in place to detect a conflict
1443 	 * based on the heap tuple insert.
1444 	 */
1445 	if (concurrent)
1446 	{
1447 		/*
1448 		 * We must commit our transaction in order to make the first pg_index
1449 		 * state update visible to other sessions.  If the DROP machinery has
1450 		 * already performed any other actions (removal of other objects,
1451 		 * pg_depend entries, etc), the commit would make those actions
1452 		 * permanent, which would leave us with inconsistent catalog state if
1453 		 * we fail partway through the following sequence.  Since DROP INDEX
1454 		 * CONCURRENTLY is restricted to dropping just one index that has no
1455 		 * dependencies, we should get here before anything's been done ---
1456 		 * but let's check that to be sure.  We can verify that the current
1457 		 * transaction has not executed any transactional updates by checking
1458 		 * that no XID has been assigned.
1459 		 */
1460 		if (GetTopTransactionIdIfAny() != InvalidTransactionId)
1461 			ereport(ERROR,
1462 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1463 					 errmsg("DROP INDEX CONCURRENTLY must be first action in transaction")));
1464 
1465 		/*
1466 		 * Mark index invalid by updating its pg_index entry
1467 		 */
1468 		index_set_state_flags(indexId, INDEX_DROP_CLEAR_VALID);
1469 
1470 		/*
1471 		 * Invalidate the relcache for the table, so that after this commit
1472 		 * all sessions will refresh any cached plans that might reference the
1473 		 * index.
1474 		 */
1475 		CacheInvalidateRelcache(userHeapRelation);
1476 
1477 		/* save lockrelid and locktag for below, then close but keep locks */
1478 		heaprelid = userHeapRelation->rd_lockInfo.lockRelId;
1479 		SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
1480 		indexrelid = userIndexRelation->rd_lockInfo.lockRelId;
1481 
1482 		heap_close(userHeapRelation, NoLock);
1483 		index_close(userIndexRelation, NoLock);
1484 
1485 		/*
1486 		 * We must commit our current transaction so that the indisvalid
1487 		 * update becomes visible to other transactions; then start another.
1488 		 * Note that any previously-built data structures are lost in the
1489 		 * commit.  The only data we keep past here are the relation IDs.
1490 		 *
1491 		 * Before committing, get a session-level lock on the table, to ensure
1492 		 * that neither it nor the index can be dropped before we finish. This
1493 		 * cannot block, even if someone else is waiting for access, because
1494 		 * we already have the same lock within our transaction.
1495 		 */
1496 		LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
1497 		LockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
1498 
1499 		PopActiveSnapshot();
1500 		CommitTransactionCommand();
1501 		StartTransactionCommand();
1502 
1503 		/*
1504 		 * Now we must wait until no running transaction could be using the
1505 		 * index for a query.  Use AccessExclusiveLock here to check for
1506 		 * running transactions that hold locks of any kind on the table. Note
1507 		 * we do not need to worry about xacts that open the table for reading
1508 		 * after this point; they will see the index as invalid when they open
1509 		 * the relation.
1510 		 *
1511 		 * Note: the reason we use actual lock acquisition here, rather than
1512 		 * just checking the ProcArray and sleeping, is that deadlock is
1513 		 * possible if one of the transactions in question is blocked trying
1514 		 * to acquire an exclusive lock on our table.  The lock code will
1515 		 * detect deadlock and error out properly.
1516 		 */
1517 		WaitForLockers(heaplocktag, AccessExclusiveLock);
1518 
1519 		/*
1520 		 * No more predicate locks will be acquired on this index, and we're
1521 		 * about to stop doing inserts into the index which could show
1522 		 * conflicts with existing predicate locks, so now is the time to move
1523 		 * them to the heap relation.
1524 		 */
1525 		userHeapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
1526 		userIndexRelation = index_open(indexId, ShareUpdateExclusiveLock);
1527 		TransferPredicateLocksToHeapRelation(userIndexRelation);
1528 
1529 		/*
1530 		 * Now we are sure that nobody uses the index for queries; they just
1531 		 * might have it open for updating it.  So now we can unset indisready
1532 		 * and indislive, then wait till nobody could be using it at all
1533 		 * anymore.
1534 		 */
1535 		index_set_state_flags(indexId, INDEX_DROP_SET_DEAD);
1536 
1537 		/*
1538 		 * Invalidate the relcache for the table, so that after this commit
1539 		 * all sessions will refresh the table's index list.  Forgetting just
1540 		 * the index's relcache entry is not enough.
1541 		 */
1542 		CacheInvalidateRelcache(userHeapRelation);
1543 
1544 		/*
1545 		 * Close the relations again, though still holding session lock.
1546 		 */
1547 		heap_close(userHeapRelation, NoLock);
1548 		index_close(userIndexRelation, NoLock);
1549 
1550 		/*
1551 		 * Again, commit the transaction to make the pg_index update visible
1552 		 * to other sessions.
1553 		 */
1554 		CommitTransactionCommand();
1555 		StartTransactionCommand();
1556 
1557 		/*
1558 		 * Wait till every transaction that saw the old index state has
1559 		 * finished.
1560 		 */
1561 		WaitForLockers(heaplocktag, AccessExclusiveLock);
1562 
1563 		/*
1564 		 * Re-open relations to allow us to complete our actions.
1565 		 *
1566 		 * At this point, nothing should be accessing the index, but lets
1567 		 * leave nothing to chance and grab AccessExclusiveLock on the index
1568 		 * before the physical deletion.
1569 		 */
1570 		userHeapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
1571 		userIndexRelation = index_open(indexId, AccessExclusiveLock);
1572 	}
1573 	else
1574 	{
1575 		/* Not concurrent, so just transfer predicate locks and we're good */
1576 		TransferPredicateLocksToHeapRelation(userIndexRelation);
1577 	}
1578 
1579 	/*
1580 	 * Schedule physical removal of the files
1581 	 */
1582 	RelationDropStorage(userIndexRelation);
1583 
1584 	/*
1585 	 * Close and flush the index's relcache entry, to ensure relcache doesn't
1586 	 * try to rebuild it while we're deleting catalog entries. We keep the
1587 	 * lock though.
1588 	 */
1589 	index_close(userIndexRelation, NoLock);
1590 
1591 	RelationForgetRelation(indexId);
1592 
1593 	/*
1594 	 * fix INDEX relation, and check for expressional index
1595 	 */
1596 	indexRelation = heap_open(IndexRelationId, RowExclusiveLock);
1597 
1598 	tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
1599 	if (!HeapTupleIsValid(tuple))
1600 		elog(ERROR, "cache lookup failed for index %u", indexId);
1601 
1602 	hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs);
1603 
1604 	CatalogTupleDelete(indexRelation, &tuple->t_self);
1605 
1606 	ReleaseSysCache(tuple);
1607 	heap_close(indexRelation, RowExclusiveLock);
1608 
1609 	/*
1610 	 * if it has any expression columns, we might have stored statistics about
1611 	 * them.
1612 	 */
1613 	if (hasexprs)
1614 		RemoveStatistics(indexId, 0);
1615 
1616 	/*
1617 	 * fix ATTRIBUTE relation
1618 	 */
1619 	DeleteAttributeTuples(indexId);
1620 
1621 	/*
1622 	 * fix RELATION relation
1623 	 */
1624 	DeleteRelationTuple(indexId);
1625 
1626 	/*
1627 	 * We are presently too lazy to attempt to compute the new correct value
1628 	 * of relhasindex (the next VACUUM will fix it if necessary). So there is
1629 	 * no need to update the pg_class tuple for the owning relation. But we
1630 	 * must send out a shared-cache-inval notice on the owning relation to
1631 	 * ensure other backends update their relcache lists of indexes.  (In the
1632 	 * concurrent case, this is redundant but harmless.)
1633 	 */
1634 	CacheInvalidateRelcache(userHeapRelation);
1635 
1636 	/*
1637 	 * Close owning rel, but keep lock
1638 	 */
1639 	heap_close(userHeapRelation, NoLock);
1640 
1641 	/*
1642 	 * Release the session locks before we go.
1643 	 */
1644 	if (concurrent)
1645 	{
1646 		UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
1647 		UnlockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
1648 	}
1649 }
1650 
1651 /* ----------------------------------------------------------------
1652  *						index_build support
1653  * ----------------------------------------------------------------
1654  */
1655 
1656 /* ----------------
1657  *		BuildIndexInfo
1658  *			Construct an IndexInfo record for an open index
1659  *
1660  * IndexInfo stores the information about the index that's needed by
1661  * FormIndexDatum, which is used for both index_build() and later insertion
1662  * of individual index tuples.  Normally we build an IndexInfo for an index
1663  * just once per command, and then use it for (potentially) many tuples.
1664  * ----------------
1665  */
1666 IndexInfo *
BuildIndexInfo(Relation index)1667 BuildIndexInfo(Relation index)
1668 {
1669 	IndexInfo  *ii = makeNode(IndexInfo);
1670 	Form_pg_index indexStruct = index->rd_index;
1671 	int			i;
1672 	int			numKeys;
1673 
1674 	/* check the number of keys, and copy attr numbers into the IndexInfo */
1675 	numKeys = indexStruct->indnatts;
1676 	if (numKeys < 1 || numKeys > INDEX_MAX_KEYS)
1677 		elog(ERROR, "invalid indnatts %d for index %u",
1678 			 numKeys, RelationGetRelid(index));
1679 	ii->ii_NumIndexAttrs = numKeys;
1680 	for (i = 0; i < numKeys; i++)
1681 		ii->ii_KeyAttrNumbers[i] = indexStruct->indkey.values[i];
1682 
1683 	/* fetch any expressions needed for expressional indexes */
1684 	ii->ii_Expressions = RelationGetIndexExpressions(index);
1685 	ii->ii_ExpressionsState = NIL;
1686 
1687 	/* fetch index predicate if any */
1688 	ii->ii_Predicate = RelationGetIndexPredicate(index);
1689 	ii->ii_PredicateState = NULL;
1690 
1691 	/* fetch exclusion constraint info if any */
1692 	if (indexStruct->indisexclusion)
1693 	{
1694 		RelationGetExclusionInfo(index,
1695 								 &ii->ii_ExclusionOps,
1696 								 &ii->ii_ExclusionProcs,
1697 								 &ii->ii_ExclusionStrats);
1698 	}
1699 	else
1700 	{
1701 		ii->ii_ExclusionOps = NULL;
1702 		ii->ii_ExclusionProcs = NULL;
1703 		ii->ii_ExclusionStrats = NULL;
1704 	}
1705 
1706 	/* other info */
1707 	ii->ii_Unique = indexStruct->indisunique;
1708 	ii->ii_ReadyForInserts = IndexIsReady(indexStruct);
1709 	/* assume not doing speculative insertion for now */
1710 	ii->ii_UniqueOps = NULL;
1711 	ii->ii_UniqueProcs = NULL;
1712 	ii->ii_UniqueStrats = NULL;
1713 
1714 	/* initialize index-build state to default */
1715 	ii->ii_Concurrent = false;
1716 	ii->ii_BrokenHotChain = false;
1717 
1718 	/* set up for possible use by index AM */
1719 	ii->ii_AmCache = NULL;
1720 	ii->ii_Context = CurrentMemoryContext;
1721 
1722 	return ii;
1723 }
1724 
1725 /* ----------------
1726  *		BuildDummyIndexInfo
1727  *			Construct a dummy IndexInfo record for an open index
1728  *
1729  * This differs from the real BuildIndexInfo in that it will never run any
1730  * user-defined code that might exist in index expressions or predicates.
1731  * Instead of the real index expressions, we return null constants that have
1732  * the right types/typmods/collations.  Predicates and exclusion clauses are
1733  * just ignored.  This is sufficient for the purpose of truncating an index,
1734  * since we will not need to actually evaluate the expressions or predicates;
1735  * the only thing that's likely to be done with the data is construction of
1736  * a tupdesc describing the index's rowtype.
1737  * ----------------
1738  */
1739 IndexInfo *
BuildDummyIndexInfo(Relation index)1740 BuildDummyIndexInfo(Relation index)
1741 {
1742 	IndexInfo  *ii = makeNode(IndexInfo);
1743 	Form_pg_index indexStruct = index->rd_index;
1744 	int			i;
1745 	int			numKeys;
1746 
1747 	/* check the number of keys, and copy attr numbers into the IndexInfo */
1748 	numKeys = indexStruct->indnatts;
1749 	if (numKeys < 1 || numKeys > INDEX_MAX_KEYS)
1750 		elog(ERROR, "invalid indnatts %d for index %u",
1751 			 numKeys, RelationGetRelid(index));
1752 	ii->ii_NumIndexAttrs = numKeys;
1753 	for (i = 0; i < numKeys; i++)
1754 		ii->ii_KeyAttrNumbers[i] = indexStruct->indkey.values[i];
1755 
1756 	/* fetch dummy expressions for expressional indexes */
1757 	ii->ii_Expressions = RelationGetDummyIndexExpressions(index);
1758 	ii->ii_ExpressionsState = NIL;
1759 
1760 	/* pretend there is no predicate */
1761 	ii->ii_Predicate = NIL;
1762 	ii->ii_PredicateState = NULL;
1763 
1764 	/* We ignore the exclusion constraint if any */
1765 	ii->ii_ExclusionOps = NULL;
1766 	ii->ii_ExclusionProcs = NULL;
1767 	ii->ii_ExclusionStrats = NULL;
1768 
1769 	/* other info */
1770 	ii->ii_Unique = indexStruct->indisunique;
1771 	ii->ii_ReadyForInserts = IndexIsReady(indexStruct);
1772 	/* assume not doing speculative insertion for now */
1773 	ii->ii_UniqueOps = NULL;
1774 	ii->ii_UniqueProcs = NULL;
1775 	ii->ii_UniqueStrats = NULL;
1776 
1777 	/* initialize index-build state to default */
1778 	ii->ii_Concurrent = false;
1779 	ii->ii_BrokenHotChain = false;
1780 
1781 	/* set up for possible use by index AM */
1782 	ii->ii_AmCache = NULL;
1783 	ii->ii_Context = CurrentMemoryContext;
1784 
1785 	return ii;
1786 }
1787 
1788 /* ----------------
1789  *		BuildSpeculativeIndexInfo
1790  *			Add extra state to IndexInfo record
1791  *
1792  * For unique indexes, we usually don't want to add info to the IndexInfo for
1793  * checking uniqueness, since the B-Tree AM handles that directly.  However,
1794  * in the case of speculative insertion, additional support is required.
1795  *
1796  * Do this processing here rather than in BuildIndexInfo() to not incur the
1797  * overhead in the common non-speculative cases.
1798  * ----------------
1799  */
1800 void
BuildSpeculativeIndexInfo(Relation index,IndexInfo * ii)1801 BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii)
1802 {
1803 	int			ncols = index->rd_rel->relnatts;
1804 	int			i;
1805 
1806 	/*
1807 	 * fetch info for checking unique indexes
1808 	 */
1809 	Assert(ii->ii_Unique);
1810 
1811 	if (index->rd_rel->relam != BTREE_AM_OID)
1812 		elog(ERROR, "unexpected non-btree speculative unique index");
1813 
1814 	ii->ii_UniqueOps = (Oid *) palloc(sizeof(Oid) * ncols);
1815 	ii->ii_UniqueProcs = (Oid *) palloc(sizeof(Oid) * ncols);
1816 	ii->ii_UniqueStrats = (uint16 *) palloc(sizeof(uint16) * ncols);
1817 
1818 	/*
1819 	 * We have to look up the operator's strategy number.  This provides a
1820 	 * cross-check that the operator does match the index.
1821 	 */
1822 	/* We need the func OIDs and strategy numbers too */
1823 	for (i = 0; i < ncols; i++)
1824 	{
1825 		ii->ii_UniqueStrats[i] = BTEqualStrategyNumber;
1826 		ii->ii_UniqueOps[i] =
1827 			get_opfamily_member(index->rd_opfamily[i],
1828 								index->rd_opcintype[i],
1829 								index->rd_opcintype[i],
1830 								ii->ii_UniqueStrats[i]);
1831 		if (!OidIsValid(ii->ii_UniqueOps[i]))
1832 			elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
1833 				 ii->ii_UniqueStrats[i], index->rd_opcintype[i],
1834 				 index->rd_opcintype[i], index->rd_opfamily[i]);
1835 		ii->ii_UniqueProcs[i] = get_opcode(ii->ii_UniqueOps[i]);
1836 	}
1837 }
1838 
1839 /* ----------------
1840  *		FormIndexDatum
1841  *			Construct values[] and isnull[] arrays for a new index tuple.
1842  *
1843  *	indexInfo		Info about the index
1844  *	slot			Heap tuple for which we must prepare an index entry
1845  *	estate			executor state for evaluating any index expressions
1846  *	values			Array of index Datums (output area)
1847  *	isnull			Array of is-null indicators (output area)
1848  *
1849  * When there are no index expressions, estate may be NULL.  Otherwise it
1850  * must be supplied, *and* the ecxt_scantuple slot of its per-tuple expr
1851  * context must point to the heap tuple passed in.
1852  *
1853  * Notice we don't actually call index_form_tuple() here; we just prepare
1854  * its input arrays values[] and isnull[].  This is because the index AM
1855  * may wish to alter the data before storage.
1856  * ----------------
1857  */
1858 void
FormIndexDatum(IndexInfo * indexInfo,TupleTableSlot * slot,EState * estate,Datum * values,bool * isnull)1859 FormIndexDatum(IndexInfo *indexInfo,
1860 			   TupleTableSlot *slot,
1861 			   EState *estate,
1862 			   Datum *values,
1863 			   bool *isnull)
1864 {
1865 	ListCell   *indexpr_item;
1866 	int			i;
1867 
1868 	if (indexInfo->ii_Expressions != NIL &&
1869 		indexInfo->ii_ExpressionsState == NIL)
1870 	{
1871 		/* First time through, set up expression evaluation state */
1872 		indexInfo->ii_ExpressionsState =
1873 			ExecPrepareExprList(indexInfo->ii_Expressions, estate);
1874 		/* Check caller has set up context correctly */
1875 		Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
1876 	}
1877 	indexpr_item = list_head(indexInfo->ii_ExpressionsState);
1878 
1879 	for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1880 	{
1881 		int			keycol = indexInfo->ii_KeyAttrNumbers[i];
1882 		Datum		iDatum;
1883 		bool		isNull;
1884 
1885 		if (keycol != 0)
1886 		{
1887 			/*
1888 			 * Plain index column; get the value we need directly from the
1889 			 * heap tuple.
1890 			 */
1891 			iDatum = slot_getattr(slot, keycol, &isNull);
1892 		}
1893 		else
1894 		{
1895 			/*
1896 			 * Index expression --- need to evaluate it.
1897 			 */
1898 			if (indexpr_item == NULL)
1899 				elog(ERROR, "wrong number of index expressions");
1900 			iDatum = ExecEvalExprSwitchContext((ExprState *) lfirst(indexpr_item),
1901 											   GetPerTupleExprContext(estate),
1902 											   &isNull);
1903 			indexpr_item = lnext(indexpr_item);
1904 		}
1905 		values[i] = iDatum;
1906 		isnull[i] = isNull;
1907 	}
1908 
1909 	if (indexpr_item != NULL)
1910 		elog(ERROR, "wrong number of index expressions");
1911 }
1912 
1913 
1914 /*
1915  * index_update_stats --- update pg_class entry after CREATE INDEX or REINDEX
1916  *
1917  * This routine updates the pg_class row of either an index or its parent
1918  * relation after CREATE INDEX or REINDEX.  Its rather bizarre API is designed
1919  * to ensure we can do all the necessary work in just one update.
1920  *
1921  * hasindex: set relhasindex to this value
1922  * isprimary: if true, set relhaspkey true; else no change
1923  * reltuples: if >= 0, set reltuples to this value; else no change
1924  *
1925  * If reltuples >= 0, relpages and relallvisible are also updated (using
1926  * RelationGetNumberOfBlocks() and visibilitymap_count()).
1927  *
1928  * NOTE: an important side-effect of this operation is that an SI invalidation
1929  * message is sent out to all backends --- including me --- causing relcache
1930  * entries to be flushed or updated with the new data.  This must happen even
1931  * if we find that no change is needed in the pg_class row.  When updating
1932  * a heap entry, this ensures that other backends find out about the new
1933  * index.  When updating an index, it's important because some index AMs
1934  * expect a relcache flush to occur after REINDEX.
1935  */
1936 static void
index_update_stats(Relation rel,bool hasindex,bool isprimary,double reltuples)1937 index_update_stats(Relation rel,
1938 				   bool hasindex,
1939 				   bool isprimary,
1940 				   double reltuples)
1941 {
1942 	Oid			relid = RelationGetRelid(rel);
1943 	Relation	pg_class;
1944 	HeapTuple	tuple;
1945 	Form_pg_class rd_rel;
1946 	bool		dirty;
1947 
1948 	/*
1949 	 * We always update the pg_class row using a non-transactional,
1950 	 * overwrite-in-place update.  There are several reasons for this:
1951 	 *
1952 	 * 1. In bootstrap mode, we have no choice --- UPDATE wouldn't work.
1953 	 *
1954 	 * 2. We could be reindexing pg_class itself, in which case we can't move
1955 	 * its pg_class row because CatalogTupleInsert/CatalogTupleUpdate might
1956 	 * not know about all the indexes yet (see reindex_relation).
1957 	 *
1958 	 * 3. Because we execute CREATE INDEX with just share lock on the parent
1959 	 * rel (to allow concurrent index creations), an ordinary update could
1960 	 * suffer a tuple-concurrently-updated failure against another CREATE
1961 	 * INDEX committing at about the same time.  We can avoid that by having
1962 	 * them both do nontransactional updates (we assume they will both be
1963 	 * trying to change the pg_class row to the same thing, so it doesn't
1964 	 * matter which goes first).
1965 	 *
1966 	 * It is safe to use a non-transactional update even though our
1967 	 * transaction could still fail before committing.  Setting relhasindex
1968 	 * true is safe even if there are no indexes (VACUUM will eventually fix
1969 	 * it), likewise for relhaspkey.  And of course the new relpages and
1970 	 * reltuples counts are correct regardless.  However, we don't want to
1971 	 * change relpages (or relallvisible) if the caller isn't providing an
1972 	 * updated reltuples count, because that would bollix the
1973 	 * reltuples/relpages ratio which is what's really important.
1974 	 */
1975 
1976 	pg_class = heap_open(RelationRelationId, RowExclusiveLock);
1977 
1978 	/*
1979 	 * Make a copy of the tuple to update.  Normally we use the syscache, but
1980 	 * we can't rely on that during bootstrap or while reindexing pg_class
1981 	 * itself.
1982 	 */
1983 	if (IsBootstrapProcessingMode() ||
1984 		ReindexIsProcessingHeap(RelationRelationId))
1985 	{
1986 		/* don't assume syscache will work */
1987 		HeapScanDesc pg_class_scan;
1988 		ScanKeyData key[1];
1989 
1990 		ScanKeyInit(&key[0],
1991 					ObjectIdAttributeNumber,
1992 					BTEqualStrategyNumber, F_OIDEQ,
1993 					ObjectIdGetDatum(relid));
1994 
1995 		pg_class_scan = heap_beginscan_catalog(pg_class, 1, key);
1996 		tuple = heap_getnext(pg_class_scan, ForwardScanDirection);
1997 		tuple = heap_copytuple(tuple);
1998 		heap_endscan(pg_class_scan);
1999 	}
2000 	else
2001 	{
2002 		/* normal case, use syscache */
2003 		tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
2004 	}
2005 
2006 	if (!HeapTupleIsValid(tuple))
2007 		elog(ERROR, "could not find tuple for relation %u", relid);
2008 	rd_rel = (Form_pg_class) GETSTRUCT(tuple);
2009 
2010 	/* Apply required updates, if any, to copied tuple */
2011 
2012 	dirty = false;
2013 	if (rd_rel->relhasindex != hasindex)
2014 	{
2015 		rd_rel->relhasindex = hasindex;
2016 		dirty = true;
2017 	}
2018 	if (isprimary)
2019 	{
2020 		if (!rd_rel->relhaspkey)
2021 		{
2022 			rd_rel->relhaspkey = true;
2023 			dirty = true;
2024 		}
2025 	}
2026 
2027 	if (reltuples >= 0)
2028 	{
2029 		BlockNumber relpages = RelationGetNumberOfBlocks(rel);
2030 		BlockNumber relallvisible;
2031 
2032 		if (rd_rel->relkind != RELKIND_INDEX)
2033 			visibilitymap_count(rel, &relallvisible, NULL);
2034 		else					/* don't bother for indexes */
2035 			relallvisible = 0;
2036 
2037 		if (rd_rel->relpages != (int32) relpages)
2038 		{
2039 			rd_rel->relpages = (int32) relpages;
2040 			dirty = true;
2041 		}
2042 		if (rd_rel->reltuples != (float4) reltuples)
2043 		{
2044 			rd_rel->reltuples = (float4) reltuples;
2045 			dirty = true;
2046 		}
2047 		if (rd_rel->relallvisible != (int32) relallvisible)
2048 		{
2049 			rd_rel->relallvisible = (int32) relallvisible;
2050 			dirty = true;
2051 		}
2052 	}
2053 
2054 	/*
2055 	 * If anything changed, write out the tuple
2056 	 */
2057 	if (dirty)
2058 	{
2059 		heap_inplace_update(pg_class, tuple);
2060 		/* the above sends a cache inval message */
2061 	}
2062 	else
2063 	{
2064 		/* no need to change tuple, but force relcache inval anyway */
2065 		CacheInvalidateRelcacheByTuple(tuple);
2066 	}
2067 
2068 	heap_freetuple(tuple);
2069 
2070 	heap_close(pg_class, RowExclusiveLock);
2071 }
2072 
2073 
2074 /*
2075  * index_build - invoke access-method-specific index build procedure
2076  *
2077  * On entry, the index's catalog entries are valid, and its physical disk
2078  * file has been created but is empty.  We call the AM-specific build
2079  * procedure to fill in the index contents.  We then update the pg_class
2080  * entries of the index and heap relation as needed, using statistics
2081  * returned by ambuild as well as data passed by the caller.
2082  *
2083  * isprimary tells whether to mark the index as a primary-key index.
2084  * isreindex indicates we are recreating a previously-existing index.
2085  *
2086  * Note: when reindexing an existing index, isprimary can be false even if
2087  * the index is a PK; it's already properly marked and need not be re-marked.
2088  *
2089  * Note: before Postgres 8.2, the passed-in heap and index Relations
2090  * were automatically closed by this routine.  This is no longer the case.
2091  * The caller opened 'em, and the caller should close 'em.
2092  */
2093 void
index_build(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,bool isprimary,bool isreindex)2094 index_build(Relation heapRelation,
2095 			Relation indexRelation,
2096 			IndexInfo *indexInfo,
2097 			bool isprimary,
2098 			bool isreindex)
2099 {
2100 	IndexBuildResult *stats;
2101 	Oid			save_userid;
2102 	int			save_sec_context;
2103 	int			save_nestlevel;
2104 
2105 	/*
2106 	 * sanity checks
2107 	 */
2108 	Assert(RelationIsValid(indexRelation));
2109 	Assert(PointerIsValid(indexRelation->rd_amroutine));
2110 	Assert(PointerIsValid(indexRelation->rd_amroutine->ambuild));
2111 	Assert(PointerIsValid(indexRelation->rd_amroutine->ambuildempty));
2112 
2113 	ereport(DEBUG1,
2114 			(errmsg("building index \"%s\" on table \"%s\"",
2115 					RelationGetRelationName(indexRelation),
2116 					RelationGetRelationName(heapRelation))));
2117 
2118 	/*
2119 	 * Switch to the table owner's userid, so that any index functions are run
2120 	 * as that user.  Also lock down security-restricted operations and
2121 	 * arrange to make GUC variable changes local to this command.
2122 	 */
2123 	GetUserIdAndSecContext(&save_userid, &save_sec_context);
2124 	SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
2125 						   save_sec_context | SECURITY_RESTRICTED_OPERATION);
2126 	save_nestlevel = NewGUCNestLevel();
2127 
2128 	/*
2129 	 * Call the access method's build procedure
2130 	 */
2131 	stats = indexRelation->rd_amroutine->ambuild(heapRelation, indexRelation,
2132 												 indexInfo);
2133 	Assert(PointerIsValid(stats));
2134 
2135 	/*
2136 	 * If this is an unlogged index, we may need to write out an init fork for
2137 	 * it -- but we must first check whether one already exists.  If, for
2138 	 * example, an unlogged relation is truncated in the transaction that
2139 	 * created it, or truncated twice in a subsequent transaction, the
2140 	 * relfilenode won't change, and nothing needs to be done here.
2141 	 */
2142 	if (indexRelation->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
2143 		!smgrexists(indexRelation->rd_smgr, INIT_FORKNUM))
2144 	{
2145 		RelationOpenSmgr(indexRelation);
2146 		smgrcreate(indexRelation->rd_smgr, INIT_FORKNUM, false);
2147 		indexRelation->rd_amroutine->ambuildempty(indexRelation);
2148 	}
2149 
2150 	/*
2151 	 * If we found any potentially broken HOT chains, mark the index as not
2152 	 * being usable until the current transaction is below the event horizon.
2153 	 * See src/backend/access/heap/README.HOT for discussion.  Also set this
2154 	 * if early pruning/vacuuming is enabled for the heap relation.  While it
2155 	 * might become safe to use the index earlier based on actual cleanup
2156 	 * activity and other active transactions, the test for that would be much
2157 	 * more complex and would require some form of blocking, so keep it simple
2158 	 * and fast by just using the current transaction.
2159 	 *
2160 	 * However, when reindexing an existing index, we should do nothing here.
2161 	 * Any HOT chains that are broken with respect to the index must predate
2162 	 * the index's original creation, so there is no need to change the
2163 	 * index's usability horizon.  Moreover, we *must not* try to change the
2164 	 * index's pg_index entry while reindexing pg_index itself, and this
2165 	 * optimization nicely prevents that.  The more complex rules needed for a
2166 	 * reindex are handled separately after this function returns.
2167 	 *
2168 	 * We also need not set indcheckxmin during a concurrent index build,
2169 	 * because we won't set indisvalid true until all transactions that care
2170 	 * about the broken HOT chains or early pruning/vacuuming are gone.
2171 	 *
2172 	 * Therefore, this code path can only be taken during non-concurrent
2173 	 * CREATE INDEX.  Thus the fact that heap_update will set the pg_index
2174 	 * tuple's xmin doesn't matter, because that tuple was created in the
2175 	 * current transaction anyway.  That also means we don't need to worry
2176 	 * about any concurrent readers of the tuple; no other transaction can see
2177 	 * it yet.
2178 	 */
2179 	if ((indexInfo->ii_BrokenHotChain || EarlyPruningEnabled(heapRelation)) &&
2180 		!isreindex &&
2181 		!indexInfo->ii_Concurrent)
2182 	{
2183 		Oid			indexId = RelationGetRelid(indexRelation);
2184 		Relation	pg_index;
2185 		HeapTuple	indexTuple;
2186 		Form_pg_index indexForm;
2187 
2188 		pg_index = heap_open(IndexRelationId, RowExclusiveLock);
2189 
2190 		indexTuple = SearchSysCacheCopy1(INDEXRELID,
2191 										 ObjectIdGetDatum(indexId));
2192 		if (!HeapTupleIsValid(indexTuple))
2193 			elog(ERROR, "cache lookup failed for index %u", indexId);
2194 		indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
2195 
2196 		/* If it's a new index, indcheckxmin shouldn't be set ... */
2197 		Assert(!indexForm->indcheckxmin);
2198 
2199 		indexForm->indcheckxmin = true;
2200 		CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
2201 
2202 		heap_freetuple(indexTuple);
2203 		heap_close(pg_index, RowExclusiveLock);
2204 	}
2205 
2206 	/*
2207 	 * Update heap and index pg_class rows
2208 	 */
2209 	index_update_stats(heapRelation,
2210 					   true,
2211 					   isprimary,
2212 					   stats->heap_tuples);
2213 
2214 	index_update_stats(indexRelation,
2215 					   false,
2216 					   false,
2217 					   stats->index_tuples);
2218 
2219 	/* Make the updated catalog row versions visible */
2220 	CommandCounterIncrement();
2221 
2222 	/*
2223 	 * If it's for an exclusion constraint, make a second pass over the heap
2224 	 * to verify that the constraint is satisfied.  We must not do this until
2225 	 * the index is fully valid.  (Broken HOT chains shouldn't matter, though;
2226 	 * see comments for IndexCheckExclusion.)
2227 	 */
2228 	if (indexInfo->ii_ExclusionOps != NULL)
2229 		IndexCheckExclusion(heapRelation, indexRelation, indexInfo);
2230 
2231 	/* Roll back any GUC changes executed by index functions */
2232 	AtEOXact_GUC(false, save_nestlevel);
2233 
2234 	/* Restore userid and security context */
2235 	SetUserIdAndSecContext(save_userid, save_sec_context);
2236 }
2237 
2238 
2239 /*
2240  * IndexBuildHeapScan - scan the heap relation to find tuples to be indexed
2241  *
2242  * This is called back from an access-method-specific index build procedure
2243  * after the AM has done whatever setup it needs.  The parent heap relation
2244  * is scanned to find tuples that should be entered into the index.  Each
2245  * such tuple is passed to the AM's callback routine, which does the right
2246  * things to add it to the new index.  After we return, the AM's index
2247  * build procedure does whatever cleanup it needs.
2248  *
2249  * The total count of heap tuples is returned.  This is for updating pg_class
2250  * statistics.  (It's annoying not to be able to do that here, but we want
2251  * to merge that update with others; see index_update_stats.)  Note that the
2252  * index AM itself must keep track of the number of index tuples; we don't do
2253  * so here because the AM might reject some of the tuples for its own reasons,
2254  * such as being unable to store NULLs.
2255  *
2256  * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
2257  * any potentially broken HOT chains.  Currently, we set this if there are
2258  * any RECENTLY_DEAD or DELETE_IN_PROGRESS entries in a HOT chain, without
2259  * trying very hard to detect whether they're really incompatible with the
2260  * chain tip.
2261  */
2262 double
IndexBuildHeapScan(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,bool allow_sync,IndexBuildCallback callback,void * callback_state)2263 IndexBuildHeapScan(Relation heapRelation,
2264 				   Relation indexRelation,
2265 				   IndexInfo *indexInfo,
2266 				   bool allow_sync,
2267 				   IndexBuildCallback callback,
2268 				   void *callback_state)
2269 {
2270 	return IndexBuildHeapRangeScan(heapRelation, indexRelation,
2271 								   indexInfo, allow_sync,
2272 								   false,
2273 								   0, InvalidBlockNumber,
2274 								   callback, callback_state);
2275 }
2276 
2277 /*
2278  * As above, except that instead of scanning the complete heap, only the given
2279  * number of blocks are scanned.  Scan to end-of-rel can be signalled by
2280  * passing InvalidBlockNumber as numblocks.  Note that restricting the range
2281  * to scan cannot be done when requesting syncscan.
2282  *
2283  * When "anyvisible" mode is requested, all tuples visible to any transaction
2284  * are considered, including those inserted or deleted by transactions that are
2285  * still in progress.
2286  */
2287 double
IndexBuildHeapRangeScan(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,bool allow_sync,bool anyvisible,BlockNumber start_blockno,BlockNumber numblocks,IndexBuildCallback callback,void * callback_state)2288 IndexBuildHeapRangeScan(Relation heapRelation,
2289 						Relation indexRelation,
2290 						IndexInfo *indexInfo,
2291 						bool allow_sync,
2292 						bool anyvisible,
2293 						BlockNumber start_blockno,
2294 						BlockNumber numblocks,
2295 						IndexBuildCallback callback,
2296 						void *callback_state)
2297 {
2298 	bool		is_system_catalog;
2299 	bool		checking_uniqueness;
2300 	HeapScanDesc scan;
2301 	HeapTuple	heapTuple;
2302 	Datum		values[INDEX_MAX_KEYS];
2303 	bool		isnull[INDEX_MAX_KEYS];
2304 	double		reltuples;
2305 	ExprState  *predicate;
2306 	TupleTableSlot *slot;
2307 	EState	   *estate;
2308 	ExprContext *econtext;
2309 	Snapshot	snapshot;
2310 	TransactionId OldestXmin;
2311 	BlockNumber root_blkno = InvalidBlockNumber;
2312 	OffsetNumber root_offsets[MaxHeapTuplesPerPage];
2313 
2314 	/*
2315 	 * sanity checks
2316 	 */
2317 	Assert(OidIsValid(indexRelation->rd_rel->relam));
2318 
2319 	/* Remember if it's a system catalog */
2320 	is_system_catalog = IsSystemRelation(heapRelation);
2321 
2322 	/* See whether we're verifying uniqueness/exclusion properties */
2323 	checking_uniqueness = (indexInfo->ii_Unique ||
2324 						   indexInfo->ii_ExclusionOps != NULL);
2325 
2326 	/*
2327 	 * "Any visible" mode is not compatible with uniqueness checks; make sure
2328 	 * only one of those is requested.
2329 	 */
2330 	Assert(!(anyvisible && checking_uniqueness));
2331 
2332 	/*
2333 	 * Need an EState for evaluation of index expressions and partial-index
2334 	 * predicates.  Also a slot to hold the current tuple.
2335 	 */
2336 	estate = CreateExecutorState();
2337 	econtext = GetPerTupleExprContext(estate);
2338 	slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
2339 
2340 	/* Arrange for econtext's scan tuple to be the tuple under test */
2341 	econtext->ecxt_scantuple = slot;
2342 
2343 	/* Set up execution state for predicate, if any. */
2344 	predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
2345 
2346 	/*
2347 	 * Prepare for scan of the base relation.  In a normal index build, we use
2348 	 * SnapshotAny because we must retrieve all tuples and do our own time
2349 	 * qual checks (because we have to index RECENTLY_DEAD tuples). In a
2350 	 * concurrent build, or during bootstrap, we take a regular MVCC snapshot
2351 	 * and index whatever's live according to that.
2352 	 */
2353 	if (IsBootstrapProcessingMode() || indexInfo->ii_Concurrent)
2354 	{
2355 		snapshot = RegisterSnapshot(GetTransactionSnapshot());
2356 		OldestXmin = InvalidTransactionId;	/* not used */
2357 
2358 		/* "any visible" mode is not compatible with this */
2359 		Assert(!anyvisible);
2360 	}
2361 	else
2362 	{
2363 		snapshot = SnapshotAny;
2364 		/* okay to ignore lazy VACUUMs here */
2365 		OldestXmin = GetOldestXmin(heapRelation, PROCARRAY_FLAGS_VACUUM);
2366 	}
2367 
2368 	scan = heap_beginscan_strat(heapRelation,	/* relation */
2369 								snapshot,	/* snapshot */
2370 								0,	/* number of keys */
2371 								NULL,	/* scan key */
2372 								true,	/* buffer access strategy OK */
2373 								allow_sync);	/* syncscan OK? */
2374 
2375 	/* set our scan endpoints */
2376 	if (!allow_sync)
2377 		heap_setscanlimits(scan, start_blockno, numblocks);
2378 	else
2379 	{
2380 		/* syncscan can only be requested on whole relation */
2381 		Assert(start_blockno == 0);
2382 		Assert(numblocks == InvalidBlockNumber);
2383 	}
2384 
2385 	reltuples = 0;
2386 
2387 	/*
2388 	 * Scan all tuples in the base relation.
2389 	 */
2390 	while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2391 	{
2392 		bool		tupleIsAlive;
2393 
2394 		CHECK_FOR_INTERRUPTS();
2395 
2396 		/*
2397 		 * When dealing with a HOT-chain of updated tuples, we want to index
2398 		 * the values of the live tuple (if any), but index it under the TID
2399 		 * of the chain's root tuple.  This approach is necessary to preserve
2400 		 * the HOT-chain structure in the heap. So we need to be able to find
2401 		 * the root item offset for every tuple that's in a HOT-chain.  When
2402 		 * first reaching a new page of the relation, call
2403 		 * heap_get_root_tuples() to build a map of root item offsets on the
2404 		 * page.
2405 		 *
2406 		 * It might look unsafe to use this information across buffer
2407 		 * lock/unlock.  However, we hold ShareLock on the table so no
2408 		 * ordinary insert/update/delete should occur; and we hold pin on the
2409 		 * buffer continuously while visiting the page, so no pruning
2410 		 * operation can occur either.
2411 		 *
2412 		 * In cases with only ShareUpdateExclusiveLock on the table, it's
2413 		 * possible for some HOT tuples to appear that we didn't know about
2414 		 * when we first read the page.  To handle that case, we re-obtain the
2415 		 * list of root offsets when a HOT tuple points to a root item that we
2416 		 * don't know about.
2417 		 *
2418 		 * Also, although our opinions about tuple liveness could change while
2419 		 * we scan the page (due to concurrent transaction commits/aborts),
2420 		 * the chain root locations won't, so this info doesn't need to be
2421 		 * rebuilt after waiting for another transaction.
2422 		 *
2423 		 * Note the implied assumption that there is no more than one live
2424 		 * tuple per HOT-chain --- else we could create more than one index
2425 		 * entry pointing to the same root tuple.
2426 		 */
2427 		if (scan->rs_cblock != root_blkno)
2428 		{
2429 			Page		page = BufferGetPage(scan->rs_cbuf);
2430 
2431 			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
2432 			heap_get_root_tuples(page, root_offsets);
2433 			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2434 
2435 			root_blkno = scan->rs_cblock;
2436 		}
2437 
2438 		if (snapshot == SnapshotAny)
2439 		{
2440 			/* do our own time qual check */
2441 			bool		indexIt;
2442 			TransactionId xwait;
2443 
2444 	recheck:
2445 
2446 			/*
2447 			 * We could possibly get away with not locking the buffer here,
2448 			 * since caller should hold ShareLock on the relation, but let's
2449 			 * be conservative about it.  (This remark is still correct even
2450 			 * with HOT-pruning: our pin on the buffer prevents pruning.)
2451 			 */
2452 			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
2453 
2454 			switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
2455 											 scan->rs_cbuf))
2456 			{
2457 				case HEAPTUPLE_DEAD:
2458 					/* Definitely dead, we can ignore it */
2459 					indexIt = false;
2460 					tupleIsAlive = false;
2461 					break;
2462 				case HEAPTUPLE_LIVE:
2463 					/* Normal case, index and unique-check it */
2464 					indexIt = true;
2465 					tupleIsAlive = true;
2466 					break;
2467 				case HEAPTUPLE_RECENTLY_DEAD:
2468 
2469 					/*
2470 					 * If tuple is recently deleted then we must index it
2471 					 * anyway to preserve MVCC semantics.  (Pre-existing
2472 					 * transactions could try to use the index after we finish
2473 					 * building it, and may need to see such tuples.)
2474 					 *
2475 					 * However, if it was HOT-updated then we must only index
2476 					 * the live tuple at the end of the HOT-chain.  Since this
2477 					 * breaks semantics for pre-existing snapshots, mark the
2478 					 * index as unusable for them.
2479 					 */
2480 					if (HeapTupleIsHotUpdated(heapTuple))
2481 					{
2482 						indexIt = false;
2483 						/* mark the index as unsafe for old snapshots */
2484 						indexInfo->ii_BrokenHotChain = true;
2485 					}
2486 					else
2487 						indexIt = true;
2488 					/* In any case, exclude the tuple from unique-checking */
2489 					tupleIsAlive = false;
2490 					break;
2491 				case HEAPTUPLE_INSERT_IN_PROGRESS:
2492 
2493 					/*
2494 					 * In "anyvisible" mode, this tuple is visible and we
2495 					 * don't need any further checks.
2496 					 */
2497 					if (anyvisible)
2498 					{
2499 						indexIt = true;
2500 						tupleIsAlive = true;
2501 						break;
2502 					}
2503 
2504 					/*
2505 					 * Since caller should hold ShareLock or better, normally
2506 					 * the only way to see this is if it was inserted earlier
2507 					 * in our own transaction.  However, it can happen in
2508 					 * system catalogs, since we tend to release write lock
2509 					 * before commit there.  Give a warning if neither case
2510 					 * applies.
2511 					 */
2512 					xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
2513 					if (!TransactionIdIsCurrentTransactionId(xwait))
2514 					{
2515 						if (!is_system_catalog)
2516 							elog(WARNING, "concurrent insert in progress within table \"%s\"",
2517 								 RelationGetRelationName(heapRelation));
2518 
2519 						/*
2520 						 * If we are performing uniqueness checks, indexing
2521 						 * such a tuple could lead to a bogus uniqueness
2522 						 * failure.  In that case we wait for the inserting
2523 						 * transaction to finish and check again.
2524 						 */
2525 						if (checking_uniqueness)
2526 						{
2527 							/*
2528 							 * Must drop the lock on the buffer before we wait
2529 							 */
2530 							LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2531 							XactLockTableWait(xwait, heapRelation,
2532 											  &heapTuple->t_self,
2533 											  XLTW_InsertIndexUnique);
2534 							CHECK_FOR_INTERRUPTS();
2535 							goto recheck;
2536 						}
2537 					}
2538 
2539 					/*
2540 					 * We must index such tuples, since if the index build
2541 					 * commits then they're good.
2542 					 */
2543 					indexIt = true;
2544 					tupleIsAlive = true;
2545 					break;
2546 				case HEAPTUPLE_DELETE_IN_PROGRESS:
2547 
2548 					/*
2549 					 * As with INSERT_IN_PROGRESS case, this is unexpected
2550 					 * unless it's our own deletion or a system catalog; but
2551 					 * in anyvisible mode, this tuple is visible.
2552 					 */
2553 					if (anyvisible)
2554 					{
2555 						indexIt = true;
2556 						tupleIsAlive = false;
2557 						break;
2558 					}
2559 
2560 					xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
2561 					if (!TransactionIdIsCurrentTransactionId(xwait))
2562 					{
2563 						if (!is_system_catalog)
2564 							elog(WARNING, "concurrent delete in progress within table \"%s\"",
2565 								 RelationGetRelationName(heapRelation));
2566 
2567 						/*
2568 						 * If we are performing uniqueness checks, assuming
2569 						 * the tuple is dead could lead to missing a
2570 						 * uniqueness violation.  In that case we wait for the
2571 						 * deleting transaction to finish and check again.
2572 						 *
2573 						 * Also, if it's a HOT-updated tuple, we should not
2574 						 * index it but rather the live tuple at the end of
2575 						 * the HOT-chain.  However, the deleting transaction
2576 						 * could abort, possibly leaving this tuple as live
2577 						 * after all, in which case it has to be indexed. The
2578 						 * only way to know what to do is to wait for the
2579 						 * deleting transaction to finish and check again.
2580 						 */
2581 						if (checking_uniqueness ||
2582 							HeapTupleIsHotUpdated(heapTuple))
2583 						{
2584 							/*
2585 							 * Must drop the lock on the buffer before we wait
2586 							 */
2587 							LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2588 							XactLockTableWait(xwait, heapRelation,
2589 											  &heapTuple->t_self,
2590 											  XLTW_InsertIndexUnique);
2591 							CHECK_FOR_INTERRUPTS();
2592 							goto recheck;
2593 						}
2594 
2595 						/*
2596 						 * Otherwise index it but don't check for uniqueness,
2597 						 * the same as a RECENTLY_DEAD tuple.
2598 						 */
2599 						indexIt = true;
2600 					}
2601 					else if (HeapTupleIsHotUpdated(heapTuple))
2602 					{
2603 						/*
2604 						 * It's a HOT-updated tuple deleted by our own xact.
2605 						 * We can assume the deletion will commit (else the
2606 						 * index contents don't matter), so treat the same as
2607 						 * RECENTLY_DEAD HOT-updated tuples.
2608 						 */
2609 						indexIt = false;
2610 						/* mark the index as unsafe for old snapshots */
2611 						indexInfo->ii_BrokenHotChain = true;
2612 					}
2613 					else
2614 					{
2615 						/*
2616 						 * It's a regular tuple deleted by our own xact. Index
2617 						 * it but don't check for uniqueness, the same as a
2618 						 * RECENTLY_DEAD tuple.
2619 						 */
2620 						indexIt = true;
2621 					}
2622 					/* In any case, exclude the tuple from unique-checking */
2623 					tupleIsAlive = false;
2624 					break;
2625 				default:
2626 					elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2627 					indexIt = tupleIsAlive = false; /* keep compiler quiet */
2628 					break;
2629 			}
2630 
2631 			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2632 
2633 			if (!indexIt)
2634 				continue;
2635 		}
2636 		else
2637 		{
2638 			/* heap_getnext did the time qual check */
2639 			tupleIsAlive = true;
2640 		}
2641 
2642 		reltuples += 1;
2643 
2644 		MemoryContextReset(econtext->ecxt_per_tuple_memory);
2645 
2646 		/* Set up for predicate or expression evaluation */
2647 		ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
2648 
2649 		/*
2650 		 * In a partial index, discard tuples that don't satisfy the
2651 		 * predicate.
2652 		 */
2653 		if (predicate != NULL)
2654 		{
2655 			if (!ExecQual(predicate, econtext))
2656 				continue;
2657 		}
2658 
2659 		/*
2660 		 * For the current heap tuple, extract all the attributes we use in
2661 		 * this index, and note which are null.  This also performs evaluation
2662 		 * of any expressions needed.
2663 		 */
2664 		FormIndexDatum(indexInfo,
2665 					   slot,
2666 					   estate,
2667 					   values,
2668 					   isnull);
2669 
2670 		/*
2671 		 * You'd think we should go ahead and build the index tuple here, but
2672 		 * some index AMs want to do further processing on the data first.  So
2673 		 * pass the values[] and isnull[] arrays, instead.
2674 		 */
2675 
2676 		if (HeapTupleIsHeapOnly(heapTuple))
2677 		{
2678 			/*
2679 			 * For a heap-only tuple, pretend its TID is that of the root. See
2680 			 * src/backend/access/heap/README.HOT for discussion.
2681 			 */
2682 			HeapTupleData rootTuple;
2683 			OffsetNumber offnum;
2684 
2685 			rootTuple = *heapTuple;
2686 			offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
2687 
2688 			/*
2689 			 * If a HOT tuple points to a root that we don't know
2690 			 * about, obtain root items afresh.  If that still fails,
2691 			 * report it as corruption.
2692 			 */
2693 			if (root_offsets[offnum - 1] == InvalidOffsetNumber)
2694 			{
2695 				Page	page = BufferGetPage(scan->rs_cbuf);
2696 
2697 				LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
2698 				heap_get_root_tuples(page, root_offsets);
2699 				LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2700 			}
2701 
2702 			if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
2703 				ereport(ERROR,
2704 						(errcode(ERRCODE_DATA_CORRUPTED),
2705 						 errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
2706 										 ItemPointerGetBlockNumber(&heapTuple->t_self),
2707 										 offnum,
2708 										 RelationGetRelationName(heapRelation))));
2709 
2710 			ItemPointerSetOffsetNumber(&rootTuple.t_self,
2711 									   root_offsets[offnum - 1]);
2712 
2713 			/* Call the AM's callback routine to process the tuple */
2714 			callback(indexRelation, &rootTuple, values, isnull, tupleIsAlive,
2715 					 callback_state);
2716 		}
2717 		else
2718 		{
2719 			/* Call the AM's callback routine to process the tuple */
2720 			callback(indexRelation, heapTuple, values, isnull, tupleIsAlive,
2721 					 callback_state);
2722 		}
2723 	}
2724 
2725 	heap_endscan(scan);
2726 
2727 	/* we can now forget our snapshot, if set */
2728 	if (IsBootstrapProcessingMode() || indexInfo->ii_Concurrent)
2729 		UnregisterSnapshot(snapshot);
2730 
2731 	ExecDropSingleTupleTableSlot(slot);
2732 
2733 	FreeExecutorState(estate);
2734 
2735 	/* These may have been pointing to the now-gone estate */
2736 	indexInfo->ii_ExpressionsState = NIL;
2737 	indexInfo->ii_PredicateState = NULL;
2738 
2739 	return reltuples;
2740 }
2741 
2742 
2743 /*
2744  * IndexCheckExclusion - verify that a new exclusion constraint is satisfied
2745  *
2746  * When creating an exclusion constraint, we first build the index normally
2747  * and then rescan the heap to check for conflicts.  We assume that we only
2748  * need to validate tuples that are live according to an up-to-date snapshot,
2749  * and that these were correctly indexed even in the presence of broken HOT
2750  * chains.  This should be OK since we are holding at least ShareLock on the
2751  * table, meaning there can be no uncommitted updates from other transactions.
2752  * (Note: that wouldn't necessarily work for system catalogs, since many
2753  * operations release write lock early on the system catalogs.)
2754  */
2755 static void
IndexCheckExclusion(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo)2756 IndexCheckExclusion(Relation heapRelation,
2757 					Relation indexRelation,
2758 					IndexInfo *indexInfo)
2759 {
2760 	HeapScanDesc scan;
2761 	HeapTuple	heapTuple;
2762 	Datum		values[INDEX_MAX_KEYS];
2763 	bool		isnull[INDEX_MAX_KEYS];
2764 	ExprState  *predicate;
2765 	TupleTableSlot *slot;
2766 	EState	   *estate;
2767 	ExprContext *econtext;
2768 	Snapshot	snapshot;
2769 
2770 	/*
2771 	 * If we are reindexing the target index, mark it as no longer being
2772 	 * reindexed, to forestall an Assert in index_beginscan when we try to use
2773 	 * the index for probes.  This is OK because the index is now fully valid.
2774 	 */
2775 	if (ReindexIsCurrentlyProcessingIndex(RelationGetRelid(indexRelation)))
2776 		ResetReindexProcessing();
2777 
2778 	/*
2779 	 * Need an EState for evaluation of index expressions and partial-index
2780 	 * predicates.  Also a slot to hold the current tuple.
2781 	 */
2782 	estate = CreateExecutorState();
2783 	econtext = GetPerTupleExprContext(estate);
2784 	slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
2785 
2786 	/* Arrange for econtext's scan tuple to be the tuple under test */
2787 	econtext->ecxt_scantuple = slot;
2788 
2789 	/* Set up execution state for predicate, if any. */
2790 	predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
2791 
2792 	/*
2793 	 * Scan all live tuples in the base relation.
2794 	 */
2795 	snapshot = RegisterSnapshot(GetLatestSnapshot());
2796 	scan = heap_beginscan_strat(heapRelation,	/* relation */
2797 								snapshot,	/* snapshot */
2798 								0,	/* number of keys */
2799 								NULL,	/* scan key */
2800 								true,	/* buffer access strategy OK */
2801 								true);	/* syncscan OK */
2802 
2803 	while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2804 	{
2805 		CHECK_FOR_INTERRUPTS();
2806 
2807 		MemoryContextReset(econtext->ecxt_per_tuple_memory);
2808 
2809 		/* Set up for predicate or expression evaluation */
2810 		ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
2811 
2812 		/*
2813 		 * In a partial index, ignore tuples that don't satisfy the predicate.
2814 		 */
2815 		if (predicate != NULL)
2816 		{
2817 			if (!ExecQual(predicate, econtext))
2818 				continue;
2819 		}
2820 
2821 		/*
2822 		 * Extract index column values, including computing expressions.
2823 		 */
2824 		FormIndexDatum(indexInfo,
2825 					   slot,
2826 					   estate,
2827 					   values,
2828 					   isnull);
2829 
2830 		/*
2831 		 * Check that this tuple has no conflicts.
2832 		 */
2833 		check_exclusion_constraint(heapRelation,
2834 								   indexRelation, indexInfo,
2835 								   &(heapTuple->t_self), values, isnull,
2836 								   estate, true);
2837 	}
2838 
2839 	heap_endscan(scan);
2840 	UnregisterSnapshot(snapshot);
2841 
2842 	ExecDropSingleTupleTableSlot(slot);
2843 
2844 	FreeExecutorState(estate);
2845 
2846 	/* These may have been pointing to the now-gone estate */
2847 	indexInfo->ii_ExpressionsState = NIL;
2848 	indexInfo->ii_PredicateState = NULL;
2849 }
2850 
2851 
2852 /*
2853  * validate_index - support code for concurrent index builds
2854  *
2855  * We do a concurrent index build by first inserting the catalog entry for the
2856  * index via index_create(), marking it not indisready and not indisvalid.
2857  * Then we commit our transaction and start a new one, then we wait for all
2858  * transactions that could have been modifying the table to terminate.  Now
2859  * we know that any subsequently-started transactions will see the index and
2860  * honor its constraints on HOT updates; so while existing HOT-chains might
2861  * be broken with respect to the index, no currently live tuple will have an
2862  * incompatible HOT update done to it.  We now build the index normally via
2863  * index_build(), while holding a weak lock that allows concurrent
2864  * insert/update/delete.  Also, we index only tuples that are valid
2865  * as of the start of the scan (see IndexBuildHeapScan), whereas a normal
2866  * build takes care to include recently-dead tuples.  This is OK because
2867  * we won't mark the index valid until all transactions that might be able
2868  * to see those tuples are gone.  The reason for doing that is to avoid
2869  * bogus unique-index failures due to concurrent UPDATEs (we might see
2870  * different versions of the same row as being valid when we pass over them,
2871  * if we used HeapTupleSatisfiesVacuum).  This leaves us with an index that
2872  * does not contain any tuples added to the table while we built the index.
2873  *
2874  * Next, we mark the index "indisready" (but still not "indisvalid") and
2875  * commit the second transaction and start a third.  Again we wait for all
2876  * transactions that could have been modifying the table to terminate.  Now
2877  * we know that any subsequently-started transactions will see the index and
2878  * insert their new tuples into it.  We then take a new reference snapshot
2879  * which is passed to validate_index().  Any tuples that are valid according
2880  * to this snap, but are not in the index, must be added to the index.
2881  * (Any tuples committed live after the snap will be inserted into the
2882  * index by their originating transaction.  Any tuples committed dead before
2883  * the snap need not be indexed, because we will wait out all transactions
2884  * that might care about them before we mark the index valid.)
2885  *
2886  * validate_index() works by first gathering all the TIDs currently in the
2887  * index, using a bulkdelete callback that just stores the TIDs and doesn't
2888  * ever say "delete it".  (This should be faster than a plain indexscan;
2889  * also, not all index AMs support full-index indexscan.)  Then we sort the
2890  * TIDs, and finally scan the table doing a "merge join" against the TID list
2891  * to see which tuples are missing from the index.  Thus we will ensure that
2892  * all tuples valid according to the reference snapshot are in the index.
2893  *
2894  * Building a unique index this way is tricky: we might try to insert a
2895  * tuple that is already dead or is in process of being deleted, and we
2896  * mustn't have a uniqueness failure against an updated version of the same
2897  * row.  We could try to check the tuple to see if it's already dead and tell
2898  * index_insert() not to do the uniqueness check, but that still leaves us
2899  * with a race condition against an in-progress update.  To handle that,
2900  * we expect the index AM to recheck liveness of the to-be-inserted tuple
2901  * before it declares a uniqueness error.
2902  *
2903  * After completing validate_index(), we wait until all transactions that
2904  * were alive at the time of the reference snapshot are gone; this is
2905  * necessary to be sure there are none left with a transaction snapshot
2906  * older than the reference (and hence possibly able to see tuples we did
2907  * not index).  Then we mark the index "indisvalid" and commit.  Subsequent
2908  * transactions will be able to use it for queries.
2909  *
2910  * Doing two full table scans is a brute-force strategy.  We could try to be
2911  * cleverer, eg storing new tuples in a special area of the table (perhaps
2912  * making the table append-only by setting use_fsm).  However that would
2913  * add yet more locking issues.
2914  */
2915 void
validate_index(Oid heapId,Oid indexId,Snapshot snapshot)2916 validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
2917 {
2918 	Relation	heapRelation,
2919 				indexRelation;
2920 	IndexInfo  *indexInfo;
2921 	IndexVacuumInfo ivinfo;
2922 	v_i_state	state;
2923 	Oid			save_userid;
2924 	int			save_sec_context;
2925 	int			save_nestlevel;
2926 
2927 	/* Open and lock the parent heap relation */
2928 	heapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
2929 	/* And the target index relation */
2930 	indexRelation = index_open(indexId, RowExclusiveLock);
2931 
2932 	/*
2933 	 * Fetch info needed for index_insert.  (You might think this should be
2934 	 * passed in from DefineIndex, but its copy is long gone due to having
2935 	 * been built in a previous transaction.)
2936 	 */
2937 	indexInfo = BuildIndexInfo(indexRelation);
2938 
2939 	/* mark build is concurrent just for consistency */
2940 	indexInfo->ii_Concurrent = true;
2941 
2942 	/*
2943 	 * Switch to the table owner's userid, so that any index functions are run
2944 	 * as that user.  Also lock down security-restricted operations and
2945 	 * arrange to make GUC variable changes local to this command.
2946 	 */
2947 	GetUserIdAndSecContext(&save_userid, &save_sec_context);
2948 	SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
2949 						   save_sec_context | SECURITY_RESTRICTED_OPERATION);
2950 	save_nestlevel = NewGUCNestLevel();
2951 
2952 	/*
2953 	 * Scan the index and gather up all the TIDs into a tuplesort object.
2954 	 */
2955 	ivinfo.index = indexRelation;
2956 	ivinfo.analyze_only = false;
2957 	ivinfo.estimated_count = true;
2958 	ivinfo.message_level = DEBUG2;
2959 	ivinfo.num_heap_tuples = heapRelation->rd_rel->reltuples;
2960 	ivinfo.strategy = NULL;
2961 
2962 	/*
2963 	 * Encode TIDs as int8 values for the sort, rather than directly sorting
2964 	 * item pointers.  This can be significantly faster, primarily because TID
2965 	 * is a pass-by-reference type on all platforms, whereas int8 is
2966 	 * pass-by-value on most platforms.
2967 	 */
2968 	state.tuplesort = tuplesort_begin_datum(INT8OID, Int8LessOperator,
2969 											InvalidOid, false,
2970 											maintenance_work_mem,
2971 											false);
2972 	state.htups = state.itups = state.tups_inserted = 0;
2973 
2974 	(void) index_bulk_delete(&ivinfo, NULL,
2975 							 validate_index_callback, (void *) &state);
2976 
2977 	/* Execute the sort */
2978 	tuplesort_performsort(state.tuplesort);
2979 
2980 	/*
2981 	 * Now scan the heap and "merge" it with the index
2982 	 */
2983 	validate_index_heapscan(heapRelation,
2984 							indexRelation,
2985 							indexInfo,
2986 							snapshot,
2987 							&state);
2988 
2989 	/* Done with tuplesort object */
2990 	tuplesort_end(state.tuplesort);
2991 
2992 	elog(DEBUG2,
2993 		 "validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples",
2994 		 state.htups, state.itups, state.tups_inserted);
2995 
2996 	/* Roll back any GUC changes executed by index functions */
2997 	AtEOXact_GUC(false, save_nestlevel);
2998 
2999 	/* Restore userid and security context */
3000 	SetUserIdAndSecContext(save_userid, save_sec_context);
3001 
3002 	/* Close rels, but keep locks */
3003 	index_close(indexRelation, NoLock);
3004 	heap_close(heapRelation, NoLock);
3005 }
3006 
3007 /*
3008  * itemptr_encode - Encode ItemPointer as int64/int8
3009  *
3010  * This representation must produce values encoded as int64 that sort in the
3011  * same order as their corresponding original TID values would (using the
3012  * default int8 opclass to produce a result equivalent to the default TID
3013  * opclass).
3014  *
3015  * As noted in validate_index(), this can be significantly faster.
3016  */
3017 static inline int64
itemptr_encode(ItemPointer itemptr)3018 itemptr_encode(ItemPointer itemptr)
3019 {
3020 	BlockNumber block = ItemPointerGetBlockNumber(itemptr);
3021 	OffsetNumber offset = ItemPointerGetOffsetNumber(itemptr);
3022 	int64		encoded;
3023 
3024 	/*
3025 	 * Use the 16 least significant bits for the offset.  32 adjacent bits are
3026 	 * used for the block number.  Since remaining bits are unused, there
3027 	 * cannot be negative encoded values (We assume a two's complement
3028 	 * representation).
3029 	 */
3030 	encoded = ((uint64) block << 16) | (uint16) offset;
3031 
3032 	return encoded;
3033 }
3034 
3035 /*
3036  * itemptr_decode - Decode int64/int8 representation back to ItemPointer
3037  */
3038 static inline void
itemptr_decode(ItemPointer itemptr,int64 encoded)3039 itemptr_decode(ItemPointer itemptr, int64 encoded)
3040 {
3041 	BlockNumber block = (BlockNumber) (encoded >> 16);
3042 	OffsetNumber offset = (OffsetNumber) (encoded & 0xFFFF);
3043 
3044 	ItemPointerSet(itemptr, block, offset);
3045 }
3046 
3047 /*
3048  * validate_index_callback - bulkdelete callback to collect the index TIDs
3049  */
3050 static bool
validate_index_callback(ItemPointer itemptr,void * opaque)3051 validate_index_callback(ItemPointer itemptr, void *opaque)
3052 {
3053 	v_i_state  *state = (v_i_state *) opaque;
3054 	int64		encoded = itemptr_encode(itemptr);
3055 
3056 	tuplesort_putdatum(state->tuplesort, Int64GetDatum(encoded), false);
3057 	state->itups += 1;
3058 	return false;				/* never actually delete anything */
3059 }
3060 
3061 /*
3062  * validate_index_heapscan - second table scan for concurrent index build
3063  *
3064  * This has much code in common with IndexBuildHeapScan, but it's enough
3065  * different that it seems cleaner to have two routines not one.
3066  */
3067 static void
validate_index_heapscan(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,Snapshot snapshot,v_i_state * state)3068 validate_index_heapscan(Relation heapRelation,
3069 						Relation indexRelation,
3070 						IndexInfo *indexInfo,
3071 						Snapshot snapshot,
3072 						v_i_state *state)
3073 {
3074 	HeapScanDesc scan;
3075 	HeapTuple	heapTuple;
3076 	Datum		values[INDEX_MAX_KEYS];
3077 	bool		isnull[INDEX_MAX_KEYS];
3078 	ExprState  *predicate;
3079 	TupleTableSlot *slot;
3080 	EState	   *estate;
3081 	ExprContext *econtext;
3082 	BlockNumber root_blkno = InvalidBlockNumber;
3083 	OffsetNumber root_offsets[MaxHeapTuplesPerPage];
3084 	bool		in_index[MaxHeapTuplesPerPage];
3085 
3086 	/* state variables for the merge */
3087 	ItemPointer indexcursor = NULL;
3088 	ItemPointerData decoded;
3089 	bool		tuplesort_empty = false;
3090 
3091 	/*
3092 	 * sanity checks
3093 	 */
3094 	Assert(OidIsValid(indexRelation->rd_rel->relam));
3095 
3096 	/*
3097 	 * Need an EState for evaluation of index expressions and partial-index
3098 	 * predicates.  Also a slot to hold the current tuple.
3099 	 */
3100 	estate = CreateExecutorState();
3101 	econtext = GetPerTupleExprContext(estate);
3102 	slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
3103 
3104 	/* Arrange for econtext's scan tuple to be the tuple under test */
3105 	econtext->ecxt_scantuple = slot;
3106 
3107 	/* Set up execution state for predicate, if any. */
3108 	predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
3109 
3110 	/*
3111 	 * Prepare for scan of the base relation.  We need just those tuples
3112 	 * satisfying the passed-in reference snapshot.  We must disable syncscan
3113 	 * here, because it's critical that we read from block zero forward to
3114 	 * match the sorted TIDs.
3115 	 */
3116 	scan = heap_beginscan_strat(heapRelation,	/* relation */
3117 								snapshot,	/* snapshot */
3118 								0,	/* number of keys */
3119 								NULL,	/* scan key */
3120 								true,	/* buffer access strategy OK */
3121 								false); /* syncscan not OK */
3122 
3123 	/*
3124 	 * Scan all tuples matching the snapshot.
3125 	 */
3126 	while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
3127 	{
3128 		ItemPointer heapcursor = &heapTuple->t_self;
3129 		ItemPointerData rootTuple;
3130 		OffsetNumber root_offnum;
3131 
3132 		CHECK_FOR_INTERRUPTS();
3133 
3134 		state->htups += 1;
3135 
3136 		/*
3137 		 * As commented in IndexBuildHeapScan, we should index heap-only
3138 		 * tuples under the TIDs of their root tuples; so when we advance onto
3139 		 * a new heap page, build a map of root item offsets on the page.
3140 		 *
3141 		 * This complicates merging against the tuplesort output: we will
3142 		 * visit the live tuples in order by their offsets, but the root
3143 		 * offsets that we need to compare against the index contents might be
3144 		 * ordered differently.  So we might have to "look back" within the
3145 		 * tuplesort output, but only within the current page.  We handle that
3146 		 * by keeping a bool array in_index[] showing all the
3147 		 * already-passed-over tuplesort output TIDs of the current page. We
3148 		 * clear that array here, when advancing onto a new heap page.
3149 		 */
3150 		if (scan->rs_cblock != root_blkno)
3151 		{
3152 			Page		page = BufferGetPage(scan->rs_cbuf);
3153 
3154 			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
3155 			heap_get_root_tuples(page, root_offsets);
3156 			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
3157 
3158 			memset(in_index, 0, sizeof(in_index));
3159 
3160 			root_blkno = scan->rs_cblock;
3161 		}
3162 
3163 		/* Convert actual tuple TID to root TID */
3164 		rootTuple = *heapcursor;
3165 		root_offnum = ItemPointerGetOffsetNumber(heapcursor);
3166 
3167 		if (HeapTupleIsHeapOnly(heapTuple))
3168 		{
3169 			root_offnum = root_offsets[root_offnum - 1];
3170 			if (!OffsetNumberIsValid(root_offnum))
3171 				ereport(ERROR,
3172 						(errcode(ERRCODE_DATA_CORRUPTED),
3173 						 errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
3174 										 ItemPointerGetBlockNumber(heapcursor),
3175 										 ItemPointerGetOffsetNumber(heapcursor),
3176 										 RelationGetRelationName(heapRelation))));
3177 			ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
3178 		}
3179 
3180 		/*
3181 		 * "merge" by skipping through the index tuples until we find or pass
3182 		 * the current root tuple.
3183 		 */
3184 		while (!tuplesort_empty &&
3185 			   (!indexcursor ||
3186 				ItemPointerCompare(indexcursor, &rootTuple) < 0))
3187 		{
3188 			Datum		ts_val;
3189 			bool		ts_isnull;
3190 
3191 			if (indexcursor)
3192 			{
3193 				/*
3194 				 * Remember index items seen earlier on the current heap page
3195 				 */
3196 				if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
3197 					in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
3198 			}
3199 
3200 			tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
3201 												  &ts_val, &ts_isnull, NULL);
3202 			Assert(tuplesort_empty || !ts_isnull);
3203 			if (!tuplesort_empty)
3204 			{
3205 				itemptr_decode(&decoded, DatumGetInt64(ts_val));
3206 				indexcursor = &decoded;
3207 
3208 				/* If int8 is pass-by-ref, free (encoded) TID Datum memory */
3209 #ifndef USE_FLOAT8_BYVAL
3210 				pfree(DatumGetPointer(ts_val));
3211 #endif
3212 			}
3213 			else
3214 			{
3215 				/* Be tidy */
3216 				indexcursor = NULL;
3217 			}
3218 		}
3219 
3220 		/*
3221 		 * If the tuplesort has overshot *and* we didn't see a match earlier,
3222 		 * then this tuple is missing from the index, so insert it.
3223 		 */
3224 		if ((tuplesort_empty ||
3225 			 ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
3226 			!in_index[root_offnum - 1])
3227 		{
3228 			MemoryContextReset(econtext->ecxt_per_tuple_memory);
3229 
3230 			/* Set up for predicate or expression evaluation */
3231 			ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
3232 
3233 			/*
3234 			 * In a partial index, discard tuples that don't satisfy the
3235 			 * predicate.
3236 			 */
3237 			if (predicate != NULL)
3238 			{
3239 				if (!ExecQual(predicate, econtext))
3240 					continue;
3241 			}
3242 
3243 			/*
3244 			 * For the current heap tuple, extract all the attributes we use
3245 			 * in this index, and note which are null.  This also performs
3246 			 * evaluation of any expressions needed.
3247 			 */
3248 			FormIndexDatum(indexInfo,
3249 						   slot,
3250 						   estate,
3251 						   values,
3252 						   isnull);
3253 
3254 			/*
3255 			 * You'd think we should go ahead and build the index tuple here,
3256 			 * but some index AMs want to do further processing on the data
3257 			 * first. So pass the values[] and isnull[] arrays, instead.
3258 			 */
3259 
3260 			/*
3261 			 * If the tuple is already committed dead, you might think we
3262 			 * could suppress uniqueness checking, but this is no longer true
3263 			 * in the presence of HOT, because the insert is actually a proxy
3264 			 * for a uniqueness check on the whole HOT-chain.  That is, the
3265 			 * tuple we have here could be dead because it was already
3266 			 * HOT-updated, and if so the updating transaction will not have
3267 			 * thought it should insert index entries.  The index AM will
3268 			 * check the whole HOT-chain and correctly detect a conflict if
3269 			 * there is one.
3270 			 */
3271 
3272 			index_insert(indexRelation,
3273 						 values,
3274 						 isnull,
3275 						 &rootTuple,
3276 						 heapRelation,
3277 						 indexInfo->ii_Unique ?
3278 						 UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
3279 						 indexInfo);
3280 
3281 			state->tups_inserted += 1;
3282 		}
3283 	}
3284 
3285 	heap_endscan(scan);
3286 
3287 	ExecDropSingleTupleTableSlot(slot);
3288 
3289 	FreeExecutorState(estate);
3290 
3291 	/* These may have been pointing to the now-gone estate */
3292 	indexInfo->ii_ExpressionsState = NIL;
3293 	indexInfo->ii_PredicateState = NULL;
3294 }
3295 
3296 
3297 /*
3298  * index_set_state_flags - adjust pg_index state flags
3299  *
3300  * This is used during CREATE/DROP INDEX CONCURRENTLY to adjust the pg_index
3301  * flags that denote the index's state.
3302  *
3303  * Note that CatalogTupleUpdate() sends a cache invalidation message for the
3304  * tuple, so other sessions will hear about the update as soon as we commit.
3305  */
3306 void
index_set_state_flags(Oid indexId,IndexStateFlagsAction action)3307 index_set_state_flags(Oid indexId, IndexStateFlagsAction action)
3308 {
3309 	Relation	pg_index;
3310 	HeapTuple	indexTuple;
3311 	Form_pg_index indexForm;
3312 
3313 	/* Open pg_index and fetch a writable copy of the index's tuple */
3314 	pg_index = heap_open(IndexRelationId, RowExclusiveLock);
3315 
3316 	indexTuple = SearchSysCacheCopy1(INDEXRELID,
3317 									 ObjectIdGetDatum(indexId));
3318 	if (!HeapTupleIsValid(indexTuple))
3319 		elog(ERROR, "cache lookup failed for index %u", indexId);
3320 	indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3321 
3322 	/* Perform the requested state change on the copy */
3323 	switch (action)
3324 	{
3325 		case INDEX_CREATE_SET_READY:
3326 			/* Set indisready during a CREATE INDEX CONCURRENTLY sequence */
3327 			Assert(indexForm->indislive);
3328 			Assert(!indexForm->indisready);
3329 			Assert(!indexForm->indisvalid);
3330 			indexForm->indisready = true;
3331 			break;
3332 		case INDEX_CREATE_SET_VALID:
3333 			/* Set indisvalid during a CREATE INDEX CONCURRENTLY sequence */
3334 			Assert(indexForm->indislive);
3335 			Assert(indexForm->indisready);
3336 			Assert(!indexForm->indisvalid);
3337 			indexForm->indisvalid = true;
3338 			break;
3339 		case INDEX_DROP_CLEAR_VALID:
3340 
3341 			/*
3342 			 * Clear indisvalid during a DROP INDEX CONCURRENTLY sequence
3343 			 *
3344 			 * If indisready == true we leave it set so the index still gets
3345 			 * maintained by active transactions.  We only need to ensure that
3346 			 * indisvalid is false.  (We don't assert that either is initially
3347 			 * true, though, since we want to be able to retry a DROP INDEX
3348 			 * CONCURRENTLY that failed partway through.)
3349 			 *
3350 			 * Note: the CLUSTER logic assumes that indisclustered cannot be
3351 			 * set on any invalid index, so clear that flag too.
3352 			 */
3353 			indexForm->indisvalid = false;
3354 			indexForm->indisclustered = false;
3355 			break;
3356 		case INDEX_DROP_SET_DEAD:
3357 
3358 			/*
3359 			 * Clear indisready/indislive during DROP INDEX CONCURRENTLY
3360 			 *
3361 			 * We clear both indisready and indislive, because we not only
3362 			 * want to stop updates, we want to prevent sessions from touching
3363 			 * the index at all.
3364 			 */
3365 			Assert(!indexForm->indisvalid);
3366 			indexForm->indisready = false;
3367 			indexForm->indislive = false;
3368 			break;
3369 	}
3370 
3371 	/* ... and update it */
3372 	CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
3373 
3374 	heap_close(pg_index, RowExclusiveLock);
3375 }
3376 
3377 
3378 /*
3379  * IndexGetRelation: given an index's relation OID, get the OID of the
3380  * relation it is an index on.  Uses the system cache.
3381  */
3382 Oid
IndexGetRelation(Oid indexId,bool missing_ok)3383 IndexGetRelation(Oid indexId, bool missing_ok)
3384 {
3385 	HeapTuple	tuple;
3386 	Form_pg_index index;
3387 	Oid			result;
3388 
3389 	tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
3390 	if (!HeapTupleIsValid(tuple))
3391 	{
3392 		if (missing_ok)
3393 			return InvalidOid;
3394 		elog(ERROR, "cache lookup failed for index %u", indexId);
3395 	}
3396 	index = (Form_pg_index) GETSTRUCT(tuple);
3397 	Assert(index->indexrelid == indexId);
3398 
3399 	result = index->indrelid;
3400 	ReleaseSysCache(tuple);
3401 	return result;
3402 }
3403 
3404 /*
3405  * reindex_index - This routine is used to recreate a single index
3406  */
3407 void
reindex_index(Oid indexId,bool skip_constraint_checks,char persistence,int options)3408 reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
3409 			  int options)
3410 {
3411 	Relation	iRel,
3412 				heapRelation;
3413 	Oid			heapId;
3414 	IndexInfo  *indexInfo;
3415 	volatile bool skipped_constraint = false;
3416 	PGRUsage	ru0;
3417 
3418 	pg_rusage_init(&ru0);
3419 
3420 	/*
3421 	 * Open and lock the parent heap relation.  ShareLock is sufficient since
3422 	 * we only need to be sure no schema or data changes are going on.
3423 	 */
3424 	heapId = IndexGetRelation(indexId, false);
3425 	heapRelation = heap_open(heapId, ShareLock);
3426 
3427 	/*
3428 	 * Open the target index relation and get an exclusive lock on it, to
3429 	 * ensure that no one else is touching this particular index.
3430 	 */
3431 	iRel = index_open(indexId, AccessExclusiveLock);
3432 
3433 	/*
3434 	 * Don't allow reindex on temp tables of other backends ... their local
3435 	 * buffer manager is not going to cope.
3436 	 */
3437 	if (RELATION_IS_OTHER_TEMP(iRel))
3438 		ereport(ERROR,
3439 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3440 				 errmsg("cannot reindex temporary tables of other sessions")));
3441 
3442 	/*
3443 	 * Also check for active uses of the index in the current transaction; we
3444 	 * don't want to reindex underneath an open indexscan.
3445 	 */
3446 	CheckTableNotInUse(iRel, "REINDEX INDEX");
3447 
3448 	/*
3449 	 * All predicate locks on the index are about to be made invalid. Promote
3450 	 * them to relation locks on the heap.
3451 	 */
3452 	TransferPredicateLocksToHeapRelation(iRel);
3453 
3454 	/* Fetch info needed for index_build */
3455 	indexInfo = BuildIndexInfo(iRel);
3456 
3457 	/* If requested, skip checking uniqueness/exclusion constraints */
3458 	if (skip_constraint_checks)
3459 	{
3460 		if (indexInfo->ii_Unique || indexInfo->ii_ExclusionOps != NULL)
3461 			skipped_constraint = true;
3462 		indexInfo->ii_Unique = false;
3463 		indexInfo->ii_ExclusionOps = NULL;
3464 		indexInfo->ii_ExclusionProcs = NULL;
3465 		indexInfo->ii_ExclusionStrats = NULL;
3466 	}
3467 
3468 	/* Suppress use of the target index while rebuilding it */
3469 	SetReindexProcessing(heapId, indexId);
3470 
3471 	/* Create a new physical relation for the index */
3472 	RelationSetNewRelfilenode(iRel, persistence, InvalidTransactionId,
3473 							  InvalidMultiXactId);
3474 
3475 	/* Initialize the index and rebuild */
3476 	/* Note: we do not need to re-establish pkey setting */
3477 	index_build(heapRelation, iRel, indexInfo, false, true);
3478 
3479 	/* Re-allow use of target index */
3480 	ResetReindexProcessing();
3481 
3482 	/*
3483 	 * If the index is marked invalid/not-ready/dead (ie, it's from a failed
3484 	 * CREATE INDEX CONCURRENTLY, or a DROP INDEX CONCURRENTLY failed midway),
3485 	 * and we didn't skip a uniqueness check, we can now mark it valid.  This
3486 	 * allows REINDEX to be used to clean up in such cases.
3487 	 *
3488 	 * We can also reset indcheckxmin, because we have now done a
3489 	 * non-concurrent index build, *except* in the case where index_build
3490 	 * found some still-broken HOT chains. If it did, and we don't have to
3491 	 * change any of the other flags, we just leave indcheckxmin alone (note
3492 	 * that index_build won't have changed it, because this is a reindex).
3493 	 * This is okay and desirable because not updating the tuple leaves the
3494 	 * index's usability horizon (recorded as the tuple's xmin value) the same
3495 	 * as it was.
3496 	 *
3497 	 * But, if the index was invalid/not-ready/dead and there were broken HOT
3498 	 * chains, we had better force indcheckxmin true, because the normal
3499 	 * argument that the HOT chains couldn't conflict with the index is
3500 	 * suspect for an invalid index.  (A conflict is definitely possible if
3501 	 * the index was dead.  It probably shouldn't happen otherwise, but let's
3502 	 * be conservative.)  In this case advancing the usability horizon is
3503 	 * appropriate.
3504 	 *
3505 	 * Another reason for avoiding unnecessary updates here is that while
3506 	 * reindexing pg_index itself, we must not try to update tuples in it.
3507 	 * pg_index's indexes should always have these flags in their clean state,
3508 	 * so that won't happen.
3509 	 *
3510 	 * If early pruning/vacuuming is enabled for the heap relation, the
3511 	 * usability horizon must be advanced to the current transaction on every
3512 	 * build or rebuild.  pg_index is OK in this regard because catalog tables
3513 	 * are not subject to early cleanup.
3514 	 */
3515 	if (!skipped_constraint)
3516 	{
3517 		Relation	pg_index;
3518 		HeapTuple	indexTuple;
3519 		Form_pg_index indexForm;
3520 		bool		index_bad;
3521 		bool		early_pruning_enabled = EarlyPruningEnabled(heapRelation);
3522 
3523 		pg_index = heap_open(IndexRelationId, RowExclusiveLock);
3524 
3525 		indexTuple = SearchSysCacheCopy1(INDEXRELID,
3526 										 ObjectIdGetDatum(indexId));
3527 		if (!HeapTupleIsValid(indexTuple))
3528 			elog(ERROR, "cache lookup failed for index %u", indexId);
3529 		indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3530 
3531 		index_bad = (!indexForm->indisvalid ||
3532 					 !indexForm->indisready ||
3533 					 !indexForm->indislive);
3534 		if (index_bad ||
3535 			(indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain) ||
3536 			early_pruning_enabled)
3537 		{
3538 			if (!indexInfo->ii_BrokenHotChain && !early_pruning_enabled)
3539 				indexForm->indcheckxmin = false;
3540 			else if (index_bad || early_pruning_enabled)
3541 				indexForm->indcheckxmin = true;
3542 			indexForm->indisvalid = true;
3543 			indexForm->indisready = true;
3544 			indexForm->indislive = true;
3545 			CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
3546 
3547 			/*
3548 			 * Invalidate the relcache for the table, so that after we commit
3549 			 * all sessions will refresh the table's index list.  This ensures
3550 			 * that if anyone misses seeing the pg_index row during this
3551 			 * update, they'll refresh their list before attempting any update
3552 			 * on the table.
3553 			 */
3554 			CacheInvalidateRelcache(heapRelation);
3555 		}
3556 
3557 		heap_close(pg_index, RowExclusiveLock);
3558 	}
3559 
3560 	/* Log what we did */
3561 	if (options & REINDEXOPT_VERBOSE)
3562 		ereport(INFO,
3563 				(errmsg("index \"%s\" was reindexed",
3564 						get_rel_name(indexId)),
3565 				 errdetail_internal("%s",
3566 									pg_rusage_show(&ru0))));
3567 
3568 	/* Close rels, but keep locks */
3569 	index_close(iRel, NoLock);
3570 	heap_close(heapRelation, NoLock);
3571 }
3572 
3573 /*
3574  * reindex_relation - This routine is used to recreate all indexes
3575  * of a relation (and optionally its toast relation too, if any).
3576  *
3577  * "flags" is a bitmask that can include any combination of these bits:
3578  *
3579  * REINDEX_REL_PROCESS_TOAST: if true, process the toast table too (if any).
3580  *
3581  * REINDEX_REL_SUPPRESS_INDEX_USE: if true, the relation was just completely
3582  * rebuilt by an operation such as VACUUM FULL or CLUSTER, and therefore its
3583  * indexes are inconsistent with it.  This makes things tricky if the relation
3584  * is a system catalog that we might consult during the reindexing.  To deal
3585  * with that case, we mark all of the indexes as pending rebuild so that they
3586  * won't be trusted until rebuilt.  The caller is required to call us *without*
3587  * having made the rebuilt table visible by doing CommandCounterIncrement;
3588  * we'll do CCI after having collected the index list.  (This way we can still
3589  * use catalog indexes while collecting the list.)
3590  *
3591  * REINDEX_REL_CHECK_CONSTRAINTS: if true, recheck unique and exclusion
3592  * constraint conditions, else don't.  To avoid deadlocks, VACUUM FULL or
3593  * CLUSTER on a system catalog must omit this flag.  REINDEX should be used to
3594  * rebuild an index if constraint inconsistency is suspected.  For optimal
3595  * performance, other callers should include the flag only after transforming
3596  * the data in a manner that risks a change in constraint validity.
3597  *
3598  * REINDEX_REL_FORCE_INDEXES_UNLOGGED: if true, set the persistence of the
3599  * rebuilt indexes to unlogged.
3600  *
3601  * REINDEX_REL_FORCE_INDEXES_PERMANENT: if true, set the persistence of the
3602  * rebuilt indexes to permanent.
3603  *
3604  * Returns true if any indexes were rebuilt (including toast table's index
3605  * when relevant).  Note that a CommandCounterIncrement will occur after each
3606  * index rebuild.
3607  */
3608 bool
reindex_relation(Oid relid,int flags,int options)3609 reindex_relation(Oid relid, int flags, int options)
3610 {
3611 	Relation	rel;
3612 	Oid			toast_relid;
3613 	List	   *indexIds;
3614 	char		persistence;
3615 	bool		result;
3616 	ListCell   *indexId;
3617 
3618 	/*
3619 	 * Open and lock the relation.  ShareLock is sufficient since we only need
3620 	 * to prevent schema and data changes in it.  The lock level used here
3621 	 * should match ReindexTable().
3622 	 */
3623 	rel = heap_open(relid, ShareLock);
3624 
3625 	toast_relid = rel->rd_rel->reltoastrelid;
3626 
3627 	/*
3628 	 * Get the list of index OIDs for this relation.  (We trust to the
3629 	 * relcache to get this with a sequential scan if ignoring system
3630 	 * indexes.)
3631 	 */
3632 	indexIds = RelationGetIndexList(rel);
3633 
3634 	if (flags & REINDEX_REL_SUPPRESS_INDEX_USE)
3635 	{
3636 		/* Suppress use of all the indexes until they are rebuilt */
3637 		SetReindexPending(indexIds);
3638 
3639 		/*
3640 		 * Make the new heap contents visible --- now things might be
3641 		 * inconsistent!
3642 		 */
3643 		CommandCounterIncrement();
3644 	}
3645 
3646 	/*
3647 	 * Compute persistence of indexes: same as that of owning rel, unless
3648 	 * caller specified otherwise.
3649 	 */
3650 	if (flags & REINDEX_REL_FORCE_INDEXES_UNLOGGED)
3651 		persistence = RELPERSISTENCE_UNLOGGED;
3652 	else if (flags & REINDEX_REL_FORCE_INDEXES_PERMANENT)
3653 		persistence = RELPERSISTENCE_PERMANENT;
3654 	else
3655 		persistence = rel->rd_rel->relpersistence;
3656 
3657 	/* Reindex all the indexes. */
3658 	foreach(indexId, indexIds)
3659 	{
3660 		Oid			indexOid = lfirst_oid(indexId);
3661 
3662 		reindex_index(indexOid, !(flags & REINDEX_REL_CHECK_CONSTRAINTS),
3663 					  persistence, options);
3664 
3665 		CommandCounterIncrement();
3666 
3667 		/* Index should no longer be in the pending list */
3668 		Assert(!ReindexIsProcessingIndex(indexOid));
3669 	}
3670 
3671 	/*
3672 	 * Close rel, but continue to hold the lock.
3673 	 */
3674 	heap_close(rel, NoLock);
3675 
3676 	result = (indexIds != NIL);
3677 
3678 	/*
3679 	 * If the relation has a secondary toast rel, reindex that too while we
3680 	 * still hold the lock on the master table.
3681 	 */
3682 	if ((flags & REINDEX_REL_PROCESS_TOAST) && OidIsValid(toast_relid))
3683 		result |= reindex_relation(toast_relid, flags, options);
3684 
3685 	return result;
3686 }
3687 
3688 
3689 /* ----------------------------------------------------------------
3690  *		System index reindexing support
3691  *
3692  * When we are busy reindexing a system index, this code provides support
3693  * for preventing catalog lookups from using that index.  We also make use
3694  * of this to catch attempted uses of user indexes during reindexing of
3695  * those indexes.
3696  * ----------------------------------------------------------------
3697  */
3698 
3699 static Oid	currentlyReindexedHeap = InvalidOid;
3700 static Oid	currentlyReindexedIndex = InvalidOid;
3701 static List *pendingReindexedIndexes = NIL;
3702 static int	reindexingNestLevel = 0;
3703 
3704 /*
3705  * ReindexIsProcessingHeap
3706  *		True if heap specified by OID is currently being reindexed.
3707  */
3708 bool
ReindexIsProcessingHeap(Oid heapOid)3709 ReindexIsProcessingHeap(Oid heapOid)
3710 {
3711 	return heapOid == currentlyReindexedHeap;
3712 }
3713 
3714 /*
3715  * ReindexIsCurrentlyProcessingIndex
3716  *		True if index specified by OID is currently being reindexed.
3717  */
3718 static bool
ReindexIsCurrentlyProcessingIndex(Oid indexOid)3719 ReindexIsCurrentlyProcessingIndex(Oid indexOid)
3720 {
3721 	return indexOid == currentlyReindexedIndex;
3722 }
3723 
3724 /*
3725  * ReindexIsProcessingIndex
3726  *		True if index specified by OID is currently being reindexed,
3727  *		or should be treated as invalid because it is awaiting reindex.
3728  */
3729 bool
ReindexIsProcessingIndex(Oid indexOid)3730 ReindexIsProcessingIndex(Oid indexOid)
3731 {
3732 	return indexOid == currentlyReindexedIndex ||
3733 		list_member_oid(pendingReindexedIndexes, indexOid);
3734 }
3735 
3736 /*
3737  * SetReindexProcessing
3738  *		Set flag that specified heap/index are being reindexed.
3739  */
3740 static void
SetReindexProcessing(Oid heapOid,Oid indexOid)3741 SetReindexProcessing(Oid heapOid, Oid indexOid)
3742 {
3743 	Assert(OidIsValid(heapOid) && OidIsValid(indexOid));
3744 	/* Reindexing is not re-entrant. */
3745 	if (OidIsValid(currentlyReindexedHeap))
3746 		elog(ERROR, "cannot reindex while reindexing");
3747 	currentlyReindexedHeap = heapOid;
3748 	currentlyReindexedIndex = indexOid;
3749 	/* Index is no longer "pending" reindex. */
3750 	RemoveReindexPending(indexOid);
3751 	/* This may have been set already, but in case it isn't, do so now. */
3752 	reindexingNestLevel = GetCurrentTransactionNestLevel();
3753 }
3754 
3755 /*
3756  * ResetReindexProcessing
3757  *		Unset reindexing status.
3758  */
3759 static void
ResetReindexProcessing(void)3760 ResetReindexProcessing(void)
3761 {
3762 	currentlyReindexedHeap = InvalidOid;
3763 	currentlyReindexedIndex = InvalidOid;
3764 	/* reindexingNestLevel remains set till end of (sub)transaction */
3765 }
3766 
3767 /*
3768  * SetReindexPending
3769  *		Mark the given indexes as pending reindex.
3770  *
3771  * NB: we assume that the current memory context stays valid throughout.
3772  */
3773 static void
SetReindexPending(List * indexes)3774 SetReindexPending(List *indexes)
3775 {
3776 	/* Reindexing is not re-entrant. */
3777 	if (pendingReindexedIndexes)
3778 		elog(ERROR, "cannot reindex while reindexing");
3779 	pendingReindexedIndexes = list_copy(indexes);
3780 	reindexingNestLevel = GetCurrentTransactionNestLevel();
3781 }
3782 
3783 /*
3784  * RemoveReindexPending
3785  *		Remove the given index from the pending list.
3786  */
3787 static void
RemoveReindexPending(Oid indexOid)3788 RemoveReindexPending(Oid indexOid)
3789 {
3790 	pendingReindexedIndexes = list_delete_oid(pendingReindexedIndexes,
3791 											  indexOid);
3792 }
3793 
3794 /*
3795  * ResetReindexState
3796  *		Clear all reindexing state during (sub)transaction abort.
3797  */
3798 void
ResetReindexState(int nestLevel)3799 ResetReindexState(int nestLevel)
3800 {
3801 	/*
3802 	 * Because reindexing is not re-entrant, we don't need to cope with nested
3803 	 * reindexing states.  We just need to avoid messing up the outer-level
3804 	 * state in case a subtransaction fails within a REINDEX.  So checking the
3805 	 * current nest level against that of the reindex operation is sufficient.
3806 	 */
3807 	if (reindexingNestLevel >= nestLevel)
3808 	{
3809 		currentlyReindexedHeap = InvalidOid;
3810 		currentlyReindexedIndex = InvalidOid;
3811 
3812 		/*
3813 		 * We needn't try to release the contents of pendingReindexedIndexes;
3814 		 * that list should be in a transaction-lifespan context, so it will
3815 		 * go away automatically.
3816 		 */
3817 		pendingReindexedIndexes = NIL;
3818 
3819 		reindexingNestLevel = 0;
3820 	}
3821 }
3822