1 /*-------------------------------------------------------------------------
2  *
3  * index.c
4  *	  code to create and destroy POSTGRES index relations
5  *
6  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/catalog/index.c
12  *
13  *
14  * INTERFACE ROUTINES
15  *		index_create()			- Create a cataloged index relation
16  *		index_drop()			- Removes index relation from catalogs
17  *		BuildIndexInfo()		- Prepare to insert index tuples
18  *		FormIndexDatum()		- Construct datum vector for one index tuple
19  *
20  *-------------------------------------------------------------------------
21  */
22 #include "postgres.h"
23 
24 #include <unistd.h>
25 
26 #include "access/amapi.h"
27 #include "access/multixact.h"
28 #include "access/relscan.h"
29 #include "access/reloptions.h"
30 #include "access/sysattr.h"
31 #include "access/transam.h"
32 #include "access/visibilitymap.h"
33 #include "access/xact.h"
34 #include "bootstrap/bootstrap.h"
35 #include "catalog/binary_upgrade.h"
36 #include "catalog/catalog.h"
37 #include "catalog/dependency.h"
38 #include "catalog/heap.h"
39 #include "catalog/index.h"
40 #include "catalog/objectaccess.h"
41 #include "catalog/pg_am.h"
42 #include "catalog/pg_collation.h"
43 #include "catalog/pg_constraint.h"
44 #include "catalog/pg_depend.h"
45 #include "catalog/pg_inherits.h"
46 #include "catalog/pg_operator.h"
47 #include "catalog/pg_opclass.h"
48 #include "catalog/pg_tablespace.h"
49 #include "catalog/pg_trigger.h"
50 #include "catalog/pg_type.h"
51 #include "catalog/storage.h"
52 #include "commands/tablecmds.h"
53 #include "commands/event_trigger.h"
54 #include "commands/trigger.h"
55 #include "executor/executor.h"
56 #include "miscadmin.h"
57 #include "nodes/makefuncs.h"
58 #include "nodes/nodeFuncs.h"
59 #include "optimizer/clauses.h"
60 #include "optimizer/planner.h"
61 #include "parser/parser.h"
62 #include "rewrite/rewriteManip.h"
63 #include "storage/bufmgr.h"
64 #include "storage/lmgr.h"
65 #include "storage/predicate.h"
66 #include "storage/procarray.h"
67 #include "storage/smgr.h"
68 #include "utils/builtins.h"
69 #include "utils/fmgroids.h"
70 #include "utils/guc.h"
71 #include "utils/inval.h"
72 #include "utils/lsyscache.h"
73 #include "utils/memutils.h"
74 #include "utils/pg_rusage.h"
75 #include "utils/syscache.h"
76 #include "utils/tuplesort.h"
77 #include "utils/snapmgr.h"
78 #include "utils/tqual.h"
79 
80 
81 /* Potentially set by pg_upgrade_support functions */
82 Oid			binary_upgrade_next_index_pg_class_oid = InvalidOid;
83 
84 /* state info for validate_index bulkdelete callback */
85 typedef struct
86 {
87 	Tuplesortstate *tuplesort;	/* for sorting the index TIDs */
88 	/* statistics (for debug purposes only): */
89 	double		htups,
90 				itups,
91 				tups_inserted;
92 } v_i_state;
93 
94 /*
95  * Pointer-free representation of variables used when reindexing system
96  * catalogs; we use this to propagate those values to parallel workers.
97  */
98 typedef struct
99 {
100 	Oid			currentlyReindexedHeap;
101 	Oid			currentlyReindexedIndex;
102 	int			numPendingReindexedIndexes;
103 	Oid			pendingReindexedIndexes[FLEXIBLE_ARRAY_MEMBER];
104 } SerializedReindexState;
105 
106 /* non-export function prototypes */
107 static bool relationHasPrimaryKey(Relation rel);
108 static TupleDesc ConstructTupleDescriptor(Relation heapRelation,
109 						 IndexInfo *indexInfo,
110 						 List *indexColNames,
111 						 Oid accessMethodObjectId,
112 						 Oid *collationObjectId,
113 						 Oid *classObjectId);
114 static void InitializeAttributeOids(Relation indexRelation,
115 						int numatts, Oid indexoid);
116 static void AppendAttributeTuples(Relation indexRelation, int numatts);
117 static void UpdateIndexRelation(Oid indexoid, Oid heapoid,
118 					Oid parentIndexId,
119 					IndexInfo *indexInfo,
120 					Oid *collationOids,
121 					Oid *classOids,
122 					int16 *coloptions,
123 					bool primary,
124 					bool isexclusion,
125 					bool immediate,
126 					bool isvalid,
127 					bool isready);
128 static void index_update_stats(Relation rel,
129 				   bool hasindex,
130 				   double reltuples);
131 static void IndexCheckExclusion(Relation heapRelation,
132 					Relation indexRelation,
133 					IndexInfo *indexInfo);
134 static inline int64 itemptr_encode(ItemPointer itemptr);
135 static inline void itemptr_decode(ItemPointer itemptr, int64 encoded);
136 static bool validate_index_callback(ItemPointer itemptr, void *opaque);
137 static void validate_index_heapscan(Relation heapRelation,
138 						Relation indexRelation,
139 						IndexInfo *indexInfo,
140 						Snapshot snapshot,
141 						v_i_state *state);
142 static bool ReindexIsCurrentlyProcessingIndex(Oid indexOid);
143 static void SetReindexProcessing(Oid heapOid, Oid indexOid);
144 static void ResetReindexProcessing(void);
145 static void SetReindexPending(List *indexes);
146 static void RemoveReindexPending(Oid indexOid);
147 
148 
149 /*
150  * relationHasPrimaryKey
151  *		See whether an existing relation has a primary key.
152  *
153  * Caller must have suitable lock on the relation.
154  *
155  * Note: we intentionally do not check IndexIsValid here; that's because this
156  * is used to enforce the rule that there can be only one indisprimary index,
157  * and we want that to be true even if said index is invalid.
158  */
159 static bool
relationHasPrimaryKey(Relation rel)160 relationHasPrimaryKey(Relation rel)
161 {
162 	bool		result = false;
163 	List	   *indexoidlist;
164 	ListCell   *indexoidscan;
165 
166 	/*
167 	 * Get the list of index OIDs for the table from the relcache, and look up
168 	 * each one in the pg_index syscache until we find one marked primary key
169 	 * (hopefully there isn't more than one such).
170 	 */
171 	indexoidlist = RelationGetIndexList(rel);
172 
173 	foreach(indexoidscan, indexoidlist)
174 	{
175 		Oid			indexoid = lfirst_oid(indexoidscan);
176 		HeapTuple	indexTuple;
177 
178 		indexTuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexoid));
179 		if (!HeapTupleIsValid(indexTuple))	/* should not happen */
180 			elog(ERROR, "cache lookup failed for index %u", indexoid);
181 		result = ((Form_pg_index) GETSTRUCT(indexTuple))->indisprimary;
182 		ReleaseSysCache(indexTuple);
183 		if (result)
184 			break;
185 	}
186 
187 	list_free(indexoidlist);
188 
189 	return result;
190 }
191 
192 /*
193  * index_check_primary_key
194  *		Apply special checks needed before creating a PRIMARY KEY index
195  *
196  * This processing used to be in DefineIndex(), but has been split out
197  * so that it can be applied during ALTER TABLE ADD PRIMARY KEY USING INDEX.
198  *
199  * We check for a pre-existing primary key, and that all columns of the index
200  * are simple column references (not expressions), and that all those
201  * columns are marked NOT NULL.  If they aren't (which can only happen during
202  * ALTER TABLE ADD CONSTRAINT, since the parser forces such columns to be
203  * created NOT NULL during CREATE TABLE), do an ALTER SET NOT NULL to mark
204  * them so --- or fail if they are not in fact nonnull.
205  *
206  * As of PG v10, the SET NOT NULL is applied to child tables as well, so
207  * that the behavior is like a manual SET NOT NULL.
208  *
209  * Caller had better have at least ShareLock on the table, else the not-null
210  * checking isn't trustworthy.
211  */
212 void
index_check_primary_key(Relation heapRel,IndexInfo * indexInfo,bool is_alter_table,IndexStmt * stmt)213 index_check_primary_key(Relation heapRel,
214 						IndexInfo *indexInfo,
215 						bool is_alter_table,
216 						IndexStmt *stmt)
217 {
218 	List	   *cmds;
219 	int			i;
220 
221 	/*
222 	 * If ALTER TABLE and CREATE TABLE .. PARTITION OF, check that there isn't
223 	 * already a PRIMARY KEY.  In CREATE TABLE for an ordinary relations, we
224 	 * have faith that the parser rejected multiple pkey clauses; and CREATE
225 	 * INDEX doesn't have a way to say PRIMARY KEY, so it's no problem either.
226 	 */
227 	if ((is_alter_table || heapRel->rd_rel->relispartition) &&
228 		relationHasPrimaryKey(heapRel))
229 	{
230 		ereport(ERROR,
231 				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
232 				 errmsg("multiple primary keys for table \"%s\" are not allowed",
233 						RelationGetRelationName(heapRel))));
234 	}
235 
236 	/*
237 	 * Check that all of the attributes in a primary key are marked as not
238 	 * null, otherwise attempt to ALTER TABLE .. SET NOT NULL
239 	 */
240 	cmds = NIL;
241 	for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
242 	{
243 		AttrNumber	attnum = indexInfo->ii_IndexAttrNumbers[i];
244 		HeapTuple	atttuple;
245 		Form_pg_attribute attform;
246 
247 		if (attnum == 0)
248 			ereport(ERROR,
249 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
250 					 errmsg("primary keys cannot be expressions")));
251 
252 		/* System attributes are never null, so no need to check */
253 		if (attnum < 0)
254 			continue;
255 
256 		atttuple = SearchSysCache2(ATTNUM,
257 								   ObjectIdGetDatum(RelationGetRelid(heapRel)),
258 								   Int16GetDatum(attnum));
259 		if (!HeapTupleIsValid(atttuple))
260 			elog(ERROR, "cache lookup failed for attribute %d of relation %u",
261 				 attnum, RelationGetRelid(heapRel));
262 		attform = (Form_pg_attribute) GETSTRUCT(atttuple);
263 
264 		if (!attform->attnotnull)
265 		{
266 			/* Add a subcommand to make this one NOT NULL */
267 			AlterTableCmd *cmd = makeNode(AlterTableCmd);
268 
269 			cmd->subtype = AT_SetNotNull;
270 			cmd->name = pstrdup(NameStr(attform->attname));
271 			cmds = lappend(cmds, cmd);
272 		}
273 
274 		ReleaseSysCache(atttuple);
275 	}
276 
277 	/*
278 	 * XXX: possible future improvement: when being called from ALTER TABLE,
279 	 * it would be more efficient to merge this with the outer ALTER TABLE, so
280 	 * as to avoid two scans.  But that seems to complicate DefineIndex's API
281 	 * unduly.
282 	 */
283 	if (cmds)
284 	{
285 		EventTriggerAlterTableStart((Node *) stmt);
286 		AlterTableInternal(RelationGetRelid(heapRel), cmds, true);
287 		EventTriggerAlterTableEnd();
288 	}
289 }
290 
291 /*
292  *		ConstructTupleDescriptor
293  *
294  * Build an index tuple descriptor for a new index
295  */
296 static TupleDesc
ConstructTupleDescriptor(Relation heapRelation,IndexInfo * indexInfo,List * indexColNames,Oid accessMethodObjectId,Oid * collationObjectId,Oid * classObjectId)297 ConstructTupleDescriptor(Relation heapRelation,
298 						 IndexInfo *indexInfo,
299 						 List *indexColNames,
300 						 Oid accessMethodObjectId,
301 						 Oid *collationObjectId,
302 						 Oid *classObjectId)
303 {
304 	int			numatts = indexInfo->ii_NumIndexAttrs;
305 	int			numkeyatts = indexInfo->ii_NumIndexKeyAttrs;
306 	ListCell   *colnames_item = list_head(indexColNames);
307 	ListCell   *indexpr_item = list_head(indexInfo->ii_Expressions);
308 	IndexAmRoutine *amroutine;
309 	TupleDesc	heapTupDesc;
310 	TupleDesc	indexTupDesc;
311 	int			natts;			/* #atts in heap rel --- for error checks */
312 	int			i;
313 
314 	/* We need access to the index AM's API struct */
315 	amroutine = GetIndexAmRoutineByAmId(accessMethodObjectId, false);
316 
317 	/* ... and to the table's tuple descriptor */
318 	heapTupDesc = RelationGetDescr(heapRelation);
319 	natts = RelationGetForm(heapRelation)->relnatts;
320 
321 	/*
322 	 * allocate the new tuple descriptor
323 	 */
324 	indexTupDesc = CreateTemplateTupleDesc(numatts, false);
325 
326 	/*
327 	 * For simple index columns, we copy the pg_attribute row from the parent
328 	 * relation and modify it as necessary.  For expressions we have to cons
329 	 * up a pg_attribute row the hard way.
330 	 */
331 	for (i = 0; i < numatts; i++)
332 	{
333 		AttrNumber	atnum = indexInfo->ii_IndexAttrNumbers[i];
334 		Form_pg_attribute to = TupleDescAttr(indexTupDesc, i);
335 		HeapTuple	tuple;
336 		Form_pg_type typeTup;
337 		Form_pg_opclass opclassTup;
338 		Oid			keyType;
339 
340 		if (atnum != 0)
341 		{
342 			/* Simple index column */
343 			Form_pg_attribute from;
344 
345 			if (atnum < 0)
346 			{
347 				/*
348 				 * here we are indexing on a system attribute (-1...-n)
349 				 */
350 				from = SystemAttributeDefinition(atnum,
351 												 heapRelation->rd_rel->relhasoids);
352 			}
353 			else
354 			{
355 				/*
356 				 * here we are indexing on a normal attribute (1...n)
357 				 */
358 				if (atnum > natts)	/* safety check */
359 					elog(ERROR, "invalid column number %d", atnum);
360 				from = TupleDescAttr(heapTupDesc,
361 									 AttrNumberGetAttrOffset(atnum));
362 			}
363 
364 			/*
365 			 * now that we've determined the "from", let's copy the tuple desc
366 			 * data...
367 			 */
368 			memcpy(to, from, ATTRIBUTE_FIXED_PART_SIZE);
369 
370 			/*
371 			 * Set the attribute name as specified by caller.
372 			 */
373 			if (colnames_item == NULL)	/* shouldn't happen */
374 				elog(ERROR, "too few entries in colnames list");
375 			namestrcpy(&to->attname, (const char *) lfirst(colnames_item));
376 			colnames_item = lnext(colnames_item);
377 
378 			/*
379 			 * Fix the stuff that should not be the same as the underlying
380 			 * attr
381 			 */
382 			to->attnum = i + 1;
383 
384 			to->attstattarget = -1;
385 			to->attcacheoff = -1;
386 			to->attnotnull = false;
387 			to->atthasdef = false;
388 			to->atthasmissing = false;
389 			to->attidentity = '\0';
390 			to->attislocal = true;
391 			to->attinhcount = 0;
392 			to->attcollation = (i < numkeyatts) ?
393 				collationObjectId[i] : InvalidOid;
394 		}
395 		else
396 		{
397 			/* Expressional index */
398 			Node	   *indexkey;
399 
400 			MemSet(to, 0, ATTRIBUTE_FIXED_PART_SIZE);
401 
402 			/*
403 			 * Set the attribute name as specified by caller.
404 			 */
405 			if (colnames_item == NULL)	/* shouldn't happen */
406 				elog(ERROR, "too few entries in colnames list");
407 			namestrcpy(&to->attname, (const char *) lfirst(colnames_item));
408 			colnames_item = lnext(colnames_item);
409 
410 			if (indexpr_item == NULL)	/* shouldn't happen */
411 				elog(ERROR, "too few entries in indexprs list");
412 			indexkey = (Node *) lfirst(indexpr_item);
413 			indexpr_item = lnext(indexpr_item);
414 
415 			/*
416 			 * Lookup the expression type in pg_type for the type length etc.
417 			 */
418 			keyType = exprType(indexkey);
419 			tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
420 			if (!HeapTupleIsValid(tuple))
421 				elog(ERROR, "cache lookup failed for type %u", keyType);
422 			typeTup = (Form_pg_type) GETSTRUCT(tuple);
423 
424 			/*
425 			 * Assign some of the attributes values. Leave the rest as 0.
426 			 */
427 			to->attnum = i + 1;
428 			to->atttypid = keyType;
429 			to->attlen = typeTup->typlen;
430 			to->attbyval = typeTup->typbyval;
431 			to->attstorage = typeTup->typstorage;
432 			to->attalign = typeTup->typalign;
433 			to->attstattarget = -1;
434 			to->attcacheoff = -1;
435 			to->atttypmod = exprTypmod(indexkey);
436 			to->attislocal = true;
437 			to->attcollation = (i < numkeyatts) ?
438 				collationObjectId[i] : InvalidOid;
439 
440 			ReleaseSysCache(tuple);
441 
442 			/*
443 			 * Make sure the expression yields a type that's safe to store in
444 			 * an index.  We need this defense because we have index opclasses
445 			 * for pseudo-types such as "record", and the actually stored type
446 			 * had better be safe; eg, a named composite type is okay, an
447 			 * anonymous record type is not.  The test is the same as for
448 			 * whether a table column is of a safe type (which is why we
449 			 * needn't check for the non-expression case).
450 			 */
451 			CheckAttributeType(NameStr(to->attname),
452 							   to->atttypid, to->attcollation,
453 							   NIL, false);
454 		}
455 
456 		/*
457 		 * We do not yet have the correct relation OID for the index, so just
458 		 * set it invalid for now.  InitializeAttributeOids() will fix it
459 		 * later.
460 		 */
461 		to->attrelid = InvalidOid;
462 
463 		/*
464 		 * Check the opclass and index AM to see if either provides a keytype
465 		 * (overriding the attribute type).  Opclass (if exists) takes
466 		 * precedence.
467 		 */
468 		keyType = amroutine->amkeytype;
469 
470 		/*
471 		 * Code below is concerned to the opclasses which are not used with
472 		 * the included columns.
473 		 */
474 		if (i < indexInfo->ii_NumIndexKeyAttrs)
475 		{
476 			tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(classObjectId[i]));
477 			if (!HeapTupleIsValid(tuple))
478 				elog(ERROR, "cache lookup failed for opclass %u",
479 					 classObjectId[i]);
480 			opclassTup = (Form_pg_opclass) GETSTRUCT(tuple);
481 			if (OidIsValid(opclassTup->opckeytype))
482 				keyType = opclassTup->opckeytype;
483 
484 			/*
485 			 * If keytype is specified as ANYELEMENT, and opcintype is
486 			 * ANYARRAY, then the attribute type must be an array (else it'd
487 			 * not have matched this opclass); use its element type.
488 			 */
489 			if (keyType == ANYELEMENTOID && opclassTup->opcintype == ANYARRAYOID)
490 			{
491 				keyType = get_base_element_type(to->atttypid);
492 				if (!OidIsValid(keyType))
493 					elog(ERROR, "could not get element type of array type %u",
494 						 to->atttypid);
495 			}
496 
497 			ReleaseSysCache(tuple);
498 		}
499 
500 		/*
501 		 * If a key type different from the heap value is specified, update
502 		 * the type-related fields in the index tupdesc.
503 		 */
504 		if (OidIsValid(keyType) && keyType != to->atttypid)
505 		{
506 			tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(keyType));
507 			if (!HeapTupleIsValid(tuple))
508 				elog(ERROR, "cache lookup failed for type %u", keyType);
509 			typeTup = (Form_pg_type) GETSTRUCT(tuple);
510 
511 			to->atttypid = keyType;
512 			to->atttypmod = -1;
513 			to->attlen = typeTup->typlen;
514 			to->attbyval = typeTup->typbyval;
515 			to->attalign = typeTup->typalign;
516 			to->attstorage = typeTup->typstorage;
517 
518 			ReleaseSysCache(tuple);
519 		}
520 	}
521 
522 	pfree(amroutine);
523 
524 	return indexTupDesc;
525 }
526 
527 /* ----------------------------------------------------------------
528  *		InitializeAttributeOids
529  * ----------------------------------------------------------------
530  */
531 static void
InitializeAttributeOids(Relation indexRelation,int numatts,Oid indexoid)532 InitializeAttributeOids(Relation indexRelation,
533 						int numatts,
534 						Oid indexoid)
535 {
536 	TupleDesc	tupleDescriptor;
537 	int			i;
538 
539 	tupleDescriptor = RelationGetDescr(indexRelation);
540 
541 	for (i = 0; i < numatts; i += 1)
542 		TupleDescAttr(tupleDescriptor, i)->attrelid = indexoid;
543 }
544 
545 /* ----------------------------------------------------------------
546  *		AppendAttributeTuples
547  * ----------------------------------------------------------------
548  */
549 static void
AppendAttributeTuples(Relation indexRelation,int numatts)550 AppendAttributeTuples(Relation indexRelation, int numatts)
551 {
552 	Relation	pg_attribute;
553 	CatalogIndexState indstate;
554 	TupleDesc	indexTupDesc;
555 	int			i;
556 
557 	/*
558 	 * open the attribute relation and its indexes
559 	 */
560 	pg_attribute = heap_open(AttributeRelationId, RowExclusiveLock);
561 
562 	indstate = CatalogOpenIndexes(pg_attribute);
563 
564 	/*
565 	 * insert data from new index's tupdesc into pg_attribute
566 	 */
567 	indexTupDesc = RelationGetDescr(indexRelation);
568 
569 	for (i = 0; i < numatts; i++)
570 	{
571 		Form_pg_attribute attr = TupleDescAttr(indexTupDesc, i);
572 
573 		/*
574 		 * There used to be very grotty code here to set these fields, but I
575 		 * think it's unnecessary.  They should be set already.
576 		 */
577 		Assert(attr->attnum == i + 1);
578 		Assert(attr->attcacheoff == -1);
579 
580 		InsertPgAttributeTuple(pg_attribute, attr, indstate);
581 	}
582 
583 	CatalogCloseIndexes(indstate);
584 
585 	heap_close(pg_attribute, RowExclusiveLock);
586 }
587 
588 /* ----------------------------------------------------------------
589  *		UpdateIndexRelation
590  *
591  * Construct and insert a new entry in the pg_index catalog
592  * ----------------------------------------------------------------
593  */
594 static void
UpdateIndexRelation(Oid indexoid,Oid heapoid,Oid parentIndexOid,IndexInfo * indexInfo,Oid * collationOids,Oid * classOids,int16 * coloptions,bool primary,bool isexclusion,bool immediate,bool isvalid,bool isready)595 UpdateIndexRelation(Oid indexoid,
596 					Oid heapoid,
597 					Oid parentIndexOid,
598 					IndexInfo *indexInfo,
599 					Oid *collationOids,
600 					Oid *classOids,
601 					int16 *coloptions,
602 					bool primary,
603 					bool isexclusion,
604 					bool immediate,
605 					bool isvalid,
606 					bool isready)
607 {
608 	int2vector *indkey;
609 	oidvector  *indcollation;
610 	oidvector  *indclass;
611 	int2vector *indoption;
612 	Datum		exprsDatum;
613 	Datum		predDatum;
614 	Datum		values[Natts_pg_index];
615 	bool		nulls[Natts_pg_index];
616 	Relation	pg_index;
617 	HeapTuple	tuple;
618 	int			i;
619 
620 	/*
621 	 * Copy the index key, opclass, and indoption info into arrays (should we
622 	 * make the caller pass them like this to start with?)
623 	 */
624 	indkey = buildint2vector(NULL, indexInfo->ii_NumIndexAttrs);
625 	for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
626 		indkey->values[i] = indexInfo->ii_IndexAttrNumbers[i];
627 	indcollation = buildoidvector(collationOids, indexInfo->ii_NumIndexKeyAttrs);
628 	indclass = buildoidvector(classOids, indexInfo->ii_NumIndexKeyAttrs);
629 	indoption = buildint2vector(coloptions, indexInfo->ii_NumIndexKeyAttrs);
630 
631 	/*
632 	 * Convert the index expressions (if any) to a text datum
633 	 */
634 	if (indexInfo->ii_Expressions != NIL)
635 	{
636 		char	   *exprsString;
637 
638 		exprsString = nodeToString(indexInfo->ii_Expressions);
639 		exprsDatum = CStringGetTextDatum(exprsString);
640 		pfree(exprsString);
641 	}
642 	else
643 		exprsDatum = (Datum) 0;
644 
645 	/*
646 	 * Convert the index predicate (if any) to a text datum.  Note we convert
647 	 * implicit-AND format to normal explicit-AND for storage.
648 	 */
649 	if (indexInfo->ii_Predicate != NIL)
650 	{
651 		char	   *predString;
652 
653 		predString = nodeToString(make_ands_explicit(indexInfo->ii_Predicate));
654 		predDatum = CStringGetTextDatum(predString);
655 		pfree(predString);
656 	}
657 	else
658 		predDatum = (Datum) 0;
659 
660 	/*
661 	 * open the system catalog index relation
662 	 */
663 	pg_index = heap_open(IndexRelationId, RowExclusiveLock);
664 
665 	/*
666 	 * Build a pg_index tuple
667 	 */
668 	MemSet(nulls, false, sizeof(nulls));
669 
670 	values[Anum_pg_index_indexrelid - 1] = ObjectIdGetDatum(indexoid);
671 	values[Anum_pg_index_indrelid - 1] = ObjectIdGetDatum(heapoid);
672 	values[Anum_pg_index_indnatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexAttrs);
673 	values[Anum_pg_index_indnkeyatts - 1] = Int16GetDatum(indexInfo->ii_NumIndexKeyAttrs);
674 	values[Anum_pg_index_indisunique - 1] = BoolGetDatum(indexInfo->ii_Unique);
675 	values[Anum_pg_index_indisprimary - 1] = BoolGetDatum(primary);
676 	values[Anum_pg_index_indisexclusion - 1] = BoolGetDatum(isexclusion);
677 	values[Anum_pg_index_indimmediate - 1] = BoolGetDatum(immediate);
678 	values[Anum_pg_index_indisclustered - 1] = BoolGetDatum(false);
679 	values[Anum_pg_index_indisvalid - 1] = BoolGetDatum(isvalid);
680 	values[Anum_pg_index_indcheckxmin - 1] = BoolGetDatum(false);
681 	values[Anum_pg_index_indisready - 1] = BoolGetDatum(isready);
682 	values[Anum_pg_index_indislive - 1] = BoolGetDatum(true);
683 	values[Anum_pg_index_indisreplident - 1] = BoolGetDatum(false);
684 	values[Anum_pg_index_indkey - 1] = PointerGetDatum(indkey);
685 	values[Anum_pg_index_indcollation - 1] = PointerGetDatum(indcollation);
686 	values[Anum_pg_index_indclass - 1] = PointerGetDatum(indclass);
687 	values[Anum_pg_index_indoption - 1] = PointerGetDatum(indoption);
688 	values[Anum_pg_index_indexprs - 1] = exprsDatum;
689 	if (exprsDatum == (Datum) 0)
690 		nulls[Anum_pg_index_indexprs - 1] = true;
691 	values[Anum_pg_index_indpred - 1] = predDatum;
692 	if (predDatum == (Datum) 0)
693 		nulls[Anum_pg_index_indpred - 1] = true;
694 
695 	tuple = heap_form_tuple(RelationGetDescr(pg_index), values, nulls);
696 
697 	/*
698 	 * insert the tuple into the pg_index catalog
699 	 */
700 	CatalogTupleInsert(pg_index, tuple);
701 
702 	/*
703 	 * close the relation and free the tuple
704 	 */
705 	heap_close(pg_index, RowExclusiveLock);
706 	heap_freetuple(tuple);
707 }
708 
709 
710 /*
711  * index_create
712  *
713  * heapRelation: table to build index on (suitably locked by caller)
714  * indexRelationName: what it say
715  * indexRelationId: normally, pass InvalidOid to let this routine
716  *		generate an OID for the index.  During bootstrap this may be
717  *		nonzero to specify a preselected OID.
718  * parentIndexRelid: if creating an index partition, the OID of the
719  *		parent index; otherwise InvalidOid.
720  * parentConstraintId: if creating a constraint on a partition, the OID
721  *		of the constraint in the parent; otherwise InvalidOid.
722  * relFileNode: normally, pass InvalidOid to get new storage.  May be
723  *		nonzero to attach an existing valid build.
724  * indexInfo: same info executor uses to insert into the index
725  * indexColNames: column names to use for index (List of char *)
726  * accessMethodObjectId: OID of index AM to use
727  * tableSpaceId: OID of tablespace to use
728  * collationObjectId: array of collation OIDs, one per index column
729  * classObjectId: array of index opclass OIDs, one per index column
730  * coloptions: array of per-index-column indoption settings
731  * reloptions: AM-specific options
732  * flags: bitmask that can include any combination of these bits:
733  *		INDEX_CREATE_IS_PRIMARY
734  *			the index is a primary key
735  *		INDEX_CREATE_ADD_CONSTRAINT:
736  *			invoke index_constraint_create also
737  *		INDEX_CREATE_SKIP_BUILD:
738  *			skip the index_build() step for the moment; caller must do it
739  *			later (typically via reindex_index())
740  *		INDEX_CREATE_CONCURRENT:
741  *			do not lock the table against writers.  The index will be
742  *			marked "invalid" and the caller must take additional steps
743  *			to fix it up.
744  *		INDEX_CREATE_IF_NOT_EXISTS:
745  *			do not throw an error if a relation with the same name
746  *			already exists.
747  *		INDEX_CREATE_PARTITIONED:
748  *			create a partitioned index (table must be partitioned)
749  * constr_flags: flags passed to index_constraint_create
750  *		(only if INDEX_CREATE_ADD_CONSTRAINT is set)
751  * allow_system_table_mods: allow table to be a system catalog
752  * is_internal: if true, post creation hook for new index
753  * constraintId: if not NULL, receives OID of created constraint
754  *
755  * Returns the OID of the created index.
756  */
757 Oid
index_create(Relation heapRelation,const char * indexRelationName,Oid indexRelationId,Oid parentIndexRelid,Oid parentConstraintId,Oid relFileNode,IndexInfo * indexInfo,List * indexColNames,Oid accessMethodObjectId,Oid tableSpaceId,Oid * collationObjectId,Oid * classObjectId,int16 * coloptions,Datum reloptions,bits16 flags,bits16 constr_flags,bool allow_system_table_mods,bool is_internal,Oid * constraintId)758 index_create(Relation heapRelation,
759 			 const char *indexRelationName,
760 			 Oid indexRelationId,
761 			 Oid parentIndexRelid,
762 			 Oid parentConstraintId,
763 			 Oid relFileNode,
764 			 IndexInfo *indexInfo,
765 			 List *indexColNames,
766 			 Oid accessMethodObjectId,
767 			 Oid tableSpaceId,
768 			 Oid *collationObjectId,
769 			 Oid *classObjectId,
770 			 int16 *coloptions,
771 			 Datum reloptions,
772 			 bits16 flags,
773 			 bits16 constr_flags,
774 			 bool allow_system_table_mods,
775 			 bool is_internal,
776 			 Oid *constraintId)
777 {
778 	Oid			heapRelationId = RelationGetRelid(heapRelation);
779 	Relation	pg_class;
780 	Relation	indexRelation;
781 	TupleDesc	indexTupDesc;
782 	bool		shared_relation;
783 	bool		mapped_relation;
784 	bool		is_exclusion;
785 	Oid			namespaceId;
786 	int			i;
787 	char		relpersistence;
788 	bool		isprimary = (flags & INDEX_CREATE_IS_PRIMARY) != 0;
789 	bool		invalid = (flags & INDEX_CREATE_INVALID) != 0;
790 	bool		concurrent = (flags & INDEX_CREATE_CONCURRENT) != 0;
791 	bool		partitioned = (flags & INDEX_CREATE_PARTITIONED) != 0;
792 	char		relkind;
793 
794 	/* constraint flags can only be set when a constraint is requested */
795 	Assert((constr_flags == 0) ||
796 		   ((flags & INDEX_CREATE_ADD_CONSTRAINT) != 0));
797 	/* partitioned indexes must never be "built" by themselves */
798 	Assert(!partitioned || (flags & INDEX_CREATE_SKIP_BUILD));
799 
800 	relkind = partitioned ? RELKIND_PARTITIONED_INDEX : RELKIND_INDEX;
801 	is_exclusion = (indexInfo->ii_ExclusionOps != NULL);
802 
803 	pg_class = heap_open(RelationRelationId, RowExclusiveLock);
804 
805 	/*
806 	 * The index will be in the same namespace as its parent table, and is
807 	 * shared across databases if and only if the parent is.  Likewise, it
808 	 * will use the relfilenode map if and only if the parent does; and it
809 	 * inherits the parent's relpersistence.
810 	 */
811 	namespaceId = RelationGetNamespace(heapRelation);
812 	shared_relation = heapRelation->rd_rel->relisshared;
813 	mapped_relation = RelationIsMapped(heapRelation);
814 	relpersistence = heapRelation->rd_rel->relpersistence;
815 
816 	/*
817 	 * check parameters
818 	 */
819 	if (indexInfo->ii_NumIndexAttrs < 1)
820 		elog(ERROR, "must index at least one column");
821 
822 	if (!allow_system_table_mods &&
823 		IsSystemRelation(heapRelation) &&
824 		IsNormalProcessingMode())
825 		ereport(ERROR,
826 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
827 				 errmsg("user-defined indexes on system catalog tables are not supported")));
828 
829 	/*
830 	 * concurrent index build on a system catalog is unsafe because we tend to
831 	 * release locks before committing in catalogs
832 	 */
833 	if (concurrent &&
834 		IsSystemRelation(heapRelation))
835 		ereport(ERROR,
836 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
837 				 errmsg("concurrent index creation on system catalog tables is not supported")));
838 
839 	/*
840 	 * This case is currently not supported, but there's no way to ask for it
841 	 * in the grammar anyway, so it can't happen.
842 	 */
843 	if (concurrent && is_exclusion)
844 		ereport(ERROR,
845 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
846 				 errmsg_internal("concurrent index creation for exclusion constraints is not supported")));
847 
848 	/*
849 	 * We cannot allow indexing a shared relation after initdb (because
850 	 * there's no way to make the entry in other databases' pg_class).
851 	 */
852 	if (shared_relation && !IsBootstrapProcessingMode())
853 		ereport(ERROR,
854 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
855 				 errmsg("shared indexes cannot be created after initdb")));
856 
857 	/*
858 	 * Shared relations must be in pg_global, too (last-ditch check)
859 	 */
860 	if (shared_relation && tableSpaceId != GLOBALTABLESPACE_OID)
861 		elog(ERROR, "shared relations must be placed in pg_global tablespace");
862 
863 	/*
864 	 * Check for duplicate name (both as to the index, and as to the
865 	 * associated constraint if any).  Such cases would fail on the relevant
866 	 * catalogs' unique indexes anyway, but we prefer to give a friendlier
867 	 * error message.
868 	 */
869 	if (get_relname_relid(indexRelationName, namespaceId))
870 	{
871 		if ((flags & INDEX_CREATE_IF_NOT_EXISTS) != 0)
872 		{
873 			ereport(NOTICE,
874 					(errcode(ERRCODE_DUPLICATE_TABLE),
875 					 errmsg("relation \"%s\" already exists, skipping",
876 							indexRelationName)));
877 			heap_close(pg_class, RowExclusiveLock);
878 			return InvalidOid;
879 		}
880 
881 		ereport(ERROR,
882 				(errcode(ERRCODE_DUPLICATE_TABLE),
883 				 errmsg("relation \"%s\" already exists",
884 						indexRelationName)));
885 	}
886 
887 	if ((flags & INDEX_CREATE_ADD_CONSTRAINT) != 0 &&
888 		ConstraintNameIsUsed(CONSTRAINT_RELATION, heapRelationId,
889 							 indexRelationName))
890 	{
891 		/*
892 		 * INDEX_CREATE_IF_NOT_EXISTS does not apply here, since the
893 		 * conflicting constraint is not an index.
894 		 */
895 		ereport(ERROR,
896 				(errcode(ERRCODE_DUPLICATE_OBJECT),
897 				 errmsg("constraint \"%s\" for relation \"%s\" already exists",
898 						indexRelationName, RelationGetRelationName(heapRelation))));
899 	}
900 
901 	/*
902 	 * construct tuple descriptor for index tuples
903 	 */
904 	indexTupDesc = ConstructTupleDescriptor(heapRelation,
905 											indexInfo,
906 											indexColNames,
907 											accessMethodObjectId,
908 											collationObjectId,
909 											classObjectId);
910 
911 	/*
912 	 * Allocate an OID for the index, unless we were told what to use.
913 	 *
914 	 * The OID will be the relfilenode as well, so make sure it doesn't
915 	 * collide with either pg_class OIDs or existing physical files.
916 	 */
917 	if (!OidIsValid(indexRelationId))
918 	{
919 		/* Use binary-upgrade override for pg_class.oid/relfilenode? */
920 		if (IsBinaryUpgrade)
921 		{
922 			if (!OidIsValid(binary_upgrade_next_index_pg_class_oid))
923 				ereport(ERROR,
924 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
925 						 errmsg("pg_class index OID value not set when in binary upgrade mode")));
926 
927 			indexRelationId = binary_upgrade_next_index_pg_class_oid;
928 			binary_upgrade_next_index_pg_class_oid = InvalidOid;
929 		}
930 		else
931 		{
932 			indexRelationId =
933 				GetNewRelFileNode(tableSpaceId, pg_class, relpersistence);
934 		}
935 	}
936 
937 	/*
938 	 * create the index relation's relcache entry and, if necessary, the
939 	 * physical disk file. (If we fail further down, it's the smgr's
940 	 * responsibility to remove the disk file again, if any.)
941 	 */
942 	indexRelation = heap_create(indexRelationName,
943 								namespaceId,
944 								tableSpaceId,
945 								indexRelationId,
946 								relFileNode,
947 								indexTupDesc,
948 								relkind,
949 								relpersistence,
950 								shared_relation,
951 								mapped_relation,
952 								allow_system_table_mods);
953 
954 	Assert(indexRelationId == RelationGetRelid(indexRelation));
955 
956 	/*
957 	 * Obtain exclusive lock on it.  Although no other transactions can see it
958 	 * until we commit, this prevents deadlock-risk complaints from lock
959 	 * manager in cases such as CLUSTER.
960 	 */
961 	LockRelation(indexRelation, AccessExclusiveLock);
962 
963 	/*
964 	 * Fill in fields of the index's pg_class entry that are not set correctly
965 	 * by heap_create.
966 	 *
967 	 * XXX should have a cleaner way to create cataloged indexes
968 	 */
969 	indexRelation->rd_rel->relowner = heapRelation->rd_rel->relowner;
970 	indexRelation->rd_rel->relam = accessMethodObjectId;
971 	indexRelation->rd_rel->relhasoids = false;
972 	indexRelation->rd_rel->relispartition = OidIsValid(parentIndexRelid);
973 
974 	/*
975 	 * store index's pg_class entry
976 	 */
977 	InsertPgClassTuple(pg_class, indexRelation,
978 					   RelationGetRelid(indexRelation),
979 					   (Datum) 0,
980 					   reloptions);
981 
982 	/* done with pg_class */
983 	heap_close(pg_class, RowExclusiveLock);
984 
985 	/*
986 	 * now update the object id's of all the attribute tuple forms in the
987 	 * index relation's tuple descriptor
988 	 */
989 	InitializeAttributeOids(indexRelation,
990 							indexInfo->ii_NumIndexAttrs,
991 							indexRelationId);
992 
993 	/*
994 	 * append ATTRIBUTE tuples for the index
995 	 */
996 	AppendAttributeTuples(indexRelation, indexInfo->ii_NumIndexAttrs);
997 
998 	/* ----------------
999 	 *	  update pg_index
1000 	 *	  (append INDEX tuple)
1001 	 *
1002 	 *	  Note that this stows away a representation of "predicate".
1003 	 *	  (Or, could define a rule to maintain the predicate) --Nels, Feb '92
1004 	 * ----------------
1005 	 */
1006 	UpdateIndexRelation(indexRelationId, heapRelationId, parentIndexRelid,
1007 						indexInfo,
1008 						collationObjectId, classObjectId, coloptions,
1009 						isprimary, is_exclusion,
1010 						(constr_flags & INDEX_CONSTR_CREATE_DEFERRABLE) == 0,
1011 						!concurrent && !invalid,
1012 						!concurrent);
1013 
1014 	/*
1015 	 * Register relcache invalidation on the indexes' heap relation, to
1016 	 * maintain consistency of its index list
1017 	 */
1018 	CacheInvalidateRelcache(heapRelation);
1019 
1020 	/* update pg_inherits, if needed */
1021 	if (OidIsValid(parentIndexRelid))
1022 		StoreSingleInheritance(indexRelationId, parentIndexRelid, 1);
1023 
1024 	/*
1025 	 * Register constraint and dependencies for the index.
1026 	 *
1027 	 * If the index is from a CONSTRAINT clause, construct a pg_constraint
1028 	 * entry.  The index will be linked to the constraint, which in turn is
1029 	 * linked to the table.  If it's not a CONSTRAINT, we need to make a
1030 	 * dependency directly on the table.
1031 	 *
1032 	 * We don't need a dependency on the namespace, because there'll be an
1033 	 * indirect dependency via our parent table.
1034 	 *
1035 	 * During bootstrap we can't register any dependencies, and we don't try
1036 	 * to make a constraint either.
1037 	 */
1038 	if (!IsBootstrapProcessingMode())
1039 	{
1040 		ObjectAddress myself,
1041 					referenced;
1042 
1043 		myself.classId = RelationRelationId;
1044 		myself.objectId = indexRelationId;
1045 		myself.objectSubId = 0;
1046 
1047 		if ((flags & INDEX_CREATE_ADD_CONSTRAINT) != 0)
1048 		{
1049 			char		constraintType;
1050 			ObjectAddress localaddr;
1051 
1052 			if (isprimary)
1053 				constraintType = CONSTRAINT_PRIMARY;
1054 			else if (indexInfo->ii_Unique)
1055 				constraintType = CONSTRAINT_UNIQUE;
1056 			else if (is_exclusion)
1057 				constraintType = CONSTRAINT_EXCLUSION;
1058 			else
1059 			{
1060 				elog(ERROR, "constraint must be PRIMARY, UNIQUE or EXCLUDE");
1061 				constraintType = 0; /* keep compiler quiet */
1062 			}
1063 
1064 			localaddr = index_constraint_create(heapRelation,
1065 												indexRelationId,
1066 												parentConstraintId,
1067 												indexInfo,
1068 												indexRelationName,
1069 												constraintType,
1070 												constr_flags,
1071 												allow_system_table_mods,
1072 												is_internal);
1073 			if (constraintId)
1074 				*constraintId = localaddr.objectId;
1075 		}
1076 		else
1077 		{
1078 			bool		have_simple_col = false;
1079 			DependencyType deptype;
1080 
1081 			deptype = OidIsValid(parentIndexRelid) ? DEPENDENCY_INTERNAL_AUTO : DEPENDENCY_AUTO;
1082 
1083 			/* Create auto dependencies on simply-referenced columns */
1084 			for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1085 			{
1086 				if (indexInfo->ii_IndexAttrNumbers[i] != 0)
1087 				{
1088 					referenced.classId = RelationRelationId;
1089 					referenced.objectId = heapRelationId;
1090 					referenced.objectSubId = indexInfo->ii_IndexAttrNumbers[i];
1091 
1092 					recordDependencyOn(&myself, &referenced, deptype);
1093 
1094 					have_simple_col = true;
1095 				}
1096 			}
1097 
1098 			/*
1099 			 * If there are no simply-referenced columns, give the index an
1100 			 * auto dependency on the whole table.  In most cases, this will
1101 			 * be redundant, but it might not be if the index expressions and
1102 			 * predicate contain no Vars or only whole-row Vars.
1103 			 */
1104 			if (!have_simple_col)
1105 			{
1106 				referenced.classId = RelationRelationId;
1107 				referenced.objectId = heapRelationId;
1108 				referenced.objectSubId = 0;
1109 
1110 				recordDependencyOn(&myself, &referenced, deptype);
1111 			}
1112 		}
1113 
1114 		/* Store dependency on parent index, if any */
1115 		if (OidIsValid(parentIndexRelid))
1116 		{
1117 			referenced.classId = RelationRelationId;
1118 			referenced.objectId = parentIndexRelid;
1119 			referenced.objectSubId = 0;
1120 
1121 			recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL_AUTO);
1122 		}
1123 
1124 		/* Store dependency on collations */
1125 		/* The default collation is pinned, so don't bother recording it */
1126 		for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
1127 		{
1128 			if (OidIsValid(collationObjectId[i]) &&
1129 				collationObjectId[i] != DEFAULT_COLLATION_OID)
1130 			{
1131 				referenced.classId = CollationRelationId;
1132 				referenced.objectId = collationObjectId[i];
1133 				referenced.objectSubId = 0;
1134 
1135 				recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
1136 			}
1137 		}
1138 
1139 		/* Store dependency on operator classes */
1140 		for (i = 0; i < indexInfo->ii_NumIndexKeyAttrs; i++)
1141 		{
1142 			referenced.classId = OperatorClassRelationId;
1143 			referenced.objectId = classObjectId[i];
1144 			referenced.objectSubId = 0;
1145 
1146 			recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
1147 		}
1148 
1149 		/* Store dependencies on anything mentioned in index expressions */
1150 		if (indexInfo->ii_Expressions)
1151 		{
1152 			recordDependencyOnSingleRelExpr(&myself,
1153 											(Node *) indexInfo->ii_Expressions,
1154 											heapRelationId,
1155 											DEPENDENCY_NORMAL,
1156 											DEPENDENCY_AUTO, false);
1157 		}
1158 
1159 		/* Store dependencies on anything mentioned in predicate */
1160 		if (indexInfo->ii_Predicate)
1161 		{
1162 			recordDependencyOnSingleRelExpr(&myself,
1163 											(Node *) indexInfo->ii_Predicate,
1164 											heapRelationId,
1165 											DEPENDENCY_NORMAL,
1166 											DEPENDENCY_AUTO, false);
1167 		}
1168 	}
1169 	else
1170 	{
1171 		/* Bootstrap mode - assert we weren't asked for constraint support */
1172 		Assert((flags & INDEX_CREATE_ADD_CONSTRAINT) == 0);
1173 	}
1174 
1175 	/* Post creation hook for new index */
1176 	InvokeObjectPostCreateHookArg(RelationRelationId,
1177 								  indexRelationId, 0, is_internal);
1178 
1179 	/*
1180 	 * Advance the command counter so that we can see the newly-entered
1181 	 * catalog tuples for the index.
1182 	 */
1183 	CommandCounterIncrement();
1184 
1185 	/*
1186 	 * In bootstrap mode, we have to fill in the index strategy structure with
1187 	 * information from the catalogs.  If we aren't bootstrapping, then the
1188 	 * relcache entry has already been rebuilt thanks to sinval update during
1189 	 * CommandCounterIncrement.
1190 	 */
1191 	if (IsBootstrapProcessingMode())
1192 		RelationInitIndexAccessInfo(indexRelation);
1193 	else
1194 		Assert(indexRelation->rd_indexcxt != NULL);
1195 
1196 	indexRelation->rd_index->indnkeyatts = indexInfo->ii_NumIndexKeyAttrs;
1197 
1198 	/*
1199 	 * If this is bootstrap (initdb) time, then we don't actually fill in the
1200 	 * index yet.  We'll be creating more indexes and classes later, so we
1201 	 * delay filling them in until just before we're done with bootstrapping.
1202 	 * Similarly, if the caller specified to skip the build then filling the
1203 	 * index is delayed till later (ALTER TABLE can save work in some cases
1204 	 * with this).  Otherwise, we call the AM routine that constructs the
1205 	 * index.
1206 	 */
1207 	if (IsBootstrapProcessingMode())
1208 	{
1209 		index_register(heapRelationId, indexRelationId, indexInfo);
1210 	}
1211 	else if ((flags & INDEX_CREATE_SKIP_BUILD) != 0)
1212 	{
1213 		/*
1214 		 * Caller is responsible for filling the index later on.  However,
1215 		 * we'd better make sure that the heap relation is correctly marked as
1216 		 * having an index.
1217 		 */
1218 		index_update_stats(heapRelation,
1219 						   true,
1220 						   -1.0);
1221 		/* Make the above update visible */
1222 		CommandCounterIncrement();
1223 	}
1224 	else
1225 	{
1226 		index_build(heapRelation, indexRelation, indexInfo, isprimary, false,
1227 					true);
1228 	}
1229 
1230 	/*
1231 	 * Close the index; but we keep the lock that we acquired above until end
1232 	 * of transaction.  Closing the heap is caller's responsibility.
1233 	 */
1234 	index_close(indexRelation, NoLock);
1235 
1236 	return indexRelationId;
1237 }
1238 
1239 /*
1240  * index_constraint_create
1241  *
1242  * Set up a constraint associated with an index.  Return the new constraint's
1243  * address.
1244  *
1245  * heapRelation: table owning the index (must be suitably locked by caller)
1246  * indexRelationId: OID of the index
1247  * parentConstraintId: if constraint is on a partition, the OID of the
1248  *		constraint in the parent.
1249  * indexInfo: same info executor uses to insert into the index
1250  * constraintName: what it say (generally, should match name of index)
1251  * constraintType: one of CONSTRAINT_PRIMARY, CONSTRAINT_UNIQUE, or
1252  *		CONSTRAINT_EXCLUSION
1253  * flags: bitmask that can include any combination of these bits:
1254  *		INDEX_CONSTR_CREATE_MARK_AS_PRIMARY: index is a PRIMARY KEY
1255  *		INDEX_CONSTR_CREATE_DEFERRABLE: constraint is DEFERRABLE
1256  *		INDEX_CONSTR_CREATE_INIT_DEFERRED: constraint is INITIALLY DEFERRED
1257  *		INDEX_CONSTR_CREATE_UPDATE_INDEX: update the pg_index row
1258  *		INDEX_CONSTR_CREATE_REMOVE_OLD_DEPS: remove existing dependencies
1259  *			of index on table's columns
1260  * allow_system_table_mods: allow table to be a system catalog
1261  * is_internal: index is constructed due to internal process
1262  */
1263 ObjectAddress
index_constraint_create(Relation heapRelation,Oid indexRelationId,Oid parentConstraintId,IndexInfo * indexInfo,const char * constraintName,char constraintType,bits16 constr_flags,bool allow_system_table_mods,bool is_internal)1264 index_constraint_create(Relation heapRelation,
1265 						Oid indexRelationId,
1266 						Oid parentConstraintId,
1267 						IndexInfo *indexInfo,
1268 						const char *constraintName,
1269 						char constraintType,
1270 						bits16 constr_flags,
1271 						bool allow_system_table_mods,
1272 						bool is_internal)
1273 {
1274 	Oid			namespaceId = RelationGetNamespace(heapRelation);
1275 	ObjectAddress myself,
1276 				referenced;
1277 	Oid			conOid;
1278 	bool		deferrable;
1279 	bool		initdeferred;
1280 	bool		mark_as_primary;
1281 	bool		islocal;
1282 	bool		noinherit;
1283 	int			inhcount;
1284 
1285 	deferrable = (constr_flags & INDEX_CONSTR_CREATE_DEFERRABLE) != 0;
1286 	initdeferred = (constr_flags & INDEX_CONSTR_CREATE_INIT_DEFERRED) != 0;
1287 	mark_as_primary = (constr_flags & INDEX_CONSTR_CREATE_MARK_AS_PRIMARY) != 0;
1288 
1289 	/* constraint creation support doesn't work while bootstrapping */
1290 	Assert(!IsBootstrapProcessingMode());
1291 
1292 	/* enforce system-table restriction */
1293 	if (!allow_system_table_mods &&
1294 		IsSystemRelation(heapRelation) &&
1295 		IsNormalProcessingMode())
1296 		ereport(ERROR,
1297 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1298 				 errmsg("user-defined indexes on system catalog tables are not supported")));
1299 
1300 	/* primary/unique constraints shouldn't have any expressions */
1301 	if (indexInfo->ii_Expressions &&
1302 		constraintType != CONSTRAINT_EXCLUSION)
1303 		elog(ERROR, "constraints cannot have index expressions");
1304 
1305 	/*
1306 	 * If we're manufacturing a constraint for a pre-existing index, we need
1307 	 * to get rid of the existing auto dependencies for the index (the ones
1308 	 * that index_create() would have made instead of calling this function).
1309 	 *
1310 	 * Note: this code would not necessarily do the right thing if the index
1311 	 * has any expressions or predicate, but we'd never be turning such an
1312 	 * index into a UNIQUE or PRIMARY KEY constraint.
1313 	 */
1314 	if (constr_flags & INDEX_CONSTR_CREATE_REMOVE_OLD_DEPS)
1315 		deleteDependencyRecordsForClass(RelationRelationId, indexRelationId,
1316 										RelationRelationId, DEPENDENCY_AUTO);
1317 
1318 	if (OidIsValid(parentConstraintId))
1319 	{
1320 		islocal = false;
1321 		inhcount = 1;
1322 		noinherit = false;
1323 	}
1324 	else
1325 	{
1326 		islocal = true;
1327 		inhcount = 0;
1328 		noinherit = true;
1329 	}
1330 
1331 	/*
1332 	 * Construct a pg_constraint entry.
1333 	 */
1334 	conOid = CreateConstraintEntry(constraintName,
1335 								   namespaceId,
1336 								   constraintType,
1337 								   deferrable,
1338 								   initdeferred,
1339 								   true,
1340 								   parentConstraintId,
1341 								   RelationGetRelid(heapRelation),
1342 								   indexInfo->ii_IndexAttrNumbers,
1343 								   indexInfo->ii_NumIndexKeyAttrs,
1344 								   indexInfo->ii_NumIndexAttrs,
1345 								   InvalidOid,	/* no domain */
1346 								   indexRelationId, /* index OID */
1347 								   InvalidOid,	/* no foreign key */
1348 								   NULL,
1349 								   NULL,
1350 								   NULL,
1351 								   NULL,
1352 								   0,
1353 								   ' ',
1354 								   ' ',
1355 								   ' ',
1356 								   indexInfo->ii_ExclusionOps,
1357 								   NULL,	/* no check constraint */
1358 								   NULL,
1359 								   NULL,
1360 								   islocal,
1361 								   inhcount,
1362 								   noinherit,
1363 								   is_internal);
1364 
1365 	/*
1366 	 * Register the index as internally dependent on the constraint.
1367 	 *
1368 	 * Note that the constraint has a dependency on the table, so we don't
1369 	 * need (or want) any direct dependency from the index to the table.
1370 	 */
1371 	myself.classId = RelationRelationId;
1372 	myself.objectId = indexRelationId;
1373 	myself.objectSubId = 0;
1374 
1375 	referenced.classId = ConstraintRelationId;
1376 	referenced.objectId = conOid;
1377 	referenced.objectSubId = 0;
1378 
1379 	recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
1380 
1381 	/*
1382 	 * Also, if this is a constraint on a partition, mark it as depending on
1383 	 * the constraint in the parent.
1384 	 */
1385 	if (OidIsValid(parentConstraintId))
1386 	{
1387 		ObjectAddress parentConstr;
1388 
1389 		ObjectAddressSet(parentConstr, ConstraintRelationId, parentConstraintId);
1390 		recordDependencyOn(&referenced, &parentConstr, DEPENDENCY_INTERNAL_AUTO);
1391 	}
1392 
1393 	/*
1394 	 * If the constraint is deferrable, create the deferred uniqueness
1395 	 * checking trigger.  (The trigger will be given an internal dependency on
1396 	 * the constraint by CreateTrigger.)
1397 	 */
1398 	if (deferrable)
1399 	{
1400 		CreateTrigStmt *trigger;
1401 
1402 		trigger = makeNode(CreateTrigStmt);
1403 		trigger->trigname = (constraintType == CONSTRAINT_PRIMARY) ?
1404 			"PK_ConstraintTrigger" :
1405 			"Unique_ConstraintTrigger";
1406 		trigger->relation = NULL;
1407 		trigger->funcname = SystemFuncName("unique_key_recheck");
1408 		trigger->args = NIL;
1409 		trigger->row = true;
1410 		trigger->timing = TRIGGER_TYPE_AFTER;
1411 		trigger->events = TRIGGER_TYPE_INSERT | TRIGGER_TYPE_UPDATE;
1412 		trigger->columns = NIL;
1413 		trigger->whenClause = NULL;
1414 		trigger->isconstraint = true;
1415 		trigger->deferrable = true;
1416 		trigger->initdeferred = initdeferred;
1417 		trigger->constrrel = NULL;
1418 
1419 		(void) CreateTrigger(trigger, NULL, RelationGetRelid(heapRelation),
1420 							 InvalidOid, conOid, indexRelationId, InvalidOid,
1421 							 InvalidOid, NULL, true, false);
1422 	}
1423 
1424 	/*
1425 	 * If needed, mark the index as primary and/or deferred in pg_index.
1426 	 *
1427 	 * Note: When making an existing index into a constraint, caller must have
1428 	 * a table lock that prevents concurrent table updates; otherwise, there
1429 	 * is a risk that concurrent readers of the table will miss seeing this
1430 	 * index at all.
1431 	 */
1432 	if ((constr_flags & INDEX_CONSTR_CREATE_UPDATE_INDEX) &&
1433 		(mark_as_primary || deferrable))
1434 	{
1435 		Relation	pg_index;
1436 		HeapTuple	indexTuple;
1437 		Form_pg_index indexForm;
1438 		bool		dirty = false;
1439 
1440 		pg_index = heap_open(IndexRelationId, RowExclusiveLock);
1441 
1442 		indexTuple = SearchSysCacheCopy1(INDEXRELID,
1443 										 ObjectIdGetDatum(indexRelationId));
1444 		if (!HeapTupleIsValid(indexTuple))
1445 			elog(ERROR, "cache lookup failed for index %u", indexRelationId);
1446 		indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
1447 
1448 		if (mark_as_primary && !indexForm->indisprimary)
1449 		{
1450 			indexForm->indisprimary = true;
1451 			dirty = true;
1452 		}
1453 
1454 		if (deferrable && indexForm->indimmediate)
1455 		{
1456 			indexForm->indimmediate = false;
1457 			dirty = true;
1458 		}
1459 
1460 		if (dirty)
1461 		{
1462 			CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
1463 
1464 			InvokeObjectPostAlterHookArg(IndexRelationId, indexRelationId, 0,
1465 										 InvalidOid, is_internal);
1466 		}
1467 
1468 		heap_freetuple(indexTuple);
1469 		heap_close(pg_index, RowExclusiveLock);
1470 	}
1471 
1472 	return referenced;
1473 }
1474 
1475 /*
1476  *		index_drop
1477  *
1478  * NOTE: this routine should now only be called through performDeletion(),
1479  * else associated dependencies won't be cleaned up.
1480  */
1481 void
index_drop(Oid indexId,bool concurrent)1482 index_drop(Oid indexId, bool concurrent)
1483 {
1484 	Oid			heapId;
1485 	Relation	userHeapRelation;
1486 	Relation	userIndexRelation;
1487 	Relation	indexRelation;
1488 	HeapTuple	tuple;
1489 	bool		hasexprs;
1490 	LockRelId	heaprelid,
1491 				indexrelid;
1492 	LOCKTAG		heaplocktag;
1493 	LOCKMODE	lockmode;
1494 
1495 	/*
1496 	 * A temporary relation uses a non-concurrent DROP.  Other backends can't
1497 	 * access a temporary relation, so there's no harm in grabbing a stronger
1498 	 * lock (see comments in RemoveRelations), and a non-concurrent DROP is
1499 	 * more efficient.
1500 	 */
1501 	Assert(get_rel_persistence(indexId) != RELPERSISTENCE_TEMP ||
1502 		   !concurrent);
1503 
1504 	/*
1505 	 * To drop an index safely, we must grab exclusive lock on its parent
1506 	 * table.  Exclusive lock on the index alone is insufficient because
1507 	 * another backend might be about to execute a query on the parent table.
1508 	 * If it relies on a previously cached list of index OIDs, then it could
1509 	 * attempt to access the just-dropped index.  We must therefore take a
1510 	 * table lock strong enough to prevent all queries on the table from
1511 	 * proceeding until we commit and send out a shared-cache-inval notice
1512 	 * that will make them update their index lists.
1513 	 *
1514 	 * In the concurrent case we avoid this requirement by disabling index use
1515 	 * in multiple steps and waiting out any transactions that might be using
1516 	 * the index, so we don't need exclusive lock on the parent table. Instead
1517 	 * we take ShareUpdateExclusiveLock, to ensure that two sessions aren't
1518 	 * doing CREATE/DROP INDEX CONCURRENTLY on the same index.  (We will get
1519 	 * AccessExclusiveLock on the index below, once we're sure nobody else is
1520 	 * using it.)
1521 	 */
1522 	heapId = IndexGetRelation(indexId, false);
1523 	lockmode = concurrent ? ShareUpdateExclusiveLock : AccessExclusiveLock;
1524 	userHeapRelation = heap_open(heapId, lockmode);
1525 	userIndexRelation = index_open(indexId, lockmode);
1526 
1527 	/*
1528 	 * We might still have open queries using it in our own session, which the
1529 	 * above locking won't prevent, so test explicitly.
1530 	 */
1531 	CheckTableNotInUse(userIndexRelation, "DROP INDEX");
1532 
1533 	/*
1534 	 * Drop Index Concurrently is more or less the reverse process of Create
1535 	 * Index Concurrently.
1536 	 *
1537 	 * First we unset indisvalid so queries starting afterwards don't use the
1538 	 * index to answer queries anymore.  We have to keep indisready = true so
1539 	 * transactions that are still scanning the index can continue to see
1540 	 * valid index contents.  For instance, if they are using READ COMMITTED
1541 	 * mode, and another transaction makes changes and commits, they need to
1542 	 * see those new tuples in the index.
1543 	 *
1544 	 * After all transactions that could possibly have used the index for
1545 	 * queries end, we can unset indisready and indislive, then wait till
1546 	 * nobody could be touching it anymore.  (Note: we need indislive because
1547 	 * this state must be distinct from the initial state during CREATE INDEX
1548 	 * CONCURRENTLY, which has indislive true while indisready and indisvalid
1549 	 * are false.  That's because in that state, transactions must examine the
1550 	 * index for HOT-safety decisions, while in this state we don't want them
1551 	 * to open it at all.)
1552 	 *
1553 	 * Since all predicate locks on the index are about to be made invalid, we
1554 	 * must promote them to predicate locks on the heap.  In the
1555 	 * non-concurrent case we can just do that now.  In the concurrent case
1556 	 * it's a bit trickier.  The predicate locks must be moved when there are
1557 	 * no index scans in progress on the index and no more can subsequently
1558 	 * start, so that no new predicate locks can be made on the index.  Also,
1559 	 * they must be moved before heap inserts stop maintaining the index, else
1560 	 * the conflict with the predicate lock on the index gap could be missed
1561 	 * before the lock on the heap relation is in place to detect a conflict
1562 	 * based on the heap tuple insert.
1563 	 */
1564 	if (concurrent)
1565 	{
1566 		/*
1567 		 * We must commit our transaction in order to make the first pg_index
1568 		 * state update visible to other sessions.  If the DROP machinery has
1569 		 * already performed any other actions (removal of other objects,
1570 		 * pg_depend entries, etc), the commit would make those actions
1571 		 * permanent, which would leave us with inconsistent catalog state if
1572 		 * we fail partway through the following sequence.  Since DROP INDEX
1573 		 * CONCURRENTLY is restricted to dropping just one index that has no
1574 		 * dependencies, we should get here before anything's been done ---
1575 		 * but let's check that to be sure.  We can verify that the current
1576 		 * transaction has not executed any transactional updates by checking
1577 		 * that no XID has been assigned.
1578 		 */
1579 		if (GetTopTransactionIdIfAny() != InvalidTransactionId)
1580 			ereport(ERROR,
1581 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1582 					 errmsg("DROP INDEX CONCURRENTLY must be first action in transaction")));
1583 
1584 		/*
1585 		 * Mark index invalid by updating its pg_index entry
1586 		 */
1587 		index_set_state_flags(indexId, INDEX_DROP_CLEAR_VALID);
1588 
1589 		/*
1590 		 * Invalidate the relcache for the table, so that after this commit
1591 		 * all sessions will refresh any cached plans that might reference the
1592 		 * index.
1593 		 */
1594 		CacheInvalidateRelcache(userHeapRelation);
1595 
1596 		/* save lockrelid and locktag for below, then close but keep locks */
1597 		heaprelid = userHeapRelation->rd_lockInfo.lockRelId;
1598 		SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
1599 		indexrelid = userIndexRelation->rd_lockInfo.lockRelId;
1600 
1601 		heap_close(userHeapRelation, NoLock);
1602 		index_close(userIndexRelation, NoLock);
1603 
1604 		/*
1605 		 * We must commit our current transaction so that the indisvalid
1606 		 * update becomes visible to other transactions; then start another.
1607 		 * Note that any previously-built data structures are lost in the
1608 		 * commit.  The only data we keep past here are the relation IDs.
1609 		 *
1610 		 * Before committing, get a session-level lock on the table, to ensure
1611 		 * that neither it nor the index can be dropped before we finish. This
1612 		 * cannot block, even if someone else is waiting for access, because
1613 		 * we already have the same lock within our transaction.
1614 		 */
1615 		LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
1616 		LockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
1617 
1618 		PopActiveSnapshot();
1619 		CommitTransactionCommand();
1620 		StartTransactionCommand();
1621 
1622 		/*
1623 		 * Now we must wait until no running transaction could be using the
1624 		 * index for a query.  Use AccessExclusiveLock here to check for
1625 		 * running transactions that hold locks of any kind on the table. Note
1626 		 * we do not need to worry about xacts that open the table for reading
1627 		 * after this point; they will see the index as invalid when they open
1628 		 * the relation.
1629 		 *
1630 		 * Note: the reason we use actual lock acquisition here, rather than
1631 		 * just checking the ProcArray and sleeping, is that deadlock is
1632 		 * possible if one of the transactions in question is blocked trying
1633 		 * to acquire an exclusive lock on our table.  The lock code will
1634 		 * detect deadlock and error out properly.
1635 		 */
1636 		WaitForLockers(heaplocktag, AccessExclusiveLock);
1637 
1638 		/*
1639 		 * No more predicate locks will be acquired on this index, and we're
1640 		 * about to stop doing inserts into the index which could show
1641 		 * conflicts with existing predicate locks, so now is the time to move
1642 		 * them to the heap relation.
1643 		 */
1644 		userHeapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
1645 		userIndexRelation = index_open(indexId, ShareUpdateExclusiveLock);
1646 		TransferPredicateLocksToHeapRelation(userIndexRelation);
1647 
1648 		/*
1649 		 * Now we are sure that nobody uses the index for queries; they just
1650 		 * might have it open for updating it.  So now we can unset indisready
1651 		 * and indislive, then wait till nobody could be using it at all
1652 		 * anymore.
1653 		 */
1654 		index_set_state_flags(indexId, INDEX_DROP_SET_DEAD);
1655 
1656 		/*
1657 		 * Invalidate the relcache for the table, so that after this commit
1658 		 * all sessions will refresh the table's index list.  Forgetting just
1659 		 * the index's relcache entry is not enough.
1660 		 */
1661 		CacheInvalidateRelcache(userHeapRelation);
1662 
1663 		/*
1664 		 * Close the relations again, though still holding session lock.
1665 		 */
1666 		heap_close(userHeapRelation, NoLock);
1667 		index_close(userIndexRelation, NoLock);
1668 
1669 		/*
1670 		 * Again, commit the transaction to make the pg_index update visible
1671 		 * to other sessions.
1672 		 */
1673 		CommitTransactionCommand();
1674 		StartTransactionCommand();
1675 
1676 		/*
1677 		 * Wait till every transaction that saw the old index state has
1678 		 * finished.
1679 		 */
1680 		WaitForLockers(heaplocktag, AccessExclusiveLock);
1681 
1682 		/*
1683 		 * Re-open relations to allow us to complete our actions.
1684 		 *
1685 		 * At this point, nothing should be accessing the index, but lets
1686 		 * leave nothing to chance and grab AccessExclusiveLock on the index
1687 		 * before the physical deletion.
1688 		 */
1689 		userHeapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
1690 		userIndexRelation = index_open(indexId, AccessExclusiveLock);
1691 	}
1692 	else
1693 	{
1694 		/* Not concurrent, so just transfer predicate locks and we're good */
1695 		TransferPredicateLocksToHeapRelation(userIndexRelation);
1696 	}
1697 
1698 	/*
1699 	 * Schedule physical removal of the files (if any)
1700 	 */
1701 	if (userIndexRelation->rd_rel->relkind != RELKIND_PARTITIONED_INDEX)
1702 		RelationDropStorage(userIndexRelation);
1703 
1704 	/*
1705 	 * Close and flush the index's relcache entry, to ensure relcache doesn't
1706 	 * try to rebuild it while we're deleting catalog entries. We keep the
1707 	 * lock though.
1708 	 */
1709 	index_close(userIndexRelation, NoLock);
1710 
1711 	RelationForgetRelation(indexId);
1712 
1713 	/*
1714 	 * fix INDEX relation, and check for expressional index
1715 	 */
1716 	indexRelation = heap_open(IndexRelationId, RowExclusiveLock);
1717 
1718 	tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
1719 	if (!HeapTupleIsValid(tuple))
1720 		elog(ERROR, "cache lookup failed for index %u", indexId);
1721 
1722 	hasexprs = !heap_attisnull(tuple, Anum_pg_index_indexprs,
1723 							   RelationGetDescr(indexRelation));
1724 
1725 	CatalogTupleDelete(indexRelation, &tuple->t_self);
1726 
1727 	ReleaseSysCache(tuple);
1728 	heap_close(indexRelation, RowExclusiveLock);
1729 
1730 	/*
1731 	 * if it has any expression columns, we might have stored statistics about
1732 	 * them.
1733 	 */
1734 	if (hasexprs)
1735 		RemoveStatistics(indexId, 0);
1736 
1737 	/*
1738 	 * fix ATTRIBUTE relation
1739 	 */
1740 	DeleteAttributeTuples(indexId);
1741 
1742 	/*
1743 	 * fix RELATION relation
1744 	 */
1745 	DeleteRelationTuple(indexId);
1746 
1747 	/*
1748 	 * fix INHERITS relation
1749 	 */
1750 	DeleteInheritsTuple(indexId, InvalidOid);
1751 
1752 	/*
1753 	 * We are presently too lazy to attempt to compute the new correct value
1754 	 * of relhasindex (the next VACUUM will fix it if necessary). So there is
1755 	 * no need to update the pg_class tuple for the owning relation. But we
1756 	 * must send out a shared-cache-inval notice on the owning relation to
1757 	 * ensure other backends update their relcache lists of indexes.  (In the
1758 	 * concurrent case, this is redundant but harmless.)
1759 	 */
1760 	CacheInvalidateRelcache(userHeapRelation);
1761 
1762 	/*
1763 	 * Close owning rel, but keep lock
1764 	 */
1765 	heap_close(userHeapRelation, NoLock);
1766 
1767 	/*
1768 	 * Release the session locks before we go.
1769 	 */
1770 	if (concurrent)
1771 	{
1772 		UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
1773 		UnlockRelationIdForSession(&indexrelid, ShareUpdateExclusiveLock);
1774 	}
1775 }
1776 
1777 /* ----------------------------------------------------------------
1778  *						index_build support
1779  * ----------------------------------------------------------------
1780  */
1781 
1782 /* ----------------
1783  *		BuildIndexInfo
1784  *			Construct an IndexInfo record for an open index
1785  *
1786  * IndexInfo stores the information about the index that's needed by
1787  * FormIndexDatum, which is used for both index_build() and later insertion
1788  * of individual index tuples.  Normally we build an IndexInfo for an index
1789  * just once per command, and then use it for (potentially) many tuples.
1790  * ----------------
1791  */
1792 IndexInfo *
BuildIndexInfo(Relation index)1793 BuildIndexInfo(Relation index)
1794 {
1795 	IndexInfo  *ii = makeNode(IndexInfo);
1796 	Form_pg_index indexStruct = index->rd_index;
1797 	int			i;
1798 	int			numAtts;
1799 
1800 	/* check the number of keys, and copy attr numbers into the IndexInfo */
1801 	numAtts = indexStruct->indnatts;
1802 	if (numAtts < 1 || numAtts > INDEX_MAX_KEYS)
1803 		elog(ERROR, "invalid indnatts %d for index %u",
1804 			 numAtts, RelationGetRelid(index));
1805 	ii->ii_NumIndexAttrs = numAtts;
1806 	ii->ii_NumIndexKeyAttrs = indexStruct->indnkeyatts;
1807 	Assert(ii->ii_NumIndexKeyAttrs != 0);
1808 	Assert(ii->ii_NumIndexKeyAttrs <= ii->ii_NumIndexAttrs);
1809 
1810 	for (i = 0; i < numAtts; i++)
1811 		ii->ii_IndexAttrNumbers[i] = indexStruct->indkey.values[i];
1812 
1813 	/* fetch any expressions needed for expressional indexes */
1814 	ii->ii_Expressions = RelationGetIndexExpressions(index);
1815 	ii->ii_ExpressionsState = NIL;
1816 
1817 	/* fetch index predicate if any */
1818 	ii->ii_Predicate = RelationGetIndexPredicate(index);
1819 	ii->ii_PredicateState = NULL;
1820 
1821 	/* fetch exclusion constraint info if any */
1822 	if (indexStruct->indisexclusion)
1823 	{
1824 		RelationGetExclusionInfo(index,
1825 								 &ii->ii_ExclusionOps,
1826 								 &ii->ii_ExclusionProcs,
1827 								 &ii->ii_ExclusionStrats);
1828 	}
1829 	else
1830 	{
1831 		ii->ii_ExclusionOps = NULL;
1832 		ii->ii_ExclusionProcs = NULL;
1833 		ii->ii_ExclusionStrats = NULL;
1834 	}
1835 
1836 	/* other info */
1837 	ii->ii_Unique = indexStruct->indisunique;
1838 	ii->ii_ReadyForInserts = IndexIsReady(indexStruct);
1839 	/* assume not doing speculative insertion for now */
1840 	ii->ii_UniqueOps = NULL;
1841 	ii->ii_UniqueProcs = NULL;
1842 	ii->ii_UniqueStrats = NULL;
1843 
1844 	/* initialize index-build state to default */
1845 	ii->ii_Concurrent = false;
1846 	ii->ii_BrokenHotChain = false;
1847 	ii->ii_ParallelWorkers = 0;
1848 
1849 	/* set up for possible use by index AM */
1850 	ii->ii_Am = index->rd_rel->relam;
1851 	ii->ii_AmCache = NULL;
1852 	ii->ii_Context = CurrentMemoryContext;
1853 
1854 	return ii;
1855 }
1856 
1857 /* ----------------
1858  *		BuildDummyIndexInfo
1859  *			Construct a dummy IndexInfo record for an open index
1860  *
1861  * This differs from the real BuildIndexInfo in that it will never run any
1862  * user-defined code that might exist in index expressions or predicates.
1863  * Instead of the real index expressions, we return null constants that have
1864  * the right types/typmods/collations.  Predicates and exclusion clauses are
1865  * just ignored.  This is sufficient for the purpose of truncating an index,
1866  * since we will not need to actually evaluate the expressions or predicates;
1867  * the only thing that's likely to be done with the data is construction of
1868  * a tupdesc describing the index's rowtype.
1869  * ----------------
1870  */
1871 IndexInfo *
BuildDummyIndexInfo(Relation index)1872 BuildDummyIndexInfo(Relation index)
1873 {
1874 	IndexInfo  *ii = makeNode(IndexInfo);
1875 	Form_pg_index indexStruct = index->rd_index;
1876 	int			i;
1877 	int			numAtts;
1878 
1879 	/* check the number of keys, and copy attr numbers into the IndexInfo */
1880 	numAtts = indexStruct->indnatts;
1881 	if (numAtts < 1 || numAtts > INDEX_MAX_KEYS)
1882 		elog(ERROR, "invalid indnatts %d for index %u",
1883 			 numAtts, RelationGetRelid(index));
1884 	ii->ii_NumIndexAttrs = numAtts;
1885 	ii->ii_NumIndexKeyAttrs = indexStruct->indnkeyatts;
1886 	Assert(ii->ii_NumIndexKeyAttrs != 0);
1887 	Assert(ii->ii_NumIndexKeyAttrs <= ii->ii_NumIndexAttrs);
1888 
1889 	for (i = 0; i < numAtts; i++)
1890 		ii->ii_IndexAttrNumbers[i] = indexStruct->indkey.values[i];
1891 
1892 	/* fetch dummy expressions for expressional indexes */
1893 	ii->ii_Expressions = RelationGetDummyIndexExpressions(index);
1894 	ii->ii_ExpressionsState = NIL;
1895 
1896 	/* pretend there is no predicate */
1897 	ii->ii_Predicate = NIL;
1898 	ii->ii_PredicateState = NULL;
1899 
1900 	/* We ignore the exclusion constraint if any */
1901 	ii->ii_ExclusionOps = NULL;
1902 	ii->ii_ExclusionProcs = NULL;
1903 	ii->ii_ExclusionStrats = NULL;
1904 
1905 	/* other info */
1906 	ii->ii_Unique = indexStruct->indisunique;
1907 	ii->ii_ReadyForInserts = IndexIsReady(indexStruct);
1908 	/* assume not doing speculative insertion for now */
1909 	ii->ii_UniqueOps = NULL;
1910 	ii->ii_UniqueProcs = NULL;
1911 	ii->ii_UniqueStrats = NULL;
1912 
1913 	/* initialize index-build state to default */
1914 	ii->ii_Concurrent = false;
1915 	ii->ii_BrokenHotChain = false;
1916 	ii->ii_ParallelWorkers = 0;
1917 
1918 	/* set up for possible use by index AM */
1919 	ii->ii_Am = index->rd_rel->relam;
1920 	ii->ii_AmCache = NULL;
1921 	ii->ii_Context = CurrentMemoryContext;
1922 
1923 	return ii;
1924 }
1925 
1926 /*
1927  * CompareIndexInfo
1928  *		Return whether the properties of two indexes (in different tables)
1929  *		indicate that they have the "same" definitions.
1930  *
1931  * Note: passing collations and opfamilies separately is a kludge.  Adding
1932  * them to IndexInfo may result in better coding here and elsewhere.
1933  *
1934  * Use convert_tuples_by_name_map(index2, index1) to build the attmap.
1935  */
1936 bool
CompareIndexInfo(IndexInfo * info1,IndexInfo * info2,Oid * collations1,Oid * collations2,Oid * opfamilies1,Oid * opfamilies2,AttrNumber * attmap,int maplen)1937 CompareIndexInfo(IndexInfo *info1, IndexInfo *info2,
1938 				 Oid *collations1, Oid *collations2,
1939 				 Oid *opfamilies1, Oid *opfamilies2,
1940 				 AttrNumber *attmap, int maplen)
1941 {
1942 	int			i;
1943 
1944 	if (info1->ii_Unique != info2->ii_Unique)
1945 		return false;
1946 
1947 	/* indexes are only equivalent if they have the same access method */
1948 	if (info1->ii_Am != info2->ii_Am)
1949 		return false;
1950 
1951 	/* and same number of attributes */
1952 	if (info1->ii_NumIndexAttrs != info2->ii_NumIndexAttrs)
1953 		return false;
1954 
1955 	/* and same number of key attributes */
1956 	if (info1->ii_NumIndexKeyAttrs != info2->ii_NumIndexKeyAttrs)
1957 		return false;
1958 
1959 	/*
1960 	 * and columns match through the attribute map (actual attribute numbers
1961 	 * might differ!)  Note that this implies that index columns that are
1962 	 * expressions appear in the same positions.  We will next compare the
1963 	 * expressions themselves.
1964 	 */
1965 	for (i = 0; i < info1->ii_NumIndexAttrs; i++)
1966 	{
1967 		if (maplen < info2->ii_IndexAttrNumbers[i])
1968 			elog(ERROR, "incorrect attribute map");
1969 
1970 		/* ignore expressions at this stage */
1971 		if ((info1->ii_IndexAttrNumbers[i] != InvalidAttrNumber) &&
1972 			(attmap[info2->ii_IndexAttrNumbers[i] - 1] !=
1973 			 info1->ii_IndexAttrNumbers[i]))
1974 			return false;
1975 
1976 		/* collation and opfamily is not valid for including columns */
1977 		if (i >= info1->ii_NumIndexKeyAttrs)
1978 			continue;
1979 
1980 		if (collations1[i] != collations2[i])
1981 			return false;
1982 		if (opfamilies1[i] != opfamilies2[i])
1983 			return false;
1984 	}
1985 
1986 	/*
1987 	 * For expression indexes: either both are expression indexes, or neither
1988 	 * is; if they are, make sure the expressions match.
1989 	 */
1990 	if ((info1->ii_Expressions != NIL) != (info2->ii_Expressions != NIL))
1991 		return false;
1992 	if (info1->ii_Expressions != NIL)
1993 	{
1994 		bool		found_whole_row;
1995 		Node	   *mapped;
1996 
1997 		mapped = map_variable_attnos((Node *) info2->ii_Expressions,
1998 									 1, 0, attmap, maplen,
1999 									 InvalidOid, &found_whole_row);
2000 		if (found_whole_row)
2001 		{
2002 			/*
2003 			 * we could throw an error here, but seems out of scope for this
2004 			 * routine.
2005 			 */
2006 			return false;
2007 		}
2008 
2009 		if (!equal(info1->ii_Expressions, mapped))
2010 			return false;
2011 	}
2012 
2013 	/* Partial index predicates must be identical, if they exist */
2014 	if ((info1->ii_Predicate == NULL) != (info2->ii_Predicate == NULL))
2015 		return false;
2016 	if (info1->ii_Predicate != NULL)
2017 	{
2018 		bool		found_whole_row;
2019 		Node	   *mapped;
2020 
2021 		mapped = map_variable_attnos((Node *) info2->ii_Predicate,
2022 									 1, 0, attmap, maplen,
2023 									 InvalidOid, &found_whole_row);
2024 		if (found_whole_row)
2025 		{
2026 			/*
2027 			 * we could throw an error here, but seems out of scope for this
2028 			 * routine.
2029 			 */
2030 			return false;
2031 		}
2032 		if (!equal(info1->ii_Predicate, mapped))
2033 			return false;
2034 	}
2035 
2036 	/* No support currently for comparing exclusion indexes. */
2037 	if (info1->ii_ExclusionOps != NULL || info2->ii_ExclusionOps != NULL)
2038 		return false;
2039 
2040 	return true;
2041 }
2042 
2043 /* ----------------
2044  *		BuildSpeculativeIndexInfo
2045  *			Add extra state to IndexInfo record
2046  *
2047  * For unique indexes, we usually don't want to add info to the IndexInfo for
2048  * checking uniqueness, since the B-Tree AM handles that directly.  However,
2049  * in the case of speculative insertion, additional support is required.
2050  *
2051  * Do this processing here rather than in BuildIndexInfo() to not incur the
2052  * overhead in the common non-speculative cases.
2053  * ----------------
2054  */
2055 void
BuildSpeculativeIndexInfo(Relation index,IndexInfo * ii)2056 BuildSpeculativeIndexInfo(Relation index, IndexInfo *ii)
2057 {
2058 	int			indnkeyatts;
2059 	int			i;
2060 
2061 	indnkeyatts = IndexRelationGetNumberOfKeyAttributes(index);
2062 
2063 	/*
2064 	 * fetch info for checking unique indexes
2065 	 */
2066 	Assert(ii->ii_Unique);
2067 
2068 	if (index->rd_rel->relam != BTREE_AM_OID)
2069 		elog(ERROR, "unexpected non-btree speculative unique index");
2070 
2071 	ii->ii_UniqueOps = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
2072 	ii->ii_UniqueProcs = (Oid *) palloc(sizeof(Oid) * indnkeyatts);
2073 	ii->ii_UniqueStrats = (uint16 *) palloc(sizeof(uint16) * indnkeyatts);
2074 
2075 	/*
2076 	 * We have to look up the operator's strategy number.  This provides a
2077 	 * cross-check that the operator does match the index.
2078 	 */
2079 	/* We need the func OIDs and strategy numbers too */
2080 	for (i = 0; i < indnkeyatts; i++)
2081 	{
2082 		ii->ii_UniqueStrats[i] = BTEqualStrategyNumber;
2083 		ii->ii_UniqueOps[i] =
2084 			get_opfamily_member(index->rd_opfamily[i],
2085 								index->rd_opcintype[i],
2086 								index->rd_opcintype[i],
2087 								ii->ii_UniqueStrats[i]);
2088 		if (!OidIsValid(ii->ii_UniqueOps[i]))
2089 			elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
2090 				 ii->ii_UniqueStrats[i], index->rd_opcintype[i],
2091 				 index->rd_opcintype[i], index->rd_opfamily[i]);
2092 		ii->ii_UniqueProcs[i] = get_opcode(ii->ii_UniqueOps[i]);
2093 	}
2094 }
2095 
2096 /* ----------------
2097  *		FormIndexDatum
2098  *			Construct values[] and isnull[] arrays for a new index tuple.
2099  *
2100  *	indexInfo		Info about the index
2101  *	slot			Heap tuple for which we must prepare an index entry
2102  *	estate			executor state for evaluating any index expressions
2103  *	values			Array of index Datums (output area)
2104  *	isnull			Array of is-null indicators (output area)
2105  *
2106  * When there are no index expressions, estate may be NULL.  Otherwise it
2107  * must be supplied, *and* the ecxt_scantuple slot of its per-tuple expr
2108  * context must point to the heap tuple passed in.
2109  *
2110  * Notice we don't actually call index_form_tuple() here; we just prepare
2111  * its input arrays values[] and isnull[].  This is because the index AM
2112  * may wish to alter the data before storage.
2113  * ----------------
2114  */
2115 void
FormIndexDatum(IndexInfo * indexInfo,TupleTableSlot * slot,EState * estate,Datum * values,bool * isnull)2116 FormIndexDatum(IndexInfo *indexInfo,
2117 			   TupleTableSlot *slot,
2118 			   EState *estate,
2119 			   Datum *values,
2120 			   bool *isnull)
2121 {
2122 	ListCell   *indexpr_item;
2123 	int			i;
2124 
2125 	if (indexInfo->ii_Expressions != NIL &&
2126 		indexInfo->ii_ExpressionsState == NIL)
2127 	{
2128 		/* First time through, set up expression evaluation state */
2129 		indexInfo->ii_ExpressionsState =
2130 			ExecPrepareExprList(indexInfo->ii_Expressions, estate);
2131 		/* Check caller has set up context correctly */
2132 		Assert(GetPerTupleExprContext(estate)->ecxt_scantuple == slot);
2133 	}
2134 	indexpr_item = list_head(indexInfo->ii_ExpressionsState);
2135 
2136 	for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
2137 	{
2138 		int			keycol = indexInfo->ii_IndexAttrNumbers[i];
2139 		Datum		iDatum;
2140 		bool		isNull;
2141 
2142 		if (keycol != 0)
2143 		{
2144 			/*
2145 			 * Plain index column; get the value we need directly from the
2146 			 * heap tuple.
2147 			 */
2148 			iDatum = slot_getattr(slot, keycol, &isNull);
2149 		}
2150 		else
2151 		{
2152 			/*
2153 			 * Index expression --- need to evaluate it.
2154 			 */
2155 			if (indexpr_item == NULL)
2156 				elog(ERROR, "wrong number of index expressions");
2157 			iDatum = ExecEvalExprSwitchContext((ExprState *) lfirst(indexpr_item),
2158 											   GetPerTupleExprContext(estate),
2159 											   &isNull);
2160 			indexpr_item = lnext(indexpr_item);
2161 		}
2162 		values[i] = iDatum;
2163 		isnull[i] = isNull;
2164 	}
2165 
2166 	if (indexpr_item != NULL)
2167 		elog(ERROR, "wrong number of index expressions");
2168 }
2169 
2170 
2171 /*
2172  * index_update_stats --- update pg_class entry after CREATE INDEX or REINDEX
2173  *
2174  * This routine updates the pg_class row of either an index or its parent
2175  * relation after CREATE INDEX or REINDEX.  Its rather bizarre API is designed
2176  * to ensure we can do all the necessary work in just one update.
2177  *
2178  * hasindex: set relhasindex to this value
2179  * reltuples: if >= 0, set reltuples to this value; else no change
2180  *
2181  * If reltuples >= 0, relpages and relallvisible are also updated (using
2182  * RelationGetNumberOfBlocks() and visibilitymap_count()).
2183  *
2184  * NOTE: an important side-effect of this operation is that an SI invalidation
2185  * message is sent out to all backends --- including me --- causing relcache
2186  * entries to be flushed or updated with the new data.  This must happen even
2187  * if we find that no change is needed in the pg_class row.  When updating
2188  * a heap entry, this ensures that other backends find out about the new
2189  * index.  When updating an index, it's important because some index AMs
2190  * expect a relcache flush to occur after REINDEX.
2191  */
2192 static void
index_update_stats(Relation rel,bool hasindex,double reltuples)2193 index_update_stats(Relation rel,
2194 				   bool hasindex,
2195 				   double reltuples)
2196 {
2197 	Oid			relid = RelationGetRelid(rel);
2198 	Relation	pg_class;
2199 	HeapTuple	tuple;
2200 	Form_pg_class rd_rel;
2201 	bool		dirty;
2202 
2203 	/*
2204 	 * We always update the pg_class row using a non-transactional,
2205 	 * overwrite-in-place update.  There are several reasons for this:
2206 	 *
2207 	 * 1. In bootstrap mode, we have no choice --- UPDATE wouldn't work.
2208 	 *
2209 	 * 2. We could be reindexing pg_class itself, in which case we can't move
2210 	 * its pg_class row because CatalogTupleInsert/CatalogTupleUpdate might
2211 	 * not know about all the indexes yet (see reindex_relation).
2212 	 *
2213 	 * 3. Because we execute CREATE INDEX with just share lock on the parent
2214 	 * rel (to allow concurrent index creations), an ordinary update could
2215 	 * suffer a tuple-concurrently-updated failure against another CREATE
2216 	 * INDEX committing at about the same time.  We can avoid that by having
2217 	 * them both do nontransactional updates (we assume they will both be
2218 	 * trying to change the pg_class row to the same thing, so it doesn't
2219 	 * matter which goes first).
2220 	 *
2221 	 * It is safe to use a non-transactional update even though our
2222 	 * transaction could still fail before committing.  Setting relhasindex
2223 	 * true is safe even if there are no indexes (VACUUM will eventually fix
2224 	 * it).  And of course the new relpages and reltuples counts are correct
2225 	 * regardless.  However, we don't want to change relpages (or
2226 	 * relallvisible) if the caller isn't providing an updated reltuples
2227 	 * count, because that would bollix the reltuples/relpages ratio which is
2228 	 * what's really important.
2229 	 */
2230 
2231 	pg_class = heap_open(RelationRelationId, RowExclusiveLock);
2232 
2233 	/*
2234 	 * Make a copy of the tuple to update.  Normally we use the syscache, but
2235 	 * we can't rely on that during bootstrap or while reindexing pg_class
2236 	 * itself.
2237 	 */
2238 	if (IsBootstrapProcessingMode() ||
2239 		ReindexIsProcessingHeap(RelationRelationId))
2240 	{
2241 		/* don't assume syscache will work */
2242 		HeapScanDesc pg_class_scan;
2243 		ScanKeyData key[1];
2244 
2245 		ScanKeyInit(&key[0],
2246 					ObjectIdAttributeNumber,
2247 					BTEqualStrategyNumber, F_OIDEQ,
2248 					ObjectIdGetDatum(relid));
2249 
2250 		pg_class_scan = heap_beginscan_catalog(pg_class, 1, key);
2251 		tuple = heap_getnext(pg_class_scan, ForwardScanDirection);
2252 		tuple = heap_copytuple(tuple);
2253 		heap_endscan(pg_class_scan);
2254 	}
2255 	else
2256 	{
2257 		/* normal case, use syscache */
2258 		tuple = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relid));
2259 	}
2260 
2261 	if (!HeapTupleIsValid(tuple))
2262 		elog(ERROR, "could not find tuple for relation %u", relid);
2263 	rd_rel = (Form_pg_class) GETSTRUCT(tuple);
2264 
2265 	/* Should this be a more comprehensive test? */
2266 	Assert(rd_rel->relkind != RELKIND_PARTITIONED_INDEX);
2267 
2268 	/* Apply required updates, if any, to copied tuple */
2269 
2270 	dirty = false;
2271 	if (rd_rel->relhasindex != hasindex)
2272 	{
2273 		rd_rel->relhasindex = hasindex;
2274 		dirty = true;
2275 	}
2276 
2277 	if (reltuples >= 0)
2278 	{
2279 		BlockNumber relpages = RelationGetNumberOfBlocks(rel);
2280 		BlockNumber relallvisible;
2281 
2282 		if (rd_rel->relkind != RELKIND_INDEX)
2283 			visibilitymap_count(rel, &relallvisible, NULL);
2284 		else					/* don't bother for indexes */
2285 			relallvisible = 0;
2286 
2287 		if (rd_rel->relpages != (int32) relpages)
2288 		{
2289 			rd_rel->relpages = (int32) relpages;
2290 			dirty = true;
2291 		}
2292 		if (rd_rel->reltuples != (float4) reltuples)
2293 		{
2294 			rd_rel->reltuples = (float4) reltuples;
2295 			dirty = true;
2296 		}
2297 		if (rd_rel->relallvisible != (int32) relallvisible)
2298 		{
2299 			rd_rel->relallvisible = (int32) relallvisible;
2300 			dirty = true;
2301 		}
2302 	}
2303 
2304 	/*
2305 	 * If anything changed, write out the tuple
2306 	 */
2307 	if (dirty)
2308 	{
2309 		heap_inplace_update(pg_class, tuple);
2310 		/* the above sends a cache inval message */
2311 	}
2312 	else
2313 	{
2314 		/* no need to change tuple, but force relcache inval anyway */
2315 		CacheInvalidateRelcacheByTuple(tuple);
2316 	}
2317 
2318 	heap_freetuple(tuple);
2319 
2320 	heap_close(pg_class, RowExclusiveLock);
2321 }
2322 
2323 
2324 /*
2325  * index_build - invoke access-method-specific index build procedure
2326  *
2327  * On entry, the index's catalog entries are valid, and its physical disk
2328  * file has been created but is empty.  We call the AM-specific build
2329  * procedure to fill in the index contents.  We then update the pg_class
2330  * entries of the index and heap relation as needed, using statistics
2331  * returned by ambuild as well as data passed by the caller.
2332  *
2333  * isprimary tells whether to mark the index as a primary-key index.
2334  * isreindex indicates we are recreating a previously-existing index.
2335  * parallel indicates if parallelism may be useful.
2336  *
2337  * Note: when reindexing an existing index, isprimary can be false even if
2338  * the index is a PK; it's already properly marked and need not be re-marked.
2339  *
2340  * Note: before Postgres 8.2, the passed-in heap and index Relations
2341  * were automatically closed by this routine.  This is no longer the case.
2342  * The caller opened 'em, and the caller should close 'em.
2343  */
2344 void
index_build(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,bool isprimary,bool isreindex,bool parallel)2345 index_build(Relation heapRelation,
2346 			Relation indexRelation,
2347 			IndexInfo *indexInfo,
2348 			bool isprimary,
2349 			bool isreindex,
2350 			bool parallel)
2351 {
2352 	IndexBuildResult *stats;
2353 	Oid			save_userid;
2354 	int			save_sec_context;
2355 	int			save_nestlevel;
2356 
2357 	/*
2358 	 * sanity checks
2359 	 */
2360 	Assert(RelationIsValid(indexRelation));
2361 	Assert(PointerIsValid(indexRelation->rd_amroutine));
2362 	Assert(PointerIsValid(indexRelation->rd_amroutine->ambuild));
2363 	Assert(PointerIsValid(indexRelation->rd_amroutine->ambuildempty));
2364 
2365 	/*
2366 	 * Determine worker process details for parallel CREATE INDEX.  Currently,
2367 	 * only btree has support for parallel builds.
2368 	 *
2369 	 * Note that planner considers parallel safety for us.
2370 	 */
2371 	if (parallel && IsNormalProcessingMode() &&
2372 		indexRelation->rd_rel->relam == BTREE_AM_OID)
2373 		indexInfo->ii_ParallelWorkers =
2374 			plan_create_index_workers(RelationGetRelid(heapRelation),
2375 									  RelationGetRelid(indexRelation));
2376 
2377 	if (indexInfo->ii_ParallelWorkers == 0)
2378 		ereport(DEBUG1,
2379 				(errmsg("building index \"%s\" on table \"%s\" serially",
2380 						RelationGetRelationName(indexRelation),
2381 						RelationGetRelationName(heapRelation))));
2382 	else
2383 		ereport(DEBUG1,
2384 				(errmsg_plural("building index \"%s\" on table \"%s\" with request for %d parallel worker",
2385 							   "building index \"%s\" on table \"%s\" with request for %d parallel workers",
2386 							   indexInfo->ii_ParallelWorkers,
2387 							   RelationGetRelationName(indexRelation),
2388 							   RelationGetRelationName(heapRelation),
2389 							   indexInfo->ii_ParallelWorkers)));
2390 
2391 	/*
2392 	 * Switch to the table owner's userid, so that any index functions are run
2393 	 * as that user.  Also lock down security-restricted operations and
2394 	 * arrange to make GUC variable changes local to this command.
2395 	 */
2396 	GetUserIdAndSecContext(&save_userid, &save_sec_context);
2397 	SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
2398 						   save_sec_context | SECURITY_RESTRICTED_OPERATION);
2399 	save_nestlevel = NewGUCNestLevel();
2400 
2401 	/*
2402 	 * Call the access method's build procedure
2403 	 */
2404 	stats = indexRelation->rd_amroutine->ambuild(heapRelation, indexRelation,
2405 												 indexInfo);
2406 	Assert(PointerIsValid(stats));
2407 
2408 	/*
2409 	 * If this is an unlogged index, we may need to write out an init fork for
2410 	 * it -- but we must first check whether one already exists.  If, for
2411 	 * example, an unlogged relation is truncated in the transaction that
2412 	 * created it, or truncated twice in a subsequent transaction, the
2413 	 * relfilenode won't change, and nothing needs to be done here.
2414 	 */
2415 	if (indexRelation->rd_rel->relpersistence == RELPERSISTENCE_UNLOGGED &&
2416 		!smgrexists(indexRelation->rd_smgr, INIT_FORKNUM))
2417 	{
2418 		RelationOpenSmgr(indexRelation);
2419 		smgrcreate(indexRelation->rd_smgr, INIT_FORKNUM, false);
2420 		indexRelation->rd_amroutine->ambuildempty(indexRelation);
2421 	}
2422 
2423 	/*
2424 	 * If we found any potentially broken HOT chains, mark the index as not
2425 	 * being usable until the current transaction is below the event horizon.
2426 	 * See src/backend/access/heap/README.HOT for discussion.  Also set this
2427 	 * if early pruning/vacuuming is enabled for the heap relation.  While it
2428 	 * might become safe to use the index earlier based on actual cleanup
2429 	 * activity and other active transactions, the test for that would be much
2430 	 * more complex and would require some form of blocking, so keep it simple
2431 	 * and fast by just using the current transaction.
2432 	 *
2433 	 * However, when reindexing an existing index, we should do nothing here.
2434 	 * Any HOT chains that are broken with respect to the index must predate
2435 	 * the index's original creation, so there is no need to change the
2436 	 * index's usability horizon.  Moreover, we *must not* try to change the
2437 	 * index's pg_index entry while reindexing pg_index itself, and this
2438 	 * optimization nicely prevents that.  The more complex rules needed for a
2439 	 * reindex are handled separately after this function returns.
2440 	 *
2441 	 * We also need not set indcheckxmin during a concurrent index build,
2442 	 * because we won't set indisvalid true until all transactions that care
2443 	 * about the broken HOT chains or early pruning/vacuuming are gone.
2444 	 *
2445 	 * Therefore, this code path can only be taken during non-concurrent
2446 	 * CREATE INDEX.  Thus the fact that heap_update will set the pg_index
2447 	 * tuple's xmin doesn't matter, because that tuple was created in the
2448 	 * current transaction anyway.  That also means we don't need to worry
2449 	 * about any concurrent readers of the tuple; no other transaction can see
2450 	 * it yet.
2451 	 */
2452 	if ((indexInfo->ii_BrokenHotChain || EarlyPruningEnabled(heapRelation)) &&
2453 		!isreindex &&
2454 		!indexInfo->ii_Concurrent)
2455 	{
2456 		Oid			indexId = RelationGetRelid(indexRelation);
2457 		Relation	pg_index;
2458 		HeapTuple	indexTuple;
2459 		Form_pg_index indexForm;
2460 
2461 		pg_index = heap_open(IndexRelationId, RowExclusiveLock);
2462 
2463 		indexTuple = SearchSysCacheCopy1(INDEXRELID,
2464 										 ObjectIdGetDatum(indexId));
2465 		if (!HeapTupleIsValid(indexTuple))
2466 			elog(ERROR, "cache lookup failed for index %u", indexId);
2467 		indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
2468 
2469 		/* If it's a new index, indcheckxmin shouldn't be set ... */
2470 		Assert(!indexForm->indcheckxmin);
2471 
2472 		indexForm->indcheckxmin = true;
2473 		CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
2474 
2475 		heap_freetuple(indexTuple);
2476 		heap_close(pg_index, RowExclusiveLock);
2477 	}
2478 
2479 	/*
2480 	 * Update heap and index pg_class rows
2481 	 */
2482 	index_update_stats(heapRelation,
2483 					   true,
2484 					   stats->heap_tuples);
2485 
2486 	index_update_stats(indexRelation,
2487 					   false,
2488 					   stats->index_tuples);
2489 
2490 	/* Make the updated catalog row versions visible */
2491 	CommandCounterIncrement();
2492 
2493 	/*
2494 	 * If it's for an exclusion constraint, make a second pass over the heap
2495 	 * to verify that the constraint is satisfied.  We must not do this until
2496 	 * the index is fully valid.  (Broken HOT chains shouldn't matter, though;
2497 	 * see comments for IndexCheckExclusion.)
2498 	 */
2499 	if (indexInfo->ii_ExclusionOps != NULL)
2500 		IndexCheckExclusion(heapRelation, indexRelation, indexInfo);
2501 
2502 	/* Roll back any GUC changes executed by index functions */
2503 	AtEOXact_GUC(false, save_nestlevel);
2504 
2505 	/* Restore userid and security context */
2506 	SetUserIdAndSecContext(save_userid, save_sec_context);
2507 }
2508 
2509 
2510 /*
2511  * IndexBuildHeapScan - scan the heap relation to find tuples to be indexed
2512  *
2513  * This is called back from an access-method-specific index build procedure
2514  * after the AM has done whatever setup it needs.  The parent heap relation
2515  * is scanned to find tuples that should be entered into the index.  Each
2516  * such tuple is passed to the AM's callback routine, which does the right
2517  * things to add it to the new index.  After we return, the AM's index
2518  * build procedure does whatever cleanup it needs.
2519  *
2520  * The total count of live heap tuples is returned.  This is for updating
2521  * pg_class statistics.  (It's annoying not to be able to do that here, but we
2522  * want to merge that update with others; see index_update_stats.)  Note that
2523  * the index AM itself must keep track of the number of index tuples; we don't
2524  * do so here because the AM might reject some of the tuples for its own
2525  * reasons, such as being unable to store NULLs.
2526  *
2527  * A side effect is to set indexInfo->ii_BrokenHotChain to true if we detect
2528  * any potentially broken HOT chains.  Currently, we set this if there are
2529  * any RECENTLY_DEAD or DELETE_IN_PROGRESS entries in a HOT chain, without
2530  * trying very hard to detect whether they're really incompatible with the
2531  * chain tip.
2532  */
2533 double
IndexBuildHeapScan(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,bool allow_sync,IndexBuildCallback callback,void * callback_state,HeapScanDesc scan)2534 IndexBuildHeapScan(Relation heapRelation,
2535 				   Relation indexRelation,
2536 				   IndexInfo *indexInfo,
2537 				   bool allow_sync,
2538 				   IndexBuildCallback callback,
2539 				   void *callback_state,
2540 				   HeapScanDesc scan)
2541 {
2542 	return IndexBuildHeapRangeScan(heapRelation, indexRelation,
2543 								   indexInfo, allow_sync,
2544 								   false,
2545 								   0, InvalidBlockNumber,
2546 								   callback, callback_state, scan);
2547 }
2548 
2549 /*
2550  * As above, except that instead of scanning the complete heap, only the given
2551  * number of blocks are scanned.  Scan to end-of-rel can be signalled by
2552  * passing InvalidBlockNumber as numblocks.  Note that restricting the range
2553  * to scan cannot be done when requesting syncscan.
2554  *
2555  * When "anyvisible" mode is requested, all tuples visible to any transaction
2556  * are indexed and counted as live, including those inserted or deleted by
2557  * transactions that are still in progress.
2558  */
2559 double
IndexBuildHeapRangeScan(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,bool allow_sync,bool anyvisible,BlockNumber start_blockno,BlockNumber numblocks,IndexBuildCallback callback,void * callback_state,HeapScanDesc scan)2560 IndexBuildHeapRangeScan(Relation heapRelation,
2561 						Relation indexRelation,
2562 						IndexInfo *indexInfo,
2563 						bool allow_sync,
2564 						bool anyvisible,
2565 						BlockNumber start_blockno,
2566 						BlockNumber numblocks,
2567 						IndexBuildCallback callback,
2568 						void *callback_state,
2569 						HeapScanDesc scan)
2570 {
2571 	bool		is_system_catalog;
2572 	bool		checking_uniqueness;
2573 	HeapTuple	heapTuple;
2574 	Datum		values[INDEX_MAX_KEYS];
2575 	bool		isnull[INDEX_MAX_KEYS];
2576 	double		reltuples;
2577 	ExprState  *predicate;
2578 	TupleTableSlot *slot;
2579 	EState	   *estate;
2580 	ExprContext *econtext;
2581 	Snapshot	snapshot;
2582 	bool		need_unregister_snapshot = false;
2583 	TransactionId OldestXmin;
2584 	BlockNumber root_blkno = InvalidBlockNumber;
2585 	OffsetNumber root_offsets[MaxHeapTuplesPerPage];
2586 
2587 	/*
2588 	 * sanity checks
2589 	 */
2590 	Assert(OidIsValid(indexRelation->rd_rel->relam));
2591 
2592 	/* Remember if it's a system catalog */
2593 	is_system_catalog = IsSystemRelation(heapRelation);
2594 
2595 	/* See whether we're verifying uniqueness/exclusion properties */
2596 	checking_uniqueness = (indexInfo->ii_Unique ||
2597 						   indexInfo->ii_ExclusionOps != NULL);
2598 
2599 	/*
2600 	 * "Any visible" mode is not compatible with uniqueness checks; make sure
2601 	 * only one of those is requested.
2602 	 */
2603 	Assert(!(anyvisible && checking_uniqueness));
2604 
2605 	/*
2606 	 * Need an EState for evaluation of index expressions and partial-index
2607 	 * predicates.  Also a slot to hold the current tuple.
2608 	 */
2609 	estate = CreateExecutorState();
2610 	econtext = GetPerTupleExprContext(estate);
2611 	slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
2612 
2613 	/* Arrange for econtext's scan tuple to be the tuple under test */
2614 	econtext->ecxt_scantuple = slot;
2615 
2616 	/* Set up execution state for predicate, if any. */
2617 	predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
2618 
2619 	/*
2620 	 * Prepare for scan of the base relation.  In a normal index build, we use
2621 	 * SnapshotAny because we must retrieve all tuples and do our own time
2622 	 * qual checks (because we have to index RECENTLY_DEAD tuples). In a
2623 	 * concurrent build, or during bootstrap, we take a regular MVCC snapshot
2624 	 * and index whatever's live according to that.
2625 	 */
2626 	OldestXmin = InvalidTransactionId;
2627 
2628 	/* okay to ignore lazy VACUUMs here */
2629 	if (!IsBootstrapProcessingMode() && !indexInfo->ii_Concurrent)
2630 		OldestXmin = GetOldestXmin(heapRelation, PROCARRAY_FLAGS_VACUUM);
2631 
2632 	if (!scan)
2633 	{
2634 		/*
2635 		 * Serial index build.
2636 		 *
2637 		 * Must begin our own heap scan in this case.  We may also need to
2638 		 * register a snapshot whose lifetime is under our direct control.
2639 		 */
2640 		if (!TransactionIdIsValid(OldestXmin))
2641 		{
2642 			snapshot = RegisterSnapshot(GetTransactionSnapshot());
2643 			need_unregister_snapshot = true;
2644 		}
2645 		else
2646 			snapshot = SnapshotAny;
2647 
2648 		scan = heap_beginscan_strat(heapRelation,	/* relation */
2649 									snapshot,	/* snapshot */
2650 									0,	/* number of keys */
2651 									NULL,	/* scan key */
2652 									true,	/* buffer access strategy OK */
2653 									allow_sync);	/* syncscan OK? */
2654 	}
2655 	else
2656 	{
2657 		/*
2658 		 * Parallel index build.
2659 		 *
2660 		 * Parallel case never registers/unregisters own snapshot.  Snapshot
2661 		 * is taken from parallel heap scan, and is SnapshotAny or an MVCC
2662 		 * snapshot, based on same criteria as serial case.
2663 		 */
2664 		Assert(!IsBootstrapProcessingMode());
2665 		Assert(allow_sync);
2666 		snapshot = scan->rs_snapshot;
2667 	}
2668 
2669 	/*
2670 	 * Must call GetOldestXmin() with SnapshotAny.  Should never call
2671 	 * GetOldestXmin() with MVCC snapshot. (It's especially worth checking
2672 	 * this for parallel builds, since ambuild routines that support parallel
2673 	 * builds must work these details out for themselves.)
2674 	 */
2675 	Assert(snapshot == SnapshotAny || IsMVCCSnapshot(snapshot));
2676 	Assert(snapshot == SnapshotAny ? TransactionIdIsValid(OldestXmin) :
2677 		   !TransactionIdIsValid(OldestXmin));
2678 	Assert(snapshot == SnapshotAny || !anyvisible);
2679 
2680 	/* set our scan endpoints */
2681 	if (!allow_sync)
2682 		heap_setscanlimits(scan, start_blockno, numblocks);
2683 	else
2684 	{
2685 		/* syncscan can only be requested on whole relation */
2686 		Assert(start_blockno == 0);
2687 		Assert(numblocks == InvalidBlockNumber);
2688 	}
2689 
2690 	reltuples = 0;
2691 
2692 	/*
2693 	 * Scan all tuples in the base relation.
2694 	 */
2695 	while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2696 	{
2697 		bool		tupleIsAlive;
2698 
2699 		CHECK_FOR_INTERRUPTS();
2700 
2701 		/*
2702 		 * When dealing with a HOT-chain of updated tuples, we want to index
2703 		 * the values of the live tuple (if any), but index it under the TID
2704 		 * of the chain's root tuple.  This approach is necessary to preserve
2705 		 * the HOT-chain structure in the heap. So we need to be able to find
2706 		 * the root item offset for every tuple that's in a HOT-chain.  When
2707 		 * first reaching a new page of the relation, call
2708 		 * heap_get_root_tuples() to build a map of root item offsets on the
2709 		 * page.
2710 		 *
2711 		 * It might look unsafe to use this information across buffer
2712 		 * lock/unlock.  However, we hold ShareLock on the table so no
2713 		 * ordinary insert/update/delete should occur; and we hold pin on the
2714 		 * buffer continuously while visiting the page, so no pruning
2715 		 * operation can occur either.
2716 		 *
2717 		 * In cases with only ShareUpdateExclusiveLock on the table, it's
2718 		 * possible for some HOT tuples to appear that we didn't know about
2719 		 * when we first read the page.  To handle that case, we re-obtain the
2720 		 * list of root offsets when a HOT tuple points to a root item that we
2721 		 * don't know about.
2722 		 *
2723 		 * Also, although our opinions about tuple liveness could change while
2724 		 * we scan the page (due to concurrent transaction commits/aborts),
2725 		 * the chain root locations won't, so this info doesn't need to be
2726 		 * rebuilt after waiting for another transaction.
2727 		 *
2728 		 * Note the implied assumption that there is no more than one live
2729 		 * tuple per HOT-chain --- else we could create more than one index
2730 		 * entry pointing to the same root tuple.
2731 		 */
2732 		if (scan->rs_cblock != root_blkno)
2733 		{
2734 			Page		page = BufferGetPage(scan->rs_cbuf);
2735 
2736 			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
2737 			heap_get_root_tuples(page, root_offsets);
2738 			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2739 
2740 			root_blkno = scan->rs_cblock;
2741 		}
2742 
2743 		if (snapshot == SnapshotAny)
2744 		{
2745 			/* do our own time qual check */
2746 			bool		indexIt;
2747 			TransactionId xwait;
2748 
2749 	recheck:
2750 
2751 			/*
2752 			 * We could possibly get away with not locking the buffer here,
2753 			 * since caller should hold ShareLock on the relation, but let's
2754 			 * be conservative about it.  (This remark is still correct even
2755 			 * with HOT-pruning: our pin on the buffer prevents pruning.)
2756 			 */
2757 			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
2758 
2759 			/*
2760 			 * The criteria for counting a tuple as live in this block need to
2761 			 * match what analyze.c's acquire_sample_rows() does, otherwise
2762 			 * CREATE INDEX and ANALYZE may produce wildly different reltuples
2763 			 * values, e.g. when there are many recently-dead tuples.
2764 			 */
2765 			switch (HeapTupleSatisfiesVacuum(heapTuple, OldestXmin,
2766 											 scan->rs_cbuf))
2767 			{
2768 				case HEAPTUPLE_DEAD:
2769 					/* Definitely dead, we can ignore it */
2770 					indexIt = false;
2771 					tupleIsAlive = false;
2772 					break;
2773 				case HEAPTUPLE_LIVE:
2774 					/* Normal case, index and unique-check it */
2775 					indexIt = true;
2776 					tupleIsAlive = true;
2777 					/* Count it as live, too */
2778 					reltuples += 1;
2779 					break;
2780 				case HEAPTUPLE_RECENTLY_DEAD:
2781 
2782 					/*
2783 					 * If tuple is recently deleted then we must index it
2784 					 * anyway to preserve MVCC semantics.  (Pre-existing
2785 					 * transactions could try to use the index after we finish
2786 					 * building it, and may need to see such tuples.)
2787 					 *
2788 					 * However, if it was HOT-updated then we must only index
2789 					 * the live tuple at the end of the HOT-chain.  Since this
2790 					 * breaks semantics for pre-existing snapshots, mark the
2791 					 * index as unusable for them.
2792 					 *
2793 					 * We don't count recently-dead tuples in reltuples, even
2794 					 * if we index them; see acquire_sample_rows().
2795 					 */
2796 					if (HeapTupleIsHotUpdated(heapTuple))
2797 					{
2798 						indexIt = false;
2799 						/* mark the index as unsafe for old snapshots */
2800 						indexInfo->ii_BrokenHotChain = true;
2801 					}
2802 					else
2803 						indexIt = true;
2804 					/* In any case, exclude the tuple from unique-checking */
2805 					tupleIsAlive = false;
2806 					break;
2807 				case HEAPTUPLE_INSERT_IN_PROGRESS:
2808 
2809 					/*
2810 					 * In "anyvisible" mode, this tuple is visible and we
2811 					 * don't need any further checks.
2812 					 */
2813 					if (anyvisible)
2814 					{
2815 						indexIt = true;
2816 						tupleIsAlive = true;
2817 						reltuples += 1;
2818 						break;
2819 					}
2820 
2821 					/*
2822 					 * Since caller should hold ShareLock or better, normally
2823 					 * the only way to see this is if it was inserted earlier
2824 					 * in our own transaction.  However, it can happen in
2825 					 * system catalogs, since we tend to release write lock
2826 					 * before commit there.  Give a warning if neither case
2827 					 * applies.
2828 					 */
2829 					xwait = HeapTupleHeaderGetXmin(heapTuple->t_data);
2830 					if (!TransactionIdIsCurrentTransactionId(xwait))
2831 					{
2832 						if (!is_system_catalog)
2833 							elog(WARNING, "concurrent insert in progress within table \"%s\"",
2834 								 RelationGetRelationName(heapRelation));
2835 
2836 						/*
2837 						 * If we are performing uniqueness checks, indexing
2838 						 * such a tuple could lead to a bogus uniqueness
2839 						 * failure.  In that case we wait for the inserting
2840 						 * transaction to finish and check again.
2841 						 */
2842 						if (checking_uniqueness)
2843 						{
2844 							/*
2845 							 * Must drop the lock on the buffer before we wait
2846 							 */
2847 							LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2848 							XactLockTableWait(xwait, heapRelation,
2849 											  &heapTuple->t_self,
2850 											  XLTW_InsertIndexUnique);
2851 							CHECK_FOR_INTERRUPTS();
2852 							goto recheck;
2853 						}
2854 					}
2855 					else
2856 					{
2857 						/*
2858 						 * For consistency with acquire_sample_rows(), count
2859 						 * HEAPTUPLE_INSERT_IN_PROGRESS tuples as live only
2860 						 * when inserted by our own transaction.
2861 						 */
2862 						reltuples += 1;
2863 					}
2864 
2865 					/*
2866 					 * We must index such tuples, since if the index build
2867 					 * commits then they're good.
2868 					 */
2869 					indexIt = true;
2870 					tupleIsAlive = true;
2871 					break;
2872 				case HEAPTUPLE_DELETE_IN_PROGRESS:
2873 
2874 					/*
2875 					 * As with INSERT_IN_PROGRESS case, this is unexpected
2876 					 * unless it's our own deletion or a system catalog; but
2877 					 * in anyvisible mode, this tuple is visible.
2878 					 */
2879 					if (anyvisible)
2880 					{
2881 						indexIt = true;
2882 						tupleIsAlive = false;
2883 						reltuples += 1;
2884 						break;
2885 					}
2886 
2887 					xwait = HeapTupleHeaderGetUpdateXid(heapTuple->t_data);
2888 					if (!TransactionIdIsCurrentTransactionId(xwait))
2889 					{
2890 						if (!is_system_catalog)
2891 							elog(WARNING, "concurrent delete in progress within table \"%s\"",
2892 								 RelationGetRelationName(heapRelation));
2893 
2894 						/*
2895 						 * If we are performing uniqueness checks, assuming
2896 						 * the tuple is dead could lead to missing a
2897 						 * uniqueness violation.  In that case we wait for the
2898 						 * deleting transaction to finish and check again.
2899 						 *
2900 						 * Also, if it's a HOT-updated tuple, we should not
2901 						 * index it but rather the live tuple at the end of
2902 						 * the HOT-chain.  However, the deleting transaction
2903 						 * could abort, possibly leaving this tuple as live
2904 						 * after all, in which case it has to be indexed. The
2905 						 * only way to know what to do is to wait for the
2906 						 * deleting transaction to finish and check again.
2907 						 */
2908 						if (checking_uniqueness ||
2909 							HeapTupleIsHotUpdated(heapTuple))
2910 						{
2911 							/*
2912 							 * Must drop the lock on the buffer before we wait
2913 							 */
2914 							LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2915 							XactLockTableWait(xwait, heapRelation,
2916 											  &heapTuple->t_self,
2917 											  XLTW_InsertIndexUnique);
2918 							CHECK_FOR_INTERRUPTS();
2919 							goto recheck;
2920 						}
2921 
2922 						/*
2923 						 * Otherwise index it but don't check for uniqueness,
2924 						 * the same as a RECENTLY_DEAD tuple.
2925 						 */
2926 						indexIt = true;
2927 
2928 						/*
2929 						 * Count HEAPTUPLE_DELETE_IN_PROGRESS tuples as live,
2930 						 * if they were not deleted by the current
2931 						 * transaction.  That's what acquire_sample_rows()
2932 						 * does, and we want the behavior to be consistent.
2933 						 */
2934 						reltuples += 1;
2935 					}
2936 					else if (HeapTupleIsHotUpdated(heapTuple))
2937 					{
2938 						/*
2939 						 * It's a HOT-updated tuple deleted by our own xact.
2940 						 * We can assume the deletion will commit (else the
2941 						 * index contents don't matter), so treat the same as
2942 						 * RECENTLY_DEAD HOT-updated tuples.
2943 						 */
2944 						indexIt = false;
2945 						/* mark the index as unsafe for old snapshots */
2946 						indexInfo->ii_BrokenHotChain = true;
2947 					}
2948 					else
2949 					{
2950 						/*
2951 						 * It's a regular tuple deleted by our own xact. Index
2952 						 * it, but don't check for uniqueness nor count in
2953 						 * reltuples, the same as a RECENTLY_DEAD tuple.
2954 						 */
2955 						indexIt = true;
2956 					}
2957 					/* In any case, exclude the tuple from unique-checking */
2958 					tupleIsAlive = false;
2959 					break;
2960 				default:
2961 					elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
2962 					indexIt = tupleIsAlive = false; /* keep compiler quiet */
2963 					break;
2964 			}
2965 
2966 			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
2967 
2968 			if (!indexIt)
2969 				continue;
2970 		}
2971 		else
2972 		{
2973 			/* heap_getnext did the time qual check */
2974 			tupleIsAlive = true;
2975 			reltuples += 1;
2976 		}
2977 
2978 		MemoryContextReset(econtext->ecxt_per_tuple_memory);
2979 
2980 		/* Set up for predicate or expression evaluation */
2981 		ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
2982 
2983 		/*
2984 		 * In a partial index, discard tuples that don't satisfy the
2985 		 * predicate.
2986 		 */
2987 		if (predicate != NULL)
2988 		{
2989 			if (!ExecQual(predicate, econtext))
2990 				continue;
2991 		}
2992 
2993 		/*
2994 		 * For the current heap tuple, extract all the attributes we use in
2995 		 * this index, and note which are null.  This also performs evaluation
2996 		 * of any expressions needed.
2997 		 */
2998 		FormIndexDatum(indexInfo,
2999 					   slot,
3000 					   estate,
3001 					   values,
3002 					   isnull);
3003 
3004 		/*
3005 		 * You'd think we should go ahead and build the index tuple here, but
3006 		 * some index AMs want to do further processing on the data first.  So
3007 		 * pass the values[] and isnull[] arrays, instead.
3008 		 */
3009 
3010 		if (HeapTupleIsHeapOnly(heapTuple))
3011 		{
3012 			/*
3013 			 * For a heap-only tuple, pretend its TID is that of the root. See
3014 			 * src/backend/access/heap/README.HOT for discussion.
3015 			 */
3016 			HeapTupleData rootTuple;
3017 			OffsetNumber offnum;
3018 
3019 			rootTuple = *heapTuple;
3020 			offnum = ItemPointerGetOffsetNumber(&heapTuple->t_self);
3021 
3022 			/*
3023 			 * If a HOT tuple points to a root that we don't know
3024 			 * about, obtain root items afresh.  If that still fails,
3025 			 * report it as corruption.
3026 			 */
3027 			if (root_offsets[offnum - 1] == InvalidOffsetNumber)
3028 			{
3029 				Page	page = BufferGetPage(scan->rs_cbuf);
3030 
3031 				LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
3032 				heap_get_root_tuples(page, root_offsets);
3033 				LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
3034 			}
3035 
3036 			if (!OffsetNumberIsValid(root_offsets[offnum - 1]))
3037 				ereport(ERROR,
3038 						(errcode(ERRCODE_DATA_CORRUPTED),
3039 						 errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
3040 										 ItemPointerGetBlockNumber(&heapTuple->t_self),
3041 										 offnum,
3042 										 RelationGetRelationName(heapRelation))));
3043 
3044 			ItemPointerSetOffsetNumber(&rootTuple.t_self,
3045 									   root_offsets[offnum - 1]);
3046 
3047 			/* Call the AM's callback routine to process the tuple */
3048 			callback(indexRelation, &rootTuple, values, isnull, tupleIsAlive,
3049 					 callback_state);
3050 		}
3051 		else
3052 		{
3053 			/* Call the AM's callback routine to process the tuple */
3054 			callback(indexRelation, heapTuple, values, isnull, tupleIsAlive,
3055 					 callback_state);
3056 		}
3057 	}
3058 
3059 	heap_endscan(scan);
3060 
3061 	/* we can now forget our snapshot, if set and registered by us */
3062 	if (need_unregister_snapshot)
3063 		UnregisterSnapshot(snapshot);
3064 
3065 	ExecDropSingleTupleTableSlot(slot);
3066 
3067 	FreeExecutorState(estate);
3068 
3069 	/* These may have been pointing to the now-gone estate */
3070 	indexInfo->ii_ExpressionsState = NIL;
3071 	indexInfo->ii_PredicateState = NULL;
3072 
3073 	return reltuples;
3074 }
3075 
3076 
3077 /*
3078  * IndexCheckExclusion - verify that a new exclusion constraint is satisfied
3079  *
3080  * When creating an exclusion constraint, we first build the index normally
3081  * and then rescan the heap to check for conflicts.  We assume that we only
3082  * need to validate tuples that are live according to an up-to-date snapshot,
3083  * and that these were correctly indexed even in the presence of broken HOT
3084  * chains.  This should be OK since we are holding at least ShareLock on the
3085  * table, meaning there can be no uncommitted updates from other transactions.
3086  * (Note: that wouldn't necessarily work for system catalogs, since many
3087  * operations release write lock early on the system catalogs.)
3088  */
3089 static void
IndexCheckExclusion(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo)3090 IndexCheckExclusion(Relation heapRelation,
3091 					Relation indexRelation,
3092 					IndexInfo *indexInfo)
3093 {
3094 	HeapScanDesc scan;
3095 	HeapTuple	heapTuple;
3096 	Datum		values[INDEX_MAX_KEYS];
3097 	bool		isnull[INDEX_MAX_KEYS];
3098 	ExprState  *predicate;
3099 	TupleTableSlot *slot;
3100 	EState	   *estate;
3101 	ExprContext *econtext;
3102 	Snapshot	snapshot;
3103 
3104 	/*
3105 	 * If we are reindexing the target index, mark it as no longer being
3106 	 * reindexed, to forestall an Assert in index_beginscan when we try to use
3107 	 * the index for probes.  This is OK because the index is now fully valid.
3108 	 */
3109 	if (ReindexIsCurrentlyProcessingIndex(RelationGetRelid(indexRelation)))
3110 		ResetReindexProcessing();
3111 
3112 	/*
3113 	 * Need an EState for evaluation of index expressions and partial-index
3114 	 * predicates.  Also a slot to hold the current tuple.
3115 	 */
3116 	estate = CreateExecutorState();
3117 	econtext = GetPerTupleExprContext(estate);
3118 	slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
3119 
3120 	/* Arrange for econtext's scan tuple to be the tuple under test */
3121 	econtext->ecxt_scantuple = slot;
3122 
3123 	/* Set up execution state for predicate, if any. */
3124 	predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
3125 
3126 	/*
3127 	 * Scan all live tuples in the base relation.
3128 	 */
3129 	snapshot = RegisterSnapshot(GetLatestSnapshot());
3130 	scan = heap_beginscan_strat(heapRelation,	/* relation */
3131 								snapshot,	/* snapshot */
3132 								0,	/* number of keys */
3133 								NULL,	/* scan key */
3134 								true,	/* buffer access strategy OK */
3135 								true);	/* syncscan OK */
3136 
3137 	while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
3138 	{
3139 		CHECK_FOR_INTERRUPTS();
3140 
3141 		MemoryContextReset(econtext->ecxt_per_tuple_memory);
3142 
3143 		/* Set up for predicate or expression evaluation */
3144 		ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
3145 
3146 		/*
3147 		 * In a partial index, ignore tuples that don't satisfy the predicate.
3148 		 */
3149 		if (predicate != NULL)
3150 		{
3151 			if (!ExecQual(predicate, econtext))
3152 				continue;
3153 		}
3154 
3155 		/*
3156 		 * Extract index column values, including computing expressions.
3157 		 */
3158 		FormIndexDatum(indexInfo,
3159 					   slot,
3160 					   estate,
3161 					   values,
3162 					   isnull);
3163 
3164 		/*
3165 		 * Check that this tuple has no conflicts.
3166 		 */
3167 		check_exclusion_constraint(heapRelation,
3168 								   indexRelation, indexInfo,
3169 								   &(heapTuple->t_self), values, isnull,
3170 								   estate, true);
3171 	}
3172 
3173 	heap_endscan(scan);
3174 	UnregisterSnapshot(snapshot);
3175 
3176 	ExecDropSingleTupleTableSlot(slot);
3177 
3178 	FreeExecutorState(estate);
3179 
3180 	/* These may have been pointing to the now-gone estate */
3181 	indexInfo->ii_ExpressionsState = NIL;
3182 	indexInfo->ii_PredicateState = NULL;
3183 }
3184 
3185 
3186 /*
3187  * validate_index - support code for concurrent index builds
3188  *
3189  * We do a concurrent index build by first inserting the catalog entry for the
3190  * index via index_create(), marking it not indisready and not indisvalid.
3191  * Then we commit our transaction and start a new one, then we wait for all
3192  * transactions that could have been modifying the table to terminate.  Now
3193  * we know that any subsequently-started transactions will see the index and
3194  * honor its constraints on HOT updates; so while existing HOT-chains might
3195  * be broken with respect to the index, no currently live tuple will have an
3196  * incompatible HOT update done to it.  We now build the index normally via
3197  * index_build(), while holding a weak lock that allows concurrent
3198  * insert/update/delete.  Also, we index only tuples that are valid
3199  * as of the start of the scan (see IndexBuildHeapScan), whereas a normal
3200  * build takes care to include recently-dead tuples.  This is OK because
3201  * we won't mark the index valid until all transactions that might be able
3202  * to see those tuples are gone.  The reason for doing that is to avoid
3203  * bogus unique-index failures due to concurrent UPDATEs (we might see
3204  * different versions of the same row as being valid when we pass over them,
3205  * if we used HeapTupleSatisfiesVacuum).  This leaves us with an index that
3206  * does not contain any tuples added to the table while we built the index.
3207  *
3208  * Next, we mark the index "indisready" (but still not "indisvalid") and
3209  * commit the second transaction and start a third.  Again we wait for all
3210  * transactions that could have been modifying the table to terminate.  Now
3211  * we know that any subsequently-started transactions will see the index and
3212  * insert their new tuples into it.  We then take a new reference snapshot
3213  * which is passed to validate_index().  Any tuples that are valid according
3214  * to this snap, but are not in the index, must be added to the index.
3215  * (Any tuples committed live after the snap will be inserted into the
3216  * index by their originating transaction.  Any tuples committed dead before
3217  * the snap need not be indexed, because we will wait out all transactions
3218  * that might care about them before we mark the index valid.)
3219  *
3220  * validate_index() works by first gathering all the TIDs currently in the
3221  * index, using a bulkdelete callback that just stores the TIDs and doesn't
3222  * ever say "delete it".  (This should be faster than a plain indexscan;
3223  * also, not all index AMs support full-index indexscan.)  Then we sort the
3224  * TIDs, and finally scan the table doing a "merge join" against the TID list
3225  * to see which tuples are missing from the index.  Thus we will ensure that
3226  * all tuples valid according to the reference snapshot are in the index.
3227  *
3228  * Building a unique index this way is tricky: we might try to insert a
3229  * tuple that is already dead or is in process of being deleted, and we
3230  * mustn't have a uniqueness failure against an updated version of the same
3231  * row.  We could try to check the tuple to see if it's already dead and tell
3232  * index_insert() not to do the uniqueness check, but that still leaves us
3233  * with a race condition against an in-progress update.  To handle that,
3234  * we expect the index AM to recheck liveness of the to-be-inserted tuple
3235  * before it declares a uniqueness error.
3236  *
3237  * After completing validate_index(), we wait until all transactions that
3238  * were alive at the time of the reference snapshot are gone; this is
3239  * necessary to be sure there are none left with a transaction snapshot
3240  * older than the reference (and hence possibly able to see tuples we did
3241  * not index).  Then we mark the index "indisvalid" and commit.  Subsequent
3242  * transactions will be able to use it for queries.
3243  *
3244  * Doing two full table scans is a brute-force strategy.  We could try to be
3245  * cleverer, eg storing new tuples in a special area of the table (perhaps
3246  * making the table append-only by setting use_fsm).  However that would
3247  * add yet more locking issues.
3248  */
3249 void
validate_index(Oid heapId,Oid indexId,Snapshot snapshot)3250 validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
3251 {
3252 	Relation	heapRelation,
3253 				indexRelation;
3254 	IndexInfo  *indexInfo;
3255 	IndexVacuumInfo ivinfo;
3256 	v_i_state	state;
3257 	Oid			save_userid;
3258 	int			save_sec_context;
3259 	int			save_nestlevel;
3260 
3261 	/* Open and lock the parent heap relation */
3262 	heapRelation = heap_open(heapId, ShareUpdateExclusiveLock);
3263 	/* And the target index relation */
3264 	indexRelation = index_open(indexId, RowExclusiveLock);
3265 
3266 	/*
3267 	 * Fetch info needed for index_insert.  (You might think this should be
3268 	 * passed in from DefineIndex, but its copy is long gone due to having
3269 	 * been built in a previous transaction.)
3270 	 */
3271 	indexInfo = BuildIndexInfo(indexRelation);
3272 
3273 	/* mark build is concurrent just for consistency */
3274 	indexInfo->ii_Concurrent = true;
3275 
3276 	/*
3277 	 * Switch to the table owner's userid, so that any index functions are run
3278 	 * as that user.  Also lock down security-restricted operations and
3279 	 * arrange to make GUC variable changes local to this command.
3280 	 */
3281 	GetUserIdAndSecContext(&save_userid, &save_sec_context);
3282 	SetUserIdAndSecContext(heapRelation->rd_rel->relowner,
3283 						   save_sec_context | SECURITY_RESTRICTED_OPERATION);
3284 	save_nestlevel = NewGUCNestLevel();
3285 
3286 	/*
3287 	 * Scan the index and gather up all the TIDs into a tuplesort object.
3288 	 */
3289 	ivinfo.index = indexRelation;
3290 	ivinfo.analyze_only = false;
3291 	ivinfo.estimated_count = true;
3292 	ivinfo.message_level = DEBUG2;
3293 	ivinfo.num_heap_tuples = heapRelation->rd_rel->reltuples;
3294 	ivinfo.strategy = NULL;
3295 
3296 	/*
3297 	 * Encode TIDs as int8 values for the sort, rather than directly sorting
3298 	 * item pointers.  This can be significantly faster, primarily because TID
3299 	 * is a pass-by-reference type on all platforms, whereas int8 is
3300 	 * pass-by-value on most platforms.
3301 	 */
3302 	state.tuplesort = tuplesort_begin_datum(INT8OID, Int8LessOperator,
3303 											InvalidOid, false,
3304 											maintenance_work_mem,
3305 											NULL, false);
3306 	state.htups = state.itups = state.tups_inserted = 0;
3307 
3308 	(void) index_bulk_delete(&ivinfo, NULL,
3309 							 validate_index_callback, (void *) &state);
3310 
3311 	/* Execute the sort */
3312 	tuplesort_performsort(state.tuplesort);
3313 
3314 	/*
3315 	 * Now scan the heap and "merge" it with the index
3316 	 */
3317 	validate_index_heapscan(heapRelation,
3318 							indexRelation,
3319 							indexInfo,
3320 							snapshot,
3321 							&state);
3322 
3323 	/* Done with tuplesort object */
3324 	tuplesort_end(state.tuplesort);
3325 
3326 	elog(DEBUG2,
3327 		 "validate_index found %.0f heap tuples, %.0f index tuples; inserted %.0f missing tuples",
3328 		 state.htups, state.itups, state.tups_inserted);
3329 
3330 	/* Roll back any GUC changes executed by index functions */
3331 	AtEOXact_GUC(false, save_nestlevel);
3332 
3333 	/* Restore userid and security context */
3334 	SetUserIdAndSecContext(save_userid, save_sec_context);
3335 
3336 	/* Close rels, but keep locks */
3337 	index_close(indexRelation, NoLock);
3338 	heap_close(heapRelation, NoLock);
3339 }
3340 
3341 /*
3342  * itemptr_encode - Encode ItemPointer as int64/int8
3343  *
3344  * This representation must produce values encoded as int64 that sort in the
3345  * same order as their corresponding original TID values would (using the
3346  * default int8 opclass to produce a result equivalent to the default TID
3347  * opclass).
3348  *
3349  * As noted in validate_index(), this can be significantly faster.
3350  */
3351 static inline int64
itemptr_encode(ItemPointer itemptr)3352 itemptr_encode(ItemPointer itemptr)
3353 {
3354 	BlockNumber block = ItemPointerGetBlockNumber(itemptr);
3355 	OffsetNumber offset = ItemPointerGetOffsetNumber(itemptr);
3356 	int64		encoded;
3357 
3358 	/*
3359 	 * Use the 16 least significant bits for the offset.  32 adjacent bits are
3360 	 * used for the block number.  Since remaining bits are unused, there
3361 	 * cannot be negative encoded values (We assume a two's complement
3362 	 * representation).
3363 	 */
3364 	encoded = ((uint64) block << 16) | (uint16) offset;
3365 
3366 	return encoded;
3367 }
3368 
3369 /*
3370  * itemptr_decode - Decode int64/int8 representation back to ItemPointer
3371  */
3372 static inline void
itemptr_decode(ItemPointer itemptr,int64 encoded)3373 itemptr_decode(ItemPointer itemptr, int64 encoded)
3374 {
3375 	BlockNumber block = (BlockNumber) (encoded >> 16);
3376 	OffsetNumber offset = (OffsetNumber) (encoded & 0xFFFF);
3377 
3378 	ItemPointerSet(itemptr, block, offset);
3379 }
3380 
3381 /*
3382  * validate_index_callback - bulkdelete callback to collect the index TIDs
3383  */
3384 static bool
validate_index_callback(ItemPointer itemptr,void * opaque)3385 validate_index_callback(ItemPointer itemptr, void *opaque)
3386 {
3387 	v_i_state  *state = (v_i_state *) opaque;
3388 	int64		encoded = itemptr_encode(itemptr);
3389 
3390 	tuplesort_putdatum(state->tuplesort, Int64GetDatum(encoded), false);
3391 	state->itups += 1;
3392 	return false;				/* never actually delete anything */
3393 }
3394 
3395 /*
3396  * validate_index_heapscan - second table scan for concurrent index build
3397  *
3398  * This has much code in common with IndexBuildHeapScan, but it's enough
3399  * different that it seems cleaner to have two routines not one.
3400  */
3401 static void
validate_index_heapscan(Relation heapRelation,Relation indexRelation,IndexInfo * indexInfo,Snapshot snapshot,v_i_state * state)3402 validate_index_heapscan(Relation heapRelation,
3403 						Relation indexRelation,
3404 						IndexInfo *indexInfo,
3405 						Snapshot snapshot,
3406 						v_i_state *state)
3407 {
3408 	HeapScanDesc scan;
3409 	HeapTuple	heapTuple;
3410 	Datum		values[INDEX_MAX_KEYS];
3411 	bool		isnull[INDEX_MAX_KEYS];
3412 	ExprState  *predicate;
3413 	TupleTableSlot *slot;
3414 	EState	   *estate;
3415 	ExprContext *econtext;
3416 	BlockNumber root_blkno = InvalidBlockNumber;
3417 	OffsetNumber root_offsets[MaxHeapTuplesPerPage];
3418 	bool		in_index[MaxHeapTuplesPerPage];
3419 
3420 	/* state variables for the merge */
3421 	ItemPointer indexcursor = NULL;
3422 	ItemPointerData decoded;
3423 	bool		tuplesort_empty = false;
3424 
3425 	/*
3426 	 * sanity checks
3427 	 */
3428 	Assert(OidIsValid(indexRelation->rd_rel->relam));
3429 
3430 	/*
3431 	 * Need an EState for evaluation of index expressions and partial-index
3432 	 * predicates.  Also a slot to hold the current tuple.
3433 	 */
3434 	estate = CreateExecutorState();
3435 	econtext = GetPerTupleExprContext(estate);
3436 	slot = MakeSingleTupleTableSlot(RelationGetDescr(heapRelation));
3437 
3438 	/* Arrange for econtext's scan tuple to be the tuple under test */
3439 	econtext->ecxt_scantuple = slot;
3440 
3441 	/* Set up execution state for predicate, if any. */
3442 	predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
3443 
3444 	/*
3445 	 * Prepare for scan of the base relation.  We need just those tuples
3446 	 * satisfying the passed-in reference snapshot.  We must disable syncscan
3447 	 * here, because it's critical that we read from block zero forward to
3448 	 * match the sorted TIDs.
3449 	 */
3450 	scan = heap_beginscan_strat(heapRelation,	/* relation */
3451 								snapshot,	/* snapshot */
3452 								0,	/* number of keys */
3453 								NULL,	/* scan key */
3454 								true,	/* buffer access strategy OK */
3455 								false); /* syncscan not OK */
3456 
3457 	/*
3458 	 * Scan all tuples matching the snapshot.
3459 	 */
3460 	while ((heapTuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
3461 	{
3462 		ItemPointer heapcursor = &heapTuple->t_self;
3463 		ItemPointerData rootTuple;
3464 		OffsetNumber root_offnum;
3465 
3466 		CHECK_FOR_INTERRUPTS();
3467 
3468 		state->htups += 1;
3469 
3470 		/*
3471 		 * As commented in IndexBuildHeapScan, we should index heap-only
3472 		 * tuples under the TIDs of their root tuples; so when we advance onto
3473 		 * a new heap page, build a map of root item offsets on the page.
3474 		 *
3475 		 * This complicates merging against the tuplesort output: we will
3476 		 * visit the live tuples in order by their offsets, but the root
3477 		 * offsets that we need to compare against the index contents might be
3478 		 * ordered differently.  So we might have to "look back" within the
3479 		 * tuplesort output, but only within the current page.  We handle that
3480 		 * by keeping a bool array in_index[] showing all the
3481 		 * already-passed-over tuplesort output TIDs of the current page. We
3482 		 * clear that array here, when advancing onto a new heap page.
3483 		 */
3484 		if (scan->rs_cblock != root_blkno)
3485 		{
3486 			Page		page = BufferGetPage(scan->rs_cbuf);
3487 
3488 			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE);
3489 			heap_get_root_tuples(page, root_offsets);
3490 			LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
3491 
3492 			memset(in_index, 0, sizeof(in_index));
3493 
3494 			root_blkno = scan->rs_cblock;
3495 		}
3496 
3497 		/* Convert actual tuple TID to root TID */
3498 		rootTuple = *heapcursor;
3499 		root_offnum = ItemPointerGetOffsetNumber(heapcursor);
3500 
3501 		if (HeapTupleIsHeapOnly(heapTuple))
3502 		{
3503 			root_offnum = root_offsets[root_offnum - 1];
3504 			if (!OffsetNumberIsValid(root_offnum))
3505 				ereport(ERROR,
3506 						(errcode(ERRCODE_DATA_CORRUPTED),
3507 						 errmsg_internal("failed to find parent tuple for heap-only tuple at (%u,%u) in table \"%s\"",
3508 										 ItemPointerGetBlockNumber(heapcursor),
3509 										 ItemPointerGetOffsetNumber(heapcursor),
3510 										 RelationGetRelationName(heapRelation))));
3511 			ItemPointerSetOffsetNumber(&rootTuple, root_offnum);
3512 		}
3513 
3514 		/*
3515 		 * "merge" by skipping through the index tuples until we find or pass
3516 		 * the current root tuple.
3517 		 */
3518 		while (!tuplesort_empty &&
3519 			   (!indexcursor ||
3520 				ItemPointerCompare(indexcursor, &rootTuple) < 0))
3521 		{
3522 			Datum		ts_val;
3523 			bool		ts_isnull;
3524 
3525 			if (indexcursor)
3526 			{
3527 				/*
3528 				 * Remember index items seen earlier on the current heap page
3529 				 */
3530 				if (ItemPointerGetBlockNumber(indexcursor) == root_blkno)
3531 					in_index[ItemPointerGetOffsetNumber(indexcursor) - 1] = true;
3532 			}
3533 
3534 			tuplesort_empty = !tuplesort_getdatum(state->tuplesort, true,
3535 												  &ts_val, &ts_isnull, NULL);
3536 			Assert(tuplesort_empty || !ts_isnull);
3537 			if (!tuplesort_empty)
3538 			{
3539 				itemptr_decode(&decoded, DatumGetInt64(ts_val));
3540 				indexcursor = &decoded;
3541 
3542 				/* If int8 is pass-by-ref, free (encoded) TID Datum memory */
3543 #ifndef USE_FLOAT8_BYVAL
3544 				pfree(DatumGetPointer(ts_val));
3545 #endif
3546 			}
3547 			else
3548 			{
3549 				/* Be tidy */
3550 				indexcursor = NULL;
3551 			}
3552 		}
3553 
3554 		/*
3555 		 * If the tuplesort has overshot *and* we didn't see a match earlier,
3556 		 * then this tuple is missing from the index, so insert it.
3557 		 */
3558 		if ((tuplesort_empty ||
3559 			 ItemPointerCompare(indexcursor, &rootTuple) > 0) &&
3560 			!in_index[root_offnum - 1])
3561 		{
3562 			MemoryContextReset(econtext->ecxt_per_tuple_memory);
3563 
3564 			/* Set up for predicate or expression evaluation */
3565 			ExecStoreTuple(heapTuple, slot, InvalidBuffer, false);
3566 
3567 			/*
3568 			 * In a partial index, discard tuples that don't satisfy the
3569 			 * predicate.
3570 			 */
3571 			if (predicate != NULL)
3572 			{
3573 				if (!ExecQual(predicate, econtext))
3574 					continue;
3575 			}
3576 
3577 			/*
3578 			 * For the current heap tuple, extract all the attributes we use
3579 			 * in this index, and note which are null.  This also performs
3580 			 * evaluation of any expressions needed.
3581 			 */
3582 			FormIndexDatum(indexInfo,
3583 						   slot,
3584 						   estate,
3585 						   values,
3586 						   isnull);
3587 
3588 			/*
3589 			 * You'd think we should go ahead and build the index tuple here,
3590 			 * but some index AMs want to do further processing on the data
3591 			 * first. So pass the values[] and isnull[] arrays, instead.
3592 			 */
3593 
3594 			/*
3595 			 * If the tuple is already committed dead, you might think we
3596 			 * could suppress uniqueness checking, but this is no longer true
3597 			 * in the presence of HOT, because the insert is actually a proxy
3598 			 * for a uniqueness check on the whole HOT-chain.  That is, the
3599 			 * tuple we have here could be dead because it was already
3600 			 * HOT-updated, and if so the updating transaction will not have
3601 			 * thought it should insert index entries.  The index AM will
3602 			 * check the whole HOT-chain and correctly detect a conflict if
3603 			 * there is one.
3604 			 */
3605 
3606 			index_insert(indexRelation,
3607 						 values,
3608 						 isnull,
3609 						 &rootTuple,
3610 						 heapRelation,
3611 						 indexInfo->ii_Unique ?
3612 						 UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
3613 						 indexInfo);
3614 
3615 			state->tups_inserted += 1;
3616 		}
3617 	}
3618 
3619 	heap_endscan(scan);
3620 
3621 	ExecDropSingleTupleTableSlot(slot);
3622 
3623 	FreeExecutorState(estate);
3624 
3625 	/* These may have been pointing to the now-gone estate */
3626 	indexInfo->ii_ExpressionsState = NIL;
3627 	indexInfo->ii_PredicateState = NULL;
3628 }
3629 
3630 
3631 /*
3632  * index_set_state_flags - adjust pg_index state flags
3633  *
3634  * This is used during CREATE/DROP INDEX CONCURRENTLY to adjust the pg_index
3635  * flags that denote the index's state.
3636  *
3637  * Note that CatalogTupleUpdate() sends a cache invalidation message for the
3638  * tuple, so other sessions will hear about the update as soon as we commit.
3639  */
3640 void
index_set_state_flags(Oid indexId,IndexStateFlagsAction action)3641 index_set_state_flags(Oid indexId, IndexStateFlagsAction action)
3642 {
3643 	Relation	pg_index;
3644 	HeapTuple	indexTuple;
3645 	Form_pg_index indexForm;
3646 
3647 	/* Open pg_index and fetch a writable copy of the index's tuple */
3648 	pg_index = heap_open(IndexRelationId, RowExclusiveLock);
3649 
3650 	indexTuple = SearchSysCacheCopy1(INDEXRELID,
3651 									 ObjectIdGetDatum(indexId));
3652 	if (!HeapTupleIsValid(indexTuple))
3653 		elog(ERROR, "cache lookup failed for index %u", indexId);
3654 	indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3655 
3656 	/* Perform the requested state change on the copy */
3657 	switch (action)
3658 	{
3659 		case INDEX_CREATE_SET_READY:
3660 			/* Set indisready during a CREATE INDEX CONCURRENTLY sequence */
3661 			Assert(indexForm->indislive);
3662 			Assert(!indexForm->indisready);
3663 			Assert(!indexForm->indisvalid);
3664 			indexForm->indisready = true;
3665 			break;
3666 		case INDEX_CREATE_SET_VALID:
3667 			/* Set indisvalid during a CREATE INDEX CONCURRENTLY sequence */
3668 			Assert(indexForm->indislive);
3669 			Assert(indexForm->indisready);
3670 			Assert(!indexForm->indisvalid);
3671 			indexForm->indisvalid = true;
3672 			break;
3673 		case INDEX_DROP_CLEAR_VALID:
3674 
3675 			/*
3676 			 * Clear indisvalid during a DROP INDEX CONCURRENTLY sequence
3677 			 *
3678 			 * If indisready == true we leave it set so the index still gets
3679 			 * maintained by active transactions.  We only need to ensure that
3680 			 * indisvalid is false.  (We don't assert that either is initially
3681 			 * true, though, since we want to be able to retry a DROP INDEX
3682 			 * CONCURRENTLY that failed partway through.)
3683 			 *
3684 			 * Note: the CLUSTER logic assumes that indisclustered cannot be
3685 			 * set on any invalid index, so clear that flag too.
3686 			 */
3687 			indexForm->indisvalid = false;
3688 			indexForm->indisclustered = false;
3689 			break;
3690 		case INDEX_DROP_SET_DEAD:
3691 
3692 			/*
3693 			 * Clear indisready/indislive during DROP INDEX CONCURRENTLY
3694 			 *
3695 			 * We clear both indisready and indislive, because we not only
3696 			 * want to stop updates, we want to prevent sessions from touching
3697 			 * the index at all.
3698 			 */
3699 			Assert(!indexForm->indisvalid);
3700 			indexForm->indisready = false;
3701 			indexForm->indislive = false;
3702 			break;
3703 	}
3704 
3705 	/* ... and update it */
3706 	CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
3707 
3708 	heap_close(pg_index, RowExclusiveLock);
3709 }
3710 
3711 
3712 /*
3713  * IndexGetRelation: given an index's relation OID, get the OID of the
3714  * relation it is an index on.  Uses the system cache.
3715  */
3716 Oid
IndexGetRelation(Oid indexId,bool missing_ok)3717 IndexGetRelation(Oid indexId, bool missing_ok)
3718 {
3719 	HeapTuple	tuple;
3720 	Form_pg_index index;
3721 	Oid			result;
3722 
3723 	tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexId));
3724 	if (!HeapTupleIsValid(tuple))
3725 	{
3726 		if (missing_ok)
3727 			return InvalidOid;
3728 		elog(ERROR, "cache lookup failed for index %u", indexId);
3729 	}
3730 	index = (Form_pg_index) GETSTRUCT(tuple);
3731 	Assert(index->indexrelid == indexId);
3732 
3733 	result = index->indrelid;
3734 	ReleaseSysCache(tuple);
3735 	return result;
3736 }
3737 
3738 /*
3739  * reindex_index - This routine is used to recreate a single index
3740  */
3741 void
reindex_index(Oid indexId,bool skip_constraint_checks,char persistence,int options)3742 reindex_index(Oid indexId, bool skip_constraint_checks, char persistence,
3743 			  int options)
3744 {
3745 	Relation	iRel,
3746 				heapRelation;
3747 	Oid			heapId;
3748 	IndexInfo  *indexInfo;
3749 	volatile bool skipped_constraint = false;
3750 	PGRUsage	ru0;
3751 
3752 	pg_rusage_init(&ru0);
3753 
3754 	/*
3755 	 * Open and lock the parent heap relation.  ShareLock is sufficient since
3756 	 * we only need to be sure no schema or data changes are going on.
3757 	 */
3758 	heapId = IndexGetRelation(indexId, false);
3759 	heapRelation = heap_open(heapId, ShareLock);
3760 
3761 	/*
3762 	 * Open the target index relation and get an exclusive lock on it, to
3763 	 * ensure that no one else is touching this particular index.
3764 	 */
3765 	iRel = index_open(indexId, AccessExclusiveLock);
3766 
3767 	/*
3768 	 * The case of reindexing partitioned tables and indexes is handled
3769 	 * differently by upper layers, so this case shouldn't arise.
3770 	 */
3771 	if (iRel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
3772 		elog(ERROR, "unsupported relation kind for index \"%s\"",
3773 			 RelationGetRelationName(iRel));
3774 
3775 	/*
3776 	 * Don't allow reindex on temp tables of other backends ... their local
3777 	 * buffer manager is not going to cope.
3778 	 */
3779 	if (RELATION_IS_OTHER_TEMP(iRel))
3780 		ereport(ERROR,
3781 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3782 				 errmsg("cannot reindex temporary tables of other sessions")));
3783 
3784 	/*
3785 	 * Also check for active uses of the index in the current transaction; we
3786 	 * don't want to reindex underneath an open indexscan.
3787 	 */
3788 	CheckTableNotInUse(iRel, "REINDEX INDEX");
3789 
3790 	/*
3791 	 * All predicate locks on the index are about to be made invalid. Promote
3792 	 * them to relation locks on the heap.
3793 	 */
3794 	TransferPredicateLocksToHeapRelation(iRel);
3795 
3796 	/* Fetch info needed for index_build */
3797 	indexInfo = BuildIndexInfo(iRel);
3798 
3799 	/* If requested, skip checking uniqueness/exclusion constraints */
3800 	if (skip_constraint_checks)
3801 	{
3802 		if (indexInfo->ii_Unique || indexInfo->ii_ExclusionOps != NULL)
3803 			skipped_constraint = true;
3804 		indexInfo->ii_Unique = false;
3805 		indexInfo->ii_ExclusionOps = NULL;
3806 		indexInfo->ii_ExclusionProcs = NULL;
3807 		indexInfo->ii_ExclusionStrats = NULL;
3808 	}
3809 
3810 	/* Suppress use of the target index while rebuilding it */
3811 	SetReindexProcessing(heapId, indexId);
3812 
3813 	/* Create a new physical relation for the index */
3814 	RelationSetNewRelfilenode(iRel, persistence, InvalidTransactionId,
3815 							  InvalidMultiXactId);
3816 
3817 	/* Initialize the index and rebuild */
3818 	/* Note: we do not need to re-establish pkey setting */
3819 	index_build(heapRelation, iRel, indexInfo, false, true, true);
3820 
3821 	/* Re-allow use of target index */
3822 	ResetReindexProcessing();
3823 
3824 	/*
3825 	 * If the index is marked invalid/not-ready/dead (ie, it's from a failed
3826 	 * CREATE INDEX CONCURRENTLY, or a DROP INDEX CONCURRENTLY failed midway),
3827 	 * and we didn't skip a uniqueness check, we can now mark it valid.  This
3828 	 * allows REINDEX to be used to clean up in such cases.
3829 	 *
3830 	 * We can also reset indcheckxmin, because we have now done a
3831 	 * non-concurrent index build, *except* in the case where index_build
3832 	 * found some still-broken HOT chains. If it did, and we don't have to
3833 	 * change any of the other flags, we just leave indcheckxmin alone (note
3834 	 * that index_build won't have changed it, because this is a reindex).
3835 	 * This is okay and desirable because not updating the tuple leaves the
3836 	 * index's usability horizon (recorded as the tuple's xmin value) the same
3837 	 * as it was.
3838 	 *
3839 	 * But, if the index was invalid/not-ready/dead and there were broken HOT
3840 	 * chains, we had better force indcheckxmin true, because the normal
3841 	 * argument that the HOT chains couldn't conflict with the index is
3842 	 * suspect for an invalid index.  (A conflict is definitely possible if
3843 	 * the index was dead.  It probably shouldn't happen otherwise, but let's
3844 	 * be conservative.)  In this case advancing the usability horizon is
3845 	 * appropriate.
3846 	 *
3847 	 * Another reason for avoiding unnecessary updates here is that while
3848 	 * reindexing pg_index itself, we must not try to update tuples in it.
3849 	 * pg_index's indexes should always have these flags in their clean state,
3850 	 * so that won't happen.
3851 	 *
3852 	 * If early pruning/vacuuming is enabled for the heap relation, the
3853 	 * usability horizon must be advanced to the current transaction on every
3854 	 * build or rebuild.  pg_index is OK in this regard because catalog tables
3855 	 * are not subject to early cleanup.
3856 	 */
3857 	if (!skipped_constraint)
3858 	{
3859 		Relation	pg_index;
3860 		HeapTuple	indexTuple;
3861 		Form_pg_index indexForm;
3862 		bool		index_bad;
3863 		bool		early_pruning_enabled = EarlyPruningEnabled(heapRelation);
3864 
3865 		pg_index = heap_open(IndexRelationId, RowExclusiveLock);
3866 
3867 		indexTuple = SearchSysCacheCopy1(INDEXRELID,
3868 										 ObjectIdGetDatum(indexId));
3869 		if (!HeapTupleIsValid(indexTuple))
3870 			elog(ERROR, "cache lookup failed for index %u", indexId);
3871 		indexForm = (Form_pg_index) GETSTRUCT(indexTuple);
3872 
3873 		index_bad = (!indexForm->indisvalid ||
3874 					 !indexForm->indisready ||
3875 					 !indexForm->indislive);
3876 		if (index_bad ||
3877 			(indexForm->indcheckxmin && !indexInfo->ii_BrokenHotChain) ||
3878 			early_pruning_enabled)
3879 		{
3880 			if (!indexInfo->ii_BrokenHotChain && !early_pruning_enabled)
3881 				indexForm->indcheckxmin = false;
3882 			else if (index_bad || early_pruning_enabled)
3883 				indexForm->indcheckxmin = true;
3884 			indexForm->indisvalid = true;
3885 			indexForm->indisready = true;
3886 			indexForm->indislive = true;
3887 			CatalogTupleUpdate(pg_index, &indexTuple->t_self, indexTuple);
3888 
3889 			/*
3890 			 * Invalidate the relcache for the table, so that after we commit
3891 			 * all sessions will refresh the table's index list.  This ensures
3892 			 * that if anyone misses seeing the pg_index row during this
3893 			 * update, they'll refresh their list before attempting any update
3894 			 * on the table.
3895 			 */
3896 			CacheInvalidateRelcache(heapRelation);
3897 		}
3898 
3899 		heap_close(pg_index, RowExclusiveLock);
3900 	}
3901 
3902 	/* Log what we did */
3903 	if (options & REINDEXOPT_VERBOSE)
3904 		ereport(INFO,
3905 				(errmsg("index \"%s\" was reindexed",
3906 						get_rel_name(indexId)),
3907 				 errdetail_internal("%s",
3908 									pg_rusage_show(&ru0))));
3909 
3910 	/* Close rels, but keep locks */
3911 	index_close(iRel, NoLock);
3912 	heap_close(heapRelation, NoLock);
3913 }
3914 
3915 /*
3916  * reindex_relation - This routine is used to recreate all indexes
3917  * of a relation (and optionally its toast relation too, if any).
3918  *
3919  * "flags" is a bitmask that can include any combination of these bits:
3920  *
3921  * REINDEX_REL_PROCESS_TOAST: if true, process the toast table too (if any).
3922  *
3923  * REINDEX_REL_SUPPRESS_INDEX_USE: if true, the relation was just completely
3924  * rebuilt by an operation such as VACUUM FULL or CLUSTER, and therefore its
3925  * indexes are inconsistent with it.  This makes things tricky if the relation
3926  * is a system catalog that we might consult during the reindexing.  To deal
3927  * with that case, we mark all of the indexes as pending rebuild so that they
3928  * won't be trusted until rebuilt.  The caller is required to call us *without*
3929  * having made the rebuilt table visible by doing CommandCounterIncrement;
3930  * we'll do CCI after having collected the index list.  (This way we can still
3931  * use catalog indexes while collecting the list.)
3932  *
3933  * REINDEX_REL_CHECK_CONSTRAINTS: if true, recheck unique and exclusion
3934  * constraint conditions, else don't.  To avoid deadlocks, VACUUM FULL or
3935  * CLUSTER on a system catalog must omit this flag.  REINDEX should be used to
3936  * rebuild an index if constraint inconsistency is suspected.  For optimal
3937  * performance, other callers should include the flag only after transforming
3938  * the data in a manner that risks a change in constraint validity.
3939  *
3940  * REINDEX_REL_FORCE_INDEXES_UNLOGGED: if true, set the persistence of the
3941  * rebuilt indexes to unlogged.
3942  *
3943  * REINDEX_REL_FORCE_INDEXES_PERMANENT: if true, set the persistence of the
3944  * rebuilt indexes to permanent.
3945  *
3946  * Returns true if any indexes were rebuilt (including toast table's index
3947  * when relevant).  Note that a CommandCounterIncrement will occur after each
3948  * index rebuild.
3949  */
3950 bool
reindex_relation(Oid relid,int flags,int options)3951 reindex_relation(Oid relid, int flags, int options)
3952 {
3953 	Relation	rel;
3954 	Oid			toast_relid;
3955 	List	   *indexIds;
3956 	char		persistence;
3957 	bool		result;
3958 	ListCell   *indexId;
3959 
3960 	/*
3961 	 * Open and lock the relation.  ShareLock is sufficient since we only need
3962 	 * to prevent schema and data changes in it.  The lock level used here
3963 	 * should match ReindexTable().
3964 	 */
3965 	rel = heap_open(relid, ShareLock);
3966 
3967 	/*
3968 	 * This may be useful when implemented someday; but that day is not today.
3969 	 * For now, avoid erroring out when called in a multi-table context
3970 	 * (REINDEX SCHEMA) and happen to come across a partitioned table.  The
3971 	 * partitions may be reindexed on their own anyway.
3972 	 */
3973 	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
3974 	{
3975 		ereport(WARNING,
3976 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3977 				 errmsg("REINDEX of partitioned tables is not yet implemented, skipping \"%s\"",
3978 						RelationGetRelationName(rel))));
3979 		heap_close(rel, ShareLock);
3980 		return false;
3981 	}
3982 
3983 	toast_relid = rel->rd_rel->reltoastrelid;
3984 
3985 	/*
3986 	 * Get the list of index OIDs for this relation.  (We trust to the
3987 	 * relcache to get this with a sequential scan if ignoring system
3988 	 * indexes.)
3989 	 */
3990 	indexIds = RelationGetIndexList(rel);
3991 
3992 	if (flags & REINDEX_REL_SUPPRESS_INDEX_USE)
3993 	{
3994 		/* Suppress use of all the indexes until they are rebuilt */
3995 		SetReindexPending(indexIds);
3996 
3997 		/*
3998 		 * Make the new heap contents visible --- now things might be
3999 		 * inconsistent!
4000 		 */
4001 		CommandCounterIncrement();
4002 	}
4003 
4004 	/*
4005 	 * Compute persistence of indexes: same as that of owning rel, unless
4006 	 * caller specified otherwise.
4007 	 */
4008 	if (flags & REINDEX_REL_FORCE_INDEXES_UNLOGGED)
4009 		persistence = RELPERSISTENCE_UNLOGGED;
4010 	else if (flags & REINDEX_REL_FORCE_INDEXES_PERMANENT)
4011 		persistence = RELPERSISTENCE_PERMANENT;
4012 	else
4013 		persistence = rel->rd_rel->relpersistence;
4014 
4015 	/* Reindex all the indexes. */
4016 	foreach(indexId, indexIds)
4017 	{
4018 		Oid			indexOid = lfirst_oid(indexId);
4019 
4020 		reindex_index(indexOid, !(flags & REINDEX_REL_CHECK_CONSTRAINTS),
4021 					  persistence, options);
4022 
4023 		CommandCounterIncrement();
4024 
4025 		/* Index should no longer be in the pending list */
4026 		Assert(!ReindexIsProcessingIndex(indexOid));
4027 	}
4028 
4029 	/*
4030 	 * Close rel, but continue to hold the lock.
4031 	 */
4032 	heap_close(rel, NoLock);
4033 
4034 	result = (indexIds != NIL);
4035 
4036 	/*
4037 	 * If the relation has a secondary toast rel, reindex that too while we
4038 	 * still hold the lock on the master table.
4039 	 */
4040 	if ((flags & REINDEX_REL_PROCESS_TOAST) && OidIsValid(toast_relid))
4041 		result |= reindex_relation(toast_relid, flags, options);
4042 
4043 	return result;
4044 }
4045 
4046 
4047 /* ----------------------------------------------------------------
4048  *		System index reindexing support
4049  *
4050  * When we are busy reindexing a system index, this code provides support
4051  * for preventing catalog lookups from using that index.  We also make use
4052  * of this to catch attempted uses of user indexes during reindexing of
4053  * those indexes.  This information is propagated to parallel workers;
4054  * attempting to change it during a parallel operation is not permitted.
4055  * ----------------------------------------------------------------
4056  */
4057 
4058 static Oid	currentlyReindexedHeap = InvalidOid;
4059 static Oid	currentlyReindexedIndex = InvalidOid;
4060 static List *pendingReindexedIndexes = NIL;
4061 static int	reindexingNestLevel = 0;
4062 
4063 /*
4064  * ReindexIsProcessingHeap
4065  *		True if heap specified by OID is currently being reindexed.
4066  */
4067 bool
ReindexIsProcessingHeap(Oid heapOid)4068 ReindexIsProcessingHeap(Oid heapOid)
4069 {
4070 	return heapOid == currentlyReindexedHeap;
4071 }
4072 
4073 /*
4074  * ReindexIsCurrentlyProcessingIndex
4075  *		True if index specified by OID is currently being reindexed.
4076  */
4077 static bool
ReindexIsCurrentlyProcessingIndex(Oid indexOid)4078 ReindexIsCurrentlyProcessingIndex(Oid indexOid)
4079 {
4080 	return indexOid == currentlyReindexedIndex;
4081 }
4082 
4083 /*
4084  * ReindexIsProcessingIndex
4085  *		True if index specified by OID is currently being reindexed,
4086  *		or should be treated as invalid because it is awaiting reindex.
4087  */
4088 bool
ReindexIsProcessingIndex(Oid indexOid)4089 ReindexIsProcessingIndex(Oid indexOid)
4090 {
4091 	return indexOid == currentlyReindexedIndex ||
4092 		list_member_oid(pendingReindexedIndexes, indexOid);
4093 }
4094 
4095 /*
4096  * SetReindexProcessing
4097  *		Set flag that specified heap/index are being reindexed.
4098  */
4099 static void
SetReindexProcessing(Oid heapOid,Oid indexOid)4100 SetReindexProcessing(Oid heapOid, Oid indexOid)
4101 {
4102 	Assert(OidIsValid(heapOid) && OidIsValid(indexOid));
4103 	/* Reindexing is not re-entrant. */
4104 	if (OidIsValid(currentlyReindexedHeap))
4105 		elog(ERROR, "cannot reindex while reindexing");
4106 	currentlyReindexedHeap = heapOid;
4107 	currentlyReindexedIndex = indexOid;
4108 	/* Index is no longer "pending" reindex. */
4109 	RemoveReindexPending(indexOid);
4110 	/* This may have been set already, but in case it isn't, do so now. */
4111 	reindexingNestLevel = GetCurrentTransactionNestLevel();
4112 }
4113 
4114 /*
4115  * ResetReindexProcessing
4116  *		Unset reindexing status.
4117  */
4118 static void
ResetReindexProcessing(void)4119 ResetReindexProcessing(void)
4120 {
4121 	currentlyReindexedHeap = InvalidOid;
4122 	currentlyReindexedIndex = InvalidOid;
4123 	/* reindexingNestLevel remains set till end of (sub)transaction */
4124 }
4125 
4126 /*
4127  * SetReindexPending
4128  *		Mark the given indexes as pending reindex.
4129  *
4130  * NB: we assume that the current memory context stays valid throughout.
4131  */
4132 static void
SetReindexPending(List * indexes)4133 SetReindexPending(List *indexes)
4134 {
4135 	/* Reindexing is not re-entrant. */
4136 	if (pendingReindexedIndexes)
4137 		elog(ERROR, "cannot reindex while reindexing");
4138 	if (IsInParallelMode())
4139 		elog(ERROR, "cannot modify reindex state during a parallel operation");
4140 	pendingReindexedIndexes = list_copy(indexes);
4141 	reindexingNestLevel = GetCurrentTransactionNestLevel();
4142 }
4143 
4144 /*
4145  * RemoveReindexPending
4146  *		Remove the given index from the pending list.
4147  */
4148 static void
RemoveReindexPending(Oid indexOid)4149 RemoveReindexPending(Oid indexOid)
4150 {
4151 	if (IsInParallelMode())
4152 		elog(ERROR, "cannot modify reindex state during a parallel operation");
4153 	pendingReindexedIndexes = list_delete_oid(pendingReindexedIndexes,
4154 											  indexOid);
4155 }
4156 
4157 /*
4158  * ResetReindexState
4159  *		Clear all reindexing state during (sub)transaction abort.
4160  */
4161 void
ResetReindexState(int nestLevel)4162 ResetReindexState(int nestLevel)
4163 {
4164 	/*
4165 	 * Because reindexing is not re-entrant, we don't need to cope with nested
4166 	 * reindexing states.  We just need to avoid messing up the outer-level
4167 	 * state in case a subtransaction fails within a REINDEX.  So checking the
4168 	 * current nest level against that of the reindex operation is sufficient.
4169 	 */
4170 	if (reindexingNestLevel >= nestLevel)
4171 	{
4172 		currentlyReindexedHeap = InvalidOid;
4173 		currentlyReindexedIndex = InvalidOid;
4174 
4175 		/*
4176 		 * We needn't try to release the contents of pendingReindexedIndexes;
4177 		 * that list should be in a transaction-lifespan context, so it will
4178 		 * go away automatically.
4179 		 */
4180 		pendingReindexedIndexes = NIL;
4181 
4182 		reindexingNestLevel = 0;
4183 	}
4184 }
4185 
4186 /*
4187  * EstimateReindexStateSpace
4188  *		Estimate space needed to pass reindex state to parallel workers.
4189  */
4190 Size
EstimateReindexStateSpace(void)4191 EstimateReindexStateSpace(void)
4192 {
4193 	return offsetof(SerializedReindexState, pendingReindexedIndexes)
4194 		+ mul_size(sizeof(Oid), list_length(pendingReindexedIndexes));
4195 }
4196 
4197 /*
4198  * SerializeReindexState
4199  *		Serialize reindex state for parallel workers.
4200  */
4201 void
SerializeReindexState(Size maxsize,char * start_address)4202 SerializeReindexState(Size maxsize, char *start_address)
4203 {
4204 	SerializedReindexState *sistate = (SerializedReindexState *) start_address;
4205 	int			c = 0;
4206 	ListCell   *lc;
4207 
4208 	sistate->currentlyReindexedHeap = currentlyReindexedHeap;
4209 	sistate->currentlyReindexedIndex = currentlyReindexedIndex;
4210 	sistate->numPendingReindexedIndexes = list_length(pendingReindexedIndexes);
4211 	foreach(lc, pendingReindexedIndexes)
4212 		sistate->pendingReindexedIndexes[c++] = lfirst_oid(lc);
4213 }
4214 
4215 /*
4216  * RestoreReindexState
4217  *		Restore reindex state in a parallel worker.
4218  */
4219 void
RestoreReindexState(void * reindexstate)4220 RestoreReindexState(void *reindexstate)
4221 {
4222 	SerializedReindexState *sistate = (SerializedReindexState *) reindexstate;
4223 	int			c = 0;
4224 	MemoryContext oldcontext;
4225 
4226 	currentlyReindexedHeap = sistate->currentlyReindexedHeap;
4227 	currentlyReindexedIndex = sistate->currentlyReindexedIndex;
4228 
4229 	Assert(pendingReindexedIndexes == NIL);
4230 	oldcontext = MemoryContextSwitchTo(TopMemoryContext);
4231 	for (c = 0; c < sistate->numPendingReindexedIndexes; ++c)
4232 		pendingReindexedIndexes =
4233 			lappend_oid(pendingReindexedIndexes,
4234 						sistate->pendingReindexedIndexes[c]);
4235 	MemoryContextSwitchTo(oldcontext);
4236 
4237 	/* Note the worker has its own transaction nesting level */
4238 	reindexingNestLevel = GetCurrentTransactionNestLevel();
4239 }
4240