1 /*-------------------------------------------------------------------------
2  *
3  * indexcmds.c
4  *	  POSTGRES define and remove index code.
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/commands/indexcmds.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include "access/amapi.h"
19 #include "access/heapam.h"
20 #include "access/htup_details.h"
21 #include "access/reloptions.h"
22 #include "access/sysattr.h"
23 #include "access/tableam.h"
24 #include "access/xact.h"
25 #include "catalog/catalog.h"
26 #include "catalog/index.h"
27 #include "catalog/indexing.h"
28 #include "catalog/pg_am.h"
29 #include "catalog/pg_constraint.h"
30 #include "catalog/pg_inherits.h"
31 #include "catalog/pg_opclass.h"
32 #include "catalog/pg_opfamily.h"
33 #include "catalog/pg_tablespace.h"
34 #include "catalog/pg_type.h"
35 #include "commands/comment.h"
36 #include "commands/dbcommands.h"
37 #include "commands/defrem.h"
38 #include "commands/event_trigger.h"
39 #include "commands/progress.h"
40 #include "commands/tablecmds.h"
41 #include "commands/tablespace.h"
42 #include "mb/pg_wchar.h"
43 #include "miscadmin.h"
44 #include "nodes/makefuncs.h"
45 #include "nodes/nodeFuncs.h"
46 #include "optimizer/optimizer.h"
47 #include "parser/parse_coerce.h"
48 #include "parser/parse_func.h"
49 #include "parser/parse_oper.h"
50 #include "partitioning/partdesc.h"
51 #include "pgstat.h"
52 #include "rewrite/rewriteManip.h"
53 #include "storage/lmgr.h"
54 #include "storage/proc.h"
55 #include "storage/procarray.h"
56 #include "storage/sinvaladt.h"
57 #include "utils/acl.h"
58 #include "utils/builtins.h"
59 #include "utils/fmgroids.h"
60 #include "utils/inval.h"
61 #include "utils/lsyscache.h"
62 #include "utils/memutils.h"
63 #include "utils/partcache.h"
64 #include "utils/pg_rusage.h"
65 #include "utils/regproc.h"
66 #include "utils/snapmgr.h"
67 #include "utils/syscache.h"
68 
69 
70 /* non-export function prototypes */
71 static bool CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts);
72 static void CheckPredicate(Expr *predicate);
73 static void ComputeIndexAttrs(IndexInfo *indexInfo,
74 							  Oid *typeOidP,
75 							  Oid *collationOidP,
76 							  Oid *classOidP,
77 							  int16 *colOptionP,
78 							  List *attList,
79 							  List *exclusionOpNames,
80 							  Oid relId,
81 							  const char *accessMethodName, Oid accessMethodId,
82 							  bool amcanorder,
83 							  bool isconstraint);
84 static char *ChooseIndexName(const char *tabname, Oid namespaceId,
85 							 List *colnames, List *exclusionOpNames,
86 							 bool primary, bool isconstraint);
87 static char *ChooseIndexNameAddition(List *colnames);
88 static List *ChooseIndexColumnNames(List *indexElems);
89 static void ReindexIndex(RangeVar *indexRelation, ReindexParams *params,
90 						 bool isTopLevel);
91 static void RangeVarCallbackForReindexIndex(const RangeVar *relation,
92 											Oid relId, Oid oldRelId, void *arg);
93 static Oid	ReindexTable(RangeVar *relation, ReindexParams *params,
94 						 bool isTopLevel);
95 static void ReindexMultipleTables(const char *objectName,
96 								  ReindexObjectType objectKind, ReindexParams *params);
97 static void reindex_error_callback(void *args);
98 static void ReindexPartitions(Oid relid, ReindexParams *params,
99 							  bool isTopLevel);
100 static void ReindexMultipleInternal(List *relids,
101 									ReindexParams *params);
102 static bool ReindexRelationConcurrently(Oid relationOid,
103 										ReindexParams *params);
104 static void update_relispartition(Oid relationId, bool newval);
105 static inline void set_indexsafe_procflags(void);
106 
107 /*
108  * callback argument type for RangeVarCallbackForReindexIndex()
109  */
110 struct ReindexIndexCallbackState
111 {
112 	ReindexParams params;		/* options from statement */
113 	Oid			locked_table_oid;	/* tracks previously locked table */
114 };
115 
116 /*
117  * callback arguments for reindex_error_callback()
118  */
119 typedef struct ReindexErrorInfo
120 {
121 	char	   *relname;
122 	char	   *relnamespace;
123 	char		relkind;
124 } ReindexErrorInfo;
125 
126 /*
127  * CheckIndexCompatible
128  *		Determine whether an existing index definition is compatible with a
129  *		prospective index definition, such that the existing index storage
130  *		could become the storage of the new index, avoiding a rebuild.
131  *
132  * 'heapRelation': the relation the index would apply to.
133  * 'accessMethodName': name of the AM to use.
134  * 'attributeList': a list of IndexElem specifying columns and expressions
135  *		to index on.
136  * 'exclusionOpNames': list of names of exclusion-constraint operators,
137  *		or NIL if not an exclusion constraint.
138  *
139  * This is tailored to the needs of ALTER TABLE ALTER TYPE, which recreates
140  * any indexes that depended on a changing column from their pg_get_indexdef
141  * or pg_get_constraintdef definitions.  We omit some of the sanity checks of
142  * DefineIndex.  We assume that the old and new indexes have the same number
143  * of columns and that if one has an expression column or predicate, both do.
144  * Errors arising from the attribute list still apply.
145  *
146  * Most column type changes that can skip a table rewrite do not invalidate
147  * indexes.  We acknowledge this when all operator classes, collations and
148  * exclusion operators match.  Though we could further permit intra-opfamily
149  * changes for btree and hash indexes, that adds subtle complexity with no
150  * concrete benefit for core types. Note, that INCLUDE columns aren't
151  * checked by this function, for them it's enough that table rewrite is
152  * skipped.
153  *
154  * When a comparison or exclusion operator has a polymorphic input type, the
155  * actual input types must also match.  This defends against the possibility
156  * that operators could vary behavior in response to get_fn_expr_argtype().
157  * At present, this hazard is theoretical: check_exclusion_constraint() and
158  * all core index access methods decline to set fn_expr for such calls.
159  *
160  * We do not yet implement a test to verify compatibility of expression
161  * columns or predicates, so assume any such index is incompatible.
162  */
163 bool
CheckIndexCompatible(Oid oldId,const char * accessMethodName,List * attributeList,List * exclusionOpNames)164 CheckIndexCompatible(Oid oldId,
165 					 const char *accessMethodName,
166 					 List *attributeList,
167 					 List *exclusionOpNames)
168 {
169 	bool		isconstraint;
170 	Oid		   *typeObjectId;
171 	Oid		   *collationObjectId;
172 	Oid		   *classObjectId;
173 	Oid			accessMethodId;
174 	Oid			relationId;
175 	HeapTuple	tuple;
176 	Form_pg_index indexForm;
177 	Form_pg_am	accessMethodForm;
178 	IndexAmRoutine *amRoutine;
179 	bool		amcanorder;
180 	int16	   *coloptions;
181 	IndexInfo  *indexInfo;
182 	int			numberOfAttributes;
183 	int			old_natts;
184 	bool		isnull;
185 	bool		ret = true;
186 	oidvector  *old_indclass;
187 	oidvector  *old_indcollation;
188 	Relation	irel;
189 	int			i;
190 	Datum		d;
191 
192 	/* Caller should already have the relation locked in some way. */
193 	relationId = IndexGetRelation(oldId, false);
194 
195 	/*
196 	 * We can pretend isconstraint = false unconditionally.  It only serves to
197 	 * decide the text of an error message that should never happen for us.
198 	 */
199 	isconstraint = false;
200 
201 	numberOfAttributes = list_length(attributeList);
202 	Assert(numberOfAttributes > 0);
203 	Assert(numberOfAttributes <= INDEX_MAX_KEYS);
204 
205 	/* look up the access method */
206 	tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName));
207 	if (!HeapTupleIsValid(tuple))
208 		ereport(ERROR,
209 				(errcode(ERRCODE_UNDEFINED_OBJECT),
210 				 errmsg("access method \"%s\" does not exist",
211 						accessMethodName)));
212 	accessMethodForm = (Form_pg_am) GETSTRUCT(tuple);
213 	accessMethodId = accessMethodForm->oid;
214 	amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler);
215 	ReleaseSysCache(tuple);
216 
217 	amcanorder = amRoutine->amcanorder;
218 
219 	/*
220 	 * Compute the operator classes, collations, and exclusion operators for
221 	 * the new index, so we can test whether it's compatible with the existing
222 	 * one.  Note that ComputeIndexAttrs might fail here, but that's OK:
223 	 * DefineIndex would have called this function with the same arguments
224 	 * later on, and it would have failed then anyway.  Our attributeList
225 	 * contains only key attributes, thus we're filling ii_NumIndexAttrs and
226 	 * ii_NumIndexKeyAttrs with same value.
227 	 */
228 	indexInfo = makeIndexInfo(numberOfAttributes, numberOfAttributes,
229 							  accessMethodId, NIL, NIL, false, false, false);
230 	typeObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
231 	collationObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
232 	classObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
233 	coloptions = (int16 *) palloc(numberOfAttributes * sizeof(int16));
234 	ComputeIndexAttrs(indexInfo,
235 					  typeObjectId, collationObjectId, classObjectId,
236 					  coloptions, attributeList,
237 					  exclusionOpNames, relationId,
238 					  accessMethodName, accessMethodId,
239 					  amcanorder, isconstraint);
240 
241 
242 	/* Get the soon-obsolete pg_index tuple. */
243 	tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(oldId));
244 	if (!HeapTupleIsValid(tuple))
245 		elog(ERROR, "cache lookup failed for index %u", oldId);
246 	indexForm = (Form_pg_index) GETSTRUCT(tuple);
247 
248 	/*
249 	 * We don't assess expressions or predicates; assume incompatibility.
250 	 * Also, if the index is invalid for any reason, treat it as incompatible.
251 	 */
252 	if (!(heap_attisnull(tuple, Anum_pg_index_indpred, NULL) &&
253 		  heap_attisnull(tuple, Anum_pg_index_indexprs, NULL) &&
254 		  indexForm->indisvalid))
255 	{
256 		ReleaseSysCache(tuple);
257 		return false;
258 	}
259 
260 	/* Any change in operator class or collation breaks compatibility. */
261 	old_natts = indexForm->indnkeyatts;
262 	Assert(old_natts == numberOfAttributes);
263 
264 	d = SysCacheGetAttr(INDEXRELID, tuple, Anum_pg_index_indcollation, &isnull);
265 	Assert(!isnull);
266 	old_indcollation = (oidvector *) DatumGetPointer(d);
267 
268 	d = SysCacheGetAttr(INDEXRELID, tuple, Anum_pg_index_indclass, &isnull);
269 	Assert(!isnull);
270 	old_indclass = (oidvector *) DatumGetPointer(d);
271 
272 	ret = (memcmp(old_indclass->values, classObjectId,
273 				  old_natts * sizeof(Oid)) == 0 &&
274 		   memcmp(old_indcollation->values, collationObjectId,
275 				  old_natts * sizeof(Oid)) == 0);
276 
277 	ReleaseSysCache(tuple);
278 
279 	if (!ret)
280 		return false;
281 
282 	/* For polymorphic opcintype, column type changes break compatibility. */
283 	irel = index_open(oldId, AccessShareLock);	/* caller probably has a lock */
284 	for (i = 0; i < old_natts; i++)
285 	{
286 		if (IsPolymorphicType(get_opclass_input_type(classObjectId[i])) &&
287 			TupleDescAttr(irel->rd_att, i)->atttypid != typeObjectId[i])
288 		{
289 			ret = false;
290 			break;
291 		}
292 	}
293 
294 	/* Any change in opclass options break compatibility. */
295 	if (ret)
296 	{
297 		Datum	   *opclassOptions = RelationGetIndexRawAttOptions(irel);
298 
299 		ret = CompareOpclassOptions(opclassOptions,
300 									indexInfo->ii_OpclassOptions, old_natts);
301 
302 		if (opclassOptions)
303 			pfree(opclassOptions);
304 	}
305 
306 	/* Any change in exclusion operator selections breaks compatibility. */
307 	if (ret && indexInfo->ii_ExclusionOps != NULL)
308 	{
309 		Oid		   *old_operators,
310 				   *old_procs;
311 		uint16	   *old_strats;
312 
313 		RelationGetExclusionInfo(irel, &old_operators, &old_procs, &old_strats);
314 		ret = memcmp(old_operators, indexInfo->ii_ExclusionOps,
315 					 old_natts * sizeof(Oid)) == 0;
316 
317 		/* Require an exact input type match for polymorphic operators. */
318 		if (ret)
319 		{
320 			for (i = 0; i < old_natts && ret; i++)
321 			{
322 				Oid			left,
323 							right;
324 
325 				op_input_types(indexInfo->ii_ExclusionOps[i], &left, &right);
326 				if ((IsPolymorphicType(left) || IsPolymorphicType(right)) &&
327 					TupleDescAttr(irel->rd_att, i)->atttypid != typeObjectId[i])
328 				{
329 					ret = false;
330 					break;
331 				}
332 			}
333 		}
334 	}
335 
336 	index_close(irel, NoLock);
337 	return ret;
338 }
339 
340 /*
341  * CompareOpclassOptions
342  *
343  * Compare per-column opclass options which are represented by arrays of text[]
344  * datums.  Both elements of arrays and array themselves can be NULL.
345  */
346 static bool
CompareOpclassOptions(Datum * opts1,Datum * opts2,int natts)347 CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts)
348 {
349 	int			i;
350 
351 	if (!opts1 && !opts2)
352 		return true;
353 
354 	for (i = 0; i < natts; i++)
355 	{
356 		Datum		opt1 = opts1 ? opts1[i] : (Datum) 0;
357 		Datum		opt2 = opts2 ? opts2[i] : (Datum) 0;
358 
359 		if (opt1 == (Datum) 0)
360 		{
361 			if (opt2 == (Datum) 0)
362 				continue;
363 			else
364 				return false;
365 		}
366 		else if (opt2 == (Datum) 0)
367 			return false;
368 
369 		/* Compare non-NULL text[] datums. */
370 		if (!DatumGetBool(DirectFunctionCall2(array_eq, opt1, opt2)))
371 			return false;
372 	}
373 
374 	return true;
375 }
376 
377 /*
378  * WaitForOlderSnapshots
379  *
380  * Wait for transactions that might have an older snapshot than the given xmin
381  * limit, because it might not contain tuples deleted just before it has
382  * been taken. Obtain a list of VXIDs of such transactions, and wait for them
383  * individually. This is used when building an index concurrently.
384  *
385  * We can exclude any running transactions that have xmin > the xmin given;
386  * their oldest snapshot must be newer than our xmin limit.
387  * We can also exclude any transactions that have xmin = zero, since they
388  * evidently have no live snapshot at all (and any one they might be in
389  * process of taking is certainly newer than ours).  Transactions in other
390  * DBs can be ignored too, since they'll never even be able to see the
391  * index being worked on.
392  *
393  * We can also exclude autovacuum processes and processes running manual
394  * lazy VACUUMs, because they won't be fazed by missing index entries
395  * either.  (Manual ANALYZEs, however, can't be excluded because they
396  * might be within transactions that are going to do arbitrary operations
397  * later.)  Processes running CREATE INDEX CONCURRENTLY or REINDEX CONCURRENTLY
398  * on indexes that are neither expressional nor partial are also safe to
399  * ignore, since we know that those processes won't examine any data
400  * outside the table they're indexing.
401  *
402  * Also, GetCurrentVirtualXIDs never reports our own vxid, so we need not
403  * check for that.
404  *
405  * If a process goes idle-in-transaction with xmin zero, we do not need to
406  * wait for it anymore, per the above argument.  We do not have the
407  * infrastructure right now to stop waiting if that happens, but we can at
408  * least avoid the folly of waiting when it is idle at the time we would
409  * begin to wait.  We do this by repeatedly rechecking the output of
410  * GetCurrentVirtualXIDs.  If, during any iteration, a particular vxid
411  * doesn't show up in the output, we know we can forget about it.
412  */
413 void
WaitForOlderSnapshots(TransactionId limitXmin,bool progress)414 WaitForOlderSnapshots(TransactionId limitXmin, bool progress)
415 {
416 	int			n_old_snapshots;
417 	int			i;
418 	VirtualTransactionId *old_snapshots;
419 
420 	old_snapshots = GetCurrentVirtualXIDs(limitXmin, true, false,
421 										  PROC_IS_AUTOVACUUM | PROC_IN_VACUUM
422 										  | PROC_IN_SAFE_IC,
423 										  &n_old_snapshots);
424 	if (progress)
425 		pgstat_progress_update_param(PROGRESS_WAITFOR_TOTAL, n_old_snapshots);
426 
427 	for (i = 0; i < n_old_snapshots; i++)
428 	{
429 		if (!VirtualTransactionIdIsValid(old_snapshots[i]))
430 			continue;			/* found uninteresting in previous cycle */
431 
432 		if (i > 0)
433 		{
434 			/* see if anything's changed ... */
435 			VirtualTransactionId *newer_snapshots;
436 			int			n_newer_snapshots;
437 			int			j;
438 			int			k;
439 
440 			newer_snapshots = GetCurrentVirtualXIDs(limitXmin,
441 													true, false,
442 													PROC_IS_AUTOVACUUM | PROC_IN_VACUUM
443 													| PROC_IN_SAFE_IC,
444 													&n_newer_snapshots);
445 			for (j = i; j < n_old_snapshots; j++)
446 			{
447 				if (!VirtualTransactionIdIsValid(old_snapshots[j]))
448 					continue;	/* found uninteresting in previous cycle */
449 				for (k = 0; k < n_newer_snapshots; k++)
450 				{
451 					if (VirtualTransactionIdEquals(old_snapshots[j],
452 												   newer_snapshots[k]))
453 						break;
454 				}
455 				if (k >= n_newer_snapshots) /* not there anymore */
456 					SetInvalidVirtualTransactionId(old_snapshots[j]);
457 			}
458 			pfree(newer_snapshots);
459 		}
460 
461 		if (VirtualTransactionIdIsValid(old_snapshots[i]))
462 		{
463 			/* If requested, publish who we're going to wait for. */
464 			if (progress)
465 			{
466 				PGPROC	   *holder = BackendIdGetProc(old_snapshots[i].backendId);
467 
468 				if (holder)
469 					pgstat_progress_update_param(PROGRESS_WAITFOR_CURRENT_PID,
470 												 holder->pid);
471 			}
472 			VirtualXactLock(old_snapshots[i], true);
473 		}
474 
475 		if (progress)
476 			pgstat_progress_update_param(PROGRESS_WAITFOR_DONE, i + 1);
477 	}
478 }
479 
480 
481 /*
482  * DefineIndex
483  *		Creates a new index.
484  *
485  * 'relationId': the OID of the heap relation on which the index is to be
486  *		created
487  * 'stmt': IndexStmt describing the properties of the new index.
488  * 'indexRelationId': normally InvalidOid, but during bootstrap can be
489  *		nonzero to specify a preselected OID for the index.
490  * 'parentIndexId': the OID of the parent index; InvalidOid if not the child
491  *		of a partitioned index.
492  * 'parentConstraintId': the OID of the parent constraint; InvalidOid if not
493  *		the child of a constraint (only used when recursing)
494  * 'is_alter_table': this is due to an ALTER rather than a CREATE operation.
495  * 'check_rights': check for CREATE rights in namespace and tablespace.  (This
496  *		should be true except when ALTER is deleting/recreating an index.)
497  * 'check_not_in_use': check for table not already in use in current session.
498  *		This should be true unless caller is holding the table open, in which
499  *		case the caller had better have checked it earlier.
500  * 'skip_build': make the catalog entries but don't create the index files
501  * 'quiet': suppress the NOTICE chatter ordinarily provided for constraints.
502  *
503  * Returns the object address of the created index.
504  */
505 ObjectAddress
DefineIndex(Oid relationId,IndexStmt * stmt,Oid indexRelationId,Oid parentIndexId,Oid parentConstraintId,bool is_alter_table,bool check_rights,bool check_not_in_use,bool skip_build,bool quiet)506 DefineIndex(Oid relationId,
507 			IndexStmt *stmt,
508 			Oid indexRelationId,
509 			Oid parentIndexId,
510 			Oid parentConstraintId,
511 			bool is_alter_table,
512 			bool check_rights,
513 			bool check_not_in_use,
514 			bool skip_build,
515 			bool quiet)
516 {
517 	bool		concurrent;
518 	char	   *indexRelationName;
519 	char	   *accessMethodName;
520 	Oid		   *typeObjectId;
521 	Oid		   *collationObjectId;
522 	Oid		   *classObjectId;
523 	Oid			accessMethodId;
524 	Oid			namespaceId;
525 	Oid			tablespaceId;
526 	Oid			createdConstraintId = InvalidOid;
527 	List	   *indexColNames;
528 	List	   *allIndexParams;
529 	Relation	rel;
530 	HeapTuple	tuple;
531 	Form_pg_am	accessMethodForm;
532 	IndexAmRoutine *amRoutine;
533 	bool		amcanorder;
534 	amoptions_function amoptions;
535 	bool		partitioned;
536 	bool		safe_index;
537 	Datum		reloptions;
538 	int16	   *coloptions;
539 	IndexInfo  *indexInfo;
540 	bits16		flags;
541 	bits16		constr_flags;
542 	int			numberOfAttributes;
543 	int			numberOfKeyAttributes;
544 	TransactionId limitXmin;
545 	ObjectAddress address;
546 	LockRelId	heaprelid;
547 	LOCKTAG		heaplocktag;
548 	LOCKMODE	lockmode;
549 	Snapshot	snapshot;
550 	int			save_nestlevel = -1;
551 	int			i;
552 
553 	/*
554 	 * Some callers need us to run with an empty default_tablespace; this is a
555 	 * necessary hack to be able to reproduce catalog state accurately when
556 	 * recreating indexes after table-rewriting ALTER TABLE.
557 	 */
558 	if (stmt->reset_default_tblspc)
559 	{
560 		save_nestlevel = NewGUCNestLevel();
561 		(void) set_config_option("default_tablespace", "",
562 								 PGC_USERSET, PGC_S_SESSION,
563 								 GUC_ACTION_SAVE, true, 0, false);
564 	}
565 
566 	/*
567 	 * Force non-concurrent build on temporary relations, even if CONCURRENTLY
568 	 * was requested.  Other backends can't access a temporary relation, so
569 	 * there's no harm in grabbing a stronger lock, and a non-concurrent DROP
570 	 * is more efficient.  Do this before any use of the concurrent option is
571 	 * done.
572 	 */
573 	if (stmt->concurrent && get_rel_persistence(relationId) != RELPERSISTENCE_TEMP)
574 		concurrent = true;
575 	else
576 		concurrent = false;
577 
578 	/*
579 	 * Start progress report.  If we're building a partition, this was already
580 	 * done.
581 	 */
582 	if (!OidIsValid(parentIndexId))
583 	{
584 		pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
585 									  relationId);
586 		pgstat_progress_update_param(PROGRESS_CREATEIDX_COMMAND,
587 									 concurrent ?
588 									 PROGRESS_CREATEIDX_COMMAND_CREATE_CONCURRENTLY :
589 									 PROGRESS_CREATEIDX_COMMAND_CREATE);
590 	}
591 
592 	/*
593 	 * No index OID to report yet
594 	 */
595 	pgstat_progress_update_param(PROGRESS_CREATEIDX_INDEX_OID,
596 								 InvalidOid);
597 
598 	/*
599 	 * count key attributes in index
600 	 */
601 	numberOfKeyAttributes = list_length(stmt->indexParams);
602 
603 	/*
604 	 * Calculate the new list of index columns including both key columns and
605 	 * INCLUDE columns.  Later we can determine which of these are key
606 	 * columns, and which are just part of the INCLUDE list by checking the
607 	 * list position.  A list item in a position less than ii_NumIndexKeyAttrs
608 	 * is part of the key columns, and anything equal to and over is part of
609 	 * the INCLUDE columns.
610 	 */
611 	allIndexParams = list_concat_copy(stmt->indexParams,
612 									  stmt->indexIncludingParams);
613 	numberOfAttributes = list_length(allIndexParams);
614 
615 	if (numberOfKeyAttributes <= 0)
616 		ereport(ERROR,
617 				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
618 				 errmsg("must specify at least one column")));
619 	if (numberOfAttributes > INDEX_MAX_KEYS)
620 		ereport(ERROR,
621 				(errcode(ERRCODE_TOO_MANY_COLUMNS),
622 				 errmsg("cannot use more than %d columns in an index",
623 						INDEX_MAX_KEYS)));
624 
625 	/*
626 	 * Only SELECT ... FOR UPDATE/SHARE are allowed while doing a standard
627 	 * index build; but for concurrent builds we allow INSERT/UPDATE/DELETE
628 	 * (but not VACUUM).
629 	 *
630 	 * NB: Caller is responsible for making sure that relationId refers to the
631 	 * relation on which the index should be built; except in bootstrap mode,
632 	 * this will typically require the caller to have already locked the
633 	 * relation.  To avoid lock upgrade hazards, that lock should be at least
634 	 * as strong as the one we take here.
635 	 *
636 	 * NB: If the lock strength here ever changes, code that is run by
637 	 * parallel workers under the control of certain particular ambuild
638 	 * functions will need to be updated, too.
639 	 */
640 	lockmode = concurrent ? ShareUpdateExclusiveLock : ShareLock;
641 	rel = table_open(relationId, lockmode);
642 
643 	namespaceId = RelationGetNamespace(rel);
644 
645 	/* Ensure that it makes sense to index this kind of relation */
646 	switch (rel->rd_rel->relkind)
647 	{
648 		case RELKIND_RELATION:
649 		case RELKIND_MATVIEW:
650 		case RELKIND_PARTITIONED_TABLE:
651 			/* OK */
652 			break;
653 		case RELKIND_FOREIGN_TABLE:
654 
655 			/*
656 			 * Custom error message for FOREIGN TABLE since the term is close
657 			 * to a regular table and can confuse the user.
658 			 */
659 			ereport(ERROR,
660 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
661 					 errmsg("cannot create index on foreign table \"%s\"",
662 							RelationGetRelationName(rel))));
663 			break;
664 		default:
665 			ereport(ERROR,
666 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
667 					 errmsg("\"%s\" is not a table or materialized view",
668 							RelationGetRelationName(rel))));
669 			break;
670 	}
671 
672 	/*
673 	 * Establish behavior for partitioned tables, and verify sanity of
674 	 * parameters.
675 	 *
676 	 * We do not build an actual index in this case; we only create a few
677 	 * catalog entries.  The actual indexes are built by recursing for each
678 	 * partition.
679 	 */
680 	partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
681 	if (partitioned)
682 	{
683 		/*
684 		 * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
685 		 * the error is thrown also for temporary tables.  Seems better to be
686 		 * consistent, even though we could do it on temporary table because
687 		 * we're not actually doing it concurrently.
688 		 */
689 		if (stmt->concurrent)
690 			ereport(ERROR,
691 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
692 					 errmsg("cannot create index on partitioned table \"%s\" concurrently",
693 							RelationGetRelationName(rel))));
694 		if (stmt->excludeOpNames)
695 			ereport(ERROR,
696 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
697 					 errmsg("cannot create exclusion constraints on partitioned table \"%s\"",
698 							RelationGetRelationName(rel))));
699 	}
700 
701 	/*
702 	 * Don't try to CREATE INDEX on temp tables of other backends.
703 	 */
704 	if (RELATION_IS_OTHER_TEMP(rel))
705 		ereport(ERROR,
706 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
707 				 errmsg("cannot create indexes on temporary tables of other sessions")));
708 
709 	/*
710 	 * Unless our caller vouches for having checked this already, insist that
711 	 * the table not be in use by our own session, either.  Otherwise we might
712 	 * fail to make entries in the new index (for instance, if an INSERT or
713 	 * UPDATE is in progress and has already made its list of target indexes).
714 	 */
715 	if (check_not_in_use)
716 		CheckTableNotInUse(rel, "CREATE INDEX");
717 
718 	/*
719 	 * Verify we (still) have CREATE rights in the rel's namespace.
720 	 * (Presumably we did when the rel was created, but maybe not anymore.)
721 	 * Skip check if caller doesn't want it.  Also skip check if
722 	 * bootstrapping, since permissions machinery may not be working yet.
723 	 */
724 	if (check_rights && !IsBootstrapProcessingMode())
725 	{
726 		AclResult	aclresult;
727 
728 		aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(),
729 										  ACL_CREATE);
730 		if (aclresult != ACLCHECK_OK)
731 			aclcheck_error(aclresult, OBJECT_SCHEMA,
732 						   get_namespace_name(namespaceId));
733 	}
734 
735 	/*
736 	 * Select tablespace to use.  If not specified, use default tablespace
737 	 * (which may in turn default to database's default).
738 	 */
739 	if (stmt->tableSpace)
740 	{
741 		tablespaceId = get_tablespace_oid(stmt->tableSpace, false);
742 		if (partitioned && tablespaceId == MyDatabaseTableSpace)
743 			ereport(ERROR,
744 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
745 					 errmsg("cannot specify default tablespace for partitioned relations")));
746 	}
747 	else
748 	{
749 		tablespaceId = GetDefaultTablespace(rel->rd_rel->relpersistence,
750 											partitioned);
751 		/* note InvalidOid is OK in this case */
752 	}
753 
754 	/* Check tablespace permissions */
755 	if (check_rights &&
756 		OidIsValid(tablespaceId) && tablespaceId != MyDatabaseTableSpace)
757 	{
758 		AclResult	aclresult;
759 
760 		aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(),
761 										   ACL_CREATE);
762 		if (aclresult != ACLCHECK_OK)
763 			aclcheck_error(aclresult, OBJECT_TABLESPACE,
764 						   get_tablespace_name(tablespaceId));
765 	}
766 
767 	/*
768 	 * Force shared indexes into the pg_global tablespace.  This is a bit of a
769 	 * hack but seems simpler than marking them in the BKI commands.  On the
770 	 * other hand, if it's not shared, don't allow it to be placed there.
771 	 */
772 	if (rel->rd_rel->relisshared)
773 		tablespaceId = GLOBALTABLESPACE_OID;
774 	else if (tablespaceId == GLOBALTABLESPACE_OID)
775 		ereport(ERROR,
776 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
777 				 errmsg("only shared relations can be placed in pg_global tablespace")));
778 
779 	/*
780 	 * Choose the index column names.
781 	 */
782 	indexColNames = ChooseIndexColumnNames(allIndexParams);
783 
784 	/*
785 	 * Select name for index if caller didn't specify
786 	 */
787 	indexRelationName = stmt->idxname;
788 	if (indexRelationName == NULL)
789 		indexRelationName = ChooseIndexName(RelationGetRelationName(rel),
790 											namespaceId,
791 											indexColNames,
792 											stmt->excludeOpNames,
793 											stmt->primary,
794 											stmt->isconstraint);
795 
796 	/*
797 	 * look up the access method, verify it can handle the requested features
798 	 */
799 	accessMethodName = stmt->accessMethod;
800 	tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName));
801 	if (!HeapTupleIsValid(tuple))
802 	{
803 		/*
804 		 * Hack to provide more-or-less-transparent updating of old RTREE
805 		 * indexes to GiST: if RTREE is requested and not found, use GIST.
806 		 */
807 		if (strcmp(accessMethodName, "rtree") == 0)
808 		{
809 			ereport(NOTICE,
810 					(errmsg("substituting access method \"gist\" for obsolete method \"rtree\"")));
811 			accessMethodName = "gist";
812 			tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName));
813 		}
814 
815 		if (!HeapTupleIsValid(tuple))
816 			ereport(ERROR,
817 					(errcode(ERRCODE_UNDEFINED_OBJECT),
818 					 errmsg("access method \"%s\" does not exist",
819 							accessMethodName)));
820 	}
821 	accessMethodForm = (Form_pg_am) GETSTRUCT(tuple);
822 	accessMethodId = accessMethodForm->oid;
823 	amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler);
824 
825 	pgstat_progress_update_param(PROGRESS_CREATEIDX_ACCESS_METHOD_OID,
826 								 accessMethodId);
827 
828 	if (stmt->unique && !amRoutine->amcanunique)
829 		ereport(ERROR,
830 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
831 				 errmsg("access method \"%s\" does not support unique indexes",
832 						accessMethodName)));
833 	if (stmt->indexIncludingParams != NIL && !amRoutine->amcaninclude)
834 		ereport(ERROR,
835 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
836 				 errmsg("access method \"%s\" does not support included columns",
837 						accessMethodName)));
838 	if (numberOfKeyAttributes > 1 && !amRoutine->amcanmulticol)
839 		ereport(ERROR,
840 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
841 				 errmsg("access method \"%s\" does not support multicolumn indexes",
842 						accessMethodName)));
843 	if (stmt->excludeOpNames && amRoutine->amgettuple == NULL)
844 		ereport(ERROR,
845 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
846 				 errmsg("access method \"%s\" does not support exclusion constraints",
847 						accessMethodName)));
848 
849 	amcanorder = amRoutine->amcanorder;
850 	amoptions = amRoutine->amoptions;
851 
852 	pfree(amRoutine);
853 	ReleaseSysCache(tuple);
854 
855 	/*
856 	 * Validate predicate, if given
857 	 */
858 	if (stmt->whereClause)
859 		CheckPredicate((Expr *) stmt->whereClause);
860 
861 	/*
862 	 * Parse AM-specific options, convert to text array form, validate.
863 	 */
864 	reloptions = transformRelOptions((Datum) 0, stmt->options,
865 									 NULL, NULL, false, false);
866 
867 	(void) index_reloptions(amoptions, reloptions, true);
868 
869 	/*
870 	 * Prepare arguments for index_create, primarily an IndexInfo structure.
871 	 * Note that predicates must be in implicit-AND format.  In a concurrent
872 	 * build, mark it not-ready-for-inserts.
873 	 */
874 	indexInfo = makeIndexInfo(numberOfAttributes,
875 							  numberOfKeyAttributes,
876 							  accessMethodId,
877 							  NIL,	/* expressions, NIL for now */
878 							  make_ands_implicit((Expr *) stmt->whereClause),
879 							  stmt->unique,
880 							  !concurrent,
881 							  concurrent);
882 
883 	typeObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
884 	collationObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
885 	classObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
886 	coloptions = (int16 *) palloc(numberOfAttributes * sizeof(int16));
887 	ComputeIndexAttrs(indexInfo,
888 					  typeObjectId, collationObjectId, classObjectId,
889 					  coloptions, allIndexParams,
890 					  stmt->excludeOpNames, relationId,
891 					  accessMethodName, accessMethodId,
892 					  amcanorder, stmt->isconstraint);
893 
894 	/*
895 	 * Extra checks when creating a PRIMARY KEY index.
896 	 */
897 	if (stmt->primary)
898 		index_check_primary_key(rel, indexInfo, is_alter_table, stmt);
899 
900 	/*
901 	 * If this table is partitioned and we're creating a unique index or a
902 	 * primary key, make sure that the partition key is a subset of the
903 	 * index's columns.  Otherwise it would be possible to violate uniqueness
904 	 * by putting values that ought to be unique in different partitions.
905 	 *
906 	 * We could lift this limitation if we had global indexes, but those have
907 	 * their own problems, so this is a useful feature combination.
908 	 */
909 	if (partitioned && (stmt->unique || stmt->primary))
910 	{
911 		PartitionKey key = RelationGetPartitionKey(rel);
912 		const char *constraint_type;
913 		int			i;
914 
915 		if (stmt->primary)
916 			constraint_type = "PRIMARY KEY";
917 		else if (stmt->unique)
918 			constraint_type = "UNIQUE";
919 		else if (stmt->excludeOpNames != NIL)
920 			constraint_type = "EXCLUDE";
921 		else
922 		{
923 			elog(ERROR, "unknown constraint type");
924 			constraint_type = NULL; /* keep compiler quiet */
925 		}
926 
927 		/*
928 		 * Verify that all the columns in the partition key appear in the
929 		 * unique key definition, with the same notion of equality.
930 		 */
931 		for (i = 0; i < key->partnatts; i++)
932 		{
933 			bool		found = false;
934 			int			eq_strategy;
935 			Oid			ptkey_eqop;
936 			int			j;
937 
938 			/*
939 			 * Identify the equality operator associated with this partkey
940 			 * column.  For list and range partitioning, partkeys use btree
941 			 * operator classes; hash partitioning uses hash operator classes.
942 			 * (Keep this in sync with ComputePartitionAttrs!)
943 			 */
944 			if (key->strategy == PARTITION_STRATEGY_HASH)
945 				eq_strategy = HTEqualStrategyNumber;
946 			else
947 				eq_strategy = BTEqualStrategyNumber;
948 
949 			ptkey_eqop = get_opfamily_member(key->partopfamily[i],
950 											 key->partopcintype[i],
951 											 key->partopcintype[i],
952 											 eq_strategy);
953 			if (!OidIsValid(ptkey_eqop))
954 				elog(ERROR, "missing operator %d(%u,%u) in partition opfamily %u",
955 					 eq_strategy, key->partopcintype[i], key->partopcintype[i],
956 					 key->partopfamily[i]);
957 
958 			/*
959 			 * We'll need to be able to identify the equality operators
960 			 * associated with index columns, too.  We know what to do with
961 			 * btree opclasses; if there are ever any other index types that
962 			 * support unique indexes, this logic will need extension.
963 			 */
964 			if (accessMethodId == BTREE_AM_OID)
965 				eq_strategy = BTEqualStrategyNumber;
966 			else
967 				ereport(ERROR,
968 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
969 						 errmsg("cannot match partition key to an index using access method \"%s\"",
970 								accessMethodName)));
971 
972 			/*
973 			 * It may be possible to support UNIQUE constraints when partition
974 			 * keys are expressions, but is it worth it?  Give up for now.
975 			 */
976 			if (key->partattrs[i] == 0)
977 				ereport(ERROR,
978 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
979 						 errmsg("unsupported %s constraint with partition key definition",
980 								constraint_type),
981 						 errdetail("%s constraints cannot be used when partition keys include expressions.",
982 								   constraint_type)));
983 
984 			/* Search the index column(s) for a match */
985 			for (j = 0; j < indexInfo->ii_NumIndexKeyAttrs; j++)
986 			{
987 				if (key->partattrs[i] == indexInfo->ii_IndexAttrNumbers[j])
988 				{
989 					/* Matched the column, now what about the equality op? */
990 					Oid			idx_opfamily;
991 					Oid			idx_opcintype;
992 
993 					if (get_opclass_opfamily_and_input_type(classObjectId[j],
994 															&idx_opfamily,
995 															&idx_opcintype))
996 					{
997 						Oid			idx_eqop;
998 
999 						idx_eqop = get_opfamily_member(idx_opfamily,
1000 													   idx_opcintype,
1001 													   idx_opcintype,
1002 													   eq_strategy);
1003 						if (ptkey_eqop == idx_eqop)
1004 						{
1005 							found = true;
1006 							break;
1007 						}
1008 					}
1009 				}
1010 			}
1011 
1012 			if (!found)
1013 			{
1014 				Form_pg_attribute att;
1015 
1016 				att = TupleDescAttr(RelationGetDescr(rel),
1017 									key->partattrs[i] - 1);
1018 				ereport(ERROR,
1019 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1020 						 errmsg("unique constraint on partitioned table must include all partitioning columns"),
1021 						 errdetail("%s constraint on table \"%s\" lacks column \"%s\" which is part of the partition key.",
1022 								   constraint_type, RelationGetRelationName(rel),
1023 								   NameStr(att->attname))));
1024 			}
1025 		}
1026 	}
1027 
1028 
1029 	/*
1030 	 * We disallow indexes on system columns.  They would not necessarily get
1031 	 * updated correctly, and they don't seem useful anyway.
1032 	 */
1033 	for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
1034 	{
1035 		AttrNumber	attno = indexInfo->ii_IndexAttrNumbers[i];
1036 
1037 		if (attno < 0)
1038 			ereport(ERROR,
1039 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1040 					 errmsg("index creation on system columns is not supported")));
1041 	}
1042 
1043 	/*
1044 	 * Also check for system columns used in expressions or predicates.
1045 	 */
1046 	if (indexInfo->ii_Expressions || indexInfo->ii_Predicate)
1047 	{
1048 		Bitmapset  *indexattrs = NULL;
1049 
1050 		pull_varattnos((Node *) indexInfo->ii_Expressions, 1, &indexattrs);
1051 		pull_varattnos((Node *) indexInfo->ii_Predicate, 1, &indexattrs);
1052 
1053 		for (i = FirstLowInvalidHeapAttributeNumber + 1; i < 0; i++)
1054 		{
1055 			if (bms_is_member(i - FirstLowInvalidHeapAttributeNumber,
1056 							  indexattrs))
1057 				ereport(ERROR,
1058 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1059 						 errmsg("index creation on system columns is not supported")));
1060 		}
1061 	}
1062 
1063 	/* Is index safe for others to ignore?  See set_indexsafe_procflags() */
1064 	safe_index = indexInfo->ii_Expressions == NIL &&
1065 		indexInfo->ii_Predicate == NIL;
1066 
1067 	/*
1068 	 * Report index creation if appropriate (delay this till after most of the
1069 	 * error checks)
1070 	 */
1071 	if (stmt->isconstraint && !quiet)
1072 	{
1073 		const char *constraint_type;
1074 
1075 		if (stmt->primary)
1076 			constraint_type = "PRIMARY KEY";
1077 		else if (stmt->unique)
1078 			constraint_type = "UNIQUE";
1079 		else if (stmt->excludeOpNames != NIL)
1080 			constraint_type = "EXCLUDE";
1081 		else
1082 		{
1083 			elog(ERROR, "unknown constraint type");
1084 			constraint_type = NULL; /* keep compiler quiet */
1085 		}
1086 
1087 		ereport(DEBUG1,
1088 				(errmsg_internal("%s %s will create implicit index \"%s\" for table \"%s\"",
1089 								 is_alter_table ? "ALTER TABLE / ADD" : "CREATE TABLE /",
1090 								 constraint_type,
1091 								 indexRelationName, RelationGetRelationName(rel))));
1092 	}
1093 
1094 	/*
1095 	 * A valid stmt->oldNode implies that we already have a built form of the
1096 	 * index.  The caller should also decline any index build.
1097 	 */
1098 	Assert(!OidIsValid(stmt->oldNode) || (skip_build && !concurrent));
1099 
1100 	/*
1101 	 * Make the catalog entries for the index, including constraints. This
1102 	 * step also actually builds the index, except if caller requested not to
1103 	 * or in concurrent mode, in which case it'll be done later, or doing a
1104 	 * partitioned index (because those don't have storage).
1105 	 */
1106 	flags = constr_flags = 0;
1107 	if (stmt->isconstraint)
1108 		flags |= INDEX_CREATE_ADD_CONSTRAINT;
1109 	if (skip_build || concurrent || partitioned)
1110 		flags |= INDEX_CREATE_SKIP_BUILD;
1111 	if (stmt->if_not_exists)
1112 		flags |= INDEX_CREATE_IF_NOT_EXISTS;
1113 	if (concurrent)
1114 		flags |= INDEX_CREATE_CONCURRENT;
1115 	if (partitioned)
1116 		flags |= INDEX_CREATE_PARTITIONED;
1117 	if (stmt->primary)
1118 		flags |= INDEX_CREATE_IS_PRIMARY;
1119 
1120 	/*
1121 	 * If the table is partitioned, and recursion was declined but partitions
1122 	 * exist, mark the index as invalid.
1123 	 */
1124 	if (partitioned && stmt->relation && !stmt->relation->inh)
1125 	{
1126 		PartitionDesc pd = RelationGetPartitionDesc(rel, true);
1127 
1128 		if (pd->nparts != 0)
1129 			flags |= INDEX_CREATE_INVALID;
1130 	}
1131 
1132 	if (stmt->deferrable)
1133 		constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
1134 	if (stmt->initdeferred)
1135 		constr_flags |= INDEX_CONSTR_CREATE_INIT_DEFERRED;
1136 
1137 	indexRelationId =
1138 		index_create(rel, indexRelationName, indexRelationId, parentIndexId,
1139 					 parentConstraintId,
1140 					 stmt->oldNode, indexInfo, indexColNames,
1141 					 accessMethodId, tablespaceId,
1142 					 collationObjectId, classObjectId,
1143 					 coloptions, reloptions,
1144 					 flags, constr_flags,
1145 					 allowSystemTableMods, !check_rights,
1146 					 &createdConstraintId);
1147 
1148 	ObjectAddressSet(address, RelationRelationId, indexRelationId);
1149 
1150 	/*
1151 	 * Revert to original default_tablespace.  Must do this before any return
1152 	 * from this function, but after index_create, so this is a good time.
1153 	 */
1154 	if (save_nestlevel >= 0)
1155 		AtEOXact_GUC(true, save_nestlevel);
1156 
1157 	if (!OidIsValid(indexRelationId))
1158 	{
1159 		table_close(rel, NoLock);
1160 
1161 		/* If this is the top-level index, we're done */
1162 		if (!OidIsValid(parentIndexId))
1163 			pgstat_progress_end_command();
1164 
1165 		return address;
1166 	}
1167 
1168 	/* Add any requested comment */
1169 	if (stmt->idxcomment != NULL)
1170 		CreateComments(indexRelationId, RelationRelationId, 0,
1171 					   stmt->idxcomment);
1172 
1173 	if (partitioned)
1174 	{
1175 		PartitionDesc partdesc;
1176 
1177 		/*
1178 		 * Unless caller specified to skip this step (via ONLY), process each
1179 		 * partition to make sure they all contain a corresponding index.
1180 		 *
1181 		 * If we're called internally (no stmt->relation), recurse always.
1182 		 */
1183 		partdesc = RelationGetPartitionDesc(rel, true);
1184 		if ((!stmt->relation || stmt->relation->inh) && partdesc->nparts > 0)
1185 		{
1186 			int			nparts = partdesc->nparts;
1187 			Oid		   *part_oids = palloc(sizeof(Oid) * nparts);
1188 			bool		invalidate_parent = false;
1189 			TupleDesc	parentDesc;
1190 			Oid		   *opfamOids;
1191 
1192 			pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL,
1193 										 nparts);
1194 
1195 			memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts);
1196 
1197 			parentDesc = RelationGetDescr(rel);
1198 			opfamOids = palloc(sizeof(Oid) * numberOfKeyAttributes);
1199 			for (i = 0; i < numberOfKeyAttributes; i++)
1200 				opfamOids[i] = get_opclass_family(classObjectId[i]);
1201 
1202 			/*
1203 			 * For each partition, scan all existing indexes; if one matches
1204 			 * our index definition and is not already attached to some other
1205 			 * parent index, attach it to the one we just created.
1206 			 *
1207 			 * If none matches, build a new index by calling ourselves
1208 			 * recursively with the same options (except for the index name).
1209 			 */
1210 			for (i = 0; i < nparts; i++)
1211 			{
1212 				Oid			childRelid = part_oids[i];
1213 				Relation	childrel;
1214 				List	   *childidxs;
1215 				ListCell   *cell;
1216 				AttrMap    *attmap;
1217 				bool		found = false;
1218 
1219 				childrel = table_open(childRelid, lockmode);
1220 
1221 				/*
1222 				 * Don't try to create indexes on foreign tables, though. Skip
1223 				 * those if a regular index, or fail if trying to create a
1224 				 * constraint index.
1225 				 */
1226 				if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
1227 				{
1228 					if (stmt->unique || stmt->primary)
1229 						ereport(ERROR,
1230 								(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1231 								 errmsg("cannot create unique index on partitioned table \"%s\"",
1232 										RelationGetRelationName(rel)),
1233 								 errdetail("Table \"%s\" contains partitions that are foreign tables.",
1234 										   RelationGetRelationName(rel))));
1235 
1236 					table_close(childrel, lockmode);
1237 					continue;
1238 				}
1239 
1240 				childidxs = RelationGetIndexList(childrel);
1241 				attmap =
1242 					build_attrmap_by_name(RelationGetDescr(childrel),
1243 										  parentDesc);
1244 
1245 				foreach(cell, childidxs)
1246 				{
1247 					Oid			cldidxid = lfirst_oid(cell);
1248 					Relation	cldidx;
1249 					IndexInfo  *cldIdxInfo;
1250 
1251 					/* this index is already partition of another one */
1252 					if (has_superclass(cldidxid))
1253 						continue;
1254 
1255 					cldidx = index_open(cldidxid, lockmode);
1256 					cldIdxInfo = BuildIndexInfo(cldidx);
1257 					if (CompareIndexInfo(cldIdxInfo, indexInfo,
1258 										 cldidx->rd_indcollation,
1259 										 collationObjectId,
1260 										 cldidx->rd_opfamily,
1261 										 opfamOids,
1262 										 attmap))
1263 					{
1264 						Oid			cldConstrOid = InvalidOid;
1265 
1266 						/*
1267 						 * Found a match.
1268 						 *
1269 						 * If this index is being created in the parent
1270 						 * because of a constraint, then the child needs to
1271 						 * have a constraint also, so look for one.  If there
1272 						 * is no such constraint, this index is no good, so
1273 						 * keep looking.
1274 						 */
1275 						if (createdConstraintId != InvalidOid)
1276 						{
1277 							cldConstrOid =
1278 								get_relation_idx_constraint_oid(childRelid,
1279 																cldidxid);
1280 							if (cldConstrOid == InvalidOid)
1281 							{
1282 								index_close(cldidx, lockmode);
1283 								continue;
1284 							}
1285 						}
1286 
1287 						/* Attach index to parent and we're done. */
1288 						IndexSetParentIndex(cldidx, indexRelationId);
1289 						if (createdConstraintId != InvalidOid)
1290 							ConstraintSetParentConstraint(cldConstrOid,
1291 														  createdConstraintId,
1292 														  childRelid);
1293 
1294 						if (!cldidx->rd_index->indisvalid)
1295 							invalidate_parent = true;
1296 
1297 						found = true;
1298 						/* keep lock till commit */
1299 						index_close(cldidx, NoLock);
1300 						break;
1301 					}
1302 
1303 					index_close(cldidx, lockmode);
1304 				}
1305 
1306 				list_free(childidxs);
1307 				table_close(childrel, NoLock);
1308 
1309 				/*
1310 				 * If no matching index was found, create our own.
1311 				 */
1312 				if (!found)
1313 				{
1314 					IndexStmt  *childStmt = copyObject(stmt);
1315 					bool		found_whole_row;
1316 					ListCell   *lc;
1317 
1318 					/*
1319 					 * We can't use the same index name for the child index,
1320 					 * so clear idxname to let the recursive invocation choose
1321 					 * a new name.  Likewise, the existing target relation
1322 					 * field is wrong, and if indexOid or oldNode are set,
1323 					 * they mustn't be applied to the child either.
1324 					 */
1325 					childStmt->idxname = NULL;
1326 					childStmt->relation = NULL;
1327 					childStmt->indexOid = InvalidOid;
1328 					childStmt->oldNode = InvalidOid;
1329 					childStmt->oldCreateSubid = InvalidSubTransactionId;
1330 					childStmt->oldFirstRelfilenodeSubid = InvalidSubTransactionId;
1331 
1332 					/*
1333 					 * Adjust any Vars (both in expressions and in the index's
1334 					 * WHERE clause) to match the partition's column numbering
1335 					 * in case it's different from the parent's.
1336 					 */
1337 					foreach(lc, childStmt->indexParams)
1338 					{
1339 						IndexElem  *ielem = lfirst(lc);
1340 
1341 						/*
1342 						 * If the index parameter is an expression, we must
1343 						 * translate it to contain child Vars.
1344 						 */
1345 						if (ielem->expr)
1346 						{
1347 							ielem->expr =
1348 								map_variable_attnos((Node *) ielem->expr,
1349 													1, 0, attmap,
1350 													InvalidOid,
1351 													&found_whole_row);
1352 							if (found_whole_row)
1353 								elog(ERROR, "cannot convert whole-row table reference");
1354 						}
1355 					}
1356 					childStmt->whereClause =
1357 						map_variable_attnos(stmt->whereClause, 1, 0,
1358 											attmap,
1359 											InvalidOid, &found_whole_row);
1360 					if (found_whole_row)
1361 						elog(ERROR, "cannot convert whole-row table reference");
1362 
1363 					DefineIndex(childRelid, childStmt,
1364 								InvalidOid, /* no predefined OID */
1365 								indexRelationId,	/* this is our child */
1366 								createdConstraintId,
1367 								is_alter_table, check_rights, check_not_in_use,
1368 								skip_build, quiet);
1369 				}
1370 
1371 				pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
1372 											 i + 1);
1373 				free_attrmap(attmap);
1374 			}
1375 
1376 			/*
1377 			 * The pg_index row we inserted for this index was marked
1378 			 * indisvalid=true.  But if we attached an existing index that is
1379 			 * invalid, this is incorrect, so update our row to invalid too.
1380 			 */
1381 			if (invalidate_parent)
1382 			{
1383 				Relation	pg_index = table_open(IndexRelationId, RowExclusiveLock);
1384 				HeapTuple	tup,
1385 							newtup;
1386 
1387 				tup = SearchSysCache1(INDEXRELID,
1388 									  ObjectIdGetDatum(indexRelationId));
1389 				if (!HeapTupleIsValid(tup))
1390 					elog(ERROR, "cache lookup failed for index %u",
1391 						 indexRelationId);
1392 				newtup = heap_copytuple(tup);
1393 				((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
1394 				CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
1395 				ReleaseSysCache(tup);
1396 				table_close(pg_index, RowExclusiveLock);
1397 				heap_freetuple(newtup);
1398 			}
1399 		}
1400 
1401 		/*
1402 		 * Indexes on partitioned tables are not themselves built, so we're
1403 		 * done here.
1404 		 */
1405 		table_close(rel, NoLock);
1406 		if (!OidIsValid(parentIndexId))
1407 			pgstat_progress_end_command();
1408 		return address;
1409 	}
1410 
1411 	if (!concurrent)
1412 	{
1413 		/* Close the heap and we're done, in the non-concurrent case */
1414 		table_close(rel, NoLock);
1415 
1416 		/* If this is the top-level index, we're done. */
1417 		if (!OidIsValid(parentIndexId))
1418 			pgstat_progress_end_command();
1419 
1420 		return address;
1421 	}
1422 
1423 	/* save lockrelid and locktag for below, then close rel */
1424 	heaprelid = rel->rd_lockInfo.lockRelId;
1425 	SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
1426 	table_close(rel, NoLock);
1427 
1428 	/*
1429 	 * For a concurrent build, it's important to make the catalog entries
1430 	 * visible to other transactions before we start to build the index. That
1431 	 * will prevent them from making incompatible HOT updates.  The new index
1432 	 * will be marked not indisready and not indisvalid, so that no one else
1433 	 * tries to either insert into it or use it for queries.
1434 	 *
1435 	 * We must commit our current transaction so that the index becomes
1436 	 * visible; then start another.  Note that all the data structures we just
1437 	 * built are lost in the commit.  The only data we keep past here are the
1438 	 * relation IDs.
1439 	 *
1440 	 * Before committing, get a session-level lock on the table, to ensure
1441 	 * that neither it nor the index can be dropped before we finish. This
1442 	 * cannot block, even if someone else is waiting for access, because we
1443 	 * already have the same lock within our transaction.
1444 	 *
1445 	 * Note: we don't currently bother with a session lock on the index,
1446 	 * because there are no operations that could change its state while we
1447 	 * hold lock on the parent table.  This might need to change later.
1448 	 */
1449 	LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
1450 
1451 	PopActiveSnapshot();
1452 	CommitTransactionCommand();
1453 	StartTransactionCommand();
1454 
1455 	/* Tell concurrent index builds to ignore us, if index qualifies */
1456 	if (safe_index)
1457 		set_indexsafe_procflags();
1458 
1459 	/*
1460 	 * The index is now visible, so we can report the OID.  While on it,
1461 	 * include the report for the beginning of phase 2.
1462 	 */
1463 	{
1464 		const int	progress_cols[] = {
1465 			PROGRESS_CREATEIDX_INDEX_OID,
1466 			PROGRESS_CREATEIDX_PHASE
1467 		};
1468 		const int64 progress_vals[] = {
1469 			indexRelationId,
1470 			PROGRESS_CREATEIDX_PHASE_WAIT_1
1471 		};
1472 
1473 		pgstat_progress_update_multi_param(2, progress_cols, progress_vals);
1474 	}
1475 
1476 	/*
1477 	 * Phase 2 of concurrent index build (see comments for validate_index()
1478 	 * for an overview of how this works)
1479 	 *
1480 	 * Now we must wait until no running transaction could have the table open
1481 	 * with the old list of indexes.  Use ShareLock to consider running
1482 	 * transactions that hold locks that permit writing to the table.  Note we
1483 	 * do not need to worry about xacts that open the table for writing after
1484 	 * this point; they will see the new index when they open it.
1485 	 *
1486 	 * Note: the reason we use actual lock acquisition here, rather than just
1487 	 * checking the ProcArray and sleeping, is that deadlock is possible if
1488 	 * one of the transactions in question is blocked trying to acquire an
1489 	 * exclusive lock on our table.  The lock code will detect deadlock and
1490 	 * error out properly.
1491 	 */
1492 	WaitForLockers(heaplocktag, ShareLock, true);
1493 
1494 	/*
1495 	 * At this moment we are sure that there are no transactions with the
1496 	 * table open for write that don't have this new index in their list of
1497 	 * indexes.  We have waited out all the existing transactions and any new
1498 	 * transaction will have the new index in its list, but the index is still
1499 	 * marked as "not-ready-for-inserts".  The index is consulted while
1500 	 * deciding HOT-safety though.  This arrangement ensures that no new HOT
1501 	 * chains can be created where the new tuple and the old tuple in the
1502 	 * chain have different index keys.
1503 	 *
1504 	 * We now take a new snapshot, and build the index using all tuples that
1505 	 * are visible in this snapshot.  We can be sure that any HOT updates to
1506 	 * these tuples will be compatible with the index, since any updates made
1507 	 * by transactions that didn't know about the index are now committed or
1508 	 * rolled back.  Thus, each visible tuple is either the end of its
1509 	 * HOT-chain or the extension of the chain is HOT-safe for this index.
1510 	 */
1511 
1512 	/* Set ActiveSnapshot since functions in the indexes may need it */
1513 	PushActiveSnapshot(GetTransactionSnapshot());
1514 
1515 	/* Perform concurrent build of index */
1516 	index_concurrently_build(relationId, indexRelationId);
1517 
1518 	/* we can do away with our snapshot */
1519 	PopActiveSnapshot();
1520 
1521 	/*
1522 	 * Commit this transaction to make the indisready update visible.
1523 	 */
1524 	CommitTransactionCommand();
1525 	StartTransactionCommand();
1526 
1527 	/* Tell concurrent index builds to ignore us, if index qualifies */
1528 	if (safe_index)
1529 		set_indexsafe_procflags();
1530 
1531 	/*
1532 	 * Phase 3 of concurrent index build
1533 	 *
1534 	 * We once again wait until no transaction can have the table open with
1535 	 * the index marked as read-only for updates.
1536 	 */
1537 	pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
1538 								 PROGRESS_CREATEIDX_PHASE_WAIT_2);
1539 	WaitForLockers(heaplocktag, ShareLock, true);
1540 
1541 	/*
1542 	 * Now take the "reference snapshot" that will be used by validate_index()
1543 	 * to filter candidate tuples.  Beware!  There might still be snapshots in
1544 	 * use that treat some transaction as in-progress that our reference
1545 	 * snapshot treats as committed.  If such a recently-committed transaction
1546 	 * deleted tuples in the table, we will not include them in the index; yet
1547 	 * those transactions which see the deleting one as still-in-progress will
1548 	 * expect such tuples to be there once we mark the index as valid.
1549 	 *
1550 	 * We solve this by waiting for all endangered transactions to exit before
1551 	 * we mark the index as valid.
1552 	 *
1553 	 * We also set ActiveSnapshot to this snap, since functions in indexes may
1554 	 * need a snapshot.
1555 	 */
1556 	snapshot = RegisterSnapshot(GetTransactionSnapshot());
1557 	PushActiveSnapshot(snapshot);
1558 
1559 	/*
1560 	 * Scan the index and the heap, insert any missing index entries.
1561 	 */
1562 	validate_index(relationId, indexRelationId, snapshot);
1563 
1564 	/*
1565 	 * Drop the reference snapshot.  We must do this before waiting out other
1566 	 * snapshot holders, else we will deadlock against other processes also
1567 	 * doing CREATE INDEX CONCURRENTLY, which would see our snapshot as one
1568 	 * they must wait for.  But first, save the snapshot's xmin to use as
1569 	 * limitXmin for GetCurrentVirtualXIDs().
1570 	 */
1571 	limitXmin = snapshot->xmin;
1572 
1573 	PopActiveSnapshot();
1574 	UnregisterSnapshot(snapshot);
1575 
1576 	/*
1577 	 * The snapshot subsystem could still contain registered snapshots that
1578 	 * are holding back our process's advertised xmin; in particular, if
1579 	 * default_transaction_isolation = serializable, there is a transaction
1580 	 * snapshot that is still active.  The CatalogSnapshot is likewise a
1581 	 * hazard.  To ensure no deadlocks, we must commit and start yet another
1582 	 * transaction, and do our wait before any snapshot has been taken in it.
1583 	 */
1584 	CommitTransactionCommand();
1585 	StartTransactionCommand();
1586 
1587 	/* Tell concurrent index builds to ignore us, if index qualifies */
1588 	if (safe_index)
1589 		set_indexsafe_procflags();
1590 
1591 	/* We should now definitely not be advertising any xmin. */
1592 	Assert(MyProc->xmin == InvalidTransactionId);
1593 
1594 	/*
1595 	 * The index is now valid in the sense that it contains all currently
1596 	 * interesting tuples.  But since it might not contain tuples deleted just
1597 	 * before the reference snap was taken, we have to wait out any
1598 	 * transactions that might have older snapshots.
1599 	 */
1600 	pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
1601 								 PROGRESS_CREATEIDX_PHASE_WAIT_3);
1602 	WaitForOlderSnapshots(limitXmin, true);
1603 
1604 	/*
1605 	 * Index can now be marked valid -- update its pg_index entry
1606 	 */
1607 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
1608 
1609 	/*
1610 	 * The pg_index update will cause backends (including this one) to update
1611 	 * relcache entries for the index itself, but we should also send a
1612 	 * relcache inval on the parent table to force replanning of cached plans.
1613 	 * Otherwise existing sessions might fail to use the new index where it
1614 	 * would be useful.  (Note that our earlier commits did not create reasons
1615 	 * to replan; so relcache flush on the index itself was sufficient.)
1616 	 */
1617 	CacheInvalidateRelcacheByRelid(heaprelid.relId);
1618 
1619 	/*
1620 	 * Last thing to do is release the session-level lock on the parent table.
1621 	 */
1622 	UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
1623 
1624 	pgstat_progress_end_command();
1625 
1626 	return address;
1627 }
1628 
1629 
1630 /*
1631  * CheckMutability
1632  *		Test whether given expression is mutable
1633  */
1634 static bool
CheckMutability(Expr * expr)1635 CheckMutability(Expr *expr)
1636 {
1637 	/*
1638 	 * First run the expression through the planner.  This has a couple of
1639 	 * important consequences.  First, function default arguments will get
1640 	 * inserted, which may affect volatility (consider "default now()").
1641 	 * Second, inline-able functions will get inlined, which may allow us to
1642 	 * conclude that the function is really less volatile than it's marked. As
1643 	 * an example, polymorphic functions must be marked with the most volatile
1644 	 * behavior that they have for any input type, but once we inline the
1645 	 * function we may be able to conclude that it's not so volatile for the
1646 	 * particular input type we're dealing with.
1647 	 *
1648 	 * We assume here that expression_planner() won't scribble on its input.
1649 	 */
1650 	expr = expression_planner(expr);
1651 
1652 	/* Now we can search for non-immutable functions */
1653 	return contain_mutable_functions((Node *) expr);
1654 }
1655 
1656 
1657 /*
1658  * CheckPredicate
1659  *		Checks that the given partial-index predicate is valid.
1660  *
1661  * This used to also constrain the form of the predicate to forms that
1662  * indxpath.c could do something with.  However, that seems overly
1663  * restrictive.  One useful application of partial indexes is to apply
1664  * a UNIQUE constraint across a subset of a table, and in that scenario
1665  * any evaluable predicate will work.  So accept any predicate here
1666  * (except ones requiring a plan), and let indxpath.c fend for itself.
1667  */
1668 static void
CheckPredicate(Expr * predicate)1669 CheckPredicate(Expr *predicate)
1670 {
1671 	/*
1672 	 * transformExpr() should have already rejected subqueries, aggregates,
1673 	 * and window functions, based on the EXPR_KIND_ for a predicate.
1674 	 */
1675 
1676 	/*
1677 	 * A predicate using mutable functions is probably wrong, for the same
1678 	 * reasons that we don't allow an index expression to use one.
1679 	 */
1680 	if (CheckMutability(predicate))
1681 		ereport(ERROR,
1682 				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1683 				 errmsg("functions in index predicate must be marked IMMUTABLE")));
1684 }
1685 
1686 /*
1687  * Compute per-index-column information, including indexed column numbers
1688  * or index expressions, opclasses and their options. Note, all output vectors
1689  * should be allocated for all columns, including "including" ones.
1690  */
1691 static void
ComputeIndexAttrs(IndexInfo * indexInfo,Oid * typeOidP,Oid * collationOidP,Oid * classOidP,int16 * colOptionP,List * attList,List * exclusionOpNames,Oid relId,const char * accessMethodName,Oid accessMethodId,bool amcanorder,bool isconstraint)1692 ComputeIndexAttrs(IndexInfo *indexInfo,
1693 				  Oid *typeOidP,
1694 				  Oid *collationOidP,
1695 				  Oid *classOidP,
1696 				  int16 *colOptionP,
1697 				  List *attList,	/* list of IndexElem's */
1698 				  List *exclusionOpNames,
1699 				  Oid relId,
1700 				  const char *accessMethodName,
1701 				  Oid accessMethodId,
1702 				  bool amcanorder,
1703 				  bool isconstraint)
1704 {
1705 	ListCell   *nextExclOp;
1706 	ListCell   *lc;
1707 	int			attn;
1708 	int			nkeycols = indexInfo->ii_NumIndexKeyAttrs;
1709 
1710 	/* Allocate space for exclusion operator info, if needed */
1711 	if (exclusionOpNames)
1712 	{
1713 		Assert(list_length(exclusionOpNames) == nkeycols);
1714 		indexInfo->ii_ExclusionOps = (Oid *) palloc(sizeof(Oid) * nkeycols);
1715 		indexInfo->ii_ExclusionProcs = (Oid *) palloc(sizeof(Oid) * nkeycols);
1716 		indexInfo->ii_ExclusionStrats = (uint16 *) palloc(sizeof(uint16) * nkeycols);
1717 		nextExclOp = list_head(exclusionOpNames);
1718 	}
1719 	else
1720 		nextExclOp = NULL;
1721 
1722 	/*
1723 	 * process attributeList
1724 	 */
1725 	attn = 0;
1726 	foreach(lc, attList)
1727 	{
1728 		IndexElem  *attribute = (IndexElem *) lfirst(lc);
1729 		Oid			atttype;
1730 		Oid			attcollation;
1731 
1732 		/*
1733 		 * Process the column-or-expression to be indexed.
1734 		 */
1735 		if (attribute->name != NULL)
1736 		{
1737 			/* Simple index attribute */
1738 			HeapTuple	atttuple;
1739 			Form_pg_attribute attform;
1740 
1741 			Assert(attribute->expr == NULL);
1742 			atttuple = SearchSysCacheAttName(relId, attribute->name);
1743 			if (!HeapTupleIsValid(atttuple))
1744 			{
1745 				/* difference in error message spellings is historical */
1746 				if (isconstraint)
1747 					ereport(ERROR,
1748 							(errcode(ERRCODE_UNDEFINED_COLUMN),
1749 							 errmsg("column \"%s\" named in key does not exist",
1750 									attribute->name)));
1751 				else
1752 					ereport(ERROR,
1753 							(errcode(ERRCODE_UNDEFINED_COLUMN),
1754 							 errmsg("column \"%s\" does not exist",
1755 									attribute->name)));
1756 			}
1757 			attform = (Form_pg_attribute) GETSTRUCT(atttuple);
1758 			indexInfo->ii_IndexAttrNumbers[attn] = attform->attnum;
1759 			atttype = attform->atttypid;
1760 			attcollation = attform->attcollation;
1761 			ReleaseSysCache(atttuple);
1762 		}
1763 		else
1764 		{
1765 			/* Index expression */
1766 			Node	   *expr = attribute->expr;
1767 
1768 			Assert(expr != NULL);
1769 
1770 			if (attn >= nkeycols)
1771 				ereport(ERROR,
1772 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1773 						 errmsg("expressions are not supported in included columns")));
1774 			atttype = exprType(expr);
1775 			attcollation = exprCollation(expr);
1776 
1777 			/*
1778 			 * Strip any top-level COLLATE clause.  This ensures that we treat
1779 			 * "x COLLATE y" and "(x COLLATE y)" alike.
1780 			 */
1781 			while (IsA(expr, CollateExpr))
1782 				expr = (Node *) ((CollateExpr *) expr)->arg;
1783 
1784 			if (IsA(expr, Var) &&
1785 				((Var *) expr)->varattno != InvalidAttrNumber)
1786 			{
1787 				/*
1788 				 * User wrote "(column)" or "(column COLLATE something)".
1789 				 * Treat it like simple attribute anyway.
1790 				 */
1791 				indexInfo->ii_IndexAttrNumbers[attn] = ((Var *) expr)->varattno;
1792 			}
1793 			else
1794 			{
1795 				indexInfo->ii_IndexAttrNumbers[attn] = 0;	/* marks expression */
1796 				indexInfo->ii_Expressions = lappend(indexInfo->ii_Expressions,
1797 													expr);
1798 
1799 				/*
1800 				 * transformExpr() should have already rejected subqueries,
1801 				 * aggregates, and window functions, based on the EXPR_KIND_
1802 				 * for an index expression.
1803 				 */
1804 
1805 				/*
1806 				 * An expression using mutable functions is probably wrong,
1807 				 * since if you aren't going to get the same result for the
1808 				 * same data every time, it's not clear what the index entries
1809 				 * mean at all.
1810 				 */
1811 				if (CheckMutability((Expr *) expr))
1812 					ereport(ERROR,
1813 							(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1814 							 errmsg("functions in index expression must be marked IMMUTABLE")));
1815 			}
1816 		}
1817 
1818 		typeOidP[attn] = atttype;
1819 
1820 		/*
1821 		 * Included columns have no collation, no opclass and no ordering
1822 		 * options.
1823 		 */
1824 		if (attn >= nkeycols)
1825 		{
1826 			if (attribute->collation)
1827 				ereport(ERROR,
1828 						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1829 						 errmsg("including column does not support a collation")));
1830 			if (attribute->opclass)
1831 				ereport(ERROR,
1832 						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1833 						 errmsg("including column does not support an operator class")));
1834 			if (attribute->ordering != SORTBY_DEFAULT)
1835 				ereport(ERROR,
1836 						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1837 						 errmsg("including column does not support ASC/DESC options")));
1838 			if (attribute->nulls_ordering != SORTBY_NULLS_DEFAULT)
1839 				ereport(ERROR,
1840 						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1841 						 errmsg("including column does not support NULLS FIRST/LAST options")));
1842 
1843 			classOidP[attn] = InvalidOid;
1844 			colOptionP[attn] = 0;
1845 			collationOidP[attn] = InvalidOid;
1846 			attn++;
1847 
1848 			continue;
1849 		}
1850 
1851 		/*
1852 		 * Apply collation override if any
1853 		 */
1854 		if (attribute->collation)
1855 			attcollation = get_collation_oid(attribute->collation, false);
1856 
1857 		/*
1858 		 * Check we have a collation iff it's a collatable type.  The only
1859 		 * expected failures here are (1) COLLATE applied to a noncollatable
1860 		 * type, or (2) index expression had an unresolved collation.  But we
1861 		 * might as well code this to be a complete consistency check.
1862 		 */
1863 		if (type_is_collatable(atttype))
1864 		{
1865 			if (!OidIsValid(attcollation))
1866 				ereport(ERROR,
1867 						(errcode(ERRCODE_INDETERMINATE_COLLATION),
1868 						 errmsg("could not determine which collation to use for index expression"),
1869 						 errhint("Use the COLLATE clause to set the collation explicitly.")));
1870 		}
1871 		else
1872 		{
1873 			if (OidIsValid(attcollation))
1874 				ereport(ERROR,
1875 						(errcode(ERRCODE_DATATYPE_MISMATCH),
1876 						 errmsg("collations are not supported by type %s",
1877 								format_type_be(atttype))));
1878 		}
1879 
1880 		collationOidP[attn] = attcollation;
1881 
1882 		/*
1883 		 * Identify the opclass to use.
1884 		 */
1885 		classOidP[attn] = ResolveOpClass(attribute->opclass,
1886 										 atttype,
1887 										 accessMethodName,
1888 										 accessMethodId);
1889 
1890 		/*
1891 		 * Identify the exclusion operator, if any.
1892 		 */
1893 		if (nextExclOp)
1894 		{
1895 			List	   *opname = (List *) lfirst(nextExclOp);
1896 			Oid			opid;
1897 			Oid			opfamily;
1898 			int			strat;
1899 
1900 			/*
1901 			 * Find the operator --- it must accept the column datatype
1902 			 * without runtime coercion (but binary compatibility is OK)
1903 			 */
1904 			opid = compatible_oper_opid(opname, atttype, atttype, false);
1905 
1906 			/*
1907 			 * Only allow commutative operators to be used in exclusion
1908 			 * constraints. If X conflicts with Y, but Y does not conflict
1909 			 * with X, bad things will happen.
1910 			 */
1911 			if (get_commutator(opid) != opid)
1912 				ereport(ERROR,
1913 						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1914 						 errmsg("operator %s is not commutative",
1915 								format_operator(opid)),
1916 						 errdetail("Only commutative operators can be used in exclusion constraints.")));
1917 
1918 			/*
1919 			 * Operator must be a member of the right opfamily, too
1920 			 */
1921 			opfamily = get_opclass_family(classOidP[attn]);
1922 			strat = get_op_opfamily_strategy(opid, opfamily);
1923 			if (strat == 0)
1924 			{
1925 				HeapTuple	opftuple;
1926 				Form_pg_opfamily opfform;
1927 
1928 				/*
1929 				 * attribute->opclass might not explicitly name the opfamily,
1930 				 * so fetch the name of the selected opfamily for use in the
1931 				 * error message.
1932 				 */
1933 				opftuple = SearchSysCache1(OPFAMILYOID,
1934 										   ObjectIdGetDatum(opfamily));
1935 				if (!HeapTupleIsValid(opftuple))
1936 					elog(ERROR, "cache lookup failed for opfamily %u",
1937 						 opfamily);
1938 				opfform = (Form_pg_opfamily) GETSTRUCT(opftuple);
1939 
1940 				ereport(ERROR,
1941 						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1942 						 errmsg("operator %s is not a member of operator family \"%s\"",
1943 								format_operator(opid),
1944 								NameStr(opfform->opfname)),
1945 						 errdetail("The exclusion operator must be related to the index operator class for the constraint.")));
1946 			}
1947 
1948 			indexInfo->ii_ExclusionOps[attn] = opid;
1949 			indexInfo->ii_ExclusionProcs[attn] = get_opcode(opid);
1950 			indexInfo->ii_ExclusionStrats[attn] = strat;
1951 			nextExclOp = lnext(exclusionOpNames, nextExclOp);
1952 		}
1953 
1954 		/*
1955 		 * Set up the per-column options (indoption field).  For now, this is
1956 		 * zero for any un-ordered index, while ordered indexes have DESC and
1957 		 * NULLS FIRST/LAST options.
1958 		 */
1959 		colOptionP[attn] = 0;
1960 		if (amcanorder)
1961 		{
1962 			/* default ordering is ASC */
1963 			if (attribute->ordering == SORTBY_DESC)
1964 				colOptionP[attn] |= INDOPTION_DESC;
1965 			/* default null ordering is LAST for ASC, FIRST for DESC */
1966 			if (attribute->nulls_ordering == SORTBY_NULLS_DEFAULT)
1967 			{
1968 				if (attribute->ordering == SORTBY_DESC)
1969 					colOptionP[attn] |= INDOPTION_NULLS_FIRST;
1970 			}
1971 			else if (attribute->nulls_ordering == SORTBY_NULLS_FIRST)
1972 				colOptionP[attn] |= INDOPTION_NULLS_FIRST;
1973 		}
1974 		else
1975 		{
1976 			/* index AM does not support ordering */
1977 			if (attribute->ordering != SORTBY_DEFAULT)
1978 				ereport(ERROR,
1979 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1980 						 errmsg("access method \"%s\" does not support ASC/DESC options",
1981 								accessMethodName)));
1982 			if (attribute->nulls_ordering != SORTBY_NULLS_DEFAULT)
1983 				ereport(ERROR,
1984 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1985 						 errmsg("access method \"%s\" does not support NULLS FIRST/LAST options",
1986 								accessMethodName)));
1987 		}
1988 
1989 		/* Set up the per-column opclass options (attoptions field). */
1990 		if (attribute->opclassopts)
1991 		{
1992 			Assert(attn < nkeycols);
1993 
1994 			if (!indexInfo->ii_OpclassOptions)
1995 				indexInfo->ii_OpclassOptions =
1996 					palloc0(sizeof(Datum) * indexInfo->ii_NumIndexAttrs);
1997 
1998 			indexInfo->ii_OpclassOptions[attn] =
1999 				transformRelOptions((Datum) 0, attribute->opclassopts,
2000 									NULL, NULL, false, false);
2001 		}
2002 
2003 		attn++;
2004 	}
2005 }
2006 
2007 /*
2008  * Resolve possibly-defaulted operator class specification
2009  *
2010  * Note: This is used to resolve operator class specifications in index and
2011  * partition key definitions.
2012  */
2013 Oid
ResolveOpClass(List * opclass,Oid attrType,const char * accessMethodName,Oid accessMethodId)2014 ResolveOpClass(List *opclass, Oid attrType,
2015 			   const char *accessMethodName, Oid accessMethodId)
2016 {
2017 	char	   *schemaname;
2018 	char	   *opcname;
2019 	HeapTuple	tuple;
2020 	Form_pg_opclass opform;
2021 	Oid			opClassId,
2022 				opInputType;
2023 
2024 	if (opclass == NIL)
2025 	{
2026 		/* no operator class specified, so find the default */
2027 		opClassId = GetDefaultOpClass(attrType, accessMethodId);
2028 		if (!OidIsValid(opClassId))
2029 			ereport(ERROR,
2030 					(errcode(ERRCODE_UNDEFINED_OBJECT),
2031 					 errmsg("data type %s has no default operator class for access method \"%s\"",
2032 							format_type_be(attrType), accessMethodName),
2033 					 errhint("You must specify an operator class for the index or define a default operator class for the data type.")));
2034 		return opClassId;
2035 	}
2036 
2037 	/*
2038 	 * Specific opclass name given, so look up the opclass.
2039 	 */
2040 
2041 	/* deconstruct the name list */
2042 	DeconstructQualifiedName(opclass, &schemaname, &opcname);
2043 
2044 	if (schemaname)
2045 	{
2046 		/* Look in specific schema only */
2047 		Oid			namespaceId;
2048 
2049 		namespaceId = LookupExplicitNamespace(schemaname, false);
2050 		tuple = SearchSysCache3(CLAAMNAMENSP,
2051 								ObjectIdGetDatum(accessMethodId),
2052 								PointerGetDatum(opcname),
2053 								ObjectIdGetDatum(namespaceId));
2054 	}
2055 	else
2056 	{
2057 		/* Unqualified opclass name, so search the search path */
2058 		opClassId = OpclassnameGetOpcid(accessMethodId, opcname);
2059 		if (!OidIsValid(opClassId))
2060 			ereport(ERROR,
2061 					(errcode(ERRCODE_UNDEFINED_OBJECT),
2062 					 errmsg("operator class \"%s\" does not exist for access method \"%s\"",
2063 							opcname, accessMethodName)));
2064 		tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(opClassId));
2065 	}
2066 
2067 	if (!HeapTupleIsValid(tuple))
2068 		ereport(ERROR,
2069 				(errcode(ERRCODE_UNDEFINED_OBJECT),
2070 				 errmsg("operator class \"%s\" does not exist for access method \"%s\"",
2071 						NameListToString(opclass), accessMethodName)));
2072 
2073 	/*
2074 	 * Verify that the index operator class accepts this datatype.  Note we
2075 	 * will accept binary compatibility.
2076 	 */
2077 	opform = (Form_pg_opclass) GETSTRUCT(tuple);
2078 	opClassId = opform->oid;
2079 	opInputType = opform->opcintype;
2080 
2081 	if (!IsBinaryCoercible(attrType, opInputType))
2082 		ereport(ERROR,
2083 				(errcode(ERRCODE_DATATYPE_MISMATCH),
2084 				 errmsg("operator class \"%s\" does not accept data type %s",
2085 						NameListToString(opclass), format_type_be(attrType))));
2086 
2087 	ReleaseSysCache(tuple);
2088 
2089 	return opClassId;
2090 }
2091 
2092 /*
2093  * GetDefaultOpClass
2094  *
2095  * Given the OIDs of a datatype and an access method, find the default
2096  * operator class, if any.  Returns InvalidOid if there is none.
2097  */
2098 Oid
GetDefaultOpClass(Oid type_id,Oid am_id)2099 GetDefaultOpClass(Oid type_id, Oid am_id)
2100 {
2101 	Oid			result = InvalidOid;
2102 	int			nexact = 0;
2103 	int			ncompatible = 0;
2104 	int			ncompatiblepreferred = 0;
2105 	Relation	rel;
2106 	ScanKeyData skey[1];
2107 	SysScanDesc scan;
2108 	HeapTuple	tup;
2109 	TYPCATEGORY tcategory;
2110 
2111 	/* If it's a domain, look at the base type instead */
2112 	type_id = getBaseType(type_id);
2113 
2114 	tcategory = TypeCategory(type_id);
2115 
2116 	/*
2117 	 * We scan through all the opclasses available for the access method,
2118 	 * looking for one that is marked default and matches the target type
2119 	 * (either exactly or binary-compatibly, but prefer an exact match).
2120 	 *
2121 	 * We could find more than one binary-compatible match.  If just one is
2122 	 * for a preferred type, use that one; otherwise we fail, forcing the user
2123 	 * to specify which one he wants.  (The preferred-type special case is a
2124 	 * kluge for varchar: it's binary-compatible to both text and bpchar, so
2125 	 * we need a tiebreaker.)  If we find more than one exact match, then
2126 	 * someone put bogus entries in pg_opclass.
2127 	 */
2128 	rel = table_open(OperatorClassRelationId, AccessShareLock);
2129 
2130 	ScanKeyInit(&skey[0],
2131 				Anum_pg_opclass_opcmethod,
2132 				BTEqualStrategyNumber, F_OIDEQ,
2133 				ObjectIdGetDatum(am_id));
2134 
2135 	scan = systable_beginscan(rel, OpclassAmNameNspIndexId, true,
2136 							  NULL, 1, skey);
2137 
2138 	while (HeapTupleIsValid(tup = systable_getnext(scan)))
2139 	{
2140 		Form_pg_opclass opclass = (Form_pg_opclass) GETSTRUCT(tup);
2141 
2142 		/* ignore altogether if not a default opclass */
2143 		if (!opclass->opcdefault)
2144 			continue;
2145 		if (opclass->opcintype == type_id)
2146 		{
2147 			nexact++;
2148 			result = opclass->oid;
2149 		}
2150 		else if (nexact == 0 &&
2151 				 IsBinaryCoercible(type_id, opclass->opcintype))
2152 		{
2153 			if (IsPreferredType(tcategory, opclass->opcintype))
2154 			{
2155 				ncompatiblepreferred++;
2156 				result = opclass->oid;
2157 			}
2158 			else if (ncompatiblepreferred == 0)
2159 			{
2160 				ncompatible++;
2161 				result = opclass->oid;
2162 			}
2163 		}
2164 	}
2165 
2166 	systable_endscan(scan);
2167 
2168 	table_close(rel, AccessShareLock);
2169 
2170 	/* raise error if pg_opclass contains inconsistent data */
2171 	if (nexact > 1)
2172 		ereport(ERROR,
2173 				(errcode(ERRCODE_DUPLICATE_OBJECT),
2174 				 errmsg("there are multiple default operator classes for data type %s",
2175 						format_type_be(type_id))));
2176 
2177 	if (nexact == 1 ||
2178 		ncompatiblepreferred == 1 ||
2179 		(ncompatiblepreferred == 0 && ncompatible == 1))
2180 		return result;
2181 
2182 	return InvalidOid;
2183 }
2184 
2185 /*
2186  *	makeObjectName()
2187  *
2188  *	Create a name for an implicitly created index, sequence, constraint,
2189  *	extended statistics, etc.
2190  *
2191  *	The parameters are typically: the original table name, the original field
2192  *	name, and a "type" string (such as "seq" or "pkey").    The field name
2193  *	and/or type can be NULL if not relevant.
2194  *
2195  *	The result is a palloc'd string.
2196  *
2197  *	The basic result we want is "name1_name2_label", omitting "_name2" or
2198  *	"_label" when those parameters are NULL.  However, we must generate
2199  *	a name with less than NAMEDATALEN characters!  So, we truncate one or
2200  *	both names if necessary to make a short-enough string.  The label part
2201  *	is never truncated (so it had better be reasonably short).
2202  *
2203  *	The caller is responsible for checking uniqueness of the generated
2204  *	name and retrying as needed; retrying will be done by altering the
2205  *	"label" string (which is why we never truncate that part).
2206  */
2207 char *
makeObjectName(const char * name1,const char * name2,const char * label)2208 makeObjectName(const char *name1, const char *name2, const char *label)
2209 {
2210 	char	   *name;
2211 	int			overhead = 0;	/* chars needed for label and underscores */
2212 	int			availchars;		/* chars available for name(s) */
2213 	int			name1chars;		/* chars allocated to name1 */
2214 	int			name2chars;		/* chars allocated to name2 */
2215 	int			ndx;
2216 
2217 	name1chars = strlen(name1);
2218 	if (name2)
2219 	{
2220 		name2chars = strlen(name2);
2221 		overhead++;				/* allow for separating underscore */
2222 	}
2223 	else
2224 		name2chars = 0;
2225 	if (label)
2226 		overhead += strlen(label) + 1;
2227 
2228 	availchars = NAMEDATALEN - 1 - overhead;
2229 	Assert(availchars > 0);		/* else caller chose a bad label */
2230 
2231 	/*
2232 	 * If we must truncate,  preferentially truncate the longer name. This
2233 	 * logic could be expressed without a loop, but it's simple and obvious as
2234 	 * a loop.
2235 	 */
2236 	while (name1chars + name2chars > availchars)
2237 	{
2238 		if (name1chars > name2chars)
2239 			name1chars--;
2240 		else
2241 			name2chars--;
2242 	}
2243 
2244 	name1chars = pg_mbcliplen(name1, name1chars, name1chars);
2245 	if (name2)
2246 		name2chars = pg_mbcliplen(name2, name2chars, name2chars);
2247 
2248 	/* Now construct the string using the chosen lengths */
2249 	name = palloc(name1chars + name2chars + overhead + 1);
2250 	memcpy(name, name1, name1chars);
2251 	ndx = name1chars;
2252 	if (name2)
2253 	{
2254 		name[ndx++] = '_';
2255 		memcpy(name + ndx, name2, name2chars);
2256 		ndx += name2chars;
2257 	}
2258 	if (label)
2259 	{
2260 		name[ndx++] = '_';
2261 		strcpy(name + ndx, label);
2262 	}
2263 	else
2264 		name[ndx] = '\0';
2265 
2266 	return name;
2267 }
2268 
2269 /*
2270  * Select a nonconflicting name for a new relation.  This is ordinarily
2271  * used to choose index names (which is why it's here) but it can also
2272  * be used for sequences, or any autogenerated relation kind.
2273  *
2274  * name1, name2, and label are used the same way as for makeObjectName(),
2275  * except that the label can't be NULL; digits will be appended to the label
2276  * if needed to create a name that is unique within the specified namespace.
2277  *
2278  * If isconstraint is true, we also avoid choosing a name matching any
2279  * existing constraint in the same namespace.  (This is stricter than what
2280  * Postgres itself requires, but the SQL standard says that constraint names
2281  * should be unique within schemas, so we follow that for autogenerated
2282  * constraint names.)
2283  *
2284  * Note: it is theoretically possible to get a collision anyway, if someone
2285  * else chooses the same name concurrently.  This is fairly unlikely to be
2286  * a problem in practice, especially if one is holding an exclusive lock on
2287  * the relation identified by name1.  However, if choosing multiple names
2288  * within a single command, you'd better create the new object and do
2289  * CommandCounterIncrement before choosing the next one!
2290  *
2291  * Returns a palloc'd string.
2292  */
2293 char *
ChooseRelationName(const char * name1,const char * name2,const char * label,Oid namespaceid,bool isconstraint)2294 ChooseRelationName(const char *name1, const char *name2,
2295 				   const char *label, Oid namespaceid,
2296 				   bool isconstraint)
2297 {
2298 	int			pass = 0;
2299 	char	   *relname = NULL;
2300 	char		modlabel[NAMEDATALEN];
2301 
2302 	/* try the unmodified label first */
2303 	strlcpy(modlabel, label, sizeof(modlabel));
2304 
2305 	for (;;)
2306 	{
2307 		relname = makeObjectName(name1, name2, modlabel);
2308 
2309 		if (!OidIsValid(get_relname_relid(relname, namespaceid)))
2310 		{
2311 			if (!isconstraint ||
2312 				!ConstraintNameExists(relname, namespaceid))
2313 				break;
2314 		}
2315 
2316 		/* found a conflict, so try a new name component */
2317 		pfree(relname);
2318 		snprintf(modlabel, sizeof(modlabel), "%s%d", label, ++pass);
2319 	}
2320 
2321 	return relname;
2322 }
2323 
2324 /*
2325  * Select the name to be used for an index.
2326  *
2327  * The argument list is pretty ad-hoc :-(
2328  */
2329 static char *
ChooseIndexName(const char * tabname,Oid namespaceId,List * colnames,List * exclusionOpNames,bool primary,bool isconstraint)2330 ChooseIndexName(const char *tabname, Oid namespaceId,
2331 				List *colnames, List *exclusionOpNames,
2332 				bool primary, bool isconstraint)
2333 {
2334 	char	   *indexname;
2335 
2336 	if (primary)
2337 	{
2338 		/* the primary key's name does not depend on the specific column(s) */
2339 		indexname = ChooseRelationName(tabname,
2340 									   NULL,
2341 									   "pkey",
2342 									   namespaceId,
2343 									   true);
2344 	}
2345 	else if (exclusionOpNames != NIL)
2346 	{
2347 		indexname = ChooseRelationName(tabname,
2348 									   ChooseIndexNameAddition(colnames),
2349 									   "excl",
2350 									   namespaceId,
2351 									   true);
2352 	}
2353 	else if (isconstraint)
2354 	{
2355 		indexname = ChooseRelationName(tabname,
2356 									   ChooseIndexNameAddition(colnames),
2357 									   "key",
2358 									   namespaceId,
2359 									   true);
2360 	}
2361 	else
2362 	{
2363 		indexname = ChooseRelationName(tabname,
2364 									   ChooseIndexNameAddition(colnames),
2365 									   "idx",
2366 									   namespaceId,
2367 									   false);
2368 	}
2369 
2370 	return indexname;
2371 }
2372 
2373 /*
2374  * Generate "name2" for a new index given the list of column names for it
2375  * (as produced by ChooseIndexColumnNames).  This will be passed to
2376  * ChooseRelationName along with the parent table name and a suitable label.
2377  *
2378  * We know that less than NAMEDATALEN characters will actually be used,
2379  * so we can truncate the result once we've generated that many.
2380  *
2381  * XXX See also ChooseForeignKeyConstraintNameAddition and
2382  * ChooseExtendedStatisticNameAddition.
2383  */
2384 static char *
ChooseIndexNameAddition(List * colnames)2385 ChooseIndexNameAddition(List *colnames)
2386 {
2387 	char		buf[NAMEDATALEN * 2];
2388 	int			buflen = 0;
2389 	ListCell   *lc;
2390 
2391 	buf[0] = '\0';
2392 	foreach(lc, colnames)
2393 	{
2394 		const char *name = (const char *) lfirst(lc);
2395 
2396 		if (buflen > 0)
2397 			buf[buflen++] = '_';	/* insert _ between names */
2398 
2399 		/*
2400 		 * At this point we have buflen <= NAMEDATALEN.  name should be less
2401 		 * than NAMEDATALEN already, but use strlcpy for paranoia.
2402 		 */
2403 		strlcpy(buf + buflen, name, NAMEDATALEN);
2404 		buflen += strlen(buf + buflen);
2405 		if (buflen >= NAMEDATALEN)
2406 			break;
2407 	}
2408 	return pstrdup(buf);
2409 }
2410 
2411 /*
2412  * Select the actual names to be used for the columns of an index, given the
2413  * list of IndexElems for the columns.  This is mostly about ensuring the
2414  * names are unique so we don't get a conflicting-attribute-names error.
2415  *
2416  * Returns a List of plain strings (char *, not String nodes).
2417  */
2418 static List *
ChooseIndexColumnNames(List * indexElems)2419 ChooseIndexColumnNames(List *indexElems)
2420 {
2421 	List	   *result = NIL;
2422 	ListCell   *lc;
2423 
2424 	foreach(lc, indexElems)
2425 	{
2426 		IndexElem  *ielem = (IndexElem *) lfirst(lc);
2427 		const char *origname;
2428 		const char *curname;
2429 		int			i;
2430 		char		buf[NAMEDATALEN];
2431 
2432 		/* Get the preliminary name from the IndexElem */
2433 		if (ielem->indexcolname)
2434 			origname = ielem->indexcolname; /* caller-specified name */
2435 		else if (ielem->name)
2436 			origname = ielem->name; /* simple column reference */
2437 		else
2438 			origname = "expr";	/* default name for expression */
2439 
2440 		/* If it conflicts with any previous column, tweak it */
2441 		curname = origname;
2442 		for (i = 1;; i++)
2443 		{
2444 			ListCell   *lc2;
2445 			char		nbuf[32];
2446 			int			nlen;
2447 
2448 			foreach(lc2, result)
2449 			{
2450 				if (strcmp(curname, (char *) lfirst(lc2)) == 0)
2451 					break;
2452 			}
2453 			if (lc2 == NULL)
2454 				break;			/* found nonconflicting name */
2455 
2456 			sprintf(nbuf, "%d", i);
2457 
2458 			/* Ensure generated names are shorter than NAMEDATALEN */
2459 			nlen = pg_mbcliplen(origname, strlen(origname),
2460 								NAMEDATALEN - 1 - strlen(nbuf));
2461 			memcpy(buf, origname, nlen);
2462 			strcpy(buf + nlen, nbuf);
2463 			curname = buf;
2464 		}
2465 
2466 		/* And attach to the result list */
2467 		result = lappend(result, pstrdup(curname));
2468 	}
2469 	return result;
2470 }
2471 
2472 /*
2473  * ExecReindex
2474  *
2475  * Primary entry point for manual REINDEX commands.  This is mainly a
2476  * preparation wrapper for the real operations that will happen in
2477  * each subroutine of REINDEX.
2478  */
2479 void
ExecReindex(ParseState * pstate,ReindexStmt * stmt,bool isTopLevel)2480 ExecReindex(ParseState *pstate, ReindexStmt *stmt, bool isTopLevel)
2481 {
2482 	ReindexParams params = {0};
2483 	ListCell   *lc;
2484 	bool		concurrently = false;
2485 	bool		verbose = false;
2486 	char	   *tablespacename = NULL;
2487 
2488 	/* Parse option list */
2489 	foreach(lc, stmt->params)
2490 	{
2491 		DefElem    *opt = (DefElem *) lfirst(lc);
2492 
2493 		if (strcmp(opt->defname, "verbose") == 0)
2494 			verbose = defGetBoolean(opt);
2495 		else if (strcmp(opt->defname, "concurrently") == 0)
2496 			concurrently = defGetBoolean(opt);
2497 		else if (strcmp(opt->defname, "tablespace") == 0)
2498 			tablespacename = defGetString(opt);
2499 		else
2500 			ereport(ERROR,
2501 					(errcode(ERRCODE_SYNTAX_ERROR),
2502 					 errmsg("unrecognized REINDEX option \"%s\"",
2503 							opt->defname),
2504 					 parser_errposition(pstate, opt->location)));
2505 	}
2506 
2507 	if (concurrently)
2508 		PreventInTransactionBlock(isTopLevel,
2509 								  "REINDEX CONCURRENTLY");
2510 
2511 	params.options =
2512 		(verbose ? REINDEXOPT_VERBOSE : 0) |
2513 		(concurrently ? REINDEXOPT_CONCURRENTLY : 0);
2514 
2515 	/*
2516 	 * Assign the tablespace OID to move indexes to, with InvalidOid to do
2517 	 * nothing.
2518 	 */
2519 	if (tablespacename != NULL)
2520 	{
2521 		params.tablespaceOid = get_tablespace_oid(tablespacename, false);
2522 
2523 		/* Check permissions except when moving to database's default */
2524 		if (OidIsValid(params.tablespaceOid) &&
2525 			params.tablespaceOid != MyDatabaseTableSpace)
2526 		{
2527 			AclResult	aclresult;
2528 
2529 			aclresult = pg_tablespace_aclcheck(params.tablespaceOid,
2530 											   GetUserId(), ACL_CREATE);
2531 			if (aclresult != ACLCHECK_OK)
2532 				aclcheck_error(aclresult, OBJECT_TABLESPACE,
2533 							   get_tablespace_name(params.tablespaceOid));
2534 		}
2535 	}
2536 	else
2537 		params.tablespaceOid = InvalidOid;
2538 
2539 	switch (stmt->kind)
2540 	{
2541 		case REINDEX_OBJECT_INDEX:
2542 			ReindexIndex(stmt->relation, &params, isTopLevel);
2543 			break;
2544 		case REINDEX_OBJECT_TABLE:
2545 			ReindexTable(stmt->relation, &params, isTopLevel);
2546 			break;
2547 		case REINDEX_OBJECT_SCHEMA:
2548 		case REINDEX_OBJECT_SYSTEM:
2549 		case REINDEX_OBJECT_DATABASE:
2550 
2551 			/*
2552 			 * This cannot run inside a user transaction block; if we were
2553 			 * inside a transaction, then its commit- and
2554 			 * start-transaction-command calls would not have the intended
2555 			 * effect!
2556 			 */
2557 			PreventInTransactionBlock(isTopLevel,
2558 									  (stmt->kind == REINDEX_OBJECT_SCHEMA) ? "REINDEX SCHEMA" :
2559 									  (stmt->kind == REINDEX_OBJECT_SYSTEM) ? "REINDEX SYSTEM" :
2560 									  "REINDEX DATABASE");
2561 			ReindexMultipleTables(stmt->name, stmt->kind, &params);
2562 			break;
2563 		default:
2564 			elog(ERROR, "unrecognized object type: %d",
2565 				 (int) stmt->kind);
2566 			break;
2567 	}
2568 }
2569 
2570 /*
2571  * ReindexIndex
2572  *		Recreate a specific index.
2573  */
2574 static void
ReindexIndex(RangeVar * indexRelation,ReindexParams * params,bool isTopLevel)2575 ReindexIndex(RangeVar *indexRelation, ReindexParams *params, bool isTopLevel)
2576 {
2577 	struct ReindexIndexCallbackState state;
2578 	Oid			indOid;
2579 	char		persistence;
2580 	char		relkind;
2581 
2582 	/*
2583 	 * Find and lock index, and check permissions on table; use callback to
2584 	 * obtain lock on table first, to avoid deadlock hazard.  The lock level
2585 	 * used here must match the index lock obtained in reindex_index().
2586 	 *
2587 	 * If it's a temporary index, we will perform a non-concurrent reindex,
2588 	 * even if CONCURRENTLY was requested.  In that case, reindex_index() will
2589 	 * upgrade the lock, but that's OK, because other sessions can't hold
2590 	 * locks on our temporary table.
2591 	 */
2592 	state.params = *params;
2593 	state.locked_table_oid = InvalidOid;
2594 	indOid = RangeVarGetRelidExtended(indexRelation,
2595 									  (params->options & REINDEXOPT_CONCURRENTLY) != 0 ?
2596 									  ShareUpdateExclusiveLock : AccessExclusiveLock,
2597 									  0,
2598 									  RangeVarCallbackForReindexIndex,
2599 									  &state);
2600 
2601 	/*
2602 	 * Obtain the current persistence and kind of the existing index.  We
2603 	 * already hold a lock on the index.
2604 	 */
2605 	persistence = get_rel_persistence(indOid);
2606 	relkind = get_rel_relkind(indOid);
2607 
2608 	if (relkind == RELKIND_PARTITIONED_INDEX)
2609 		ReindexPartitions(indOid, params, isTopLevel);
2610 	else if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 &&
2611 			 persistence != RELPERSISTENCE_TEMP)
2612 		ReindexRelationConcurrently(indOid, params);
2613 	else
2614 	{
2615 		ReindexParams newparams = *params;
2616 
2617 		newparams.options |= REINDEXOPT_REPORT_PROGRESS;
2618 		reindex_index(indOid, false, persistence, &newparams);
2619 	}
2620 }
2621 
2622 /*
2623  * Check permissions on table before acquiring relation lock; also lock
2624  * the heap before the RangeVarGetRelidExtended takes the index lock, to avoid
2625  * deadlocks.
2626  */
2627 static void
RangeVarCallbackForReindexIndex(const RangeVar * relation,Oid relId,Oid oldRelId,void * arg)2628 RangeVarCallbackForReindexIndex(const RangeVar *relation,
2629 								Oid relId, Oid oldRelId, void *arg)
2630 {
2631 	char		relkind;
2632 	struct ReindexIndexCallbackState *state = arg;
2633 	LOCKMODE	table_lockmode;
2634 
2635 	/*
2636 	 * Lock level here should match table lock in reindex_index() for
2637 	 * non-concurrent case and table locks used by index_concurrently_*() for
2638 	 * concurrent case.
2639 	 */
2640 	table_lockmode = (state->params.options & REINDEXOPT_CONCURRENTLY) != 0 ?
2641 		ShareUpdateExclusiveLock : ShareLock;
2642 
2643 	/*
2644 	 * If we previously locked some other index's heap, and the name we're
2645 	 * looking up no longer refers to that relation, release the now-useless
2646 	 * lock.
2647 	 */
2648 	if (relId != oldRelId && OidIsValid(oldRelId))
2649 	{
2650 		UnlockRelationOid(state->locked_table_oid, table_lockmode);
2651 		state->locked_table_oid = InvalidOid;
2652 	}
2653 
2654 	/* If the relation does not exist, there's nothing more to do. */
2655 	if (!OidIsValid(relId))
2656 		return;
2657 
2658 	/*
2659 	 * If the relation does exist, check whether it's an index.  But note that
2660 	 * the relation might have been dropped between the time we did the name
2661 	 * lookup and now.  In that case, there's nothing to do.
2662 	 */
2663 	relkind = get_rel_relkind(relId);
2664 	if (!relkind)
2665 		return;
2666 	if (relkind != RELKIND_INDEX &&
2667 		relkind != RELKIND_PARTITIONED_INDEX)
2668 		ereport(ERROR,
2669 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
2670 				 errmsg("\"%s\" is not an index", relation->relname)));
2671 
2672 	/* Check permissions */
2673 	if (!pg_class_ownercheck(relId, GetUserId()))
2674 		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_INDEX, relation->relname);
2675 
2676 	/* Lock heap before index to avoid deadlock. */
2677 	if (relId != oldRelId)
2678 	{
2679 		Oid			table_oid = IndexGetRelation(relId, true);
2680 
2681 		/*
2682 		 * If the OID isn't valid, it means the index was concurrently
2683 		 * dropped, which is not a problem for us; just return normally.
2684 		 */
2685 		if (OidIsValid(table_oid))
2686 		{
2687 			LockRelationOid(table_oid, table_lockmode);
2688 			state->locked_table_oid = table_oid;
2689 		}
2690 	}
2691 }
2692 
2693 /*
2694  * ReindexTable
2695  *		Recreate all indexes of a table (and of its toast table, if any)
2696  */
2697 static Oid
ReindexTable(RangeVar * relation,ReindexParams * params,bool isTopLevel)2698 ReindexTable(RangeVar *relation, ReindexParams *params, bool isTopLevel)
2699 {
2700 	Oid			heapOid;
2701 	bool		result;
2702 
2703 	/*
2704 	 * The lock level used here should match reindex_relation().
2705 	 *
2706 	 * If it's a temporary table, we will perform a non-concurrent reindex,
2707 	 * even if CONCURRENTLY was requested.  In that case, reindex_relation()
2708 	 * will upgrade the lock, but that's OK, because other sessions can't hold
2709 	 * locks on our temporary table.
2710 	 */
2711 	heapOid = RangeVarGetRelidExtended(relation,
2712 									   (params->options & REINDEXOPT_CONCURRENTLY) != 0 ?
2713 									   ShareUpdateExclusiveLock : ShareLock,
2714 									   0,
2715 									   RangeVarCallbackOwnsTable, NULL);
2716 
2717 	if (get_rel_relkind(heapOid) == RELKIND_PARTITIONED_TABLE)
2718 		ReindexPartitions(heapOid, params, isTopLevel);
2719 	else if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 &&
2720 			 get_rel_persistence(heapOid) != RELPERSISTENCE_TEMP)
2721 	{
2722 		result = ReindexRelationConcurrently(heapOid, params);
2723 
2724 		if (!result)
2725 			ereport(NOTICE,
2726 					(errmsg("table \"%s\" has no indexes that can be reindexed concurrently",
2727 							relation->relname)));
2728 	}
2729 	else
2730 	{
2731 		ReindexParams newparams = *params;
2732 
2733 		newparams.options |= REINDEXOPT_REPORT_PROGRESS;
2734 		result = reindex_relation(heapOid,
2735 								  REINDEX_REL_PROCESS_TOAST |
2736 								  REINDEX_REL_CHECK_CONSTRAINTS,
2737 								  &newparams);
2738 		if (!result)
2739 			ereport(NOTICE,
2740 					(errmsg("table \"%s\" has no indexes to reindex",
2741 							relation->relname)));
2742 	}
2743 
2744 	return heapOid;
2745 }
2746 
2747 /*
2748  * ReindexMultipleTables
2749  *		Recreate indexes of tables selected by objectName/objectKind.
2750  *
2751  * To reduce the probability of deadlocks, each table is reindexed in a
2752  * separate transaction, so we can release the lock on it right away.
2753  * That means this must not be called within a user transaction block!
2754  */
2755 static void
ReindexMultipleTables(const char * objectName,ReindexObjectType objectKind,ReindexParams * params)2756 ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
2757 					  ReindexParams *params)
2758 {
2759 	Oid			objectOid;
2760 	Relation	relationRelation;
2761 	TableScanDesc scan;
2762 	ScanKeyData scan_keys[1];
2763 	HeapTuple	tuple;
2764 	MemoryContext private_context;
2765 	MemoryContext old;
2766 	List	   *relids = NIL;
2767 	int			num_keys;
2768 	bool		concurrent_warning = false;
2769 	bool		tablespace_warning = false;
2770 
2771 	AssertArg(objectName);
2772 	Assert(objectKind == REINDEX_OBJECT_SCHEMA ||
2773 		   objectKind == REINDEX_OBJECT_SYSTEM ||
2774 		   objectKind == REINDEX_OBJECT_DATABASE);
2775 
2776 	if (objectKind == REINDEX_OBJECT_SYSTEM &&
2777 		(params->options & REINDEXOPT_CONCURRENTLY) != 0)
2778 		ereport(ERROR,
2779 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2780 				 errmsg("cannot reindex system catalogs concurrently")));
2781 
2782 	/*
2783 	 * Get OID of object to reindex, being the database currently being used
2784 	 * by session for a database or for system catalogs, or the schema defined
2785 	 * by caller. At the same time do permission checks that need different
2786 	 * processing depending on the object type.
2787 	 */
2788 	if (objectKind == REINDEX_OBJECT_SCHEMA)
2789 	{
2790 		objectOid = get_namespace_oid(objectName, false);
2791 
2792 		if (!pg_namespace_ownercheck(objectOid, GetUserId()))
2793 			aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SCHEMA,
2794 						   objectName);
2795 	}
2796 	else
2797 	{
2798 		objectOid = MyDatabaseId;
2799 
2800 		if (strcmp(objectName, get_database_name(objectOid)) != 0)
2801 			ereport(ERROR,
2802 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2803 					 errmsg("can only reindex the currently open database")));
2804 		if (!pg_database_ownercheck(objectOid, GetUserId()))
2805 			aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
2806 						   objectName);
2807 	}
2808 
2809 	/*
2810 	 * Create a memory context that will survive forced transaction commits we
2811 	 * do below.  Since it is a child of PortalContext, it will go away
2812 	 * eventually even if we suffer an error; there's no need for special
2813 	 * abort cleanup logic.
2814 	 */
2815 	private_context = AllocSetContextCreate(PortalContext,
2816 											"ReindexMultipleTables",
2817 											ALLOCSET_SMALL_SIZES);
2818 
2819 	/*
2820 	 * Define the search keys to find the objects to reindex. For a schema, we
2821 	 * select target relations using relnamespace, something not necessary for
2822 	 * a database-wide operation.
2823 	 */
2824 	if (objectKind == REINDEX_OBJECT_SCHEMA)
2825 	{
2826 		num_keys = 1;
2827 		ScanKeyInit(&scan_keys[0],
2828 					Anum_pg_class_relnamespace,
2829 					BTEqualStrategyNumber, F_OIDEQ,
2830 					ObjectIdGetDatum(objectOid));
2831 	}
2832 	else
2833 		num_keys = 0;
2834 
2835 	/*
2836 	 * Scan pg_class to build a list of the relations we need to reindex.
2837 	 *
2838 	 * We only consider plain relations and materialized views here (toast
2839 	 * rels will be processed indirectly by reindex_relation).
2840 	 */
2841 	relationRelation = table_open(RelationRelationId, AccessShareLock);
2842 	scan = table_beginscan_catalog(relationRelation, num_keys, scan_keys);
2843 	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2844 	{
2845 		Form_pg_class classtuple = (Form_pg_class) GETSTRUCT(tuple);
2846 		Oid			relid = classtuple->oid;
2847 
2848 		/*
2849 		 * Only regular tables and matviews can have indexes, so ignore any
2850 		 * other kind of relation.
2851 		 *
2852 		 * Partitioned tables/indexes are skipped but matching leaf partitions
2853 		 * are processed.
2854 		 */
2855 		if (classtuple->relkind != RELKIND_RELATION &&
2856 			classtuple->relkind != RELKIND_MATVIEW)
2857 			continue;
2858 
2859 		/* Skip temp tables of other backends; we can't reindex them at all */
2860 		if (classtuple->relpersistence == RELPERSISTENCE_TEMP &&
2861 			!isTempNamespace(classtuple->relnamespace))
2862 			continue;
2863 
2864 		/* Check user/system classification, and optionally skip */
2865 		if (objectKind == REINDEX_OBJECT_SYSTEM &&
2866 			!IsSystemClass(relid, classtuple))
2867 			continue;
2868 
2869 		/*
2870 		 * The table can be reindexed if the user is superuser, the table
2871 		 * owner, or the database/schema owner (but in the latter case, only
2872 		 * if it's not a shared relation).  pg_class_ownercheck includes the
2873 		 * superuser case, and depending on objectKind we already know that
2874 		 * the user has permission to run REINDEX on this database or schema
2875 		 * per the permission checks at the beginning of this routine.
2876 		 */
2877 		if (classtuple->relisshared &&
2878 			!pg_class_ownercheck(relid, GetUserId()))
2879 			continue;
2880 
2881 		/*
2882 		 * Skip system tables, since index_create() would reject indexing them
2883 		 * concurrently (and it would likely fail if we tried).
2884 		 */
2885 		if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 &&
2886 			IsCatalogRelationOid(relid))
2887 		{
2888 			if (!concurrent_warning)
2889 				ereport(WARNING,
2890 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2891 						 errmsg("cannot reindex system catalogs concurrently, skipping all")));
2892 			concurrent_warning = true;
2893 			continue;
2894 		}
2895 
2896 		/*
2897 		 * If a new tablespace is set, check if this relation has to be
2898 		 * skipped.
2899 		 */
2900 		if (OidIsValid(params->tablespaceOid))
2901 		{
2902 			bool		skip_rel = false;
2903 
2904 			/*
2905 			 * Mapped relations cannot be moved to different tablespaces (in
2906 			 * particular this eliminates all shared catalogs.).
2907 			 */
2908 			if (RELKIND_HAS_STORAGE(classtuple->relkind) &&
2909 				!OidIsValid(classtuple->relfilenode))
2910 				skip_rel = true;
2911 
2912 			/*
2913 			 * A system relation is always skipped, even with
2914 			 * allow_system_table_mods enabled.
2915 			 */
2916 			if (IsSystemClass(relid, classtuple))
2917 				skip_rel = true;
2918 
2919 			if (skip_rel)
2920 			{
2921 				if (!tablespace_warning)
2922 					ereport(WARNING,
2923 							(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
2924 							 errmsg("cannot move system relations, skipping all")));
2925 				tablespace_warning = true;
2926 				continue;
2927 			}
2928 		}
2929 
2930 		/* Save the list of relation OIDs in private context */
2931 		old = MemoryContextSwitchTo(private_context);
2932 
2933 		/*
2934 		 * We always want to reindex pg_class first if it's selected to be
2935 		 * reindexed.  This ensures that if there is any corruption in
2936 		 * pg_class' indexes, they will be fixed before we process any other
2937 		 * tables.  This is critical because reindexing itself will try to
2938 		 * update pg_class.
2939 		 */
2940 		if (relid == RelationRelationId)
2941 			relids = lcons_oid(relid, relids);
2942 		else
2943 			relids = lappend_oid(relids, relid);
2944 
2945 		MemoryContextSwitchTo(old);
2946 	}
2947 	table_endscan(scan);
2948 	table_close(relationRelation, AccessShareLock);
2949 
2950 	/*
2951 	 * Process each relation listed in a separate transaction.  Note that this
2952 	 * commits and then starts a new transaction immediately.
2953 	 */
2954 	ReindexMultipleInternal(relids, params);
2955 
2956 	MemoryContextDelete(private_context);
2957 }
2958 
2959 /*
2960  * Error callback specific to ReindexPartitions().
2961  */
2962 static void
reindex_error_callback(void * arg)2963 reindex_error_callback(void *arg)
2964 {
2965 	ReindexErrorInfo *errinfo = (ReindexErrorInfo *) arg;
2966 
2967 	Assert(errinfo->relkind == RELKIND_PARTITIONED_INDEX ||
2968 		   errinfo->relkind == RELKIND_PARTITIONED_TABLE);
2969 
2970 	if (errinfo->relkind == RELKIND_PARTITIONED_TABLE)
2971 		errcontext("while reindexing partitioned table \"%s.%s\"",
2972 				   errinfo->relnamespace, errinfo->relname);
2973 	else if (errinfo->relkind == RELKIND_PARTITIONED_INDEX)
2974 		errcontext("while reindexing partitioned index \"%s.%s\"",
2975 				   errinfo->relnamespace, errinfo->relname);
2976 }
2977 
2978 /*
2979  * ReindexPartitions
2980  *
2981  * Reindex a set of partitions, per the partitioned index or table given
2982  * by the caller.
2983  */
2984 static void
ReindexPartitions(Oid relid,ReindexParams * params,bool isTopLevel)2985 ReindexPartitions(Oid relid, ReindexParams *params, bool isTopLevel)
2986 {
2987 	List	   *partitions = NIL;
2988 	char		relkind = get_rel_relkind(relid);
2989 	char	   *relname = get_rel_name(relid);
2990 	char	   *relnamespace = get_namespace_name(get_rel_namespace(relid));
2991 	MemoryContext reindex_context;
2992 	List	   *inhoids;
2993 	ListCell   *lc;
2994 	ErrorContextCallback errcallback;
2995 	ReindexErrorInfo errinfo;
2996 
2997 	Assert(relkind == RELKIND_PARTITIONED_INDEX ||
2998 		   relkind == RELKIND_PARTITIONED_TABLE);
2999 
3000 	/*
3001 	 * Check if this runs in a transaction block, with an error callback to
3002 	 * provide more context under which a problem happens.
3003 	 */
3004 	errinfo.relname = pstrdup(relname);
3005 	errinfo.relnamespace = pstrdup(relnamespace);
3006 	errinfo.relkind = relkind;
3007 	errcallback.callback = reindex_error_callback;
3008 	errcallback.arg = (void *) &errinfo;
3009 	errcallback.previous = error_context_stack;
3010 	error_context_stack = &errcallback;
3011 
3012 	PreventInTransactionBlock(isTopLevel,
3013 							  relkind == RELKIND_PARTITIONED_TABLE ?
3014 							  "REINDEX TABLE" : "REINDEX INDEX");
3015 
3016 	/* Pop the error context stack */
3017 	error_context_stack = errcallback.previous;
3018 
3019 	/*
3020 	 * Create special memory context for cross-transaction storage.
3021 	 *
3022 	 * Since it is a child of PortalContext, it will go away eventually even
3023 	 * if we suffer an error so there is no need for special abort cleanup
3024 	 * logic.
3025 	 */
3026 	reindex_context = AllocSetContextCreate(PortalContext, "Reindex",
3027 											ALLOCSET_DEFAULT_SIZES);
3028 
3029 	/* ShareLock is enough to prevent schema modifications */
3030 	inhoids = find_all_inheritors(relid, ShareLock, NULL);
3031 
3032 	/*
3033 	 * The list of relations to reindex are the physical partitions of the
3034 	 * tree so discard any partitioned table or index.
3035 	 */
3036 	foreach(lc, inhoids)
3037 	{
3038 		Oid			partoid = lfirst_oid(lc);
3039 		char		partkind = get_rel_relkind(partoid);
3040 		MemoryContext old_context;
3041 
3042 		/*
3043 		 * This discards partitioned tables, partitioned indexes and foreign
3044 		 * tables.
3045 		 */
3046 		if (!RELKIND_HAS_STORAGE(partkind))
3047 			continue;
3048 
3049 		Assert(partkind == RELKIND_INDEX ||
3050 			   partkind == RELKIND_RELATION);
3051 
3052 		/* Save partition OID */
3053 		old_context = MemoryContextSwitchTo(reindex_context);
3054 		partitions = lappend_oid(partitions, partoid);
3055 		MemoryContextSwitchTo(old_context);
3056 	}
3057 
3058 	/*
3059 	 * Process each partition listed in a separate transaction.  Note that
3060 	 * this commits and then starts a new transaction immediately.
3061 	 */
3062 	ReindexMultipleInternal(partitions, params);
3063 
3064 	/*
3065 	 * Clean up working storage --- note we must do this after
3066 	 * StartTransactionCommand, else we might be trying to delete the active
3067 	 * context!
3068 	 */
3069 	MemoryContextDelete(reindex_context);
3070 }
3071 
3072 /*
3073  * ReindexMultipleInternal
3074  *
3075  * Reindex a list of relations, each one being processed in its own
3076  * transaction.  This commits the existing transaction immediately,
3077  * and starts a new transaction when finished.
3078  */
3079 static void
ReindexMultipleInternal(List * relids,ReindexParams * params)3080 ReindexMultipleInternal(List *relids, ReindexParams *params)
3081 {
3082 	ListCell   *l;
3083 
3084 	PopActiveSnapshot();
3085 	CommitTransactionCommand();
3086 
3087 	foreach(l, relids)
3088 	{
3089 		Oid			relid = lfirst_oid(l);
3090 		char		relkind;
3091 		char		relpersistence;
3092 
3093 		StartTransactionCommand();
3094 
3095 		/* functions in indexes may want a snapshot set */
3096 		PushActiveSnapshot(GetTransactionSnapshot());
3097 
3098 		/* check if the relation still exists */
3099 		if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(relid)))
3100 		{
3101 			PopActiveSnapshot();
3102 			CommitTransactionCommand();
3103 			continue;
3104 		}
3105 
3106 		/*
3107 		 * Check permissions except when moving to database's default if a new
3108 		 * tablespace is chosen.  Note that this check also happens in
3109 		 * ExecReindex(), but we do an extra check here as this runs across
3110 		 * multiple transactions.
3111 		 */
3112 		if (OidIsValid(params->tablespaceOid) &&
3113 			params->tablespaceOid != MyDatabaseTableSpace)
3114 		{
3115 			AclResult	aclresult;
3116 
3117 			aclresult = pg_tablespace_aclcheck(params->tablespaceOid,
3118 											   GetUserId(), ACL_CREATE);
3119 			if (aclresult != ACLCHECK_OK)
3120 				aclcheck_error(aclresult, OBJECT_TABLESPACE,
3121 							   get_tablespace_name(params->tablespaceOid));
3122 		}
3123 
3124 		relkind = get_rel_relkind(relid);
3125 		relpersistence = get_rel_persistence(relid);
3126 
3127 		/*
3128 		 * Partitioned tables and indexes can never be processed directly, and
3129 		 * a list of their leaves should be built first.
3130 		 */
3131 		Assert(relkind != RELKIND_PARTITIONED_INDEX &&
3132 			   relkind != RELKIND_PARTITIONED_TABLE);
3133 
3134 		if ((params->options & REINDEXOPT_CONCURRENTLY) != 0 &&
3135 			relpersistence != RELPERSISTENCE_TEMP)
3136 		{
3137 			ReindexParams newparams = *params;
3138 
3139 			newparams.options |= REINDEXOPT_MISSING_OK;
3140 			(void) ReindexRelationConcurrently(relid, &newparams);
3141 			/* ReindexRelationConcurrently() does the verbose output */
3142 		}
3143 		else if (relkind == RELKIND_INDEX)
3144 		{
3145 			ReindexParams newparams = *params;
3146 
3147 			newparams.options |=
3148 				REINDEXOPT_REPORT_PROGRESS | REINDEXOPT_MISSING_OK;
3149 			reindex_index(relid, false, relpersistence, &newparams);
3150 			PopActiveSnapshot();
3151 			/* reindex_index() does the verbose output */
3152 		}
3153 		else
3154 		{
3155 			bool		result;
3156 			ReindexParams newparams = *params;
3157 
3158 			newparams.options |=
3159 				REINDEXOPT_REPORT_PROGRESS | REINDEXOPT_MISSING_OK;
3160 			result = reindex_relation(relid,
3161 									  REINDEX_REL_PROCESS_TOAST |
3162 									  REINDEX_REL_CHECK_CONSTRAINTS,
3163 									  &newparams);
3164 
3165 			if (result && (params->options & REINDEXOPT_VERBOSE) != 0)
3166 				ereport(INFO,
3167 						(errmsg("table \"%s.%s\" was reindexed",
3168 								get_namespace_name(get_rel_namespace(relid)),
3169 								get_rel_name(relid))));
3170 
3171 			PopActiveSnapshot();
3172 		}
3173 
3174 		CommitTransactionCommand();
3175 	}
3176 
3177 	StartTransactionCommand();
3178 }
3179 
3180 
3181 /*
3182  * ReindexRelationConcurrently - process REINDEX CONCURRENTLY for given
3183  * relation OID
3184  *
3185  * 'relationOid' can either belong to an index, a table or a materialized
3186  * view.  For tables and materialized views, all its indexes will be rebuilt,
3187  * excluding invalid indexes and any indexes used in exclusion constraints,
3188  * but including its associated toast table indexes.  For indexes, the index
3189  * itself will be rebuilt.
3190  *
3191  * The locks taken on parent tables and involved indexes are kept until the
3192  * transaction is committed, at which point a session lock is taken on each
3193  * relation.  Both of these protect against concurrent schema changes.
3194  *
3195  * Returns true if any indexes have been rebuilt (including toast table's
3196  * indexes, when relevant), otherwise returns false.
3197  *
3198  * NOTE: This cannot be used on temporary relations.  A concurrent build would
3199  * cause issues with ON COMMIT actions triggered by the transactions of the
3200  * concurrent build.  Temporary relations are not subject to concurrent
3201  * concerns, so there's no need for the more complicated concurrent build,
3202  * anyway, and a non-concurrent reindex is more efficient.
3203  */
3204 static bool
ReindexRelationConcurrently(Oid relationOid,ReindexParams * params)3205 ReindexRelationConcurrently(Oid relationOid, ReindexParams *params)
3206 {
3207 	typedef struct ReindexIndexInfo
3208 	{
3209 		Oid			indexId;
3210 		Oid			tableId;
3211 		Oid			amId;
3212 		bool		safe;		/* for set_indexsafe_procflags */
3213 	} ReindexIndexInfo;
3214 	List	   *heapRelationIds = NIL;
3215 	List	   *indexIds = NIL;
3216 	List	   *newIndexIds = NIL;
3217 	List	   *relationLocks = NIL;
3218 	List	   *lockTags = NIL;
3219 	ListCell   *lc,
3220 			   *lc2;
3221 	MemoryContext private_context;
3222 	MemoryContext oldcontext;
3223 	char		relkind;
3224 	char	   *relationName = NULL;
3225 	char	   *relationNamespace = NULL;
3226 	PGRUsage	ru0;
3227 	const int	progress_index[] = {
3228 		PROGRESS_CREATEIDX_COMMAND,
3229 		PROGRESS_CREATEIDX_PHASE,
3230 		PROGRESS_CREATEIDX_INDEX_OID,
3231 		PROGRESS_CREATEIDX_ACCESS_METHOD_OID
3232 	};
3233 	int64		progress_vals[4];
3234 
3235 	/*
3236 	 * Create a memory context that will survive forced transaction commits we
3237 	 * do below.  Since it is a child of PortalContext, it will go away
3238 	 * eventually even if we suffer an error; there's no need for special
3239 	 * abort cleanup logic.
3240 	 */
3241 	private_context = AllocSetContextCreate(PortalContext,
3242 											"ReindexConcurrent",
3243 											ALLOCSET_SMALL_SIZES);
3244 
3245 	if ((params->options & REINDEXOPT_VERBOSE) != 0)
3246 	{
3247 		/* Save data needed by REINDEX VERBOSE in private context */
3248 		oldcontext = MemoryContextSwitchTo(private_context);
3249 
3250 		relationName = get_rel_name(relationOid);
3251 		relationNamespace = get_namespace_name(get_rel_namespace(relationOid));
3252 
3253 		pg_rusage_init(&ru0);
3254 
3255 		MemoryContextSwitchTo(oldcontext);
3256 	}
3257 
3258 	relkind = get_rel_relkind(relationOid);
3259 
3260 	/*
3261 	 * Extract the list of indexes that are going to be rebuilt based on the
3262 	 * relation Oid given by caller.
3263 	 */
3264 	switch (relkind)
3265 	{
3266 		case RELKIND_RELATION:
3267 		case RELKIND_MATVIEW:
3268 		case RELKIND_TOASTVALUE:
3269 			{
3270 				/*
3271 				 * In the case of a relation, find all its indexes including
3272 				 * toast indexes.
3273 				 */
3274 				Relation	heapRelation;
3275 
3276 				/* Save the list of relation OIDs in private context */
3277 				oldcontext = MemoryContextSwitchTo(private_context);
3278 
3279 				/* Track this relation for session locks */
3280 				heapRelationIds = lappend_oid(heapRelationIds, relationOid);
3281 
3282 				MemoryContextSwitchTo(oldcontext);
3283 
3284 				if (IsCatalogRelationOid(relationOid))
3285 					ereport(ERROR,
3286 							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3287 							 errmsg("cannot reindex system catalogs concurrently")));
3288 
3289 				/* Open relation to get its indexes */
3290 				if ((params->options & REINDEXOPT_MISSING_OK) != 0)
3291 				{
3292 					heapRelation = try_table_open(relationOid,
3293 												  ShareUpdateExclusiveLock);
3294 					/* leave if relation does not exist */
3295 					if (!heapRelation)
3296 						break;
3297 				}
3298 				else
3299 					heapRelation = table_open(relationOid,
3300 											  ShareUpdateExclusiveLock);
3301 
3302 				if (OidIsValid(params->tablespaceOid) &&
3303 					IsSystemRelation(heapRelation))
3304 					ereport(ERROR,
3305 							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3306 							 errmsg("cannot move system relation \"%s\"",
3307 									RelationGetRelationName(heapRelation))));
3308 
3309 				/* Add all the valid indexes of relation to list */
3310 				foreach(lc, RelationGetIndexList(heapRelation))
3311 				{
3312 					Oid			cellOid = lfirst_oid(lc);
3313 					Relation	indexRelation = index_open(cellOid,
3314 														   ShareUpdateExclusiveLock);
3315 
3316 					if (!indexRelation->rd_index->indisvalid)
3317 						ereport(WARNING,
3318 								(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3319 								 errmsg("cannot reindex invalid index \"%s.%s\" concurrently, skipping",
3320 										get_namespace_name(get_rel_namespace(cellOid)),
3321 										get_rel_name(cellOid))));
3322 					else if (indexRelation->rd_index->indisexclusion)
3323 						ereport(WARNING,
3324 								(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3325 								 errmsg("cannot reindex exclusion constraint index \"%s.%s\" concurrently, skipping",
3326 										get_namespace_name(get_rel_namespace(cellOid)),
3327 										get_rel_name(cellOid))));
3328 					else
3329 					{
3330 						ReindexIndexInfo *idx;
3331 
3332 						/* Save the list of relation OIDs in private context */
3333 						oldcontext = MemoryContextSwitchTo(private_context);
3334 
3335 						idx = palloc(sizeof(ReindexIndexInfo));
3336 						idx->indexId = cellOid;
3337 						/* other fields set later */
3338 
3339 						indexIds = lappend(indexIds, idx);
3340 
3341 						MemoryContextSwitchTo(oldcontext);
3342 					}
3343 
3344 					index_close(indexRelation, NoLock);
3345 				}
3346 
3347 				/* Also add the toast indexes */
3348 				if (OidIsValid(heapRelation->rd_rel->reltoastrelid))
3349 				{
3350 					Oid			toastOid = heapRelation->rd_rel->reltoastrelid;
3351 					Relation	toastRelation = table_open(toastOid,
3352 														   ShareUpdateExclusiveLock);
3353 
3354 					/* Save the list of relation OIDs in private context */
3355 					oldcontext = MemoryContextSwitchTo(private_context);
3356 
3357 					/* Track this relation for session locks */
3358 					heapRelationIds = lappend_oid(heapRelationIds, toastOid);
3359 
3360 					MemoryContextSwitchTo(oldcontext);
3361 
3362 					foreach(lc2, RelationGetIndexList(toastRelation))
3363 					{
3364 						Oid			cellOid = lfirst_oid(lc2);
3365 						Relation	indexRelation = index_open(cellOid,
3366 															   ShareUpdateExclusiveLock);
3367 
3368 						if (!indexRelation->rd_index->indisvalid)
3369 							ereport(WARNING,
3370 									(errcode(ERRCODE_INDEX_CORRUPTED),
3371 									 errmsg("cannot reindex invalid index \"%s.%s\" concurrently, skipping",
3372 											get_namespace_name(get_rel_namespace(cellOid)),
3373 											get_rel_name(cellOid))));
3374 						else
3375 						{
3376 							ReindexIndexInfo *idx;
3377 
3378 							/*
3379 							 * Save the list of relation OIDs in private
3380 							 * context
3381 							 */
3382 							oldcontext = MemoryContextSwitchTo(private_context);
3383 
3384 							idx = palloc(sizeof(ReindexIndexInfo));
3385 							idx->indexId = cellOid;
3386 							indexIds = lappend(indexIds, idx);
3387 							/* other fields set later */
3388 
3389 							MemoryContextSwitchTo(oldcontext);
3390 						}
3391 
3392 						index_close(indexRelation, NoLock);
3393 					}
3394 
3395 					table_close(toastRelation, NoLock);
3396 				}
3397 
3398 				table_close(heapRelation, NoLock);
3399 				break;
3400 			}
3401 		case RELKIND_INDEX:
3402 			{
3403 				Oid			heapId = IndexGetRelation(relationOid,
3404 													  (params->options & REINDEXOPT_MISSING_OK) != 0);
3405 				Relation	heapRelation;
3406 				ReindexIndexInfo *idx;
3407 
3408 				/* if relation is missing, leave */
3409 				if (!OidIsValid(heapId))
3410 					break;
3411 
3412 				if (IsCatalogRelationOid(heapId))
3413 					ereport(ERROR,
3414 							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3415 							 errmsg("cannot reindex system catalogs concurrently")));
3416 
3417 				/*
3418 				 * Don't allow reindex for an invalid index on TOAST table, as
3419 				 * if rebuilt it would not be possible to drop it.  Match
3420 				 * error message in reindex_index().
3421 				 */
3422 				if (IsToastNamespace(get_rel_namespace(relationOid)) &&
3423 					!get_index_isvalid(relationOid))
3424 					ereport(ERROR,
3425 							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3426 							 errmsg("cannot reindex invalid index on TOAST table")));
3427 
3428 				/*
3429 				 * Check if parent relation can be locked and if it exists,
3430 				 * this needs to be done at this stage as the list of indexes
3431 				 * to rebuild is not complete yet, and REINDEXOPT_MISSING_OK
3432 				 * should not be used once all the session locks are taken.
3433 				 */
3434 				if ((params->options & REINDEXOPT_MISSING_OK) != 0)
3435 				{
3436 					heapRelation = try_table_open(heapId,
3437 												  ShareUpdateExclusiveLock);
3438 					/* leave if relation does not exist */
3439 					if (!heapRelation)
3440 						break;
3441 				}
3442 				else
3443 					heapRelation = table_open(heapId,
3444 											  ShareUpdateExclusiveLock);
3445 
3446 				if (OidIsValid(params->tablespaceOid) &&
3447 					IsSystemRelation(heapRelation))
3448 					ereport(ERROR,
3449 							(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3450 							 errmsg("cannot move system relation \"%s\"",
3451 									get_rel_name(relationOid))));
3452 
3453 				table_close(heapRelation, NoLock);
3454 
3455 				/* Save the list of relation OIDs in private context */
3456 				oldcontext = MemoryContextSwitchTo(private_context);
3457 
3458 				/* Track the heap relation of this index for session locks */
3459 				heapRelationIds = list_make1_oid(heapId);
3460 
3461 				/*
3462 				 * Save the list of relation OIDs in private context.  Note
3463 				 * that invalid indexes are allowed here.
3464 				 */
3465 				idx = palloc(sizeof(ReindexIndexInfo));
3466 				idx->indexId = relationOid;
3467 				indexIds = lappend(indexIds, idx);
3468 				/* other fields set later */
3469 
3470 				MemoryContextSwitchTo(oldcontext);
3471 				break;
3472 			}
3473 
3474 		case RELKIND_PARTITIONED_TABLE:
3475 		case RELKIND_PARTITIONED_INDEX:
3476 		default:
3477 			/* Return error if type of relation is not supported */
3478 			ereport(ERROR,
3479 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
3480 					 errmsg("cannot reindex this type of relation concurrently")));
3481 			break;
3482 	}
3483 
3484 	/*
3485 	 * Definitely no indexes, so leave.  Any checks based on
3486 	 * REINDEXOPT_MISSING_OK should be done only while the list of indexes to
3487 	 * work on is built as the session locks taken before this transaction
3488 	 * commits will make sure that they cannot be dropped by a concurrent
3489 	 * session until this operation completes.
3490 	 */
3491 	if (indexIds == NIL)
3492 	{
3493 		PopActiveSnapshot();
3494 		return false;
3495 	}
3496 
3497 	/* It's not a shared catalog, so refuse to move it to shared tablespace */
3498 	if (params->tablespaceOid == GLOBALTABLESPACE_OID)
3499 		ereport(ERROR,
3500 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
3501 				 errmsg("cannot move non-shared relation to tablespace \"%s\"",
3502 						get_tablespace_name(params->tablespaceOid))));
3503 
3504 	Assert(heapRelationIds != NIL);
3505 
3506 	/*-----
3507 	 * Now we have all the indexes we want to process in indexIds.
3508 	 *
3509 	 * The phases now are:
3510 	 *
3511 	 * 1. create new indexes in the catalog
3512 	 * 2. build new indexes
3513 	 * 3. let new indexes catch up with tuples inserted in the meantime
3514 	 * 4. swap index names
3515 	 * 5. mark old indexes as dead
3516 	 * 6. drop old indexes
3517 	 *
3518 	 * We process each phase for all indexes before moving to the next phase,
3519 	 * for efficiency.
3520 	 */
3521 
3522 	/*
3523 	 * Phase 1 of REINDEX CONCURRENTLY
3524 	 *
3525 	 * Create a new index with the same properties as the old one, but it is
3526 	 * only registered in catalogs and will be built later.  Then get session
3527 	 * locks on all involved tables.  See analogous code in DefineIndex() for
3528 	 * more detailed comments.
3529 	 */
3530 
3531 	foreach(lc, indexIds)
3532 	{
3533 		char	   *concurrentName;
3534 		ReindexIndexInfo *idx = lfirst(lc);
3535 		ReindexIndexInfo *newidx;
3536 		Oid			newIndexId;
3537 		Relation	indexRel;
3538 		Relation	heapRel;
3539 		Relation	newIndexRel;
3540 		LockRelId  *lockrelid;
3541 		Oid			tablespaceid;
3542 
3543 		indexRel = index_open(idx->indexId, ShareUpdateExclusiveLock);
3544 		heapRel = table_open(indexRel->rd_index->indrelid,
3545 							 ShareUpdateExclusiveLock);
3546 
3547 		/* determine safety of this index for set_indexsafe_procflags */
3548 		idx->safe = (indexRel->rd_indexprs == NIL &&
3549 					 indexRel->rd_indpred == NIL);
3550 		idx->tableId = RelationGetRelid(heapRel);
3551 		idx->amId = indexRel->rd_rel->relam;
3552 
3553 		/* This function shouldn't be called for temporary relations. */
3554 		if (indexRel->rd_rel->relpersistence == RELPERSISTENCE_TEMP)
3555 			elog(ERROR, "cannot reindex a temporary table concurrently");
3556 
3557 		pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
3558 									  idx->tableId);
3559 
3560 		progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
3561 		progress_vals[1] = 0;	/* initializing */
3562 		progress_vals[2] = idx->indexId;
3563 		progress_vals[3] = idx->amId;
3564 		pgstat_progress_update_multi_param(4, progress_index, progress_vals);
3565 
3566 		/* Choose a temporary relation name for the new index */
3567 		concurrentName = ChooseRelationName(get_rel_name(idx->indexId),
3568 											NULL,
3569 											"ccnew",
3570 											get_rel_namespace(indexRel->rd_index->indrelid),
3571 											false);
3572 
3573 		/* Choose the new tablespace, indexes of toast tables are not moved */
3574 		if (OidIsValid(params->tablespaceOid) &&
3575 			heapRel->rd_rel->relkind != RELKIND_TOASTVALUE)
3576 			tablespaceid = params->tablespaceOid;
3577 		else
3578 			tablespaceid = indexRel->rd_rel->reltablespace;
3579 
3580 		/* Create new index definition based on given index */
3581 		newIndexId = index_concurrently_create_copy(heapRel,
3582 													idx->indexId,
3583 													tablespaceid,
3584 													concurrentName);
3585 
3586 		/*
3587 		 * Now open the relation of the new index, a session-level lock is
3588 		 * also needed on it.
3589 		 */
3590 		newIndexRel = index_open(newIndexId, ShareUpdateExclusiveLock);
3591 
3592 		/*
3593 		 * Save the list of OIDs and locks in private context
3594 		 */
3595 		oldcontext = MemoryContextSwitchTo(private_context);
3596 
3597 		newidx = palloc(sizeof(ReindexIndexInfo));
3598 		newidx->indexId = newIndexId;
3599 		newidx->safe = idx->safe;
3600 		newidx->tableId = idx->tableId;
3601 		newidx->amId = idx->amId;
3602 
3603 		newIndexIds = lappend(newIndexIds, newidx);
3604 
3605 		/*
3606 		 * Save lockrelid to protect each relation from drop then close
3607 		 * relations. The lockrelid on parent relation is not taken here to
3608 		 * avoid multiple locks taken on the same relation, instead we rely on
3609 		 * parentRelationIds built earlier.
3610 		 */
3611 		lockrelid = palloc(sizeof(*lockrelid));
3612 		*lockrelid = indexRel->rd_lockInfo.lockRelId;
3613 		relationLocks = lappend(relationLocks, lockrelid);
3614 		lockrelid = palloc(sizeof(*lockrelid));
3615 		*lockrelid = newIndexRel->rd_lockInfo.lockRelId;
3616 		relationLocks = lappend(relationLocks, lockrelid);
3617 
3618 		MemoryContextSwitchTo(oldcontext);
3619 
3620 		index_close(indexRel, NoLock);
3621 		index_close(newIndexRel, NoLock);
3622 		table_close(heapRel, NoLock);
3623 	}
3624 
3625 	/*
3626 	 * Save the heap lock for following visibility checks with other backends
3627 	 * might conflict with this session.
3628 	 */
3629 	foreach(lc, heapRelationIds)
3630 	{
3631 		Relation	heapRelation = table_open(lfirst_oid(lc), ShareUpdateExclusiveLock);
3632 		LockRelId  *lockrelid;
3633 		LOCKTAG    *heaplocktag;
3634 
3635 		/* Save the list of locks in private context */
3636 		oldcontext = MemoryContextSwitchTo(private_context);
3637 
3638 		/* Add lockrelid of heap relation to the list of locked relations */
3639 		lockrelid = palloc(sizeof(*lockrelid));
3640 		*lockrelid = heapRelation->rd_lockInfo.lockRelId;
3641 		relationLocks = lappend(relationLocks, lockrelid);
3642 
3643 		heaplocktag = (LOCKTAG *) palloc(sizeof(LOCKTAG));
3644 
3645 		/* Save the LOCKTAG for this parent relation for the wait phase */
3646 		SET_LOCKTAG_RELATION(*heaplocktag, lockrelid->dbId, lockrelid->relId);
3647 		lockTags = lappend(lockTags, heaplocktag);
3648 
3649 		MemoryContextSwitchTo(oldcontext);
3650 
3651 		/* Close heap relation */
3652 		table_close(heapRelation, NoLock);
3653 	}
3654 
3655 	/* Get a session-level lock on each table. */
3656 	foreach(lc, relationLocks)
3657 	{
3658 		LockRelId  *lockrelid = (LockRelId *) lfirst(lc);
3659 
3660 		LockRelationIdForSession(lockrelid, ShareUpdateExclusiveLock);
3661 	}
3662 
3663 	PopActiveSnapshot();
3664 	CommitTransactionCommand();
3665 	StartTransactionCommand();
3666 
3667 	/*
3668 	 * Because we don't take a snapshot in this transaction, there's no need
3669 	 * to set the PROC_IN_SAFE_IC flag here.
3670 	 */
3671 
3672 	/*
3673 	 * Phase 2 of REINDEX CONCURRENTLY
3674 	 *
3675 	 * Build the new indexes in a separate transaction for each index to avoid
3676 	 * having open transactions for an unnecessary long time.  But before
3677 	 * doing that, wait until no running transactions could have the table of
3678 	 * the index open with the old list of indexes.  See "phase 2" in
3679 	 * DefineIndex() for more details.
3680 	 */
3681 
3682 	pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
3683 								 PROGRESS_CREATEIDX_PHASE_WAIT_1);
3684 	WaitForLockersMultiple(lockTags, ShareLock, true);
3685 	CommitTransactionCommand();
3686 
3687 	foreach(lc, newIndexIds)
3688 	{
3689 		ReindexIndexInfo *newidx = lfirst(lc);
3690 
3691 		/* Start new transaction for this index's concurrent build */
3692 		StartTransactionCommand();
3693 
3694 		/*
3695 		 * Check for user-requested abort.  This is inside a transaction so as
3696 		 * xact.c does not issue a useless WARNING, and ensures that
3697 		 * session-level locks are cleaned up on abort.
3698 		 */
3699 		CHECK_FOR_INTERRUPTS();
3700 
3701 		/* Tell concurrent indexing to ignore us, if index qualifies */
3702 		if (newidx->safe)
3703 			set_indexsafe_procflags();
3704 
3705 		/* Set ActiveSnapshot since functions in the indexes may need it */
3706 		PushActiveSnapshot(GetTransactionSnapshot());
3707 
3708 		/*
3709 		 * Update progress for the index to build, with the correct parent
3710 		 * table involved.
3711 		 */
3712 		pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX, newidx->tableId);
3713 		progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
3714 		progress_vals[1] = PROGRESS_CREATEIDX_PHASE_BUILD;
3715 		progress_vals[2] = newidx->indexId;
3716 		progress_vals[3] = newidx->amId;
3717 		pgstat_progress_update_multi_param(4, progress_index, progress_vals);
3718 
3719 		/* Perform concurrent build of new index */
3720 		index_concurrently_build(newidx->tableId, newidx->indexId);
3721 
3722 		PopActiveSnapshot();
3723 		CommitTransactionCommand();
3724 	}
3725 
3726 	StartTransactionCommand();
3727 
3728 	/*
3729 	 * Because we don't take a snapshot or Xid in this transaction, there's no
3730 	 * need to set the PROC_IN_SAFE_IC flag here.
3731 	 */
3732 
3733 	/*
3734 	 * Phase 3 of REINDEX CONCURRENTLY
3735 	 *
3736 	 * During this phase the old indexes catch up with any new tuples that
3737 	 * were created during the previous phase.  See "phase 3" in DefineIndex()
3738 	 * for more details.
3739 	 */
3740 
3741 	pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
3742 								 PROGRESS_CREATEIDX_PHASE_WAIT_2);
3743 	WaitForLockersMultiple(lockTags, ShareLock, true);
3744 	CommitTransactionCommand();
3745 
3746 	foreach(lc, newIndexIds)
3747 	{
3748 		ReindexIndexInfo *newidx = lfirst(lc);
3749 		TransactionId limitXmin;
3750 		Snapshot	snapshot;
3751 
3752 		StartTransactionCommand();
3753 
3754 		/*
3755 		 * Check for user-requested abort.  This is inside a transaction so as
3756 		 * xact.c does not issue a useless WARNING, and ensures that
3757 		 * session-level locks are cleaned up on abort.
3758 		 */
3759 		CHECK_FOR_INTERRUPTS();
3760 
3761 		/* Tell concurrent indexing to ignore us, if index qualifies */
3762 		if (newidx->safe)
3763 			set_indexsafe_procflags();
3764 
3765 		/*
3766 		 * Take the "reference snapshot" that will be used by validate_index()
3767 		 * to filter candidate tuples.
3768 		 */
3769 		snapshot = RegisterSnapshot(GetTransactionSnapshot());
3770 		PushActiveSnapshot(snapshot);
3771 
3772 		/*
3773 		 * Update progress for the index to build, with the correct parent
3774 		 * table involved.
3775 		 */
3776 		pgstat_progress_start_command(PROGRESS_COMMAND_CREATE_INDEX,
3777 									  newidx->tableId);
3778 		progress_vals[0] = PROGRESS_CREATEIDX_COMMAND_REINDEX_CONCURRENTLY;
3779 		progress_vals[1] = PROGRESS_CREATEIDX_PHASE_VALIDATE_IDXSCAN;
3780 		progress_vals[2] = newidx->indexId;
3781 		progress_vals[3] = newidx->amId;
3782 		pgstat_progress_update_multi_param(4, progress_index, progress_vals);
3783 
3784 		validate_index(newidx->tableId, newidx->indexId, snapshot);
3785 
3786 		/*
3787 		 * We can now do away with our active snapshot, we still need to save
3788 		 * the xmin limit to wait for older snapshots.
3789 		 */
3790 		limitXmin = snapshot->xmin;
3791 
3792 		PopActiveSnapshot();
3793 		UnregisterSnapshot(snapshot);
3794 
3795 		/*
3796 		 * To ensure no deadlocks, we must commit and start yet another
3797 		 * transaction, and do our wait before any snapshot has been taken in
3798 		 * it.
3799 		 */
3800 		CommitTransactionCommand();
3801 		StartTransactionCommand();
3802 
3803 		/*
3804 		 * The index is now valid in the sense that it contains all currently
3805 		 * interesting tuples.  But since it might not contain tuples deleted
3806 		 * just before the reference snap was taken, we have to wait out any
3807 		 * transactions that might have older snapshots.
3808 		 *
3809 		 * Because we don't take a snapshot or Xid in this transaction,
3810 		 * there's no need to set the PROC_IN_SAFE_IC flag here.
3811 		 */
3812 		pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
3813 									 PROGRESS_CREATEIDX_PHASE_WAIT_3);
3814 		WaitForOlderSnapshots(limitXmin, true);
3815 
3816 		CommitTransactionCommand();
3817 	}
3818 
3819 	/*
3820 	 * Phase 4 of REINDEX CONCURRENTLY
3821 	 *
3822 	 * Now that the new indexes have been validated, swap each new index with
3823 	 * its corresponding old index.
3824 	 *
3825 	 * We mark the new indexes as valid and the old indexes as not valid at
3826 	 * the same time to make sure we only get constraint violations from the
3827 	 * indexes with the correct names.
3828 	 */
3829 
3830 	StartTransactionCommand();
3831 
3832 	/*
3833 	 * Because this transaction only does catalog manipulations and doesn't do
3834 	 * any index operations, we can set the PROC_IN_SAFE_IC flag here
3835 	 * unconditionally.
3836 	 */
3837 	set_indexsafe_procflags();
3838 
3839 	forboth(lc, indexIds, lc2, newIndexIds)
3840 	{
3841 		ReindexIndexInfo *oldidx = lfirst(lc);
3842 		ReindexIndexInfo *newidx = lfirst(lc2);
3843 		char	   *oldName;
3844 
3845 		/*
3846 		 * Check for user-requested abort.  This is inside a transaction so as
3847 		 * xact.c does not issue a useless WARNING, and ensures that
3848 		 * session-level locks are cleaned up on abort.
3849 		 */
3850 		CHECK_FOR_INTERRUPTS();
3851 
3852 		/* Choose a relation name for old index */
3853 		oldName = ChooseRelationName(get_rel_name(oldidx->indexId),
3854 									 NULL,
3855 									 "ccold",
3856 									 get_rel_namespace(oldidx->tableId),
3857 									 false);
3858 
3859 		/*
3860 		 * Swap old index with the new one.  This also marks the new one as
3861 		 * valid and the old one as not valid.
3862 		 */
3863 		index_concurrently_swap(newidx->indexId, oldidx->indexId, oldName);
3864 
3865 		/*
3866 		 * Invalidate the relcache for the table, so that after this commit
3867 		 * all sessions will refresh any cached plans that might reference the
3868 		 * index.
3869 		 */
3870 		CacheInvalidateRelcacheByRelid(oldidx->tableId);
3871 
3872 		/*
3873 		 * CCI here so that subsequent iterations see the oldName in the
3874 		 * catalog and can choose a nonconflicting name for their oldName.
3875 		 * Otherwise, this could lead to conflicts if a table has two indexes
3876 		 * whose names are equal for the first NAMEDATALEN-minus-a-few
3877 		 * characters.
3878 		 */
3879 		CommandCounterIncrement();
3880 	}
3881 
3882 	/* Commit this transaction and make index swaps visible */
3883 	CommitTransactionCommand();
3884 	StartTransactionCommand();
3885 
3886 	/*
3887 	 * While we could set PROC_IN_SAFE_IC if all indexes qualified, there's no
3888 	 * real need for that, because we only acquire an Xid after the wait is
3889 	 * done, and that lasts for a very short period.
3890 	 */
3891 
3892 	/*
3893 	 * Phase 5 of REINDEX CONCURRENTLY
3894 	 *
3895 	 * Mark the old indexes as dead.  First we must wait until no running
3896 	 * transaction could be using the index for a query.  See also
3897 	 * index_drop() for more details.
3898 	 */
3899 
3900 	pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
3901 								 PROGRESS_CREATEIDX_PHASE_WAIT_4);
3902 	WaitForLockersMultiple(lockTags, AccessExclusiveLock, true);
3903 
3904 	foreach(lc, indexIds)
3905 	{
3906 		ReindexIndexInfo *oldidx = lfirst(lc);
3907 
3908 		/*
3909 		 * Check for user-requested abort.  This is inside a transaction so as
3910 		 * xact.c does not issue a useless WARNING, and ensures that
3911 		 * session-level locks are cleaned up on abort.
3912 		 */
3913 		CHECK_FOR_INTERRUPTS();
3914 
3915 		index_concurrently_set_dead(oldidx->tableId, oldidx->indexId);
3916 	}
3917 
3918 	/* Commit this transaction to make the updates visible. */
3919 	CommitTransactionCommand();
3920 	StartTransactionCommand();
3921 
3922 	/*
3923 	 * While we could set PROC_IN_SAFE_IC if all indexes qualified, there's no
3924 	 * real need for that, because we only acquire an Xid after the wait is
3925 	 * done, and that lasts for a very short period.
3926 	 */
3927 
3928 	/*
3929 	 * Phase 6 of REINDEX CONCURRENTLY
3930 	 *
3931 	 * Drop the old indexes.
3932 	 */
3933 
3934 	pgstat_progress_update_param(PROGRESS_CREATEIDX_PHASE,
3935 								 PROGRESS_CREATEIDX_PHASE_WAIT_5);
3936 	WaitForLockersMultiple(lockTags, AccessExclusiveLock, true);
3937 
3938 	PushActiveSnapshot(GetTransactionSnapshot());
3939 
3940 	{
3941 		ObjectAddresses *objects = new_object_addresses();
3942 
3943 		foreach(lc, indexIds)
3944 		{
3945 			ReindexIndexInfo *idx = lfirst(lc);
3946 			ObjectAddress object;
3947 
3948 			object.classId = RelationRelationId;
3949 			object.objectId = idx->indexId;
3950 			object.objectSubId = 0;
3951 
3952 			add_exact_object_address(&object, objects);
3953 		}
3954 
3955 		/*
3956 		 * Use PERFORM_DELETION_CONCURRENT_LOCK so that index_drop() uses the
3957 		 * right lock level.
3958 		 */
3959 		performMultipleDeletions(objects, DROP_RESTRICT,
3960 								 PERFORM_DELETION_CONCURRENT_LOCK | PERFORM_DELETION_INTERNAL);
3961 	}
3962 
3963 	PopActiveSnapshot();
3964 	CommitTransactionCommand();
3965 
3966 	/*
3967 	 * Finally, release the session-level lock on the table.
3968 	 */
3969 	foreach(lc, relationLocks)
3970 	{
3971 		LockRelId  *lockrelid = (LockRelId *) lfirst(lc);
3972 
3973 		UnlockRelationIdForSession(lockrelid, ShareUpdateExclusiveLock);
3974 	}
3975 
3976 	/* Start a new transaction to finish process properly */
3977 	StartTransactionCommand();
3978 
3979 	/* Log what we did */
3980 	if ((params->options & REINDEXOPT_VERBOSE) != 0)
3981 	{
3982 		if (relkind == RELKIND_INDEX)
3983 			ereport(INFO,
3984 					(errmsg("index \"%s.%s\" was reindexed",
3985 							relationNamespace, relationName),
3986 					 errdetail("%s.",
3987 							   pg_rusage_show(&ru0))));
3988 		else
3989 		{
3990 			foreach(lc, newIndexIds)
3991 			{
3992 				ReindexIndexInfo *idx = lfirst(lc);
3993 				Oid			indOid = idx->indexId;
3994 
3995 				ereport(INFO,
3996 						(errmsg("index \"%s.%s\" was reindexed",
3997 								get_namespace_name(get_rel_namespace(indOid)),
3998 								get_rel_name(indOid))));
3999 				/* Don't show rusage here, since it's not per index. */
4000 			}
4001 
4002 			ereport(INFO,
4003 					(errmsg("table \"%s.%s\" was reindexed",
4004 							relationNamespace, relationName),
4005 					 errdetail("%s.",
4006 							   pg_rusage_show(&ru0))));
4007 		}
4008 	}
4009 
4010 	MemoryContextDelete(private_context);
4011 
4012 	pgstat_progress_end_command();
4013 
4014 	return true;
4015 }
4016 
4017 /*
4018  * Insert or delete an appropriate pg_inherits tuple to make the given index
4019  * be a partition of the indicated parent index.
4020  *
4021  * This also corrects the pg_depend information for the affected index.
4022  */
4023 void
IndexSetParentIndex(Relation partitionIdx,Oid parentOid)4024 IndexSetParentIndex(Relation partitionIdx, Oid parentOid)
4025 {
4026 	Relation	pg_inherits;
4027 	ScanKeyData key[2];
4028 	SysScanDesc scan;
4029 	Oid			partRelid = RelationGetRelid(partitionIdx);
4030 	HeapTuple	tuple;
4031 	bool		fix_dependencies;
4032 
4033 	/* Make sure this is an index */
4034 	Assert(partitionIdx->rd_rel->relkind == RELKIND_INDEX ||
4035 		   partitionIdx->rd_rel->relkind == RELKIND_PARTITIONED_INDEX);
4036 
4037 	/*
4038 	 * Scan pg_inherits for rows linking our index to some parent.
4039 	 */
4040 	pg_inherits = relation_open(InheritsRelationId, RowExclusiveLock);
4041 	ScanKeyInit(&key[0],
4042 				Anum_pg_inherits_inhrelid,
4043 				BTEqualStrategyNumber, F_OIDEQ,
4044 				ObjectIdGetDatum(partRelid));
4045 	ScanKeyInit(&key[1],
4046 				Anum_pg_inherits_inhseqno,
4047 				BTEqualStrategyNumber, F_INT4EQ,
4048 				Int32GetDatum(1));
4049 	scan = systable_beginscan(pg_inherits, InheritsRelidSeqnoIndexId, true,
4050 							  NULL, 2, key);
4051 	tuple = systable_getnext(scan);
4052 
4053 	if (!HeapTupleIsValid(tuple))
4054 	{
4055 		if (parentOid == InvalidOid)
4056 		{
4057 			/*
4058 			 * No pg_inherits row, and no parent wanted: nothing to do in this
4059 			 * case.
4060 			 */
4061 			fix_dependencies = false;
4062 		}
4063 		else
4064 		{
4065 			StoreSingleInheritance(partRelid, parentOid, 1);
4066 			fix_dependencies = true;
4067 		}
4068 	}
4069 	else
4070 	{
4071 		Form_pg_inherits inhForm = (Form_pg_inherits) GETSTRUCT(tuple);
4072 
4073 		if (parentOid == InvalidOid)
4074 		{
4075 			/*
4076 			 * There exists a pg_inherits row, which we want to clear; do so.
4077 			 */
4078 			CatalogTupleDelete(pg_inherits, &tuple->t_self);
4079 			fix_dependencies = true;
4080 		}
4081 		else
4082 		{
4083 			/*
4084 			 * A pg_inherits row exists.  If it's the same we want, then we're
4085 			 * good; if it differs, that amounts to a corrupt catalog and
4086 			 * should not happen.
4087 			 */
4088 			if (inhForm->inhparent != parentOid)
4089 			{
4090 				/* unexpected: we should not get called in this case */
4091 				elog(ERROR, "bogus pg_inherit row: inhrelid %u inhparent %u",
4092 					 inhForm->inhrelid, inhForm->inhparent);
4093 			}
4094 
4095 			/* already in the right state */
4096 			fix_dependencies = false;
4097 		}
4098 	}
4099 
4100 	/* done with pg_inherits */
4101 	systable_endscan(scan);
4102 	relation_close(pg_inherits, RowExclusiveLock);
4103 
4104 	/* set relhassubclass if an index partition has been added to the parent */
4105 	if (OidIsValid(parentOid))
4106 		SetRelationHasSubclass(parentOid, true);
4107 
4108 	/* set relispartition correctly on the partition */
4109 	update_relispartition(partRelid, OidIsValid(parentOid));
4110 
4111 	if (fix_dependencies)
4112 	{
4113 		/*
4114 		 * Insert/delete pg_depend rows.  If setting a parent, add PARTITION
4115 		 * dependencies on the parent index and the table; if removing a
4116 		 * parent, delete PARTITION dependencies.
4117 		 */
4118 		if (OidIsValid(parentOid))
4119 		{
4120 			ObjectAddress partIdx;
4121 			ObjectAddress parentIdx;
4122 			ObjectAddress partitionTbl;
4123 
4124 			ObjectAddressSet(partIdx, RelationRelationId, partRelid);
4125 			ObjectAddressSet(parentIdx, RelationRelationId, parentOid);
4126 			ObjectAddressSet(partitionTbl, RelationRelationId,
4127 							 partitionIdx->rd_index->indrelid);
4128 			recordDependencyOn(&partIdx, &parentIdx,
4129 							   DEPENDENCY_PARTITION_PRI);
4130 			recordDependencyOn(&partIdx, &partitionTbl,
4131 							   DEPENDENCY_PARTITION_SEC);
4132 		}
4133 		else
4134 		{
4135 			deleteDependencyRecordsForClass(RelationRelationId, partRelid,
4136 											RelationRelationId,
4137 											DEPENDENCY_PARTITION_PRI);
4138 			deleteDependencyRecordsForClass(RelationRelationId, partRelid,
4139 											RelationRelationId,
4140 											DEPENDENCY_PARTITION_SEC);
4141 		}
4142 
4143 		/* make our updates visible */
4144 		CommandCounterIncrement();
4145 	}
4146 }
4147 
4148 /*
4149  * Subroutine of IndexSetParentIndex to update the relispartition flag of the
4150  * given index to the given value.
4151  */
4152 static void
update_relispartition(Oid relationId,bool newval)4153 update_relispartition(Oid relationId, bool newval)
4154 {
4155 	HeapTuple	tup;
4156 	Relation	classRel;
4157 
4158 	classRel = table_open(RelationRelationId, RowExclusiveLock);
4159 	tup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relationId));
4160 	if (!HeapTupleIsValid(tup))
4161 		elog(ERROR, "cache lookup failed for relation %u", relationId);
4162 	Assert(((Form_pg_class) GETSTRUCT(tup))->relispartition != newval);
4163 	((Form_pg_class) GETSTRUCT(tup))->relispartition = newval;
4164 	CatalogTupleUpdate(classRel, &tup->t_self, tup);
4165 	heap_freetuple(tup);
4166 	table_close(classRel, RowExclusiveLock);
4167 }
4168 
4169 /*
4170  * Set the PROC_IN_SAFE_IC flag in MyProc->statusFlags.
4171  *
4172  * When doing concurrent index builds, we can set this flag
4173  * to tell other processes concurrently running CREATE
4174  * INDEX CONCURRENTLY or REINDEX CONCURRENTLY to ignore us when
4175  * doing their waits for concurrent snapshots.  On one hand it
4176  * avoids pointlessly waiting for a process that's not interesting
4177  * anyway; but more importantly it avoids deadlocks in some cases.
4178  *
4179  * This can be done safely only for indexes that don't execute any
4180  * expressions that could access other tables, so index must not be
4181  * expressional nor partial.  Caller is responsible for only calling
4182  * this routine when that assumption holds true.
4183  *
4184  * (The flag is reset automatically at transaction end, so it must be
4185  * set for each transaction.)
4186  */
4187 static inline void
set_indexsafe_procflags(void)4188 set_indexsafe_procflags(void)
4189 {
4190 	/*
4191 	 * This should only be called before installing xid or xmin in MyProc;
4192 	 * otherwise, concurrent processes could see an Xmin that moves backwards.
4193 	 */
4194 	Assert(MyProc->xid == InvalidTransactionId &&
4195 		   MyProc->xmin == InvalidTransactionId);
4196 
4197 	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
4198 	MyProc->statusFlags |= PROC_IN_SAFE_IC;
4199 	ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags;
4200 	LWLockRelease(ProcArrayLock);
4201 }
4202