1 /*-------------------------------------------------------------------------
2  *
3  * indexcmds.c
4  *	  POSTGRES define and remove index code.
5  *
6  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/commands/indexcmds.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include "access/amapi.h"
19 #include "access/hash.h"
20 #include "access/htup_details.h"
21 #include "access/reloptions.h"
22 #include "access/sysattr.h"
23 #include "access/xact.h"
24 #include "catalog/catalog.h"
25 #include "catalog/index.h"
26 #include "catalog/indexing.h"
27 #include "catalog/partition.h"
28 #include "catalog/pg_am.h"
29 #include "catalog/pg_constraint.h"
30 #include "catalog/pg_inherits.h"
31 #include "catalog/pg_opclass.h"
32 #include "catalog/pg_opfamily.h"
33 #include "catalog/pg_tablespace.h"
34 #include "catalog/pg_type.h"
35 #include "commands/comment.h"
36 #include "commands/dbcommands.h"
37 #include "commands/defrem.h"
38 #include "commands/event_trigger.h"
39 #include "commands/tablecmds.h"
40 #include "commands/tablespace.h"
41 #include "mb/pg_wchar.h"
42 #include "miscadmin.h"
43 #include "nodes/makefuncs.h"
44 #include "nodes/nodeFuncs.h"
45 #include "optimizer/clauses.h"
46 #include "optimizer/planner.h"
47 #include "optimizer/var.h"
48 #include "parser/parse_coerce.h"
49 #include "parser/parse_func.h"
50 #include "parser/parse_oper.h"
51 #include "rewrite/rewriteManip.h"
52 #include "storage/lmgr.h"
53 #include "storage/proc.h"
54 #include "storage/procarray.h"
55 #include "utils/acl.h"
56 #include "utils/builtins.h"
57 #include "utils/fmgroids.h"
58 #include "utils/inval.h"
59 #include "utils/lsyscache.h"
60 #include "utils/memutils.h"
61 #include "utils/partcache.h"
62 #include "utils/regproc.h"
63 #include "utils/snapmgr.h"
64 #include "utils/syscache.h"
65 #include "utils/tqual.h"
66 
67 
68 /* non-export function prototypes */
69 static void CheckPredicate(Expr *predicate);
70 static void ComputeIndexAttrs(IndexInfo *indexInfo,
71 				  Oid *typeOidP,
72 				  Oid *collationOidP,
73 				  Oid *classOidP,
74 				  int16 *colOptionP,
75 				  List *attList,
76 				  List *exclusionOpNames,
77 				  Oid relId,
78 				  const char *accessMethodName, Oid accessMethodId,
79 				  bool amcanorder,
80 				  bool isconstraint);
81 static char *ChooseIndexName(const char *tabname, Oid namespaceId,
82 				List *colnames, List *exclusionOpNames,
83 				bool primary, bool isconstraint);
84 static char *ChooseIndexNameAddition(List *colnames);
85 static List *ChooseIndexColumnNames(List *indexElems);
86 static void RangeVarCallbackForReindexIndex(const RangeVar *relation,
87 								Oid relId, Oid oldRelId, void *arg);
88 static void ReindexPartitionedIndex(Relation parentIdx);
89 static void update_relispartition(Oid relationId, bool newval);
90 
91 /*
92  * CheckIndexCompatible
93  *		Determine whether an existing index definition is compatible with a
94  *		prospective index definition, such that the existing index storage
95  *		could become the storage of the new index, avoiding a rebuild.
96  *
97  * 'heapRelation': the relation the index would apply to.
98  * 'accessMethodName': name of the AM to use.
99  * 'attributeList': a list of IndexElem specifying columns and expressions
100  *		to index on.
101  * 'exclusionOpNames': list of names of exclusion-constraint operators,
102  *		or NIL if not an exclusion constraint.
103  *
104  * This is tailored to the needs of ALTER TABLE ALTER TYPE, which recreates
105  * any indexes that depended on a changing column from their pg_get_indexdef
106  * or pg_get_constraintdef definitions.  We omit some of the sanity checks of
107  * DefineIndex.  We assume that the old and new indexes have the same number
108  * of columns and that if one has an expression column or predicate, both do.
109  * Errors arising from the attribute list still apply.
110  *
111  * Most column type changes that can skip a table rewrite do not invalidate
112  * indexes.  We acknowledge this when all operator classes, collations and
113  * exclusion operators match.  Though we could further permit intra-opfamily
114  * changes for btree and hash indexes, that adds subtle complexity with no
115  * concrete benefit for core types. Note, that INCLUDE columns aren't
116  * checked by this function, for them it's enough that table rewrite is
117  * skipped.
118  *
119  * When a comparison or exclusion operator has a polymorphic input type, the
120  * actual input types must also match.  This defends against the possibility
121  * that operators could vary behavior in response to get_fn_expr_argtype().
122  * At present, this hazard is theoretical: check_exclusion_constraint() and
123  * all core index access methods decline to set fn_expr for such calls.
124  *
125  * We do not yet implement a test to verify compatibility of expression
126  * columns or predicates, so assume any such index is incompatible.
127  */
128 bool
CheckIndexCompatible(Oid oldId,const char * accessMethodName,List * attributeList,List * exclusionOpNames)129 CheckIndexCompatible(Oid oldId,
130 					 const char *accessMethodName,
131 					 List *attributeList,
132 					 List *exclusionOpNames)
133 {
134 	bool		isconstraint;
135 	Oid		   *typeObjectId;
136 	Oid		   *collationObjectId;
137 	Oid		   *classObjectId;
138 	Oid			accessMethodId;
139 	Oid			relationId;
140 	HeapTuple	tuple;
141 	Form_pg_index indexForm;
142 	Form_pg_am	accessMethodForm;
143 	IndexAmRoutine *amRoutine;
144 	bool		amcanorder;
145 	int16	   *coloptions;
146 	IndexInfo  *indexInfo;
147 	int			numberOfAttributes;
148 	int			old_natts;
149 	bool		isnull;
150 	bool		ret = true;
151 	oidvector  *old_indclass;
152 	oidvector  *old_indcollation;
153 	Relation	irel;
154 	int			i;
155 	Datum		d;
156 
157 	/* Caller should already have the relation locked in some way. */
158 	relationId = IndexGetRelation(oldId, false);
159 
160 	/*
161 	 * We can pretend isconstraint = false unconditionally.  It only serves to
162 	 * decide the text of an error message that should never happen for us.
163 	 */
164 	isconstraint = false;
165 
166 	numberOfAttributes = list_length(attributeList);
167 	Assert(numberOfAttributes > 0);
168 	Assert(numberOfAttributes <= INDEX_MAX_KEYS);
169 
170 	/* look up the access method */
171 	tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName));
172 	if (!HeapTupleIsValid(tuple))
173 		ereport(ERROR,
174 				(errcode(ERRCODE_UNDEFINED_OBJECT),
175 				 errmsg("access method \"%s\" does not exist",
176 						accessMethodName)));
177 	accessMethodId = HeapTupleGetOid(tuple);
178 	accessMethodForm = (Form_pg_am) GETSTRUCT(tuple);
179 	amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler);
180 	ReleaseSysCache(tuple);
181 
182 	amcanorder = amRoutine->amcanorder;
183 
184 	/*
185 	 * Compute the operator classes, collations, and exclusion operators for
186 	 * the new index, so we can test whether it's compatible with the existing
187 	 * one.  Note that ComputeIndexAttrs might fail here, but that's OK:
188 	 * DefineIndex would have called this function with the same arguments
189 	 * later on, and it would have failed then anyway.  Our attributeList
190 	 * contains only key attributes, thus we're filling ii_NumIndexAttrs and
191 	 * ii_NumIndexKeyAttrs with same value.
192 	 */
193 	indexInfo = makeNode(IndexInfo);
194 	indexInfo->ii_NumIndexAttrs = numberOfAttributes;
195 	indexInfo->ii_NumIndexKeyAttrs = numberOfAttributes;
196 	indexInfo->ii_Expressions = NIL;
197 	indexInfo->ii_ExpressionsState = NIL;
198 	indexInfo->ii_PredicateState = NULL;
199 	indexInfo->ii_ExclusionOps = NULL;
200 	indexInfo->ii_ExclusionProcs = NULL;
201 	indexInfo->ii_ExclusionStrats = NULL;
202 	indexInfo->ii_Am = accessMethodId;
203 	indexInfo->ii_AmCache = NULL;
204 	indexInfo->ii_Context = CurrentMemoryContext;
205 	typeObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
206 	collationObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
207 	classObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
208 	coloptions = (int16 *) palloc(numberOfAttributes * sizeof(int16));
209 	ComputeIndexAttrs(indexInfo,
210 					  typeObjectId, collationObjectId, classObjectId,
211 					  coloptions, attributeList,
212 					  exclusionOpNames, relationId,
213 					  accessMethodName, accessMethodId,
214 					  amcanorder, isconstraint);
215 
216 
217 	/* Get the soon-obsolete pg_index tuple. */
218 	tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(oldId));
219 	if (!HeapTupleIsValid(tuple))
220 		elog(ERROR, "cache lookup failed for index %u", oldId);
221 	indexForm = (Form_pg_index) GETSTRUCT(tuple);
222 
223 	/*
224 	 * We don't assess expressions or predicates; assume incompatibility.
225 	 * Also, if the index is invalid for any reason, treat it as incompatible.
226 	 */
227 	if (!(heap_attisnull(tuple, Anum_pg_index_indpred, NULL) &&
228 		  heap_attisnull(tuple, Anum_pg_index_indexprs, NULL) &&
229 		  IndexIsValid(indexForm)))
230 	{
231 		ReleaseSysCache(tuple);
232 		return false;
233 	}
234 
235 	/* Any change in operator class or collation breaks compatibility. */
236 	old_natts = indexForm->indnkeyatts;
237 	Assert(old_natts == numberOfAttributes);
238 
239 	d = SysCacheGetAttr(INDEXRELID, tuple, Anum_pg_index_indcollation, &isnull);
240 	Assert(!isnull);
241 	old_indcollation = (oidvector *) DatumGetPointer(d);
242 
243 	d = SysCacheGetAttr(INDEXRELID, tuple, Anum_pg_index_indclass, &isnull);
244 	Assert(!isnull);
245 	old_indclass = (oidvector *) DatumGetPointer(d);
246 
247 	ret = (memcmp(old_indclass->values, classObjectId,
248 				  old_natts * sizeof(Oid)) == 0 &&
249 		   memcmp(old_indcollation->values, collationObjectId,
250 				  old_natts * sizeof(Oid)) == 0);
251 
252 	ReleaseSysCache(tuple);
253 
254 	if (!ret)
255 		return false;
256 
257 	/* For polymorphic opcintype, column type changes break compatibility. */
258 	irel = index_open(oldId, AccessShareLock);	/* caller probably has a lock */
259 	for (i = 0; i < old_natts; i++)
260 	{
261 		if (IsPolymorphicType(get_opclass_input_type(classObjectId[i])) &&
262 			TupleDescAttr(irel->rd_att, i)->atttypid != typeObjectId[i])
263 		{
264 			ret = false;
265 			break;
266 		}
267 	}
268 
269 	/* Any change in exclusion operator selections breaks compatibility. */
270 	if (ret && indexInfo->ii_ExclusionOps != NULL)
271 	{
272 		Oid		   *old_operators,
273 				   *old_procs;
274 		uint16	   *old_strats;
275 
276 		RelationGetExclusionInfo(irel, &old_operators, &old_procs, &old_strats);
277 		ret = memcmp(old_operators, indexInfo->ii_ExclusionOps,
278 					 old_natts * sizeof(Oid)) == 0;
279 
280 		/* Require an exact input type match for polymorphic operators. */
281 		if (ret)
282 		{
283 			for (i = 0; i < old_natts && ret; i++)
284 			{
285 				Oid			left,
286 							right;
287 
288 				op_input_types(indexInfo->ii_ExclusionOps[i], &left, &right);
289 				if ((IsPolymorphicType(left) || IsPolymorphicType(right)) &&
290 					TupleDescAttr(irel->rd_att, i)->atttypid != typeObjectId[i])
291 				{
292 					ret = false;
293 					break;
294 				}
295 			}
296 		}
297 	}
298 
299 	index_close(irel, NoLock);
300 	return ret;
301 }
302 
303 /*
304  * DefineIndex
305  *		Creates a new index.
306  *
307  * 'relationId': the OID of the heap relation on which the index is to be
308  *		created
309  * 'stmt': IndexStmt describing the properties of the new index.
310  * 'indexRelationId': normally InvalidOid, but during bootstrap can be
311  *		nonzero to specify a preselected OID for the index.
312  * 'parentIndexId': the OID of the parent index; InvalidOid if not the child
313  *		of a partitioned index.
314  * 'parentConstraintId': the OID of the parent constraint; InvalidOid if not
315  *		the child of a constraint (only used when recursing)
316  * 'is_alter_table': this is due to an ALTER rather than a CREATE operation.
317  * 'check_rights': check for CREATE rights in namespace and tablespace.  (This
318  *		should be true except when ALTER is deleting/recreating an index.)
319  * 'check_not_in_use': check for table not already in use in current session.
320  *		This should be true unless caller is holding the table open, in which
321  *		case the caller had better have checked it earlier.
322  * 'skip_build': make the catalog entries but don't create the index files
323  * 'quiet': suppress the NOTICE chatter ordinarily provided for constraints.
324  *
325  * Returns the object address of the created index.
326  */
327 ObjectAddress
DefineIndex(Oid relationId,IndexStmt * stmt,Oid indexRelationId,Oid parentIndexId,Oid parentConstraintId,bool is_alter_table,bool check_rights,bool check_not_in_use,bool skip_build,bool quiet)328 DefineIndex(Oid relationId,
329 			IndexStmt *stmt,
330 			Oid indexRelationId,
331 			Oid parentIndexId,
332 			Oid parentConstraintId,
333 			bool is_alter_table,
334 			bool check_rights,
335 			bool check_not_in_use,
336 			bool skip_build,
337 			bool quiet)
338 {
339 	bool		concurrent;
340 	char	   *indexRelationName;
341 	char	   *accessMethodName;
342 	Oid		   *typeObjectId;
343 	Oid		   *collationObjectId;
344 	Oid		   *classObjectId;
345 	Oid			accessMethodId;
346 	Oid			namespaceId;
347 	Oid			tablespaceId;
348 	Oid			createdConstraintId = InvalidOid;
349 	List	   *indexColNames;
350 	List	   *allIndexParams;
351 	Relation	rel;
352 	Relation	indexRelation;
353 	HeapTuple	tuple;
354 	Form_pg_am	accessMethodForm;
355 	IndexAmRoutine *amRoutine;
356 	bool		amcanorder;
357 	amoptions_function amoptions;
358 	bool		partitioned;
359 	Datum		reloptions;
360 	int16	   *coloptions;
361 	IndexInfo  *indexInfo;
362 	bits16		flags;
363 	bits16		constr_flags;
364 	int			numberOfAttributes;
365 	int			numberOfKeyAttributes;
366 	TransactionId limitXmin;
367 	VirtualTransactionId *old_snapshots;
368 	ObjectAddress address;
369 	int			n_old_snapshots;
370 	LockRelId	heaprelid;
371 	LOCKTAG		heaplocktag;
372 	LOCKMODE	lockmode;
373 	Snapshot	snapshot;
374 	int			i;
375 
376 	/*
377 	 * Force non-concurrent build on temporary relations, even if CONCURRENTLY
378 	 * was requested.  Other backends can't access a temporary relation, so
379 	 * there's no harm in grabbing a stronger lock, and a non-concurrent DROP
380 	 * is more efficient.  Do this before any use of the concurrent option is
381 	 * done.
382 	 */
383 	if (stmt->concurrent && get_rel_persistence(relationId) != RELPERSISTENCE_TEMP)
384 		concurrent = true;
385 	else
386 		concurrent = false;
387 
388 	/*
389 	 * count key attributes in index
390 	 */
391 	numberOfKeyAttributes = list_length(stmt->indexParams);
392 
393 	/*
394 	 * Calculate the new list of index columns including both key columns and
395 	 * INCLUDE columns.  Later we can determine which of these are key
396 	 * columns, and which are just part of the INCLUDE list by checking the
397 	 * list position.  A list item in a position less than ii_NumIndexKeyAttrs
398 	 * is part of the key columns, and anything equal to and over is part of
399 	 * the INCLUDE columns.
400 	 */
401 	allIndexParams = list_concat(list_copy(stmt->indexParams),
402 								 list_copy(stmt->indexIncludingParams));
403 	numberOfAttributes = list_length(allIndexParams);
404 
405 	if (numberOfKeyAttributes <= 0)
406 		ereport(ERROR,
407 				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
408 				 errmsg("must specify at least one column")));
409 	if (numberOfAttributes > INDEX_MAX_KEYS)
410 		ereport(ERROR,
411 				(errcode(ERRCODE_TOO_MANY_COLUMNS),
412 				 errmsg("cannot use more than %d columns in an index",
413 						INDEX_MAX_KEYS)));
414 
415 	/*
416 	 * Only SELECT ... FOR UPDATE/SHARE are allowed while doing a standard
417 	 * index build; but for concurrent builds we allow INSERT/UPDATE/DELETE
418 	 * (but not VACUUM).
419 	 *
420 	 * NB: Caller is responsible for making sure that relationId refers to the
421 	 * relation on which the index should be built; except in bootstrap mode,
422 	 * this will typically require the caller to have already locked the
423 	 * relation.  To avoid lock upgrade hazards, that lock should be at least
424 	 * as strong as the one we take here.
425 	 *
426 	 * NB: If the lock strength here ever changes, code that is run by
427 	 * parallel workers under the control of certain particular ambuild
428 	 * functions will need to be updated, too.
429 	 */
430 	lockmode = concurrent ? ShareUpdateExclusiveLock : ShareLock;
431 	rel = heap_open(relationId, lockmode);
432 
433 	relationId = RelationGetRelid(rel);
434 	namespaceId = RelationGetNamespace(rel);
435 
436 	/* Ensure that it makes sense to index this kind of relation */
437 	switch (rel->rd_rel->relkind)
438 	{
439 		case RELKIND_RELATION:
440 		case RELKIND_MATVIEW:
441 		case RELKIND_PARTITIONED_TABLE:
442 			/* OK */
443 			break;
444 		case RELKIND_FOREIGN_TABLE:
445 
446 			/*
447 			 * Custom error message for FOREIGN TABLE since the term is close
448 			 * to a regular table and can confuse the user.
449 			 */
450 			ereport(ERROR,
451 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
452 					 errmsg("cannot create index on foreign table \"%s\"",
453 							RelationGetRelationName(rel))));
454 			break;
455 		default:
456 			ereport(ERROR,
457 					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
458 					 errmsg("\"%s\" is not a table or materialized view",
459 							RelationGetRelationName(rel))));
460 			break;
461 	}
462 
463 	/*
464 	 * Establish behavior for partitioned tables, and verify sanity of
465 	 * parameters.
466 	 *
467 	 * We do not build an actual index in this case; we only create a few
468 	 * catalog entries.  The actual indexes are built by recursing for each
469 	 * partition.
470 	 */
471 	partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
472 	if (partitioned)
473 	{
474 		/*
475 		 * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
476 		 * the error is thrown also for temporary tables.  Seems better to be
477 		 * consistent, even though we could do it on temporary table because
478 		 * we're not actually doing it concurrently.
479 		 */
480 		if (stmt->concurrent)
481 			ereport(ERROR,
482 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
483 					 errmsg("cannot create index on partitioned table \"%s\" concurrently",
484 							RelationGetRelationName(rel))));
485 		if (stmt->excludeOpNames)
486 			ereport(ERROR,
487 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
488 					 errmsg("cannot create exclusion constraints on partitioned table \"%s\"",
489 							RelationGetRelationName(rel))));
490 	}
491 
492 	/*
493 	 * Don't try to CREATE INDEX on temp tables of other backends.
494 	 */
495 	if (RELATION_IS_OTHER_TEMP(rel))
496 		ereport(ERROR,
497 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
498 				 errmsg("cannot create indexes on temporary tables of other sessions")));
499 
500 	/*
501 	 * Unless our caller vouches for having checked this already, insist that
502 	 * the table not be in use by our own session, either.  Otherwise we might
503 	 * fail to make entries in the new index (for instance, if an INSERT or
504 	 * UPDATE is in progress and has already made its list of target indexes).
505 	 */
506 	if (check_not_in_use)
507 		CheckTableNotInUse(rel, "CREATE INDEX");
508 
509 	/*
510 	 * Verify we (still) have CREATE rights in the rel's namespace.
511 	 * (Presumably we did when the rel was created, but maybe not anymore.)
512 	 * Skip check if caller doesn't want it.  Also skip check if
513 	 * bootstrapping, since permissions machinery may not be working yet.
514 	 */
515 	if (check_rights && !IsBootstrapProcessingMode())
516 	{
517 		AclResult	aclresult;
518 
519 		aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(),
520 										  ACL_CREATE);
521 		if (aclresult != ACLCHECK_OK)
522 			aclcheck_error(aclresult, OBJECT_SCHEMA,
523 						   get_namespace_name(namespaceId));
524 	}
525 
526 	/*
527 	 * Select tablespace to use.  If not specified, use default tablespace
528 	 * (which may in turn default to database's default).
529 	 */
530 	if (stmt->tableSpace)
531 	{
532 		tablespaceId = get_tablespace_oid(stmt->tableSpace, false);
533 	}
534 	else
535 	{
536 		tablespaceId = GetDefaultTablespace(rel->rd_rel->relpersistence);
537 		/* note InvalidOid is OK in this case */
538 	}
539 
540 	/* Check tablespace permissions */
541 	if (check_rights &&
542 		OidIsValid(tablespaceId) && tablespaceId != MyDatabaseTableSpace)
543 	{
544 		AclResult	aclresult;
545 
546 		aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(),
547 										   ACL_CREATE);
548 		if (aclresult != ACLCHECK_OK)
549 			aclcheck_error(aclresult, OBJECT_TABLESPACE,
550 						   get_tablespace_name(tablespaceId));
551 	}
552 
553 	/*
554 	 * Force shared indexes into the pg_global tablespace.  This is a bit of a
555 	 * hack but seems simpler than marking them in the BKI commands.  On the
556 	 * other hand, if it's not shared, don't allow it to be placed there.
557 	 */
558 	if (rel->rd_rel->relisshared)
559 		tablespaceId = GLOBALTABLESPACE_OID;
560 	else if (tablespaceId == GLOBALTABLESPACE_OID)
561 		ereport(ERROR,
562 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
563 				 errmsg("only shared relations can be placed in pg_global tablespace")));
564 
565 	/*
566 	 * Choose the index column names.
567 	 */
568 	indexColNames = ChooseIndexColumnNames(allIndexParams);
569 
570 	/*
571 	 * Select name for index if caller didn't specify
572 	 */
573 	indexRelationName = stmt->idxname;
574 	if (indexRelationName == NULL)
575 		indexRelationName = ChooseIndexName(RelationGetRelationName(rel),
576 											namespaceId,
577 											indexColNames,
578 											stmt->excludeOpNames,
579 											stmt->primary,
580 											stmt->isconstraint);
581 
582 	/*
583 	 * look up the access method, verify it can handle the requested features
584 	 */
585 	accessMethodName = stmt->accessMethod;
586 	tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName));
587 	if (!HeapTupleIsValid(tuple))
588 	{
589 		/*
590 		 * Hack to provide more-or-less-transparent updating of old RTREE
591 		 * indexes to GiST: if RTREE is requested and not found, use GIST.
592 		 */
593 		if (strcmp(accessMethodName, "rtree") == 0)
594 		{
595 			ereport(NOTICE,
596 					(errmsg("substituting access method \"gist\" for obsolete method \"rtree\"")));
597 			accessMethodName = "gist";
598 			tuple = SearchSysCache1(AMNAME, PointerGetDatum(accessMethodName));
599 		}
600 
601 		if (!HeapTupleIsValid(tuple))
602 			ereport(ERROR,
603 					(errcode(ERRCODE_UNDEFINED_OBJECT),
604 					 errmsg("access method \"%s\" does not exist",
605 							accessMethodName)));
606 	}
607 	accessMethodId = HeapTupleGetOid(tuple);
608 	accessMethodForm = (Form_pg_am) GETSTRUCT(tuple);
609 	amRoutine = GetIndexAmRoutine(accessMethodForm->amhandler);
610 
611 	if (stmt->unique && !amRoutine->amcanunique)
612 		ereport(ERROR,
613 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
614 				 errmsg("access method \"%s\" does not support unique indexes",
615 						accessMethodName)));
616 	if (stmt->indexIncludingParams != NIL && !amRoutine->amcaninclude)
617 		ereport(ERROR,
618 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
619 				 errmsg("access method \"%s\" does not support included columns",
620 						accessMethodName)));
621 	if (numberOfKeyAttributes > 1 && !amRoutine->amcanmulticol)
622 		ereport(ERROR,
623 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
624 				 errmsg("access method \"%s\" does not support multicolumn indexes",
625 						accessMethodName)));
626 	if (stmt->excludeOpNames && amRoutine->amgettuple == NULL)
627 		ereport(ERROR,
628 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
629 				 errmsg("access method \"%s\" does not support exclusion constraints",
630 						accessMethodName)));
631 
632 	amcanorder = amRoutine->amcanorder;
633 	amoptions = amRoutine->amoptions;
634 
635 	pfree(amRoutine);
636 	ReleaseSysCache(tuple);
637 
638 	/*
639 	 * Validate predicate, if given
640 	 */
641 	if (stmt->whereClause)
642 		CheckPredicate((Expr *) stmt->whereClause);
643 
644 	/*
645 	 * Parse AM-specific options, convert to text array form, validate.
646 	 */
647 	reloptions = transformRelOptions((Datum) 0, stmt->options,
648 									 NULL, NULL, false, false);
649 
650 	(void) index_reloptions(amoptions, reloptions, true);
651 
652 	/*
653 	 * Prepare arguments for index_create, primarily an IndexInfo structure.
654 	 * Note that ii_Predicate must be in implicit-AND format.
655 	 */
656 	indexInfo = makeNode(IndexInfo);
657 	indexInfo->ii_NumIndexAttrs = numberOfAttributes;
658 	indexInfo->ii_NumIndexKeyAttrs = numberOfKeyAttributes;
659 	indexInfo->ii_Expressions = NIL;	/* for now */
660 	indexInfo->ii_ExpressionsState = NIL;
661 	indexInfo->ii_Predicate = make_ands_implicit((Expr *) stmt->whereClause);
662 	indexInfo->ii_PredicateState = NULL;
663 	indexInfo->ii_ExclusionOps = NULL;
664 	indexInfo->ii_ExclusionProcs = NULL;
665 	indexInfo->ii_ExclusionStrats = NULL;
666 	indexInfo->ii_Unique = stmt->unique;
667 	/* In a concurrent build, mark it not-ready-for-inserts */
668 	indexInfo->ii_ReadyForInserts = !concurrent;
669 	indexInfo->ii_Concurrent = concurrent;
670 	indexInfo->ii_BrokenHotChain = false;
671 	indexInfo->ii_ParallelWorkers = 0;
672 	indexInfo->ii_Am = accessMethodId;
673 	indexInfo->ii_AmCache = NULL;
674 	indexInfo->ii_Context = CurrentMemoryContext;
675 
676 	typeObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
677 	collationObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
678 	classObjectId = (Oid *) palloc(numberOfAttributes * sizeof(Oid));
679 	coloptions = (int16 *) palloc(numberOfAttributes * sizeof(int16));
680 	ComputeIndexAttrs(indexInfo,
681 					  typeObjectId, collationObjectId, classObjectId,
682 					  coloptions, allIndexParams,
683 					  stmt->excludeOpNames, relationId,
684 					  accessMethodName, accessMethodId,
685 					  amcanorder, stmt->isconstraint);
686 
687 	/*
688 	 * Extra checks when creating a PRIMARY KEY index.
689 	 */
690 	if (stmt->primary)
691 		index_check_primary_key(rel, indexInfo, is_alter_table, stmt);
692 
693 	/*
694 	 * If this table is partitioned and we're creating a unique index or a
695 	 * primary key, make sure that the partition key is a subset of the
696 	 * index's columns.  Otherwise it would be possible to violate uniqueness
697 	 * by putting values that ought to be unique in different partitions.
698 	 *
699 	 * We could lift this limitation if we had global indexes, but those have
700 	 * their own problems, so this is a useful feature combination.
701 	 */
702 	if (partitioned && (stmt->unique || stmt->primary))
703 	{
704 		PartitionKey key = RelationGetPartitionKey(rel);
705 		const char *constraint_type;
706 		int			i;
707 
708 		if (stmt->primary)
709 			constraint_type = "PRIMARY KEY";
710 		else if (stmt->unique)
711 			constraint_type = "UNIQUE";
712 		else if (stmt->excludeOpNames != NIL)
713 			constraint_type = "EXCLUDE";
714 		else
715 		{
716 			elog(ERROR, "unknown constraint type");
717 			constraint_type = NULL; /* keep compiler quiet */
718 		}
719 
720 		/*
721 		 * Verify that all the columns in the partition key appear in the
722 		 * unique key definition, with the same notion of equality.
723 		 */
724 		for (i = 0; i < key->partnatts; i++)
725 		{
726 			bool		found = false;
727 			int			eq_strategy;
728 			Oid			ptkey_eqop;
729 			int			j;
730 
731 			/*
732 			 * Identify the equality operator associated with this partkey
733 			 * column.  For list and range partitioning, partkeys use btree
734 			 * operator classes; hash partitioning uses hash operator classes.
735 			 * (Keep this in sync with ComputePartitionAttrs!)
736 			 */
737 			if (key->strategy == PARTITION_STRATEGY_HASH)
738 				eq_strategy = HTEqualStrategyNumber;
739 			else
740 				eq_strategy = BTEqualStrategyNumber;
741 
742 			ptkey_eqop = get_opfamily_member(key->partopfamily[i],
743 											 key->partopcintype[i],
744 											 key->partopcintype[i],
745 											 eq_strategy);
746 			if (!OidIsValid(ptkey_eqop))
747 				elog(ERROR, "missing operator %d(%u,%u) in partition opfamily %u",
748 					 eq_strategy, key->partopcintype[i], key->partopcintype[i],
749 					 key->partopfamily[i]);
750 
751 			/*
752 			 * We'll need to be able to identify the equality operators
753 			 * associated with index columns, too.  We know what to do with
754 			 * btree opclasses; if there are ever any other index types that
755 			 * support unique indexes, this logic will need extension.
756 			 */
757 			if (accessMethodId == BTREE_AM_OID)
758 				eq_strategy = BTEqualStrategyNumber;
759 			else
760 				ereport(ERROR,
761 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
762 						 errmsg("cannot match partition key to an index using access method \"%s\"",
763 								accessMethodName)));
764 
765 			/*
766 			 * It may be possible to support UNIQUE constraints when partition
767 			 * keys are expressions, but is it worth it?  Give up for now.
768 			 */
769 			if (key->partattrs[i] == 0)
770 				ereport(ERROR,
771 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
772 						 errmsg("unsupported %s constraint with partition key definition",
773 								constraint_type),
774 						 errdetail("%s constraints cannot be used when partition keys include expressions.",
775 								   constraint_type)));
776 
777 			/* Search the index column(s) for a match */
778 			for (j = 0; j < indexInfo->ii_NumIndexKeyAttrs; j++)
779 			{
780 				if (key->partattrs[i] == indexInfo->ii_IndexAttrNumbers[j])
781 				{
782 					/* Matched the column, now what about the equality op? */
783 					Oid			idx_opfamily;
784 					Oid			idx_opcintype;
785 
786 					idx_opfamily = get_opclass_family(classObjectId[j]);
787 					idx_opcintype = get_opclass_input_type(classObjectId[j]);
788 					if (OidIsValid(idx_opfamily) && OidIsValid(idx_opcintype))
789 					{
790 						Oid			idx_eqop;
791 
792 						idx_eqop = get_opfamily_member(idx_opfamily,
793 													   idx_opcintype,
794 													   idx_opcintype,
795 													   eq_strategy);
796 						if (ptkey_eqop == idx_eqop)
797 						{
798 							found = true;
799 							break;
800 						}
801 					}
802 				}
803 			}
804 
805 			if (!found)
806 			{
807 				Form_pg_attribute att;
808 
809 				att = TupleDescAttr(RelationGetDescr(rel),
810 									key->partattrs[i] - 1);
811 				ereport(ERROR,
812 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
813 						 errmsg("unique constraint on partitioned table must include all partitioning columns"),
814 						 errdetail("%s constraint on table \"%s\" lacks column \"%s\" which is part of the partition key.",
815 								   constraint_type, RelationGetRelationName(rel),
816 								   NameStr(att->attname))));
817 			}
818 		}
819 	}
820 
821 
822 	/*
823 	 * We disallow indexes on system columns other than OID.  They would not
824 	 * necessarily get updated correctly, and they don't seem useful anyway.
825 	 */
826 	for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
827 	{
828 		AttrNumber	attno = indexInfo->ii_IndexAttrNumbers[i];
829 
830 		if (attno < 0 && attno != ObjectIdAttributeNumber)
831 			ereport(ERROR,
832 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
833 					 errmsg("index creation on system columns is not supported")));
834 	}
835 
836 	/*
837 	 * Also check for system columns used in expressions or predicates.
838 	 */
839 	if (indexInfo->ii_Expressions || indexInfo->ii_Predicate)
840 	{
841 		Bitmapset  *indexattrs = NULL;
842 
843 		pull_varattnos((Node *) indexInfo->ii_Expressions, 1, &indexattrs);
844 		pull_varattnos((Node *) indexInfo->ii_Predicate, 1, &indexattrs);
845 
846 		for (i = FirstLowInvalidHeapAttributeNumber + 1; i < 0; i++)
847 		{
848 			if (i != ObjectIdAttributeNumber &&
849 				bms_is_member(i - FirstLowInvalidHeapAttributeNumber,
850 							  indexattrs))
851 				ereport(ERROR,
852 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
853 						 errmsg("index creation on system columns is not supported")));
854 		}
855 	}
856 
857 	/*
858 	 * Report index creation if appropriate (delay this till after most of the
859 	 * error checks)
860 	 */
861 	if (stmt->isconstraint && !quiet)
862 	{
863 		const char *constraint_type;
864 
865 		if (stmt->primary)
866 			constraint_type = "PRIMARY KEY";
867 		else if (stmt->unique)
868 			constraint_type = "UNIQUE";
869 		else if (stmt->excludeOpNames != NIL)
870 			constraint_type = "EXCLUDE";
871 		else
872 		{
873 			elog(ERROR, "unknown constraint type");
874 			constraint_type = NULL; /* keep compiler quiet */
875 		}
876 
877 		ereport(DEBUG1,
878 				(errmsg("%s %s will create implicit index \"%s\" for table \"%s\"",
879 						is_alter_table ? "ALTER TABLE / ADD" : "CREATE TABLE /",
880 						constraint_type,
881 						indexRelationName, RelationGetRelationName(rel))));
882 	}
883 
884 	/*
885 	 * A valid stmt->oldNode implies that we already have a built form of the
886 	 * index.  The caller should also decline any index build.
887 	 */
888 	Assert(!OidIsValid(stmt->oldNode) || (skip_build && !concurrent));
889 
890 	/*
891 	 * Make the catalog entries for the index, including constraints. This
892 	 * step also actually builds the index, except if caller requested not to
893 	 * or in concurrent mode, in which case it'll be done later, or doing a
894 	 * partitioned index (because those don't have storage).
895 	 */
896 	flags = constr_flags = 0;
897 	if (stmt->isconstraint)
898 		flags |= INDEX_CREATE_ADD_CONSTRAINT;
899 	if (skip_build || concurrent || partitioned)
900 		flags |= INDEX_CREATE_SKIP_BUILD;
901 	if (stmt->if_not_exists)
902 		flags |= INDEX_CREATE_IF_NOT_EXISTS;
903 	if (concurrent)
904 		flags |= INDEX_CREATE_CONCURRENT;
905 	if (partitioned)
906 		flags |= INDEX_CREATE_PARTITIONED;
907 	if (stmt->primary)
908 		flags |= INDEX_CREATE_IS_PRIMARY;
909 
910 	/*
911 	 * If the table is partitioned, and recursion was declined but partitions
912 	 * exist, mark the index as invalid.
913 	 */
914 	if (partitioned && stmt->relation && !stmt->relation->inh)
915 	{
916 		PartitionDesc	pd = RelationGetPartitionDesc(rel);
917 
918 		if (pd->nparts != 0)
919 			flags |= INDEX_CREATE_INVALID;
920 	}
921 
922 	if (stmt->deferrable)
923 		constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
924 	if (stmt->initdeferred)
925 		constr_flags |= INDEX_CONSTR_CREATE_INIT_DEFERRED;
926 
927 	indexRelationId =
928 		index_create(rel, indexRelationName, indexRelationId, parentIndexId,
929 					 parentConstraintId,
930 					 stmt->oldNode, indexInfo, indexColNames,
931 					 accessMethodId, tablespaceId,
932 					 collationObjectId, classObjectId,
933 					 coloptions, reloptions,
934 					 flags, constr_flags,
935 					 allowSystemTableMods, !check_rights,
936 					 &createdConstraintId);
937 
938 	ObjectAddressSet(address, RelationRelationId, indexRelationId);
939 
940 	if (!OidIsValid(indexRelationId))
941 	{
942 		heap_close(rel, NoLock);
943 		return address;
944 	}
945 
946 	/* Add any requested comment */
947 	if (stmt->idxcomment != NULL)
948 		CreateComments(indexRelationId, RelationRelationId, 0,
949 					   stmt->idxcomment);
950 
951 	if (partitioned)
952 	{
953 		PartitionDesc partdesc;
954 
955 		/*
956 		 * Unless caller specified to skip this step (via ONLY), process each
957 		 * partition to make sure they all contain a corresponding index.
958 		 *
959 		 * If we're called internally (no stmt->relation), recurse always.
960 		 */
961 		partdesc = RelationGetPartitionDesc(rel);
962 		if ((!stmt->relation || stmt->relation->inh) && partdesc->nparts > 0)
963 		{
964 			int			nparts = partdesc->nparts;
965 			Oid		   *part_oids = palloc(sizeof(Oid) * nparts);
966 			bool		invalidate_parent = false;
967 			TupleDesc	parentDesc;
968 			Oid		   *opfamOids;
969 
970 			memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts);
971 
972 			parentDesc = RelationGetDescr(rel);
973 			opfamOids = palloc(sizeof(Oid) * numberOfKeyAttributes);
974 			for (i = 0; i < numberOfKeyAttributes; i++)
975 				opfamOids[i] = get_opclass_family(classObjectId[i]);
976 
977 			/*
978 			 * For each partition, scan all existing indexes; if one matches
979 			 * our index definition and is not already attached to some other
980 			 * parent index, attach it to the one we just created.
981 			 *
982 			 * If none matches, build a new index by calling ourselves
983 			 * recursively with the same options (except for the index name).
984 			 */
985 			for (i = 0; i < nparts; i++)
986 			{
987 				Oid			childRelid = part_oids[i];
988 				Relation	childrel;
989 				List	   *childidxs;
990 				ListCell   *cell;
991 				AttrNumber *attmap;
992 				bool		found = false;
993 				int			maplen;
994 
995 				childrel = heap_open(childRelid, lockmode);
996 
997 				/*
998 				 * Don't try to create indexes on foreign tables, though.
999 				 * Skip those if a regular index, or fail if trying to create
1000 				 * a constraint index.
1001 				 */
1002 				if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
1003 				{
1004 					if (stmt->unique || stmt->primary)
1005 						ereport(ERROR,
1006 								(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1007 								 errmsg("cannot create unique index on partitioned table \"%s\"",
1008 										RelationGetRelationName(rel)),
1009 								 errdetail("Table \"%s\" contains partitions that are foreign tables.",
1010 										RelationGetRelationName(rel))));
1011 
1012 					heap_close(childrel, lockmode);
1013 					continue;
1014 				}
1015 
1016 				childidxs = RelationGetIndexList(childrel);
1017 				attmap =
1018 					convert_tuples_by_name_map(RelationGetDescr(childrel),
1019 											   parentDesc,
1020 											   gettext_noop("could not convert row type"));
1021 				maplen = parentDesc->natts;
1022 
1023 				foreach(cell, childidxs)
1024 				{
1025 					Oid			cldidxid = lfirst_oid(cell);
1026 					Relation	cldidx;
1027 					IndexInfo  *cldIdxInfo;
1028 
1029 					/* this index is already partition of another one */
1030 					if (has_superclass(cldidxid))
1031 						continue;
1032 
1033 					cldidx = index_open(cldidxid, lockmode);
1034 					cldIdxInfo = BuildIndexInfo(cldidx);
1035 					if (CompareIndexInfo(cldIdxInfo, indexInfo,
1036 										 cldidx->rd_indcollation,
1037 										 collationObjectId,
1038 										 cldidx->rd_opfamily,
1039 										 opfamOids,
1040 										 attmap, maplen))
1041 					{
1042 						Oid			cldConstrOid = InvalidOid;
1043 
1044 						/*
1045 						 * Found a match.
1046 						 *
1047 						 * If this index is being created in the parent
1048 						 * because of a constraint, then the child needs to
1049 						 * have a constraint also, so look for one.  If there
1050 						 * is no such constraint, this index is no good, so
1051 						 * keep looking.
1052 						 */
1053 						if (createdConstraintId != InvalidOid)
1054 						{
1055 							cldConstrOid =
1056 								get_relation_idx_constraint_oid(childRelid,
1057 																cldidxid);
1058 							if (cldConstrOid == InvalidOid)
1059 							{
1060 								index_close(cldidx, lockmode);
1061 								continue;
1062 							}
1063 						}
1064 
1065 						/* Attach index to parent and we're done. */
1066 						IndexSetParentIndex(cldidx, indexRelationId);
1067 						if (createdConstraintId != InvalidOid)
1068 							ConstraintSetParentConstraint(cldConstrOid,
1069 														  createdConstraintId);
1070 
1071 						if (!IndexIsValid(cldidx->rd_index))
1072 							invalidate_parent = true;
1073 
1074 						found = true;
1075 						/* keep lock till commit */
1076 						index_close(cldidx, NoLock);
1077 						break;
1078 					}
1079 
1080 					index_close(cldidx, lockmode);
1081 				}
1082 
1083 				list_free(childidxs);
1084 				heap_close(childrel, NoLock);
1085 
1086 				/*
1087 				 * If no matching index was found, create our own.
1088 				 */
1089 				if (!found)
1090 				{
1091 					IndexStmt  *childStmt = copyObject(stmt);
1092 					bool		found_whole_row;
1093 					ListCell   *lc;
1094 
1095 					/*
1096 					 * We can't use the same index name for the child index,
1097 					 * so clear idxname to let the recursive invocation choose
1098 					 * a new name.  Likewise, the existing target relation
1099 					 * field is wrong, and if indexOid or oldNode are set,
1100 					 * they mustn't be applied to the child either.
1101 					 */
1102 					childStmt->idxname = NULL;
1103 					childStmt->relation = NULL;
1104 					childStmt->relationId = childRelid;
1105 					childStmt->indexOid = InvalidOid;
1106 					childStmt->oldNode = InvalidOid;
1107 
1108 					/*
1109 					 * Adjust any Vars (both in expressions and in the index's
1110 					 * WHERE clause) to match the partition's column numbering
1111 					 * in case it's different from the parent's.
1112 					 */
1113 					foreach(lc, childStmt->indexParams)
1114 					{
1115 						IndexElem  *ielem = lfirst(lc);
1116 
1117 						/*
1118 						 * If the index parameter is an expression, we must
1119 						 * translate it to contain child Vars.
1120 						 */
1121 						if (ielem->expr)
1122 						{
1123 							ielem->expr =
1124 								map_variable_attnos((Node *) ielem->expr,
1125 													1, 0, attmap, maplen,
1126 													InvalidOid,
1127 													&found_whole_row);
1128 							if (found_whole_row)
1129 								elog(ERROR, "cannot convert whole-row table reference");
1130 						}
1131 					}
1132 					childStmt->whereClause =
1133 						map_variable_attnos(stmt->whereClause, 1, 0,
1134 											attmap, maplen,
1135 											InvalidOid, &found_whole_row);
1136 					if (found_whole_row)
1137 						elog(ERROR, "cannot convert whole-row table reference");
1138 
1139 					DefineIndex(childRelid, childStmt,
1140 								InvalidOid, /* no predefined OID */
1141 								indexRelationId,	/* this is our child */
1142 								createdConstraintId,
1143 								is_alter_table, check_rights, check_not_in_use,
1144 								skip_build, quiet);
1145 				}
1146 
1147 				pfree(attmap);
1148 			}
1149 
1150 			/*
1151 			 * The pg_index row we inserted for this index was marked
1152 			 * indisvalid=true.  But if we attached an existing index that is
1153 			 * invalid, this is incorrect, so update our row to invalid too.
1154 			 */
1155 			if (invalidate_parent)
1156 			{
1157 				Relation	pg_index = heap_open(IndexRelationId, RowExclusiveLock);
1158 				HeapTuple	tup,
1159 							newtup;
1160 
1161 				tup = SearchSysCache1(INDEXRELID,
1162 									  ObjectIdGetDatum(indexRelationId));
1163 				if (!HeapTupleIsValid(tup))
1164 					elog(ERROR, "cache lookup failed for index %u",
1165 						 indexRelationId);
1166 				newtup = heap_copytuple(tup);
1167 				((Form_pg_index) GETSTRUCT(newtup))->indisvalid = false;
1168 				CatalogTupleUpdate(pg_index, &tup->t_self, newtup);
1169 				ReleaseSysCache(tup);
1170 				heap_close(pg_index, RowExclusiveLock);
1171 				heap_freetuple(newtup);
1172 			}
1173 		}
1174 
1175 		/*
1176 		 * Indexes on partitioned tables are not themselves built, so we're
1177 		 * done here.
1178 		 */
1179 		heap_close(rel, NoLock);
1180 		return address;
1181 	}
1182 
1183 	if (!concurrent)
1184 	{
1185 		/* Close the heap and we're done, in the non-concurrent case */
1186 		heap_close(rel, NoLock);
1187 		return address;
1188 	}
1189 
1190 	/* save lockrelid and locktag for below, then close rel */
1191 	heaprelid = rel->rd_lockInfo.lockRelId;
1192 	SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
1193 	heap_close(rel, NoLock);
1194 
1195 	/*
1196 	 * For a concurrent build, it's important to make the catalog entries
1197 	 * visible to other transactions before we start to build the index. That
1198 	 * will prevent them from making incompatible HOT updates.  The new index
1199 	 * will be marked not indisready and not indisvalid, so that no one else
1200 	 * tries to either insert into it or use it for queries.
1201 	 *
1202 	 * We must commit our current transaction so that the index becomes
1203 	 * visible; then start another.  Note that all the data structures we just
1204 	 * built are lost in the commit.  The only data we keep past here are the
1205 	 * relation IDs.
1206 	 *
1207 	 * Before committing, get a session-level lock on the table, to ensure
1208 	 * that neither it nor the index can be dropped before we finish. This
1209 	 * cannot block, even if someone else is waiting for access, because we
1210 	 * already have the same lock within our transaction.
1211 	 *
1212 	 * Note: we don't currently bother with a session lock on the index,
1213 	 * because there are no operations that could change its state while we
1214 	 * hold lock on the parent table.  This might need to change later.
1215 	 */
1216 	LockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
1217 
1218 	PopActiveSnapshot();
1219 	CommitTransactionCommand();
1220 	StartTransactionCommand();
1221 
1222 	/*
1223 	 * Phase 2 of concurrent index build (see comments for validate_index()
1224 	 * for an overview of how this works)
1225 	 *
1226 	 * Now we must wait until no running transaction could have the table open
1227 	 * with the old list of indexes.  Use ShareLock to consider running
1228 	 * transactions that hold locks that permit writing to the table.  Note we
1229 	 * do not need to worry about xacts that open the table for writing after
1230 	 * this point; they will see the new index when they open it.
1231 	 *
1232 	 * Note: the reason we use actual lock acquisition here, rather than just
1233 	 * checking the ProcArray and sleeping, is that deadlock is possible if
1234 	 * one of the transactions in question is blocked trying to acquire an
1235 	 * exclusive lock on our table.  The lock code will detect deadlock and
1236 	 * error out properly.
1237 	 */
1238 	WaitForLockers(heaplocktag, ShareLock);
1239 
1240 	/*
1241 	 * At this moment we are sure that there are no transactions with the
1242 	 * table open for write that don't have this new index in their list of
1243 	 * indexes.  We have waited out all the existing transactions and any new
1244 	 * transaction will have the new index in its list, but the index is still
1245 	 * marked as "not-ready-for-inserts".  The index is consulted while
1246 	 * deciding HOT-safety though.  This arrangement ensures that no new HOT
1247 	 * chains can be created where the new tuple and the old tuple in the
1248 	 * chain have different index keys.
1249 	 *
1250 	 * We now take a new snapshot, and build the index using all tuples that
1251 	 * are visible in this snapshot.  We can be sure that any HOT updates to
1252 	 * these tuples will be compatible with the index, since any updates made
1253 	 * by transactions that didn't know about the index are now committed or
1254 	 * rolled back.  Thus, each visible tuple is either the end of its
1255 	 * HOT-chain or the extension of the chain is HOT-safe for this index.
1256 	 */
1257 
1258 	/* Open and lock the parent heap relation */
1259 	rel = heap_openrv(stmt->relation, ShareUpdateExclusiveLock);
1260 
1261 	/* And the target index relation */
1262 	indexRelation = index_open(indexRelationId, RowExclusiveLock);
1263 
1264 	/* Set ActiveSnapshot since functions in the indexes may need it */
1265 	PushActiveSnapshot(GetTransactionSnapshot());
1266 
1267 	/* We have to re-build the IndexInfo struct, since it was lost in commit */
1268 	indexInfo = BuildIndexInfo(indexRelation);
1269 	Assert(!indexInfo->ii_ReadyForInserts);
1270 	indexInfo->ii_Concurrent = true;
1271 	indexInfo->ii_BrokenHotChain = false;
1272 
1273 	/* Now build the index */
1274 	index_build(rel, indexRelation, indexInfo, stmt->primary, false, true);
1275 
1276 	/* Close both the relations, but keep the locks */
1277 	heap_close(rel, NoLock);
1278 	index_close(indexRelation, NoLock);
1279 
1280 	/*
1281 	 * Update the pg_index row to mark the index as ready for inserts. Once we
1282 	 * commit this transaction, any new transactions that open the table must
1283 	 * insert new entries into the index for insertions and non-HOT updates.
1284 	 */
1285 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
1286 
1287 	/* we can do away with our snapshot */
1288 	PopActiveSnapshot();
1289 
1290 	/*
1291 	 * Commit this transaction to make the indisready update visible.
1292 	 */
1293 	CommitTransactionCommand();
1294 	StartTransactionCommand();
1295 
1296 	/*
1297 	 * Phase 3 of concurrent index build
1298 	 *
1299 	 * We once again wait until no transaction can have the table open with
1300 	 * the index marked as read-only for updates.
1301 	 */
1302 	WaitForLockers(heaplocktag, ShareLock);
1303 
1304 	/*
1305 	 * Now take the "reference snapshot" that will be used by validate_index()
1306 	 * to filter candidate tuples.  Beware!  There might still be snapshots in
1307 	 * use that treat some transaction as in-progress that our reference
1308 	 * snapshot treats as committed.  If such a recently-committed transaction
1309 	 * deleted tuples in the table, we will not include them in the index; yet
1310 	 * those transactions which see the deleting one as still-in-progress will
1311 	 * expect such tuples to be there once we mark the index as valid.
1312 	 *
1313 	 * We solve this by waiting for all endangered transactions to exit before
1314 	 * we mark the index as valid.
1315 	 *
1316 	 * We also set ActiveSnapshot to this snap, since functions in indexes may
1317 	 * need a snapshot.
1318 	 */
1319 	snapshot = RegisterSnapshot(GetTransactionSnapshot());
1320 	PushActiveSnapshot(snapshot);
1321 
1322 	/*
1323 	 * Scan the index and the heap, insert any missing index entries.
1324 	 */
1325 	validate_index(relationId, indexRelationId, snapshot);
1326 
1327 	/*
1328 	 * Drop the reference snapshot.  We must do this before waiting out other
1329 	 * snapshot holders, else we will deadlock against other processes also
1330 	 * doing CREATE INDEX CONCURRENTLY, which would see our snapshot as one
1331 	 * they must wait for.  But first, save the snapshot's xmin to use as
1332 	 * limitXmin for GetCurrentVirtualXIDs().
1333 	 */
1334 	limitXmin = snapshot->xmin;
1335 
1336 	PopActiveSnapshot();
1337 	UnregisterSnapshot(snapshot);
1338 
1339 	/*
1340 	 * The snapshot subsystem could still contain registered snapshots that
1341 	 * are holding back our process's advertised xmin; in particular, if
1342 	 * default_transaction_isolation = serializable, there is a transaction
1343 	 * snapshot that is still active.  The CatalogSnapshot is likewise a
1344 	 * hazard.  To ensure no deadlocks, we must commit and start yet another
1345 	 * transaction, and do our wait before any snapshot has been taken in it.
1346 	 */
1347 	CommitTransactionCommand();
1348 	StartTransactionCommand();
1349 
1350 	/* We should now definitely not be advertising any xmin. */
1351 	Assert(MyPgXact->xmin == InvalidTransactionId);
1352 
1353 	/*
1354 	 * The index is now valid in the sense that it contains all currently
1355 	 * interesting tuples.  But since it might not contain tuples deleted just
1356 	 * before the reference snap was taken, we have to wait out any
1357 	 * transactions that might have older snapshots.  Obtain a list of VXIDs
1358 	 * of such transactions, and wait for them individually.
1359 	 *
1360 	 * We can exclude any running transactions that have xmin > the xmin of
1361 	 * our reference snapshot; their oldest snapshot must be newer than ours.
1362 	 * We can also exclude any transactions that have xmin = zero, since they
1363 	 * evidently have no live snapshot at all (and any one they might be in
1364 	 * process of taking is certainly newer than ours).  Transactions in other
1365 	 * DBs can be ignored too, since they'll never even be able to see this
1366 	 * index.
1367 	 *
1368 	 * We can also exclude autovacuum processes and processes running manual
1369 	 * lazy VACUUMs, because they won't be fazed by missing index entries
1370 	 * either.  (Manual ANALYZEs, however, can't be excluded because they
1371 	 * might be within transactions that are going to do arbitrary operations
1372 	 * later.)
1373 	 *
1374 	 * Also, GetCurrentVirtualXIDs never reports our own vxid, so we need not
1375 	 * check for that.
1376 	 *
1377 	 * If a process goes idle-in-transaction with xmin zero, we do not need to
1378 	 * wait for it anymore, per the above argument.  We do not have the
1379 	 * infrastructure right now to stop waiting if that happens, but we can at
1380 	 * least avoid the folly of waiting when it is idle at the time we would
1381 	 * begin to wait.  We do this by repeatedly rechecking the output of
1382 	 * GetCurrentVirtualXIDs.  If, during any iteration, a particular vxid
1383 	 * doesn't show up in the output, we know we can forget about it.
1384 	 */
1385 	old_snapshots = GetCurrentVirtualXIDs(limitXmin, true, false,
1386 										  PROC_IS_AUTOVACUUM | PROC_IN_VACUUM,
1387 										  &n_old_snapshots);
1388 
1389 	for (i = 0; i < n_old_snapshots; i++)
1390 	{
1391 		if (!VirtualTransactionIdIsValid(old_snapshots[i]))
1392 			continue;			/* found uninteresting in previous cycle */
1393 
1394 		if (i > 0)
1395 		{
1396 			/* see if anything's changed ... */
1397 			VirtualTransactionId *newer_snapshots;
1398 			int			n_newer_snapshots;
1399 			int			j;
1400 			int			k;
1401 
1402 			newer_snapshots = GetCurrentVirtualXIDs(limitXmin,
1403 													true, false,
1404 													PROC_IS_AUTOVACUUM | PROC_IN_VACUUM,
1405 													&n_newer_snapshots);
1406 			for (j = i; j < n_old_snapshots; j++)
1407 			{
1408 				if (!VirtualTransactionIdIsValid(old_snapshots[j]))
1409 					continue;	/* found uninteresting in previous cycle */
1410 				for (k = 0; k < n_newer_snapshots; k++)
1411 				{
1412 					if (VirtualTransactionIdEquals(old_snapshots[j],
1413 												   newer_snapshots[k]))
1414 						break;
1415 				}
1416 				if (k >= n_newer_snapshots) /* not there anymore */
1417 					SetInvalidVirtualTransactionId(old_snapshots[j]);
1418 			}
1419 			pfree(newer_snapshots);
1420 		}
1421 
1422 		if (VirtualTransactionIdIsValid(old_snapshots[i]))
1423 			VirtualXactLock(old_snapshots[i], true);
1424 	}
1425 
1426 	/*
1427 	 * Index can now be marked valid -- update its pg_index entry
1428 	 */
1429 	index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
1430 
1431 	/*
1432 	 * The pg_index update will cause backends (including this one) to update
1433 	 * relcache entries for the index itself, but we should also send a
1434 	 * relcache inval on the parent table to force replanning of cached plans.
1435 	 * Otherwise existing sessions might fail to use the new index where it
1436 	 * would be useful.  (Note that our earlier commits did not create reasons
1437 	 * to replan; so relcache flush on the index itself was sufficient.)
1438 	 */
1439 	CacheInvalidateRelcacheByRelid(heaprelid.relId);
1440 
1441 	/*
1442 	 * Last thing to do is release the session-level lock on the parent table.
1443 	 */
1444 	UnlockRelationIdForSession(&heaprelid, ShareUpdateExclusiveLock);
1445 
1446 	return address;
1447 }
1448 
1449 
1450 /*
1451  * CheckMutability
1452  *		Test whether given expression is mutable
1453  */
1454 static bool
CheckMutability(Expr * expr)1455 CheckMutability(Expr *expr)
1456 {
1457 	/*
1458 	 * First run the expression through the planner.  This has a couple of
1459 	 * important consequences.  First, function default arguments will get
1460 	 * inserted, which may affect volatility (consider "default now()").
1461 	 * Second, inline-able functions will get inlined, which may allow us to
1462 	 * conclude that the function is really less volatile than it's marked. As
1463 	 * an example, polymorphic functions must be marked with the most volatile
1464 	 * behavior that they have for any input type, but once we inline the
1465 	 * function we may be able to conclude that it's not so volatile for the
1466 	 * particular input type we're dealing with.
1467 	 *
1468 	 * We assume here that expression_planner() won't scribble on its input.
1469 	 */
1470 	expr = expression_planner(expr);
1471 
1472 	/* Now we can search for non-immutable functions */
1473 	return contain_mutable_functions((Node *) expr);
1474 }
1475 
1476 
1477 /*
1478  * CheckPredicate
1479  *		Checks that the given partial-index predicate is valid.
1480  *
1481  * This used to also constrain the form of the predicate to forms that
1482  * indxpath.c could do something with.  However, that seems overly
1483  * restrictive.  One useful application of partial indexes is to apply
1484  * a UNIQUE constraint across a subset of a table, and in that scenario
1485  * any evaluable predicate will work.  So accept any predicate here
1486  * (except ones requiring a plan), and let indxpath.c fend for itself.
1487  */
1488 static void
CheckPredicate(Expr * predicate)1489 CheckPredicate(Expr *predicate)
1490 {
1491 	/*
1492 	 * transformExpr() should have already rejected subqueries, aggregates,
1493 	 * and window functions, based on the EXPR_KIND_ for a predicate.
1494 	 */
1495 
1496 	/*
1497 	 * A predicate using mutable functions is probably wrong, for the same
1498 	 * reasons that we don't allow an index expression to use one.
1499 	 */
1500 	if (CheckMutability(predicate))
1501 		ereport(ERROR,
1502 				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1503 				 errmsg("functions in index predicate must be marked IMMUTABLE")));
1504 }
1505 
1506 /*
1507  * Compute per-index-column information, including indexed column numbers
1508  * or index expressions, opclasses, and indoptions. Note, all output vectors
1509  * should be allocated for all columns, including "including" ones.
1510  */
1511 static void
ComputeIndexAttrs(IndexInfo * indexInfo,Oid * typeOidP,Oid * collationOidP,Oid * classOidP,int16 * colOptionP,List * attList,List * exclusionOpNames,Oid relId,const char * accessMethodName,Oid accessMethodId,bool amcanorder,bool isconstraint)1512 ComputeIndexAttrs(IndexInfo *indexInfo,
1513 				  Oid *typeOidP,
1514 				  Oid *collationOidP,
1515 				  Oid *classOidP,
1516 				  int16 *colOptionP,
1517 				  List *attList,	/* list of IndexElem's */
1518 				  List *exclusionOpNames,
1519 				  Oid relId,
1520 				  const char *accessMethodName,
1521 				  Oid accessMethodId,
1522 				  bool amcanorder,
1523 				  bool isconstraint)
1524 {
1525 	ListCell   *nextExclOp;
1526 	ListCell   *lc;
1527 	int			attn;
1528 	int			nkeycols = indexInfo->ii_NumIndexKeyAttrs;
1529 
1530 	/* Allocate space for exclusion operator info, if needed */
1531 	if (exclusionOpNames)
1532 	{
1533 		Assert(list_length(exclusionOpNames) == nkeycols);
1534 		indexInfo->ii_ExclusionOps = (Oid *) palloc(sizeof(Oid) * nkeycols);
1535 		indexInfo->ii_ExclusionProcs = (Oid *) palloc(sizeof(Oid) * nkeycols);
1536 		indexInfo->ii_ExclusionStrats = (uint16 *) palloc(sizeof(uint16) * nkeycols);
1537 		nextExclOp = list_head(exclusionOpNames);
1538 	}
1539 	else
1540 		nextExclOp = NULL;
1541 
1542 	/*
1543 	 * process attributeList
1544 	 */
1545 	attn = 0;
1546 	foreach(lc, attList)
1547 	{
1548 		IndexElem  *attribute = (IndexElem *) lfirst(lc);
1549 		Oid			atttype;
1550 		Oid			attcollation;
1551 
1552 		/*
1553 		 * Process the column-or-expression to be indexed.
1554 		 */
1555 		if (attribute->name != NULL)
1556 		{
1557 			/* Simple index attribute */
1558 			HeapTuple	atttuple;
1559 			Form_pg_attribute attform;
1560 
1561 			Assert(attribute->expr == NULL);
1562 			atttuple = SearchSysCacheAttName(relId, attribute->name);
1563 			if (!HeapTupleIsValid(atttuple))
1564 			{
1565 				/* difference in error message spellings is historical */
1566 				if (isconstraint)
1567 					ereport(ERROR,
1568 							(errcode(ERRCODE_UNDEFINED_COLUMN),
1569 							 errmsg("column \"%s\" named in key does not exist",
1570 									attribute->name)));
1571 				else
1572 					ereport(ERROR,
1573 							(errcode(ERRCODE_UNDEFINED_COLUMN),
1574 							 errmsg("column \"%s\" does not exist",
1575 									attribute->name)));
1576 			}
1577 			attform = (Form_pg_attribute) GETSTRUCT(atttuple);
1578 			indexInfo->ii_IndexAttrNumbers[attn] = attform->attnum;
1579 			atttype = attform->atttypid;
1580 			attcollation = attform->attcollation;
1581 			ReleaseSysCache(atttuple);
1582 		}
1583 		else
1584 		{
1585 			/* Index expression */
1586 			Node	   *expr = attribute->expr;
1587 
1588 			Assert(expr != NULL);
1589 
1590 			if (attn >= nkeycols)
1591 				ereport(ERROR,
1592 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1593 						 errmsg("expressions are not supported in included columns")));
1594 			atttype = exprType(expr);
1595 			attcollation = exprCollation(expr);
1596 
1597 			/*
1598 			 * Strip any top-level COLLATE clause.  This ensures that we treat
1599 			 * "x COLLATE y" and "(x COLLATE y)" alike.
1600 			 */
1601 			while (IsA(expr, CollateExpr))
1602 				expr = (Node *) ((CollateExpr *) expr)->arg;
1603 
1604 			if (IsA(expr, Var) &&
1605 				((Var *) expr)->varattno != InvalidAttrNumber)
1606 			{
1607 				/*
1608 				 * User wrote "(column)" or "(column COLLATE something)".
1609 				 * Treat it like simple attribute anyway.
1610 				 */
1611 				indexInfo->ii_IndexAttrNumbers[attn] = ((Var *) expr)->varattno;
1612 			}
1613 			else
1614 			{
1615 				indexInfo->ii_IndexAttrNumbers[attn] = 0;	/* marks expression */
1616 				indexInfo->ii_Expressions = lappend(indexInfo->ii_Expressions,
1617 													expr);
1618 
1619 				/*
1620 				 * transformExpr() should have already rejected subqueries,
1621 				 * aggregates, and window functions, based on the EXPR_KIND_
1622 				 * for an index expression.
1623 				 */
1624 
1625 				/*
1626 				 * An expression using mutable functions is probably wrong,
1627 				 * since if you aren't going to get the same result for the
1628 				 * same data every time, it's not clear what the index entries
1629 				 * mean at all.
1630 				 */
1631 				if (CheckMutability((Expr *) expr))
1632 					ereport(ERROR,
1633 							(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1634 							 errmsg("functions in index expression must be marked IMMUTABLE")));
1635 			}
1636 		}
1637 
1638 		typeOidP[attn] = atttype;
1639 
1640 		/*
1641 		 * Included columns have no collation, no opclass and no ordering
1642 		 * options.
1643 		 */
1644 		if (attn >= nkeycols)
1645 		{
1646 			if (attribute->collation)
1647 				ereport(ERROR,
1648 						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1649 						 errmsg("including column does not support a collation")));
1650 			if (attribute->opclass)
1651 				ereport(ERROR,
1652 						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1653 						 errmsg("including column does not support an operator class")));
1654 			if (attribute->ordering != SORTBY_DEFAULT)
1655 				ereport(ERROR,
1656 						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1657 						 errmsg("including column does not support ASC/DESC options")));
1658 			if (attribute->nulls_ordering != SORTBY_NULLS_DEFAULT)
1659 				ereport(ERROR,
1660 						(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
1661 						 errmsg("including column does not support NULLS FIRST/LAST options")));
1662 
1663 			classOidP[attn] = InvalidOid;
1664 			colOptionP[attn] = 0;
1665 			collationOidP[attn] = InvalidOid;
1666 			attn++;
1667 
1668 			continue;
1669 		}
1670 
1671 		/*
1672 		 * Apply collation override if any
1673 		 */
1674 		if (attribute->collation)
1675 			attcollation = get_collation_oid(attribute->collation, false);
1676 
1677 		/*
1678 		 * Check we have a collation iff it's a collatable type.  The only
1679 		 * expected failures here are (1) COLLATE applied to a noncollatable
1680 		 * type, or (2) index expression had an unresolved collation.  But we
1681 		 * might as well code this to be a complete consistency check.
1682 		 */
1683 		if (type_is_collatable(atttype))
1684 		{
1685 			if (!OidIsValid(attcollation))
1686 				ereport(ERROR,
1687 						(errcode(ERRCODE_INDETERMINATE_COLLATION),
1688 						 errmsg("could not determine which collation to use for index expression"),
1689 						 errhint("Use the COLLATE clause to set the collation explicitly.")));
1690 		}
1691 		else
1692 		{
1693 			if (OidIsValid(attcollation))
1694 				ereport(ERROR,
1695 						(errcode(ERRCODE_DATATYPE_MISMATCH),
1696 						 errmsg("collations are not supported by type %s",
1697 								format_type_be(atttype))));
1698 		}
1699 
1700 		collationOidP[attn] = attcollation;
1701 
1702 		/*
1703 		 * Identify the opclass to use.
1704 		 */
1705 		classOidP[attn] = ResolveOpClass(attribute->opclass,
1706 										 atttype,
1707 										 accessMethodName,
1708 										 accessMethodId);
1709 
1710 		/*
1711 		 * Identify the exclusion operator, if any.
1712 		 */
1713 		if (nextExclOp)
1714 		{
1715 			List	   *opname = (List *) lfirst(nextExclOp);
1716 			Oid			opid;
1717 			Oid			opfamily;
1718 			int			strat;
1719 
1720 			/*
1721 			 * Find the operator --- it must accept the column datatype
1722 			 * without runtime coercion (but binary compatibility is OK)
1723 			 */
1724 			opid = compatible_oper_opid(opname, atttype, atttype, false);
1725 
1726 			/*
1727 			 * Only allow commutative operators to be used in exclusion
1728 			 * constraints. If X conflicts with Y, but Y does not conflict
1729 			 * with X, bad things will happen.
1730 			 */
1731 			if (get_commutator(opid) != opid)
1732 				ereport(ERROR,
1733 						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1734 						 errmsg("operator %s is not commutative",
1735 								format_operator(opid)),
1736 						 errdetail("Only commutative operators can be used in exclusion constraints.")));
1737 
1738 			/*
1739 			 * Operator must be a member of the right opfamily, too
1740 			 */
1741 			opfamily = get_opclass_family(classOidP[attn]);
1742 			strat = get_op_opfamily_strategy(opid, opfamily);
1743 			if (strat == 0)
1744 			{
1745 				HeapTuple	opftuple;
1746 				Form_pg_opfamily opfform;
1747 
1748 				/*
1749 				 * attribute->opclass might not explicitly name the opfamily,
1750 				 * so fetch the name of the selected opfamily for use in the
1751 				 * error message.
1752 				 */
1753 				opftuple = SearchSysCache1(OPFAMILYOID,
1754 										   ObjectIdGetDatum(opfamily));
1755 				if (!HeapTupleIsValid(opftuple))
1756 					elog(ERROR, "cache lookup failed for opfamily %u",
1757 						 opfamily);
1758 				opfform = (Form_pg_opfamily) GETSTRUCT(opftuple);
1759 
1760 				ereport(ERROR,
1761 						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
1762 						 errmsg("operator %s is not a member of operator family \"%s\"",
1763 								format_operator(opid),
1764 								NameStr(opfform->opfname)),
1765 						 errdetail("The exclusion operator must be related to the index operator class for the constraint.")));
1766 			}
1767 
1768 			indexInfo->ii_ExclusionOps[attn] = opid;
1769 			indexInfo->ii_ExclusionProcs[attn] = get_opcode(opid);
1770 			indexInfo->ii_ExclusionStrats[attn] = strat;
1771 			nextExclOp = lnext(nextExclOp);
1772 		}
1773 
1774 		/*
1775 		 * Set up the per-column options (indoption field).  For now, this is
1776 		 * zero for any un-ordered index, while ordered indexes have DESC and
1777 		 * NULLS FIRST/LAST options.
1778 		 */
1779 		colOptionP[attn] = 0;
1780 		if (amcanorder)
1781 		{
1782 			/* default ordering is ASC */
1783 			if (attribute->ordering == SORTBY_DESC)
1784 				colOptionP[attn] |= INDOPTION_DESC;
1785 			/* default null ordering is LAST for ASC, FIRST for DESC */
1786 			if (attribute->nulls_ordering == SORTBY_NULLS_DEFAULT)
1787 			{
1788 				if (attribute->ordering == SORTBY_DESC)
1789 					colOptionP[attn] |= INDOPTION_NULLS_FIRST;
1790 			}
1791 			else if (attribute->nulls_ordering == SORTBY_NULLS_FIRST)
1792 				colOptionP[attn] |= INDOPTION_NULLS_FIRST;
1793 		}
1794 		else
1795 		{
1796 			/* index AM does not support ordering */
1797 			if (attribute->ordering != SORTBY_DEFAULT)
1798 				ereport(ERROR,
1799 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1800 						 errmsg("access method \"%s\" does not support ASC/DESC options",
1801 								accessMethodName)));
1802 			if (attribute->nulls_ordering != SORTBY_NULLS_DEFAULT)
1803 				ereport(ERROR,
1804 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1805 						 errmsg("access method \"%s\" does not support NULLS FIRST/LAST options",
1806 								accessMethodName)));
1807 		}
1808 
1809 		attn++;
1810 	}
1811 }
1812 
1813 /*
1814  * Resolve possibly-defaulted operator class specification
1815  *
1816  * Note: This is used to resolve operator class specification in index and
1817  * partition key definitions.
1818  */
1819 Oid
ResolveOpClass(List * opclass,Oid attrType,const char * accessMethodName,Oid accessMethodId)1820 ResolveOpClass(List *opclass, Oid attrType,
1821 			   const char *accessMethodName, Oid accessMethodId)
1822 {
1823 	char	   *schemaname;
1824 	char	   *opcname;
1825 	HeapTuple	tuple;
1826 	Oid			opClassId,
1827 				opInputType;
1828 
1829 	/*
1830 	 * Release 7.0 removed network_ops, timespan_ops, and datetime_ops, so we
1831 	 * ignore those opclass names so the default *_ops is used.  This can be
1832 	 * removed in some later release.  bjm 2000/02/07
1833 	 *
1834 	 * Release 7.1 removes lztext_ops, so suppress that too for a while.  tgl
1835 	 * 2000/07/30
1836 	 *
1837 	 * Release 7.2 renames timestamp_ops to timestamptz_ops, so suppress that
1838 	 * too for awhile.  I'm starting to think we need a better approach. tgl
1839 	 * 2000/10/01
1840 	 *
1841 	 * Release 8.0 removes bigbox_ops (which was dead code for a long while
1842 	 * anyway).  tgl 2003/11/11
1843 	 */
1844 	if (list_length(opclass) == 1)
1845 	{
1846 		char	   *claname = strVal(linitial(opclass));
1847 
1848 		if (strcmp(claname, "network_ops") == 0 ||
1849 			strcmp(claname, "timespan_ops") == 0 ||
1850 			strcmp(claname, "datetime_ops") == 0 ||
1851 			strcmp(claname, "lztext_ops") == 0 ||
1852 			strcmp(claname, "timestamp_ops") == 0 ||
1853 			strcmp(claname, "bigbox_ops") == 0)
1854 			opclass = NIL;
1855 	}
1856 
1857 	if (opclass == NIL)
1858 	{
1859 		/* no operator class specified, so find the default */
1860 		opClassId = GetDefaultOpClass(attrType, accessMethodId);
1861 		if (!OidIsValid(opClassId))
1862 			ereport(ERROR,
1863 					(errcode(ERRCODE_UNDEFINED_OBJECT),
1864 					 errmsg("data type %s has no default operator class for access method \"%s\"",
1865 							format_type_be(attrType), accessMethodName),
1866 					 errhint("You must specify an operator class for the index or define a default operator class for the data type.")));
1867 		return opClassId;
1868 	}
1869 
1870 	/*
1871 	 * Specific opclass name given, so look up the opclass.
1872 	 */
1873 
1874 	/* deconstruct the name list */
1875 	DeconstructQualifiedName(opclass, &schemaname, &opcname);
1876 
1877 	if (schemaname)
1878 	{
1879 		/* Look in specific schema only */
1880 		Oid			namespaceId;
1881 
1882 		namespaceId = LookupExplicitNamespace(schemaname, false);
1883 		tuple = SearchSysCache3(CLAAMNAMENSP,
1884 								ObjectIdGetDatum(accessMethodId),
1885 								PointerGetDatum(opcname),
1886 								ObjectIdGetDatum(namespaceId));
1887 	}
1888 	else
1889 	{
1890 		/* Unqualified opclass name, so search the search path */
1891 		opClassId = OpclassnameGetOpcid(accessMethodId, opcname);
1892 		if (!OidIsValid(opClassId))
1893 			ereport(ERROR,
1894 					(errcode(ERRCODE_UNDEFINED_OBJECT),
1895 					 errmsg("operator class \"%s\" does not exist for access method \"%s\"",
1896 							opcname, accessMethodName)));
1897 		tuple = SearchSysCache1(CLAOID, ObjectIdGetDatum(opClassId));
1898 	}
1899 
1900 	if (!HeapTupleIsValid(tuple))
1901 		ereport(ERROR,
1902 				(errcode(ERRCODE_UNDEFINED_OBJECT),
1903 				 errmsg("operator class \"%s\" does not exist for access method \"%s\"",
1904 						NameListToString(opclass), accessMethodName)));
1905 
1906 	/*
1907 	 * Verify that the index operator class accepts this datatype.  Note we
1908 	 * will accept binary compatibility.
1909 	 */
1910 	opClassId = HeapTupleGetOid(tuple);
1911 	opInputType = ((Form_pg_opclass) GETSTRUCT(tuple))->opcintype;
1912 
1913 	if (!IsBinaryCoercible(attrType, opInputType))
1914 		ereport(ERROR,
1915 				(errcode(ERRCODE_DATATYPE_MISMATCH),
1916 				 errmsg("operator class \"%s\" does not accept data type %s",
1917 						NameListToString(opclass), format_type_be(attrType))));
1918 
1919 	ReleaseSysCache(tuple);
1920 
1921 	return opClassId;
1922 }
1923 
1924 /*
1925  * GetDefaultOpClass
1926  *
1927  * Given the OIDs of a datatype and an access method, find the default
1928  * operator class, if any.  Returns InvalidOid if there is none.
1929  */
1930 Oid
GetDefaultOpClass(Oid type_id,Oid am_id)1931 GetDefaultOpClass(Oid type_id, Oid am_id)
1932 {
1933 	Oid			result = InvalidOid;
1934 	int			nexact = 0;
1935 	int			ncompatible = 0;
1936 	int			ncompatiblepreferred = 0;
1937 	Relation	rel;
1938 	ScanKeyData skey[1];
1939 	SysScanDesc scan;
1940 	HeapTuple	tup;
1941 	TYPCATEGORY tcategory;
1942 
1943 	/* If it's a domain, look at the base type instead */
1944 	type_id = getBaseType(type_id);
1945 
1946 	tcategory = TypeCategory(type_id);
1947 
1948 	/*
1949 	 * We scan through all the opclasses available for the access method,
1950 	 * looking for one that is marked default and matches the target type
1951 	 * (either exactly or binary-compatibly, but prefer an exact match).
1952 	 *
1953 	 * We could find more than one binary-compatible match.  If just one is
1954 	 * for a preferred type, use that one; otherwise we fail, forcing the user
1955 	 * to specify which one he wants.  (The preferred-type special case is a
1956 	 * kluge for varchar: it's binary-compatible to both text and bpchar, so
1957 	 * we need a tiebreaker.)  If we find more than one exact match, then
1958 	 * someone put bogus entries in pg_opclass.
1959 	 */
1960 	rel = heap_open(OperatorClassRelationId, AccessShareLock);
1961 
1962 	ScanKeyInit(&skey[0],
1963 				Anum_pg_opclass_opcmethod,
1964 				BTEqualStrategyNumber, F_OIDEQ,
1965 				ObjectIdGetDatum(am_id));
1966 
1967 	scan = systable_beginscan(rel, OpclassAmNameNspIndexId, true,
1968 							  NULL, 1, skey);
1969 
1970 	while (HeapTupleIsValid(tup = systable_getnext(scan)))
1971 	{
1972 		Form_pg_opclass opclass = (Form_pg_opclass) GETSTRUCT(tup);
1973 
1974 		/* ignore altogether if not a default opclass */
1975 		if (!opclass->opcdefault)
1976 			continue;
1977 		if (opclass->opcintype == type_id)
1978 		{
1979 			nexact++;
1980 			result = HeapTupleGetOid(tup);
1981 		}
1982 		else if (nexact == 0 &&
1983 				 IsBinaryCoercible(type_id, opclass->opcintype))
1984 		{
1985 			if (IsPreferredType(tcategory, opclass->opcintype))
1986 			{
1987 				ncompatiblepreferred++;
1988 				result = HeapTupleGetOid(tup);
1989 			}
1990 			else if (ncompatiblepreferred == 0)
1991 			{
1992 				ncompatible++;
1993 				result = HeapTupleGetOid(tup);
1994 			}
1995 		}
1996 	}
1997 
1998 	systable_endscan(scan);
1999 
2000 	heap_close(rel, AccessShareLock);
2001 
2002 	/* raise error if pg_opclass contains inconsistent data */
2003 	if (nexact > 1)
2004 		ereport(ERROR,
2005 				(errcode(ERRCODE_DUPLICATE_OBJECT),
2006 				 errmsg("there are multiple default operator classes for data type %s",
2007 						format_type_be(type_id))));
2008 
2009 	if (nexact == 1 ||
2010 		ncompatiblepreferred == 1 ||
2011 		(ncompatiblepreferred == 0 && ncompatible == 1))
2012 		return result;
2013 
2014 	return InvalidOid;
2015 }
2016 
2017 /*
2018  *	makeObjectName()
2019  *
2020  *	Create a name for an implicitly created index, sequence, constraint,
2021  *	extended statistics, etc.
2022  *
2023  *	The parameters are typically: the original table name, the original field
2024  *	name, and a "type" string (such as "seq" or "pkey").    The field name
2025  *	and/or type can be NULL if not relevant.
2026  *
2027  *	The result is a palloc'd string.
2028  *
2029  *	The basic result we want is "name1_name2_label", omitting "_name2" or
2030  *	"_label" when those parameters are NULL.  However, we must generate
2031  *	a name with less than NAMEDATALEN characters!  So, we truncate one or
2032  *	both names if necessary to make a short-enough string.  The label part
2033  *	is never truncated (so it had better be reasonably short).
2034  *
2035  *	The caller is responsible for checking uniqueness of the generated
2036  *	name and retrying as needed; retrying will be done by altering the
2037  *	"label" string (which is why we never truncate that part).
2038  */
2039 char *
makeObjectName(const char * name1,const char * name2,const char * label)2040 makeObjectName(const char *name1, const char *name2, const char *label)
2041 {
2042 	char	   *name;
2043 	int			overhead = 0;	/* chars needed for label and underscores */
2044 	int			availchars;		/* chars available for name(s) */
2045 	int			name1chars;		/* chars allocated to name1 */
2046 	int			name2chars;		/* chars allocated to name2 */
2047 	int			ndx;
2048 
2049 	name1chars = strlen(name1);
2050 	if (name2)
2051 	{
2052 		name2chars = strlen(name2);
2053 		overhead++;				/* allow for separating underscore */
2054 	}
2055 	else
2056 		name2chars = 0;
2057 	if (label)
2058 		overhead += strlen(label) + 1;
2059 
2060 	availchars = NAMEDATALEN - 1 - overhead;
2061 	Assert(availchars > 0);		/* else caller chose a bad label */
2062 
2063 	/*
2064 	 * If we must truncate,  preferentially truncate the longer name. This
2065 	 * logic could be expressed without a loop, but it's simple and obvious as
2066 	 * a loop.
2067 	 */
2068 	while (name1chars + name2chars > availchars)
2069 	{
2070 		if (name1chars > name2chars)
2071 			name1chars--;
2072 		else
2073 			name2chars--;
2074 	}
2075 
2076 	name1chars = pg_mbcliplen(name1, name1chars, name1chars);
2077 	if (name2)
2078 		name2chars = pg_mbcliplen(name2, name2chars, name2chars);
2079 
2080 	/* Now construct the string using the chosen lengths */
2081 	name = palloc(name1chars + name2chars + overhead + 1);
2082 	memcpy(name, name1, name1chars);
2083 	ndx = name1chars;
2084 	if (name2)
2085 	{
2086 		name[ndx++] = '_';
2087 		memcpy(name + ndx, name2, name2chars);
2088 		ndx += name2chars;
2089 	}
2090 	if (label)
2091 	{
2092 		name[ndx++] = '_';
2093 		strcpy(name + ndx, label);
2094 	}
2095 	else
2096 		name[ndx] = '\0';
2097 
2098 	return name;
2099 }
2100 
2101 /*
2102  * Select a nonconflicting name for a new relation.  This is ordinarily
2103  * used to choose index names (which is why it's here) but it can also
2104  * be used for sequences, or any autogenerated relation kind.
2105  *
2106  * name1, name2, and label are used the same way as for makeObjectName(),
2107  * except that the label can't be NULL; digits will be appended to the label
2108  * if needed to create a name that is unique within the specified namespace.
2109  *
2110  * If isconstraint is true, we also avoid choosing a name matching any
2111  * existing constraint in the same namespace.  (This is stricter than what
2112  * Postgres itself requires, but the SQL standard says that constraint names
2113  * should be unique within schemas, so we follow that for autogenerated
2114  * constraint names.)
2115  *
2116  * Note: it is theoretically possible to get a collision anyway, if someone
2117  * else chooses the same name concurrently.  This is fairly unlikely to be
2118  * a problem in practice, especially if one is holding an exclusive lock on
2119  * the relation identified by name1.  However, if choosing multiple names
2120  * within a single command, you'd better create the new object and do
2121  * CommandCounterIncrement before choosing the next one!
2122  *
2123  * Returns a palloc'd string.
2124  */
2125 char *
ChooseRelationName(const char * name1,const char * name2,const char * label,Oid namespaceid,bool isconstraint)2126 ChooseRelationName(const char *name1, const char *name2,
2127 				   const char *label, Oid namespaceid,
2128 				   bool isconstraint)
2129 {
2130 	int			pass = 0;
2131 	char	   *relname = NULL;
2132 	char		modlabel[NAMEDATALEN];
2133 
2134 	/* try the unmodified label first */
2135 	StrNCpy(modlabel, label, sizeof(modlabel));
2136 
2137 	for (;;)
2138 	{
2139 		relname = makeObjectName(name1, name2, modlabel);
2140 
2141 		if (!OidIsValid(get_relname_relid(relname, namespaceid)))
2142 		{
2143 			if (!isconstraint ||
2144 				!ConstraintNameExists(relname, namespaceid))
2145 				break;
2146 		}
2147 
2148 		/* found a conflict, so try a new name component */
2149 		pfree(relname);
2150 		snprintf(modlabel, sizeof(modlabel), "%s%d", label, ++pass);
2151 	}
2152 
2153 	return relname;
2154 }
2155 
2156 /*
2157  * Select the name to be used for an index.
2158  *
2159  * The argument list is pretty ad-hoc :-(
2160  */
2161 static char *
ChooseIndexName(const char * tabname,Oid namespaceId,List * colnames,List * exclusionOpNames,bool primary,bool isconstraint)2162 ChooseIndexName(const char *tabname, Oid namespaceId,
2163 				List *colnames, List *exclusionOpNames,
2164 				bool primary, bool isconstraint)
2165 {
2166 	char	   *indexname;
2167 
2168 	if (primary)
2169 	{
2170 		/* the primary key's name does not depend on the specific column(s) */
2171 		indexname = ChooseRelationName(tabname,
2172 									   NULL,
2173 									   "pkey",
2174 									   namespaceId,
2175 									   true);
2176 	}
2177 	else if (exclusionOpNames != NIL)
2178 	{
2179 		indexname = ChooseRelationName(tabname,
2180 									   ChooseIndexNameAddition(colnames),
2181 									   "excl",
2182 									   namespaceId,
2183 									   true);
2184 	}
2185 	else if (isconstraint)
2186 	{
2187 		indexname = ChooseRelationName(tabname,
2188 									   ChooseIndexNameAddition(colnames),
2189 									   "key",
2190 									   namespaceId,
2191 									   true);
2192 	}
2193 	else
2194 	{
2195 		indexname = ChooseRelationName(tabname,
2196 									   ChooseIndexNameAddition(colnames),
2197 									   "idx",
2198 									   namespaceId,
2199 									   false);
2200 	}
2201 
2202 	return indexname;
2203 }
2204 
2205 /*
2206  * Generate "name2" for a new index given the list of column names for it
2207  * (as produced by ChooseIndexColumnNames).  This will be passed to
2208  * ChooseRelationName along with the parent table name and a suitable label.
2209  *
2210  * We know that less than NAMEDATALEN characters will actually be used,
2211  * so we can truncate the result once we've generated that many.
2212  *
2213  * XXX See also ChooseExtendedStatisticNameAddition.
2214  */
2215 static char *
ChooseIndexNameAddition(List * colnames)2216 ChooseIndexNameAddition(List *colnames)
2217 {
2218 	char		buf[NAMEDATALEN * 2];
2219 	int			buflen = 0;
2220 	ListCell   *lc;
2221 
2222 	buf[0] = '\0';
2223 	foreach(lc, colnames)
2224 	{
2225 		const char *name = (const char *) lfirst(lc);
2226 
2227 		if (buflen > 0)
2228 			buf[buflen++] = '_';	/* insert _ between names */
2229 
2230 		/*
2231 		 * At this point we have buflen <= NAMEDATALEN.  name should be less
2232 		 * than NAMEDATALEN already, but use strlcpy for paranoia.
2233 		 */
2234 		strlcpy(buf + buflen, name, NAMEDATALEN);
2235 		buflen += strlen(buf + buflen);
2236 		if (buflen >= NAMEDATALEN)
2237 			break;
2238 	}
2239 	return pstrdup(buf);
2240 }
2241 
2242 /*
2243  * Select the actual names to be used for the columns of an index, given the
2244  * list of IndexElems for the columns.  This is mostly about ensuring the
2245  * names are unique so we don't get a conflicting-attribute-names error.
2246  *
2247  * Returns a List of plain strings (char *, not String nodes).
2248  */
2249 static List *
ChooseIndexColumnNames(List * indexElems)2250 ChooseIndexColumnNames(List *indexElems)
2251 {
2252 	List	   *result = NIL;
2253 	ListCell   *lc;
2254 
2255 	foreach(lc, indexElems)
2256 	{
2257 		IndexElem  *ielem = (IndexElem *) lfirst(lc);
2258 		const char *origname;
2259 		const char *curname;
2260 		int			i;
2261 		char		buf[NAMEDATALEN];
2262 
2263 		/* Get the preliminary name from the IndexElem */
2264 		if (ielem->indexcolname)
2265 			origname = ielem->indexcolname; /* caller-specified name */
2266 		else if (ielem->name)
2267 			origname = ielem->name; /* simple column reference */
2268 		else
2269 			origname = "expr";	/* default name for expression */
2270 
2271 		/* If it conflicts with any previous column, tweak it */
2272 		curname = origname;
2273 		for (i = 1;; i++)
2274 		{
2275 			ListCell   *lc2;
2276 			char		nbuf[32];
2277 			int			nlen;
2278 
2279 			foreach(lc2, result)
2280 			{
2281 				if (strcmp(curname, (char *) lfirst(lc2)) == 0)
2282 					break;
2283 			}
2284 			if (lc2 == NULL)
2285 				break;			/* found nonconflicting name */
2286 
2287 			sprintf(nbuf, "%d", i);
2288 
2289 			/* Ensure generated names are shorter than NAMEDATALEN */
2290 			nlen = pg_mbcliplen(origname, strlen(origname),
2291 								NAMEDATALEN - 1 - strlen(nbuf));
2292 			memcpy(buf, origname, nlen);
2293 			strcpy(buf + nlen, nbuf);
2294 			curname = buf;
2295 		}
2296 
2297 		/* And attach to the result list */
2298 		result = lappend(result, pstrdup(curname));
2299 	}
2300 	return result;
2301 }
2302 
2303 /*
2304  * ReindexIndex
2305  *		Recreate a specific index.
2306  */
2307 void
ReindexIndex(RangeVar * indexRelation,int options)2308 ReindexIndex(RangeVar *indexRelation, int options)
2309 {
2310 	Oid			indOid;
2311 	Oid			heapOid = InvalidOid;
2312 	Relation	irel;
2313 	char		persistence;
2314 
2315 	/*
2316 	 * Find and lock index, and check permissions on table; use callback to
2317 	 * obtain lock on table first, to avoid deadlock hazard.  The lock level
2318 	 * used here must match the index lock obtained in reindex_index().
2319 	 */
2320 	indOid = RangeVarGetRelidExtended(indexRelation, AccessExclusiveLock,
2321 									  0,
2322 									  RangeVarCallbackForReindexIndex,
2323 									  (void *) &heapOid);
2324 
2325 	/*
2326 	 * Obtain the current persistence of the existing index.  We already hold
2327 	 * lock on the index.
2328 	 */
2329 	irel = index_open(indOid, NoLock);
2330 
2331 	if (irel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
2332 	{
2333 		ReindexPartitionedIndex(irel);
2334 		return;
2335 	}
2336 
2337 	persistence = irel->rd_rel->relpersistence;
2338 	index_close(irel, NoLock);
2339 
2340 	reindex_index(indOid, false, persistence, options);
2341 }
2342 
2343 /*
2344  * Check permissions on table before acquiring relation lock; also lock
2345  * the heap before the RangeVarGetRelidExtended takes the index lock, to avoid
2346  * deadlocks.
2347  */
2348 static void
RangeVarCallbackForReindexIndex(const RangeVar * relation,Oid relId,Oid oldRelId,void * arg)2349 RangeVarCallbackForReindexIndex(const RangeVar *relation,
2350 								Oid relId, Oid oldRelId, void *arg)
2351 {
2352 	char		relkind;
2353 	Oid		   *heapOid = (Oid *) arg;
2354 
2355 	/*
2356 	 * If we previously locked some other index's heap, and the name we're
2357 	 * looking up no longer refers to that relation, release the now-useless
2358 	 * lock.
2359 	 */
2360 	if (relId != oldRelId && OidIsValid(oldRelId))
2361 	{
2362 		/* lock level here should match reindex_index() heap lock */
2363 		UnlockRelationOid(*heapOid, ShareLock);
2364 		*heapOid = InvalidOid;
2365 	}
2366 
2367 	/* If the relation does not exist, there's nothing more to do. */
2368 	if (!OidIsValid(relId))
2369 		return;
2370 
2371 	/*
2372 	 * If the relation does exist, check whether it's an index.  But note that
2373 	 * the relation might have been dropped between the time we did the name
2374 	 * lookup and now.  In that case, there's nothing to do.
2375 	 */
2376 	relkind = get_rel_relkind(relId);
2377 	if (!relkind)
2378 		return;
2379 	if (relkind != RELKIND_INDEX &&
2380 		relkind != RELKIND_PARTITIONED_INDEX)
2381 		ereport(ERROR,
2382 				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
2383 				 errmsg("\"%s\" is not an index", relation->relname)));
2384 
2385 	/* Check permissions */
2386 	if (!pg_class_ownercheck(relId, GetUserId()))
2387 		aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_INDEX, relation->relname);
2388 
2389 	/* Lock heap before index to avoid deadlock. */
2390 	if (relId != oldRelId)
2391 	{
2392 		/*
2393 		 * Lock level here should match reindex_index() heap lock. If the OID
2394 		 * isn't valid, it means the index as concurrently dropped, which is
2395 		 * not a problem for us; just return normally.
2396 		 */
2397 		*heapOid = IndexGetRelation(relId, true);
2398 		if (OidIsValid(*heapOid))
2399 			LockRelationOid(*heapOid, ShareLock);
2400 	}
2401 }
2402 
2403 /*
2404  * ReindexTable
2405  *		Recreate all indexes of a table (and of its toast table, if any)
2406  */
2407 Oid
ReindexTable(RangeVar * relation,int options)2408 ReindexTable(RangeVar *relation, int options)
2409 {
2410 	Oid			heapOid;
2411 
2412 	/* The lock level used here should match reindex_relation(). */
2413 	heapOid = RangeVarGetRelidExtended(relation, ShareLock, 0,
2414 									   RangeVarCallbackOwnsTable, NULL);
2415 
2416 	if (!reindex_relation(heapOid,
2417 						  REINDEX_REL_PROCESS_TOAST |
2418 						  REINDEX_REL_CHECK_CONSTRAINTS,
2419 						  options))
2420 		ereport(NOTICE,
2421 				(errmsg("table \"%s\" has no indexes",
2422 						relation->relname)));
2423 
2424 	return heapOid;
2425 }
2426 
2427 /*
2428  * ReindexMultipleTables
2429  *		Recreate indexes of tables selected by objectName/objectKind.
2430  *
2431  * To reduce the probability of deadlocks, each table is reindexed in a
2432  * separate transaction, so we can release the lock on it right away.
2433  * That means this must not be called within a user transaction block!
2434  */
2435 void
ReindexMultipleTables(const char * objectName,ReindexObjectType objectKind,int options)2436 ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
2437 					  int options)
2438 {
2439 	Oid			objectOid;
2440 	Relation	relationRelation;
2441 	HeapScanDesc scan;
2442 	ScanKeyData scan_keys[1];
2443 	HeapTuple	tuple;
2444 	MemoryContext private_context;
2445 	MemoryContext old;
2446 	List	   *relids = NIL;
2447 	ListCell   *l;
2448 	int			num_keys;
2449 
2450 	AssertArg(objectName);
2451 	Assert(objectKind == REINDEX_OBJECT_SCHEMA ||
2452 		   objectKind == REINDEX_OBJECT_SYSTEM ||
2453 		   objectKind == REINDEX_OBJECT_DATABASE);
2454 
2455 	/*
2456 	 * Get OID of object to reindex, being the database currently being used
2457 	 * by session for a database or for system catalogs, or the schema defined
2458 	 * by caller. At the same time do permission checks that need different
2459 	 * processing depending on the object type.
2460 	 */
2461 	if (objectKind == REINDEX_OBJECT_SCHEMA)
2462 	{
2463 		objectOid = get_namespace_oid(objectName, false);
2464 
2465 		if (!pg_namespace_ownercheck(objectOid, GetUserId()))
2466 			aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_SCHEMA,
2467 						   objectName);
2468 	}
2469 	else
2470 	{
2471 		objectOid = MyDatabaseId;
2472 
2473 		if (strcmp(objectName, get_database_name(objectOid)) != 0)
2474 			ereport(ERROR,
2475 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2476 					 errmsg("can only reindex the currently open database")));
2477 		if (!pg_database_ownercheck(objectOid, GetUserId()))
2478 			aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_DATABASE,
2479 						   objectName);
2480 	}
2481 
2482 	/*
2483 	 * Create a memory context that will survive forced transaction commits we
2484 	 * do below.  Since it is a child of PortalContext, it will go away
2485 	 * eventually even if we suffer an error; there's no need for special
2486 	 * abort cleanup logic.
2487 	 */
2488 	private_context = AllocSetContextCreate(PortalContext,
2489 											"ReindexMultipleTables",
2490 											ALLOCSET_SMALL_SIZES);
2491 
2492 	/*
2493 	 * Define the search keys to find the objects to reindex. For a schema, we
2494 	 * select target relations using relnamespace, something not necessary for
2495 	 * a database-wide operation.
2496 	 */
2497 	if (objectKind == REINDEX_OBJECT_SCHEMA)
2498 	{
2499 		num_keys = 1;
2500 		ScanKeyInit(&scan_keys[0],
2501 					Anum_pg_class_relnamespace,
2502 					BTEqualStrategyNumber, F_OIDEQ,
2503 					ObjectIdGetDatum(objectOid));
2504 	}
2505 	else
2506 		num_keys = 0;
2507 
2508 	/*
2509 	 * Scan pg_class to build a list of the relations we need to reindex.
2510 	 *
2511 	 * We only consider plain relations and materialized views here (toast
2512 	 * rels will be processed indirectly by reindex_relation).
2513 	 */
2514 	relationRelation = heap_open(RelationRelationId, AccessShareLock);
2515 	scan = heap_beginscan_catalog(relationRelation, num_keys, scan_keys);
2516 	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
2517 	{
2518 		Form_pg_class classtuple = (Form_pg_class) GETSTRUCT(tuple);
2519 		Oid			relid = HeapTupleGetOid(tuple);
2520 
2521 		/*
2522 		 * Only regular tables and matviews can have indexes, so ignore any
2523 		 * other kind of relation.
2524 		 *
2525 		 * It is tempting to also consider partitioned tables here, but that
2526 		 * has the problem that if the children are in the same schema, they
2527 		 * would be processed twice.  Maybe we could have a separate list of
2528 		 * partitioned tables, and expand that afterwards into relids,
2529 		 * ignoring any duplicates.
2530 		 */
2531 		if (classtuple->relkind != RELKIND_RELATION &&
2532 			classtuple->relkind != RELKIND_MATVIEW)
2533 			continue;
2534 
2535 		/* Skip temp tables of other backends; we can't reindex them at all */
2536 		if (classtuple->relpersistence == RELPERSISTENCE_TEMP &&
2537 			!isTempNamespace(classtuple->relnamespace))
2538 			continue;
2539 
2540 		/* Check user/system classification, and optionally skip */
2541 		if (objectKind == REINDEX_OBJECT_SYSTEM &&
2542 			!IsSystemClass(relid, classtuple))
2543 			continue;
2544 
2545 		/*
2546 		 * The table can be reindexed if the user is superuser, the table
2547 		 * owner, or the database/schema owner (but in the latter case, only
2548 		 * if it's not a shared relation).  pg_class_ownercheck includes the
2549 		 * superuser case, and depending on objectKind we already know that
2550 		 * the user has permission to run REINDEX on this database or schema
2551 		 * per the permission checks at the beginning of this routine.
2552 		 */
2553 		if (classtuple->relisshared &&
2554 			!pg_class_ownercheck(relid, GetUserId()))
2555 			continue;
2556 
2557 		/* Save the list of relation OIDs in private context */
2558 		old = MemoryContextSwitchTo(private_context);
2559 
2560 		/*
2561 		 * We always want to reindex pg_class first if it's selected to be
2562 		 * reindexed.  This ensures that if there is any corruption in
2563 		 * pg_class' indexes, they will be fixed before we process any other
2564 		 * tables.  This is critical because reindexing itself will try to
2565 		 * update pg_class.
2566 		 */
2567 		if (relid == RelationRelationId)
2568 			relids = lcons_oid(relid, relids);
2569 		else
2570 			relids = lappend_oid(relids, relid);
2571 
2572 		MemoryContextSwitchTo(old);
2573 	}
2574 	heap_endscan(scan);
2575 	heap_close(relationRelation, AccessShareLock);
2576 
2577 	/* Now reindex each rel in a separate transaction */
2578 	PopActiveSnapshot();
2579 	CommitTransactionCommand();
2580 	foreach(l, relids)
2581 	{
2582 		Oid			relid = lfirst_oid(l);
2583 
2584 		StartTransactionCommand();
2585 		/* functions in indexes may want a snapshot set */
2586 		PushActiveSnapshot(GetTransactionSnapshot());
2587 		if (reindex_relation(relid,
2588 							 REINDEX_REL_PROCESS_TOAST |
2589 							 REINDEX_REL_CHECK_CONSTRAINTS,
2590 							 options))
2591 
2592 			if (options & REINDEXOPT_VERBOSE)
2593 				ereport(INFO,
2594 						(errmsg("table \"%s.%s\" was reindexed",
2595 								get_namespace_name(get_rel_namespace(relid)),
2596 								get_rel_name(relid))));
2597 		PopActiveSnapshot();
2598 		CommitTransactionCommand();
2599 	}
2600 	StartTransactionCommand();
2601 
2602 	MemoryContextDelete(private_context);
2603 }
2604 
2605 /*
2606  *	ReindexPartitionedIndex
2607  *		Reindex each child of the given partitioned index.
2608  *
2609  * Not yet implemented.
2610  */
2611 static void
ReindexPartitionedIndex(Relation parentIdx)2612 ReindexPartitionedIndex(Relation parentIdx)
2613 {
2614 	ereport(ERROR,
2615 			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
2616 			 errmsg("REINDEX is not yet implemented for partitioned indexes")));
2617 }
2618 
2619 /*
2620  * Insert or delete an appropriate pg_inherits tuple to make the given index
2621  * be a partition of the indicated parent index.
2622  *
2623  * This also corrects the pg_depend information for the affected index.
2624  */
2625 void
IndexSetParentIndex(Relation partitionIdx,Oid parentOid)2626 IndexSetParentIndex(Relation partitionIdx, Oid parentOid)
2627 {
2628 	Relation	pg_inherits;
2629 	ScanKeyData key[2];
2630 	SysScanDesc scan;
2631 	Oid			partRelid = RelationGetRelid(partitionIdx);
2632 	HeapTuple	tuple;
2633 	bool		fix_dependencies;
2634 
2635 	/* Make sure this is an index */
2636 	Assert(partitionIdx->rd_rel->relkind == RELKIND_INDEX ||
2637 		   partitionIdx->rd_rel->relkind == RELKIND_PARTITIONED_INDEX);
2638 
2639 	/*
2640 	 * Scan pg_inherits for rows linking our index to some parent.
2641 	 */
2642 	pg_inherits = relation_open(InheritsRelationId, RowExclusiveLock);
2643 	ScanKeyInit(&key[0],
2644 				Anum_pg_inherits_inhrelid,
2645 				BTEqualStrategyNumber, F_OIDEQ,
2646 				ObjectIdGetDatum(partRelid));
2647 	ScanKeyInit(&key[1],
2648 				Anum_pg_inherits_inhseqno,
2649 				BTEqualStrategyNumber, F_INT4EQ,
2650 				Int32GetDatum(1));
2651 	scan = systable_beginscan(pg_inherits, InheritsRelidSeqnoIndexId, true,
2652 							  NULL, 2, key);
2653 	tuple = systable_getnext(scan);
2654 
2655 	if (!HeapTupleIsValid(tuple))
2656 	{
2657 		if (parentOid == InvalidOid)
2658 		{
2659 			/*
2660 			 * No pg_inherits row, and no parent wanted: nothing to do in this
2661 			 * case.
2662 			 */
2663 			fix_dependencies = false;
2664 		}
2665 		else
2666 		{
2667 			StoreSingleInheritance(partRelid, parentOid, 1);
2668 			fix_dependencies = true;
2669 		}
2670 	}
2671 	else
2672 	{
2673 		Form_pg_inherits inhForm = (Form_pg_inherits) GETSTRUCT(tuple);
2674 
2675 		if (parentOid == InvalidOid)
2676 		{
2677 			/*
2678 			 * There exists a pg_inherits row, which we want to clear; do so.
2679 			 */
2680 			CatalogTupleDelete(pg_inherits, &tuple->t_self);
2681 			fix_dependencies = true;
2682 		}
2683 		else
2684 		{
2685 			/*
2686 			 * A pg_inherits row exists.  If it's the same we want, then we're
2687 			 * good; if it differs, that amounts to a corrupt catalog and
2688 			 * should not happen.
2689 			 */
2690 			if (inhForm->inhparent != parentOid)
2691 			{
2692 				/* unexpected: we should not get called in this case */
2693 				elog(ERROR, "bogus pg_inherit row: inhrelid %u inhparent %u",
2694 					 inhForm->inhrelid, inhForm->inhparent);
2695 			}
2696 
2697 			/* already in the right state */
2698 			fix_dependencies = false;
2699 		}
2700 	}
2701 
2702 	/* done with pg_inherits */
2703 	systable_endscan(scan);
2704 	relation_close(pg_inherits, RowExclusiveLock);
2705 
2706 	/* set relispartition correctly on the partition */
2707 	update_relispartition(partRelid, OidIsValid(parentOid));
2708 
2709 	if (fix_dependencies)
2710 	{
2711 		ObjectAddress partIdx;
2712 
2713 		/*
2714 		 * Insert/delete pg_depend rows.  If setting a parent, add an
2715 		 * INTERNAL_AUTO dependency to the parent index; if making standalone,
2716 		 * remove all existing rows and put back the regular dependency on the
2717 		 * table.
2718 		 */
2719 		ObjectAddressSet(partIdx, RelationRelationId, partRelid);
2720 
2721 		if (OidIsValid(parentOid))
2722 		{
2723 			ObjectAddress parentIdx;
2724 
2725 			ObjectAddressSet(parentIdx, RelationRelationId, parentOid);
2726 			recordDependencyOn(&partIdx, &parentIdx, DEPENDENCY_INTERNAL_AUTO);
2727 		}
2728 		else
2729 		{
2730 			ObjectAddress partitionTbl;
2731 
2732 			ObjectAddressSet(partitionTbl, RelationRelationId,
2733 							 partitionIdx->rd_index->indrelid);
2734 
2735 			deleteDependencyRecordsForClass(RelationRelationId, partRelid,
2736 											RelationRelationId,
2737 											DEPENDENCY_INTERNAL_AUTO);
2738 
2739 			recordDependencyOn(&partIdx, &partitionTbl, DEPENDENCY_AUTO);
2740 		}
2741 
2742 		/* make our updates visible */
2743 		CommandCounterIncrement();
2744 	}
2745 }
2746 
2747 /*
2748  * Subroutine of IndexSetParentIndex to update the relispartition flag of the
2749  * given index to the given value.
2750  */
2751 static void
update_relispartition(Oid relationId,bool newval)2752 update_relispartition(Oid relationId, bool newval)
2753 {
2754 	HeapTuple	tup;
2755 	Relation	classRel;
2756 
2757 	classRel = heap_open(RelationRelationId, RowExclusiveLock);
2758 	tup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relationId));
2759 	if (!HeapTupleIsValid(tup))
2760 		elog(ERROR, "cache lookup failed for relation %u", relationId);
2761 	Assert(((Form_pg_class) GETSTRUCT(tup))->relispartition != newval);
2762 	((Form_pg_class) GETSTRUCT(tup))->relispartition = newval;
2763 	CatalogTupleUpdate(classRel, &tup->t_self, tup);
2764 	heap_freetuple(tup);
2765 	heap_close(classRel, RowExclusiveLock);
2766 }
2767