1 /*-------------------------------------------------------------------------
2  *
3  * execIndexing.c
4  *	  routines for inserting index tuples and enforcing unique and
5  *	  exclusion constraints.
6  *
7  * ExecInsertIndexTuples() is the main entry point.  It's called after
8  * inserting a tuple to the heap, and it inserts corresponding index tuples
9  * into all indexes.  At the same time, it enforces any unique and
10  * exclusion constraints:
11  *
12  * Unique Indexes
13  * --------------
14  *
15  * Enforcing a unique constraint is straightforward.  When the index AM
16  * inserts the tuple to the index, it also checks that there are no
17  * conflicting tuples in the index already.  It does so atomically, so that
18  * even if two backends try to insert the same key concurrently, only one
19  * of them will succeed.  All the logic to ensure atomicity, and to wait
20  * for in-progress transactions to finish, is handled by the index AM.
21  *
22  * If a unique constraint is deferred, we request the index AM to not
23  * throw an error if a conflict is found.  Instead, we make note that there
24  * was a conflict and return the list of indexes with conflicts to the
25  * caller.  The caller must re-check them later, by calling index_insert()
26  * with the UNIQUE_CHECK_EXISTING option.
27  *
28  * Exclusion Constraints
29  * ---------------------
30  *
31  * Exclusion constraints are different from unique indexes in that when the
32  * tuple is inserted to the index, the index AM does not check for
33  * duplicate keys at the same time.  After the insertion, we perform a
34  * separate scan on the index to check for conflicting tuples, and if one
35  * is found, we throw an error and the transaction is aborted.  If the
36  * conflicting tuple's inserter or deleter is in-progress, we wait for it
37  * to finish first.
38  *
39  * There is a chance of deadlock, if two backends insert a tuple at the
40  * same time, and then perform the scan to check for conflicts.  They will
41  * find each other's tuple, and both try to wait for each other.  The
42  * deadlock detector will detect that, and abort one of the transactions.
43  * That's fairly harmless, as one of them was bound to abort with a
44  * "duplicate key error" anyway, although you get a different error
45  * message.
46  *
47  * If an exclusion constraint is deferred, we still perform the conflict
48  * checking scan immediately after inserting the index tuple.  But instead
49  * of throwing an error if a conflict is found, we return that information
50  * to the caller.  The caller must re-check them later by calling
51  * check_exclusion_constraint().
52  *
53  * Speculative insertion
54  * ---------------------
55  *
56  * Speculative insertion is a two-phase mechanism used to implement
57  * INSERT ... ON CONFLICT DO UPDATE/NOTHING.  The tuple is first inserted
58  * to the heap and update the indexes as usual, but if a constraint is
59  * violated, we can still back out the insertion without aborting the whole
60  * transaction.  In an INSERT ... ON CONFLICT statement, if a conflict is
61  * detected, the inserted tuple is backed out and the ON CONFLICT action is
62  * executed instead.
63  *
64  * Insertion to a unique index works as usual: the index AM checks for
65  * duplicate keys atomically with the insertion.  But instead of throwing
66  * an error on a conflict, the speculatively inserted heap tuple is backed
67  * out.
68  *
69  * Exclusion constraints are slightly more complicated.  As mentioned
70  * earlier, there is a risk of deadlock when two backends insert the same
71  * key concurrently.  That was not a problem for regular insertions, when
72  * one of the transactions has to be aborted anyway, but with a speculative
73  * insertion we cannot let a deadlock happen, because we only want to back
74  * out the speculatively inserted tuple on conflict, not abort the whole
75  * transaction.
76  *
77  * When a backend detects that the speculative insertion conflicts with
78  * another in-progress tuple, it has two options:
79  *
80  * 1. back out the speculatively inserted tuple, then wait for the other
81  *	  transaction, and retry. Or,
82  * 2. wait for the other transaction, with the speculatively inserted tuple
83  *	  still in place.
84  *
85  * If two backends insert at the same time, and both try to wait for each
86  * other, they will deadlock.  So option 2 is not acceptable.  Option 1
87  * avoids the deadlock, but it is prone to a livelock instead.  Both
88  * transactions will wake up immediately as the other transaction backs
89  * out.  Then they both retry, and conflict with each other again, lather,
90  * rinse, repeat.
91  *
92  * To avoid the livelock, one of the backends must back out first, and then
93  * wait, while the other one waits without backing out.  It doesn't matter
94  * which one backs out, so we employ an arbitrary rule that the transaction
95  * with the higher XID backs out.
96  *
97  *
98  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
99  * Portions Copyright (c) 1994, Regents of the University of California
100  *
101  *
102  * IDENTIFICATION
103  *	  src/backend/executor/execIndexing.c
104  *
105  *-------------------------------------------------------------------------
106  */
107 #include "postgres.h"
108 
109 #include "access/genam.h"
110 #include "access/relscan.h"
111 #include "access/tableam.h"
112 #include "access/xact.h"
113 #include "catalog/index.h"
114 #include "executor/executor.h"
115 #include "nodes/nodeFuncs.h"
116 #include "storage/lmgr.h"
117 #include "utils/snapmgr.h"
118 
119 /* waitMode argument to check_exclusion_or_unique_constraint() */
120 typedef enum
121 {
122 	CEOUC_WAIT,
123 	CEOUC_NOWAIT,
124 	CEOUC_LIVELOCK_PREVENTING_WAIT
125 } CEOUC_WAIT_MODE;
126 
127 static bool check_exclusion_or_unique_constraint(Relation heap, Relation index,
128 												 IndexInfo *indexInfo,
129 												 ItemPointer tupleid,
130 												 Datum *values, bool *isnull,
131 												 EState *estate, bool newIndex,
132 												 CEOUC_WAIT_MODE waitMode,
133 												 bool errorOK,
134 												 ItemPointer conflictTid);
135 
136 static bool index_recheck_constraint(Relation index, Oid *constr_procs,
137 									 Datum *existing_values, bool *existing_isnull,
138 									 Datum *new_values);
139 static bool index_unchanged_by_update(ResultRelInfo *resultRelInfo,
140 									  EState *estate, IndexInfo *indexInfo,
141 									  Relation indexRelation);
142 static bool index_expression_changed_walker(Node *node,
143 											Bitmapset *allUpdatedCols);
144 
145 /* ----------------------------------------------------------------
146  *		ExecOpenIndices
147  *
148  *		Find the indices associated with a result relation, open them,
149  *		and save information about them in the result ResultRelInfo.
150  *
151  *		At entry, caller has already opened and locked
152  *		resultRelInfo->ri_RelationDesc.
153  * ----------------------------------------------------------------
154  */
155 void
ExecOpenIndices(ResultRelInfo * resultRelInfo,bool speculative)156 ExecOpenIndices(ResultRelInfo *resultRelInfo, bool speculative)
157 {
158 	Relation	resultRelation = resultRelInfo->ri_RelationDesc;
159 	List	   *indexoidlist;
160 	ListCell   *l;
161 	int			len,
162 				i;
163 	RelationPtr relationDescs;
164 	IndexInfo **indexInfoArray;
165 
166 	resultRelInfo->ri_NumIndices = 0;
167 
168 	/* fast path if no indexes */
169 	if (!RelationGetForm(resultRelation)->relhasindex)
170 		return;
171 
172 	/*
173 	 * Get cached list of index OIDs
174 	 */
175 	indexoidlist = RelationGetIndexList(resultRelation);
176 	len = list_length(indexoidlist);
177 	if (len == 0)
178 		return;
179 
180 	/*
181 	 * allocate space for result arrays
182 	 */
183 	relationDescs = (RelationPtr) palloc(len * sizeof(Relation));
184 	indexInfoArray = (IndexInfo **) palloc(len * sizeof(IndexInfo *));
185 
186 	resultRelInfo->ri_NumIndices = len;
187 	resultRelInfo->ri_IndexRelationDescs = relationDescs;
188 	resultRelInfo->ri_IndexRelationInfo = indexInfoArray;
189 
190 	/*
191 	 * For each index, open the index relation and save pg_index info. We
192 	 * acquire RowExclusiveLock, signifying we will update the index.
193 	 *
194 	 * Note: we do this even if the index is not indisready; it's not worth
195 	 * the trouble to optimize for the case where it isn't.
196 	 */
197 	i = 0;
198 	foreach(l, indexoidlist)
199 	{
200 		Oid			indexOid = lfirst_oid(l);
201 		Relation	indexDesc;
202 		IndexInfo  *ii;
203 
204 		indexDesc = index_open(indexOid, RowExclusiveLock);
205 
206 		/* extract index key information from the index's pg_index info */
207 		ii = BuildIndexInfo(indexDesc);
208 
209 		/*
210 		 * If the indexes are to be used for speculative insertion, add extra
211 		 * information required by unique index entries.
212 		 */
213 		if (speculative && ii->ii_Unique)
214 			BuildSpeculativeIndexInfo(indexDesc, ii);
215 
216 		relationDescs[i] = indexDesc;
217 		indexInfoArray[i] = ii;
218 		i++;
219 	}
220 
221 	list_free(indexoidlist);
222 }
223 
224 /* ----------------------------------------------------------------
225  *		ExecCloseIndices
226  *
227  *		Close the index relations stored in resultRelInfo
228  * ----------------------------------------------------------------
229  */
230 void
ExecCloseIndices(ResultRelInfo * resultRelInfo)231 ExecCloseIndices(ResultRelInfo *resultRelInfo)
232 {
233 	int			i;
234 	int			numIndices;
235 	RelationPtr indexDescs;
236 
237 	numIndices = resultRelInfo->ri_NumIndices;
238 	indexDescs = resultRelInfo->ri_IndexRelationDescs;
239 
240 	for (i = 0; i < numIndices; i++)
241 	{
242 		if (indexDescs[i] == NULL)
243 			continue;			/* shouldn't happen? */
244 
245 		/* Drop lock acquired by ExecOpenIndices */
246 		index_close(indexDescs[i], RowExclusiveLock);
247 	}
248 
249 	/*
250 	 * XXX should free indexInfo array here too?  Currently we assume that
251 	 * such stuff will be cleaned up automatically in FreeExecutorState.
252 	 */
253 }
254 
255 /* ----------------------------------------------------------------
256  *		ExecInsertIndexTuples
257  *
258  *		This routine takes care of inserting index tuples
259  *		into all the relations indexing the result relation
260  *		when a heap tuple is inserted into the result relation.
261  *
262  *		When 'update' is true, executor is performing an UPDATE
263  *		that could not use an optimization like heapam's HOT (in
264  *		more general terms a call to table_tuple_update() took
265  *		place and set 'update_indexes' to true).  Receiving this
266  *		hint makes us consider if we should pass down the
267  *		'indexUnchanged' hint in turn.  That's something that we
268  *		figure out for each index_insert() call iff 'update' is
269  *		true.  (When 'update' is false we already know not to pass
270  *		the hint to any index.)
271  *
272  *		Unique and exclusion constraints are enforced at the same
273  *		time.  This returns a list of index OIDs for any unique or
274  *		exclusion constraints that are deferred and that had
275  *		potential (unconfirmed) conflicts.  (if noDupErr == true,
276  *		the same is done for non-deferred constraints, but report
277  *		if conflict was speculative or deferred conflict to caller)
278  *
279  *		If 'arbiterIndexes' is nonempty, noDupErr applies only to
280  *		those indexes.  NIL means noDupErr applies to all indexes.
281  * ----------------------------------------------------------------
282  */
283 List *
ExecInsertIndexTuples(ResultRelInfo * resultRelInfo,TupleTableSlot * slot,EState * estate,bool update,bool noDupErr,bool * specConflict,List * arbiterIndexes)284 ExecInsertIndexTuples(ResultRelInfo *resultRelInfo,
285 					  TupleTableSlot *slot,
286 					  EState *estate,
287 					  bool update,
288 					  bool noDupErr,
289 					  bool *specConflict,
290 					  List *arbiterIndexes)
291 {
292 	ItemPointer tupleid = &slot->tts_tid;
293 	List	   *result = NIL;
294 	int			i;
295 	int			numIndices;
296 	RelationPtr relationDescs;
297 	Relation	heapRelation;
298 	IndexInfo **indexInfoArray;
299 	ExprContext *econtext;
300 	Datum		values[INDEX_MAX_KEYS];
301 	bool		isnull[INDEX_MAX_KEYS];
302 
303 	Assert(ItemPointerIsValid(tupleid));
304 
305 	/*
306 	 * Get information from the result relation info structure.
307 	 */
308 	numIndices = resultRelInfo->ri_NumIndices;
309 	relationDescs = resultRelInfo->ri_IndexRelationDescs;
310 	indexInfoArray = resultRelInfo->ri_IndexRelationInfo;
311 	heapRelation = resultRelInfo->ri_RelationDesc;
312 
313 	/* Sanity check: slot must belong to the same rel as the resultRelInfo. */
314 	Assert(slot->tts_tableOid == RelationGetRelid(heapRelation));
315 
316 	/*
317 	 * We will use the EState's per-tuple context for evaluating predicates
318 	 * and index expressions (creating it if it's not already there).
319 	 */
320 	econtext = GetPerTupleExprContext(estate);
321 
322 	/* Arrange for econtext's scan tuple to be the tuple under test */
323 	econtext->ecxt_scantuple = slot;
324 
325 	/*
326 	 * for each index, form and insert the index tuple
327 	 */
328 	for (i = 0; i < numIndices; i++)
329 	{
330 		Relation	indexRelation = relationDescs[i];
331 		IndexInfo  *indexInfo;
332 		bool		applyNoDupErr;
333 		IndexUniqueCheck checkUnique;
334 		bool		indexUnchanged;
335 		bool		satisfiesConstraint;
336 
337 		if (indexRelation == NULL)
338 			continue;
339 
340 		indexInfo = indexInfoArray[i];
341 
342 		/* If the index is marked as read-only, ignore it */
343 		if (!indexInfo->ii_ReadyForInserts)
344 			continue;
345 
346 		/* Check for partial index */
347 		if (indexInfo->ii_Predicate != NIL)
348 		{
349 			ExprState  *predicate;
350 
351 			/*
352 			 * If predicate state not set up yet, create it (in the estate's
353 			 * per-query context)
354 			 */
355 			predicate = indexInfo->ii_PredicateState;
356 			if (predicate == NULL)
357 			{
358 				predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
359 				indexInfo->ii_PredicateState = predicate;
360 			}
361 
362 			/* Skip this index-update if the predicate isn't satisfied */
363 			if (!ExecQual(predicate, econtext))
364 				continue;
365 		}
366 
367 		/*
368 		 * FormIndexDatum fills in its values and isnull parameters with the
369 		 * appropriate values for the column(s) of the index.
370 		 */
371 		FormIndexDatum(indexInfo,
372 					   slot,
373 					   estate,
374 					   values,
375 					   isnull);
376 
377 		/* Check whether to apply noDupErr to this index */
378 		applyNoDupErr = noDupErr &&
379 			(arbiterIndexes == NIL ||
380 			 list_member_oid(arbiterIndexes,
381 							 indexRelation->rd_index->indexrelid));
382 
383 		/*
384 		 * The index AM does the actual insertion, plus uniqueness checking.
385 		 *
386 		 * For an immediate-mode unique index, we just tell the index AM to
387 		 * throw error if not unique.
388 		 *
389 		 * For a deferrable unique index, we tell the index AM to just detect
390 		 * possible non-uniqueness, and we add the index OID to the result
391 		 * list if further checking is needed.
392 		 *
393 		 * For a speculative insertion (used by INSERT ... ON CONFLICT), do
394 		 * the same as for a deferrable unique index.
395 		 */
396 		if (!indexRelation->rd_index->indisunique)
397 			checkUnique = UNIQUE_CHECK_NO;
398 		else if (applyNoDupErr)
399 			checkUnique = UNIQUE_CHECK_PARTIAL;
400 		else if (indexRelation->rd_index->indimmediate)
401 			checkUnique = UNIQUE_CHECK_YES;
402 		else
403 			checkUnique = UNIQUE_CHECK_PARTIAL;
404 
405 		/*
406 		 * There's definitely going to be an index_insert() call for this
407 		 * index.  If we're being called as part of an UPDATE statement,
408 		 * consider if the 'indexUnchanged' = true hint should be passed.
409 		 */
410 		indexUnchanged = update && index_unchanged_by_update(resultRelInfo,
411 															 estate,
412 															 indexInfo,
413 															 indexRelation);
414 
415 		satisfiesConstraint =
416 			index_insert(indexRelation, /* index relation */
417 						 values,	/* array of index Datums */
418 						 isnull,	/* null flags */
419 						 tupleid,	/* tid of heap tuple */
420 						 heapRelation,	/* heap relation */
421 						 checkUnique,	/* type of uniqueness check to do */
422 						 indexUnchanged,	/* UPDATE without logical change? */
423 						 indexInfo);	/* index AM may need this */
424 
425 		/*
426 		 * If the index has an associated exclusion constraint, check that.
427 		 * This is simpler than the process for uniqueness checks since we
428 		 * always insert first and then check.  If the constraint is deferred,
429 		 * we check now anyway, but don't throw error on violation or wait for
430 		 * a conclusive outcome from a concurrent insertion; instead we'll
431 		 * queue a recheck event.  Similarly, noDupErr callers (speculative
432 		 * inserters) will recheck later, and wait for a conclusive outcome
433 		 * then.
434 		 *
435 		 * An index for an exclusion constraint can't also be UNIQUE (not an
436 		 * essential property, we just don't allow it in the grammar), so no
437 		 * need to preserve the prior state of satisfiesConstraint.
438 		 */
439 		if (indexInfo->ii_ExclusionOps != NULL)
440 		{
441 			bool		violationOK;
442 			CEOUC_WAIT_MODE waitMode;
443 
444 			if (applyNoDupErr)
445 			{
446 				violationOK = true;
447 				waitMode = CEOUC_LIVELOCK_PREVENTING_WAIT;
448 			}
449 			else if (!indexRelation->rd_index->indimmediate)
450 			{
451 				violationOK = true;
452 				waitMode = CEOUC_NOWAIT;
453 			}
454 			else
455 			{
456 				violationOK = false;
457 				waitMode = CEOUC_WAIT;
458 			}
459 
460 			satisfiesConstraint =
461 				check_exclusion_or_unique_constraint(heapRelation,
462 													 indexRelation, indexInfo,
463 													 tupleid, values, isnull,
464 													 estate, false,
465 													 waitMode, violationOK, NULL);
466 		}
467 
468 		if ((checkUnique == UNIQUE_CHECK_PARTIAL ||
469 			 indexInfo->ii_ExclusionOps != NULL) &&
470 			!satisfiesConstraint)
471 		{
472 			/*
473 			 * The tuple potentially violates the uniqueness or exclusion
474 			 * constraint, so make a note of the index so that we can re-check
475 			 * it later.  Speculative inserters are told if there was a
476 			 * speculative conflict, since that always requires a restart.
477 			 */
478 			result = lappend_oid(result, RelationGetRelid(indexRelation));
479 			if (indexRelation->rd_index->indimmediate && specConflict)
480 				*specConflict = true;
481 		}
482 	}
483 
484 	return result;
485 }
486 
487 /* ----------------------------------------------------------------
488  *		ExecCheckIndexConstraints
489  *
490  *		This routine checks if a tuple violates any unique or
491  *		exclusion constraints.  Returns true if there is no conflict.
492  *		Otherwise returns false, and the TID of the conflicting
493  *		tuple is returned in *conflictTid.
494  *
495  *		If 'arbiterIndexes' is given, only those indexes are checked.
496  *		NIL means all indexes.
497  *
498  *		Note that this doesn't lock the values in any way, so it's
499  *		possible that a conflicting tuple is inserted immediately
500  *		after this returns.  But this can be used for a pre-check
501  *		before insertion.
502  * ----------------------------------------------------------------
503  */
504 bool
ExecCheckIndexConstraints(ResultRelInfo * resultRelInfo,TupleTableSlot * slot,EState * estate,ItemPointer conflictTid,List * arbiterIndexes)505 ExecCheckIndexConstraints(ResultRelInfo *resultRelInfo, TupleTableSlot *slot,
506 						  EState *estate, ItemPointer conflictTid,
507 						  List *arbiterIndexes)
508 {
509 	int			i;
510 	int			numIndices;
511 	RelationPtr relationDescs;
512 	Relation	heapRelation;
513 	IndexInfo **indexInfoArray;
514 	ExprContext *econtext;
515 	Datum		values[INDEX_MAX_KEYS];
516 	bool		isnull[INDEX_MAX_KEYS];
517 	ItemPointerData invalidItemPtr;
518 	bool		checkedIndex = false;
519 
520 	ItemPointerSetInvalid(conflictTid);
521 	ItemPointerSetInvalid(&invalidItemPtr);
522 
523 	/*
524 	 * Get information from the result relation info structure.
525 	 */
526 	numIndices = resultRelInfo->ri_NumIndices;
527 	relationDescs = resultRelInfo->ri_IndexRelationDescs;
528 	indexInfoArray = resultRelInfo->ri_IndexRelationInfo;
529 	heapRelation = resultRelInfo->ri_RelationDesc;
530 
531 	/*
532 	 * We will use the EState's per-tuple context for evaluating predicates
533 	 * and index expressions (creating it if it's not already there).
534 	 */
535 	econtext = GetPerTupleExprContext(estate);
536 
537 	/* Arrange for econtext's scan tuple to be the tuple under test */
538 	econtext->ecxt_scantuple = slot;
539 
540 	/*
541 	 * For each index, form index tuple and check if it satisfies the
542 	 * constraint.
543 	 */
544 	for (i = 0; i < numIndices; i++)
545 	{
546 		Relation	indexRelation = relationDescs[i];
547 		IndexInfo  *indexInfo;
548 		bool		satisfiesConstraint;
549 
550 		if (indexRelation == NULL)
551 			continue;
552 
553 		indexInfo = indexInfoArray[i];
554 
555 		if (!indexInfo->ii_Unique && !indexInfo->ii_ExclusionOps)
556 			continue;
557 
558 		/* If the index is marked as read-only, ignore it */
559 		if (!indexInfo->ii_ReadyForInserts)
560 			continue;
561 
562 		/* When specific arbiter indexes requested, only examine them */
563 		if (arbiterIndexes != NIL &&
564 			!list_member_oid(arbiterIndexes,
565 							 indexRelation->rd_index->indexrelid))
566 			continue;
567 
568 		if (!indexRelation->rd_index->indimmediate)
569 			ereport(ERROR,
570 					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
571 					 errmsg("ON CONFLICT does not support deferrable unique constraints/exclusion constraints as arbiters"),
572 					 errtableconstraint(heapRelation,
573 										RelationGetRelationName(indexRelation))));
574 
575 		checkedIndex = true;
576 
577 		/* Check for partial index */
578 		if (indexInfo->ii_Predicate != NIL)
579 		{
580 			ExprState  *predicate;
581 
582 			/*
583 			 * If predicate state not set up yet, create it (in the estate's
584 			 * per-query context)
585 			 */
586 			predicate = indexInfo->ii_PredicateState;
587 			if (predicate == NULL)
588 			{
589 				predicate = ExecPrepareQual(indexInfo->ii_Predicate, estate);
590 				indexInfo->ii_PredicateState = predicate;
591 			}
592 
593 			/* Skip this index-update if the predicate isn't satisfied */
594 			if (!ExecQual(predicate, econtext))
595 				continue;
596 		}
597 
598 		/*
599 		 * FormIndexDatum fills in its values and isnull parameters with the
600 		 * appropriate values for the column(s) of the index.
601 		 */
602 		FormIndexDatum(indexInfo,
603 					   slot,
604 					   estate,
605 					   values,
606 					   isnull);
607 
608 		satisfiesConstraint =
609 			check_exclusion_or_unique_constraint(heapRelation, indexRelation,
610 												 indexInfo, &invalidItemPtr,
611 												 values, isnull, estate, false,
612 												 CEOUC_WAIT, true,
613 												 conflictTid);
614 		if (!satisfiesConstraint)
615 			return false;
616 	}
617 
618 	if (arbiterIndexes != NIL && !checkedIndex)
619 		elog(ERROR, "unexpected failure to find arbiter index");
620 
621 	return true;
622 }
623 
624 /*
625  * Check for violation of an exclusion or unique constraint
626  *
627  * heap: the table containing the new tuple
628  * index: the index supporting the constraint
629  * indexInfo: info about the index, including the exclusion properties
630  * tupleid: heap TID of the new tuple we have just inserted (invalid if we
631  *		haven't inserted a new tuple yet)
632  * values, isnull: the *index* column values computed for the new tuple
633  * estate: an EState we can do evaluation in
634  * newIndex: if true, we are trying to build a new index (this affects
635  *		only the wording of error messages)
636  * waitMode: whether to wait for concurrent inserters/deleters
637  * violationOK: if true, don't throw error for violation
638  * conflictTid: if not-NULL, the TID of the conflicting tuple is returned here
639  *
640  * Returns true if OK, false if actual or potential violation
641  *
642  * 'waitMode' determines what happens if a conflict is detected with a tuple
643  * that was inserted or deleted by a transaction that's still running.
644  * CEOUC_WAIT means that we wait for the transaction to commit, before
645  * throwing an error or returning.  CEOUC_NOWAIT means that we report the
646  * violation immediately; so the violation is only potential, and the caller
647  * must recheck sometime later.  This behavior is convenient for deferred
648  * exclusion checks; we need not bother queuing a deferred event if there is
649  * definitely no conflict at insertion time.
650  *
651  * CEOUC_LIVELOCK_PREVENTING_WAIT is like CEOUC_NOWAIT, but we will sometimes
652  * wait anyway, to prevent livelocking if two transactions try inserting at
653  * the same time.  This is used with speculative insertions, for INSERT ON
654  * CONFLICT statements. (See notes in file header)
655  *
656  * If violationOK is true, we just report the potential or actual violation to
657  * the caller by returning 'false'.  Otherwise we throw a descriptive error
658  * message here.  When violationOK is false, a false result is impossible.
659  *
660  * Note: The indexam is normally responsible for checking unique constraints,
661  * so this normally only needs to be used for exclusion constraints.  But this
662  * function is also called when doing a "pre-check" for conflicts on a unique
663  * constraint, when doing speculative insertion.  Caller may use the returned
664  * conflict TID to take further steps.
665  */
666 static bool
check_exclusion_or_unique_constraint(Relation heap,Relation index,IndexInfo * indexInfo,ItemPointer tupleid,Datum * values,bool * isnull,EState * estate,bool newIndex,CEOUC_WAIT_MODE waitMode,bool violationOK,ItemPointer conflictTid)667 check_exclusion_or_unique_constraint(Relation heap, Relation index,
668 									 IndexInfo *indexInfo,
669 									 ItemPointer tupleid,
670 									 Datum *values, bool *isnull,
671 									 EState *estate, bool newIndex,
672 									 CEOUC_WAIT_MODE waitMode,
673 									 bool violationOK,
674 									 ItemPointer conflictTid)
675 {
676 	Oid		   *constr_procs;
677 	uint16	   *constr_strats;
678 	Oid		   *index_collations = index->rd_indcollation;
679 	int			indnkeyatts = IndexRelationGetNumberOfKeyAttributes(index);
680 	IndexScanDesc index_scan;
681 	ScanKeyData scankeys[INDEX_MAX_KEYS];
682 	SnapshotData DirtySnapshot;
683 	int			i;
684 	bool		conflict;
685 	bool		found_self;
686 	ExprContext *econtext;
687 	TupleTableSlot *existing_slot;
688 	TupleTableSlot *save_scantuple;
689 
690 	if (indexInfo->ii_ExclusionOps)
691 	{
692 		constr_procs = indexInfo->ii_ExclusionProcs;
693 		constr_strats = indexInfo->ii_ExclusionStrats;
694 	}
695 	else
696 	{
697 		constr_procs = indexInfo->ii_UniqueProcs;
698 		constr_strats = indexInfo->ii_UniqueStrats;
699 	}
700 
701 	/*
702 	 * If any of the input values are NULL, the constraint check is assumed to
703 	 * pass (i.e., we assume the operators are strict).
704 	 */
705 	for (i = 0; i < indnkeyatts; i++)
706 	{
707 		if (isnull[i])
708 			return true;
709 	}
710 
711 	/*
712 	 * Search the tuples that are in the index for any violations, including
713 	 * tuples that aren't visible yet.
714 	 */
715 	InitDirtySnapshot(DirtySnapshot);
716 
717 	for (i = 0; i < indnkeyatts; i++)
718 	{
719 		ScanKeyEntryInitialize(&scankeys[i],
720 							   0,
721 							   i + 1,
722 							   constr_strats[i],
723 							   InvalidOid,
724 							   index_collations[i],
725 							   constr_procs[i],
726 							   values[i]);
727 	}
728 
729 	/*
730 	 * Need a TupleTableSlot to put existing tuples in.
731 	 *
732 	 * To use FormIndexDatum, we have to make the econtext's scantuple point
733 	 * to this slot.  Be sure to save and restore caller's value for
734 	 * scantuple.
735 	 */
736 	existing_slot = table_slot_create(heap, NULL);
737 
738 	econtext = GetPerTupleExprContext(estate);
739 	save_scantuple = econtext->ecxt_scantuple;
740 	econtext->ecxt_scantuple = existing_slot;
741 
742 	/*
743 	 * May have to restart scan from this point if a potential conflict is
744 	 * found.
745 	 */
746 retry:
747 	conflict = false;
748 	found_self = false;
749 	index_scan = index_beginscan(heap, index, &DirtySnapshot, indnkeyatts, 0);
750 	index_rescan(index_scan, scankeys, indnkeyatts, NULL, 0);
751 
752 	while (index_getnext_slot(index_scan, ForwardScanDirection, existing_slot))
753 	{
754 		TransactionId xwait;
755 		XLTW_Oper	reason_wait;
756 		Datum		existing_values[INDEX_MAX_KEYS];
757 		bool		existing_isnull[INDEX_MAX_KEYS];
758 		char	   *error_new;
759 		char	   *error_existing;
760 
761 		/*
762 		 * Ignore the entry for the tuple we're trying to check.
763 		 */
764 		if (ItemPointerIsValid(tupleid) &&
765 			ItemPointerEquals(tupleid, &existing_slot->tts_tid))
766 		{
767 			if (found_self)		/* should not happen */
768 				elog(ERROR, "found self tuple multiple times in index \"%s\"",
769 					 RelationGetRelationName(index));
770 			found_self = true;
771 			continue;
772 		}
773 
774 		/*
775 		 * Extract the index column values and isnull flags from the existing
776 		 * tuple.
777 		 */
778 		FormIndexDatum(indexInfo, existing_slot, estate,
779 					   existing_values, existing_isnull);
780 
781 		/* If lossy indexscan, must recheck the condition */
782 		if (index_scan->xs_recheck)
783 		{
784 			if (!index_recheck_constraint(index,
785 										  constr_procs,
786 										  existing_values,
787 										  existing_isnull,
788 										  values))
789 				continue;		/* tuple doesn't actually match, so no
790 								 * conflict */
791 		}
792 
793 		/*
794 		 * At this point we have either a conflict or a potential conflict.
795 		 *
796 		 * If an in-progress transaction is affecting the visibility of this
797 		 * tuple, we need to wait for it to complete and then recheck (unless
798 		 * the caller requested not to).  For simplicity we do rechecking by
799 		 * just restarting the whole scan --- this case probably doesn't
800 		 * happen often enough to be worth trying harder, and anyway we don't
801 		 * want to hold any index internal locks while waiting.
802 		 */
803 		xwait = TransactionIdIsValid(DirtySnapshot.xmin) ?
804 			DirtySnapshot.xmin : DirtySnapshot.xmax;
805 
806 		if (TransactionIdIsValid(xwait) &&
807 			(waitMode == CEOUC_WAIT ||
808 			 (waitMode == CEOUC_LIVELOCK_PREVENTING_WAIT &&
809 			  DirtySnapshot.speculativeToken &&
810 			  TransactionIdPrecedes(GetCurrentTransactionId(), xwait))))
811 		{
812 			reason_wait = indexInfo->ii_ExclusionOps ?
813 				XLTW_RecheckExclusionConstr : XLTW_InsertIndex;
814 			index_endscan(index_scan);
815 			if (DirtySnapshot.speculativeToken)
816 				SpeculativeInsertionWait(DirtySnapshot.xmin,
817 										 DirtySnapshot.speculativeToken);
818 			else
819 				XactLockTableWait(xwait, heap,
820 								  &existing_slot->tts_tid, reason_wait);
821 			goto retry;
822 		}
823 
824 		/*
825 		 * We have a definite conflict (or a potential one, but the caller
826 		 * didn't want to wait).  Return it to caller, or report it.
827 		 */
828 		if (violationOK)
829 		{
830 			conflict = true;
831 			if (conflictTid)
832 				*conflictTid = existing_slot->tts_tid;
833 			break;
834 		}
835 
836 		error_new = BuildIndexValueDescription(index, values, isnull);
837 		error_existing = BuildIndexValueDescription(index, existing_values,
838 													existing_isnull);
839 		if (newIndex)
840 			ereport(ERROR,
841 					(errcode(ERRCODE_EXCLUSION_VIOLATION),
842 					 errmsg("could not create exclusion constraint \"%s\"",
843 							RelationGetRelationName(index)),
844 					 error_new && error_existing ?
845 					 errdetail("Key %s conflicts with key %s.",
846 							   error_new, error_existing) :
847 					 errdetail("Key conflicts exist."),
848 					 errtableconstraint(heap,
849 										RelationGetRelationName(index))));
850 		else
851 			ereport(ERROR,
852 					(errcode(ERRCODE_EXCLUSION_VIOLATION),
853 					 errmsg("conflicting key value violates exclusion constraint \"%s\"",
854 							RelationGetRelationName(index)),
855 					 error_new && error_existing ?
856 					 errdetail("Key %s conflicts with existing key %s.",
857 							   error_new, error_existing) :
858 					 errdetail("Key conflicts with existing key."),
859 					 errtableconstraint(heap,
860 										RelationGetRelationName(index))));
861 	}
862 
863 	index_endscan(index_scan);
864 
865 	/*
866 	 * Ordinarily, at this point the search should have found the originally
867 	 * inserted tuple (if any), unless we exited the loop early because of
868 	 * conflict.  However, it is possible to define exclusion constraints for
869 	 * which that wouldn't be true --- for instance, if the operator is <>. So
870 	 * we no longer complain if found_self is still false.
871 	 */
872 
873 	econtext->ecxt_scantuple = save_scantuple;
874 
875 	ExecDropSingleTupleTableSlot(existing_slot);
876 
877 	return !conflict;
878 }
879 
880 /*
881  * Check for violation of an exclusion constraint
882  *
883  * This is a dumbed down version of check_exclusion_or_unique_constraint
884  * for external callers. They don't need all the special modes.
885  */
886 void
check_exclusion_constraint(Relation heap,Relation index,IndexInfo * indexInfo,ItemPointer tupleid,Datum * values,bool * isnull,EState * estate,bool newIndex)887 check_exclusion_constraint(Relation heap, Relation index,
888 						   IndexInfo *indexInfo,
889 						   ItemPointer tupleid,
890 						   Datum *values, bool *isnull,
891 						   EState *estate, bool newIndex)
892 {
893 	(void) check_exclusion_or_unique_constraint(heap, index, indexInfo, tupleid,
894 												values, isnull,
895 												estate, newIndex,
896 												CEOUC_WAIT, false, NULL);
897 }
898 
899 /*
900  * Check existing tuple's index values to see if it really matches the
901  * exclusion condition against the new_values.  Returns true if conflict.
902  */
903 static bool
index_recheck_constraint(Relation index,Oid * constr_procs,Datum * existing_values,bool * existing_isnull,Datum * new_values)904 index_recheck_constraint(Relation index, Oid *constr_procs,
905 						 Datum *existing_values, bool *existing_isnull,
906 						 Datum *new_values)
907 {
908 	int			indnkeyatts = IndexRelationGetNumberOfKeyAttributes(index);
909 	int			i;
910 
911 	for (i = 0; i < indnkeyatts; i++)
912 	{
913 		/* Assume the exclusion operators are strict */
914 		if (existing_isnull[i])
915 			return false;
916 
917 		if (!DatumGetBool(OidFunctionCall2Coll(constr_procs[i],
918 											   index->rd_indcollation[i],
919 											   existing_values[i],
920 											   new_values[i])))
921 			return false;
922 	}
923 
924 	return true;
925 }
926 
927 /*
928  * Check if ExecInsertIndexTuples() should pass indexUnchanged hint.
929  *
930  * When the executor performs an UPDATE that requires a new round of index
931  * tuples, determine if we should pass 'indexUnchanged' = true hint for one
932  * single index.
933  */
934 static bool
index_unchanged_by_update(ResultRelInfo * resultRelInfo,EState * estate,IndexInfo * indexInfo,Relation indexRelation)935 index_unchanged_by_update(ResultRelInfo *resultRelInfo, EState *estate,
936 						  IndexInfo *indexInfo, Relation indexRelation)
937 {
938 	Bitmapset  *updatedCols = ExecGetUpdatedCols(resultRelInfo, estate);
939 	Bitmapset  *extraUpdatedCols = ExecGetExtraUpdatedCols(resultRelInfo, estate);
940 	Bitmapset  *allUpdatedCols;
941 	bool		hasexpression = false;
942 	List	   *idxExprs;
943 
944 	/*
945 	 * Check for indexed attribute overlap with updated columns.
946 	 *
947 	 * Only do this for key columns.  A change to a non-key column within an
948 	 * INCLUDE index should not be counted here.  Non-key column values are
949 	 * opaque payload state to the index AM, a little like an extra table TID.
950 	 */
951 	for (int attr = 0; attr < indexInfo->ii_NumIndexKeyAttrs; attr++)
952 	{
953 		int			keycol = indexInfo->ii_IndexAttrNumbers[attr];
954 
955 		if (keycol <= 0)
956 		{
957 			/*
958 			 * Skip expressions for now, but remember to deal with them later
959 			 * on
960 			 */
961 			hasexpression = true;
962 			continue;
963 		}
964 
965 		if (bms_is_member(keycol - FirstLowInvalidHeapAttributeNumber,
966 						  updatedCols) ||
967 			bms_is_member(keycol - FirstLowInvalidHeapAttributeNumber,
968 						  extraUpdatedCols))
969 		{
970 			/* Changed key column -- don't hint for this index */
971 			return false;
972 		}
973 	}
974 
975 	/*
976 	 * When we get this far and index has no expressions, return true so that
977 	 * index_insert() call will go on to pass 'indexUnchanged' = true hint.
978 	 *
979 	 * The _absence_ of an indexed key attribute that overlaps with updated
980 	 * attributes (in addition to the total absence of indexed expressions)
981 	 * shows that the index as a whole is logically unchanged by UPDATE.
982 	 */
983 	if (!hasexpression)
984 		return true;
985 
986 	/*
987 	 * Need to pass only one bms to expression_tree_walker helper function.
988 	 * Avoid allocating memory in common case where there are no extra cols.
989 	 */
990 	if (!extraUpdatedCols)
991 		allUpdatedCols = updatedCols;
992 	else
993 		allUpdatedCols = bms_union(updatedCols, extraUpdatedCols);
994 
995 	/*
996 	 * We have to work slightly harder in the event of indexed expressions,
997 	 * but the principle is the same as before: try to find columns (Vars,
998 	 * actually) that overlap with known-updated columns.
999 	 *
1000 	 * If we find any matching Vars, don't pass hint for index.  Otherwise
1001 	 * pass hint.
1002 	 */
1003 	idxExprs = RelationGetIndexExpressions(indexRelation);
1004 	hasexpression = index_expression_changed_walker((Node *) idxExprs,
1005 													allUpdatedCols);
1006 	list_free(idxExprs);
1007 	if (extraUpdatedCols)
1008 		bms_free(allUpdatedCols);
1009 
1010 	if (hasexpression)
1011 		return false;
1012 
1013 	return true;
1014 }
1015 
1016 /*
1017  * Indexed expression helper for index_unchanged_by_update().
1018  *
1019  * Returns true when Var that appears within allUpdatedCols located.
1020  */
1021 static bool
index_expression_changed_walker(Node * node,Bitmapset * allUpdatedCols)1022 index_expression_changed_walker(Node *node, Bitmapset *allUpdatedCols)
1023 {
1024 	if (node == NULL)
1025 		return false;
1026 
1027 	if (IsA(node, Var))
1028 	{
1029 		Var		   *var = (Var *) node;
1030 
1031 		if (bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
1032 						  allUpdatedCols))
1033 		{
1034 			/* Var was updated -- indicates that we should not hint */
1035 			return true;
1036 		}
1037 
1038 		/* Still haven't found a reason to not pass the hint */
1039 		return false;
1040 	}
1041 
1042 	return expression_tree_walker(node, index_expression_changed_walker,
1043 								  (void *) allUpdatedCols);
1044 }
1045