1 /*-------------------------------------------------------------------------
2  *
3  * nodeModifyTable.c
4  *	  routines to handle ModifyTable nodes.
5  *
6  * Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/executor/nodeModifyTable.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /* INTERFACE ROUTINES
16  *		ExecInitModifyTable - initialize the ModifyTable node
17  *		ExecModifyTable		- retrieve the next tuple from the node
18  *		ExecEndModifyTable	- shut down the ModifyTable node
19  *		ExecReScanModifyTable - rescan the ModifyTable node
20  *
21  *	 NOTES
22  *		Each ModifyTable node contains a list of one or more subplans,
23  *		much like an Append node.  There is one subplan per result relation.
24  *		The key reason for this is that in an inherited UPDATE command, each
25  *		result relation could have a different schema (more or different
26  *		columns) requiring a different plan tree to produce it.  In an
27  *		inherited DELETE, all the subplans should produce the same output
28  *		rowtype, but we might still find that different plans are appropriate
29  *		for different child relations.
30  *
31  *		If the query specifies RETURNING, then the ModifyTable returns a
32  *		RETURNING tuple after completing each row insert, update, or delete.
33  *		It must be called again to continue the operation.  Without RETURNING,
34  *		we just loop within the node until all the work is done, then
35  *		return NULL.  This avoids useless call/return overhead.
36  */
37 
38 #include "postgres.h"
39 
40 #include "access/heapam.h"
41 #include "access/htup_details.h"
42 #include "access/tableam.h"
43 #include "access/xact.h"
44 #include "catalog/catalog.h"
45 #include "commands/trigger.h"
46 #include "executor/execPartition.h"
47 #include "executor/executor.h"
48 #include "executor/nodeModifyTable.h"
49 #include "foreign/fdwapi.h"
50 #include "miscadmin.h"
51 #include "nodes/nodeFuncs.h"
52 #include "rewrite/rewriteHandler.h"
53 #include "storage/bufmgr.h"
54 #include "storage/lmgr.h"
55 #include "utils/builtins.h"
56 #include "utils/datum.h"
57 #include "utils/memutils.h"
58 #include "utils/rel.h"
59 
60 
61 static bool ExecOnConflictUpdate(ModifyTableState *mtstate,
62 								 ResultRelInfo *resultRelInfo,
63 								 ItemPointer conflictTid,
64 								 TupleTableSlot *planSlot,
65 								 TupleTableSlot *excludedSlot,
66 								 EState *estate,
67 								 bool canSetTag,
68 								 TupleTableSlot **returning);
69 static TupleTableSlot *ExecPrepareTupleRouting(ModifyTableState *mtstate,
70 											   EState *estate,
71 											   PartitionTupleRouting *proute,
72 											   ResultRelInfo *targetRelInfo,
73 											   TupleTableSlot *slot);
74 static ResultRelInfo *getTargetResultRelInfo(ModifyTableState *node);
75 static void ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate);
76 static TupleConversionMap *tupconv_map_for_subplan(ModifyTableState *node,
77 												   int whichplan);
78 
79 /*
80  * Verify that the tuples to be produced by INSERT or UPDATE match the
81  * target relation's rowtype
82  *
83  * We do this to guard against stale plans.  If plan invalidation is
84  * functioning properly then we should never get a failure here, but better
85  * safe than sorry.  Note that this is called after we have obtained lock
86  * on the target rel, so the rowtype can't change underneath us.
87  *
88  * The plan output is represented by its targetlist, because that makes
89  * handling the dropped-column case easier.
90  */
91 static void
ExecCheckPlanOutput(Relation resultRel,List * targetList)92 ExecCheckPlanOutput(Relation resultRel, List *targetList)
93 {
94 	TupleDesc	resultDesc = RelationGetDescr(resultRel);
95 	int			attno = 0;
96 	ListCell   *lc;
97 
98 	foreach(lc, targetList)
99 	{
100 		TargetEntry *tle = (TargetEntry *) lfirst(lc);
101 		Form_pg_attribute attr;
102 
103 		if (tle->resjunk)
104 			continue;			/* ignore junk tlist items */
105 
106 		if (attno >= resultDesc->natts)
107 			ereport(ERROR,
108 					(errcode(ERRCODE_DATATYPE_MISMATCH),
109 					 errmsg("table row type and query-specified row type do not match"),
110 					 errdetail("Query has too many columns.")));
111 		attr = TupleDescAttr(resultDesc, attno);
112 		attno++;
113 
114 		if (!attr->attisdropped)
115 		{
116 			/* Normal case: demand type match */
117 			if (exprType((Node *) tle->expr) != attr->atttypid)
118 				ereport(ERROR,
119 						(errcode(ERRCODE_DATATYPE_MISMATCH),
120 						 errmsg("table row type and query-specified row type do not match"),
121 						 errdetail("Table has type %s at ordinal position %d, but query expects %s.",
122 								   format_type_be(attr->atttypid),
123 								   attno,
124 								   format_type_be(exprType((Node *) tle->expr)))));
125 		}
126 		else
127 		{
128 			/*
129 			 * For a dropped column, we can't check atttypid (it's likely 0).
130 			 * In any case the planner has most likely inserted an INT4 null.
131 			 * What we insist on is just *some* NULL constant.
132 			 */
133 			if (!IsA(tle->expr, Const) ||
134 				!((Const *) tle->expr)->constisnull)
135 				ereport(ERROR,
136 						(errcode(ERRCODE_DATATYPE_MISMATCH),
137 						 errmsg("table row type and query-specified row type do not match"),
138 						 errdetail("Query provides a value for a dropped column at ordinal position %d.",
139 								   attno)));
140 		}
141 	}
142 	if (attno != resultDesc->natts)
143 		ereport(ERROR,
144 				(errcode(ERRCODE_DATATYPE_MISMATCH),
145 				 errmsg("table row type and query-specified row type do not match"),
146 				 errdetail("Query has too few columns.")));
147 }
148 
149 /*
150  * ExecProcessReturning --- evaluate a RETURNING list
151  *
152  * projectReturning: the projection to evaluate
153  * resultRelOid: result relation's OID
154  * tupleSlot: slot holding tuple actually inserted/updated/deleted
155  * planSlot: slot holding tuple returned by top subplan node
156  *
157  * In cross-partition UPDATE cases, projectReturning and planSlot are as
158  * for the source partition, and tupleSlot must conform to that.  But
159  * resultRelOid is for the destination partition.
160  *
161  * Note: If tupleSlot is NULL, the FDW should have already provided econtext's
162  * scan tuple.
163  *
164  * Returns a slot holding the result tuple
165  */
166 static TupleTableSlot *
ExecProcessReturning(ProjectionInfo * projectReturning,Oid resultRelOid,TupleTableSlot * tupleSlot,TupleTableSlot * planSlot)167 ExecProcessReturning(ProjectionInfo *projectReturning,
168 					 Oid resultRelOid,
169 					 TupleTableSlot *tupleSlot,
170 					 TupleTableSlot *planSlot)
171 {
172 	ExprContext *econtext = projectReturning->pi_exprContext;
173 
174 	/* Make tuple and any needed join variables available to ExecProject */
175 	if (tupleSlot)
176 		econtext->ecxt_scantuple = tupleSlot;
177 	else
178 		Assert(econtext->ecxt_scantuple);
179 	econtext->ecxt_outertuple = planSlot;
180 
181 	/*
182 	 * RETURNING expressions might reference the tableoid column, so be sure
183 	 * we expose the desired OID, ie that of the real target relation.
184 	 */
185 	econtext->ecxt_scantuple->tts_tableOid = resultRelOid;
186 
187 	/* Compute the RETURNING expressions */
188 	return ExecProject(projectReturning);
189 }
190 
191 /*
192  * ExecCheckTupleVisible -- verify tuple is visible
193  *
194  * It would not be consistent with guarantees of the higher isolation levels to
195  * proceed with avoiding insertion (taking speculative insertion's alternative
196  * path) on the basis of another tuple that is not visible to MVCC snapshot.
197  * Check for the need to raise a serialization failure, and do so as necessary.
198  */
199 static void
ExecCheckTupleVisible(EState * estate,Relation rel,TupleTableSlot * slot)200 ExecCheckTupleVisible(EState *estate,
201 					  Relation rel,
202 					  TupleTableSlot *slot)
203 {
204 	if (!IsolationUsesXactSnapshot())
205 		return;
206 
207 	if (!table_tuple_satisfies_snapshot(rel, slot, estate->es_snapshot))
208 	{
209 		Datum		xminDatum;
210 		TransactionId xmin;
211 		bool		isnull;
212 
213 		xminDatum = slot_getsysattr(slot, MinTransactionIdAttributeNumber, &isnull);
214 		Assert(!isnull);
215 		xmin = DatumGetTransactionId(xminDatum);
216 
217 		/*
218 		 * We should not raise a serialization failure if the conflict is
219 		 * against a tuple inserted by our own transaction, even if it's not
220 		 * visible to our snapshot.  (This would happen, for example, if
221 		 * conflicting keys are proposed for insertion in a single command.)
222 		 */
223 		if (!TransactionIdIsCurrentTransactionId(xmin))
224 			ereport(ERROR,
225 					(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
226 					 errmsg("could not serialize access due to concurrent update")));
227 	}
228 }
229 
230 /*
231  * ExecCheckTIDVisible -- convenience variant of ExecCheckTupleVisible()
232  */
233 static void
ExecCheckTIDVisible(EState * estate,ResultRelInfo * relinfo,ItemPointer tid,TupleTableSlot * tempSlot)234 ExecCheckTIDVisible(EState *estate,
235 					ResultRelInfo *relinfo,
236 					ItemPointer tid,
237 					TupleTableSlot *tempSlot)
238 {
239 	Relation	rel = relinfo->ri_RelationDesc;
240 
241 	/* Redundantly check isolation level */
242 	if (!IsolationUsesXactSnapshot())
243 		return;
244 
245 	if (!table_tuple_fetch_row_version(rel, tid, SnapshotAny, tempSlot))
246 		elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT");
247 	ExecCheckTupleVisible(estate, rel, tempSlot);
248 	ExecClearTuple(tempSlot);
249 }
250 
251 /*
252  * Compute stored generated columns for a tuple
253  */
254 void
ExecComputeStoredGenerated(EState * estate,TupleTableSlot * slot,CmdType cmdtype)255 ExecComputeStoredGenerated(EState *estate, TupleTableSlot *slot, CmdType cmdtype)
256 {
257 	ResultRelInfo *resultRelInfo = estate->es_result_relation_info;
258 	Relation	rel = resultRelInfo->ri_RelationDesc;
259 	TupleDesc	tupdesc = RelationGetDescr(rel);
260 	int			natts = tupdesc->natts;
261 	MemoryContext oldContext;
262 	Datum	   *values;
263 	bool	   *nulls;
264 
265 	Assert(tupdesc->constr && tupdesc->constr->has_generated_stored);
266 
267 	/*
268 	 * If first time through for this result relation, build expression
269 	 * nodetrees for rel's stored generation expressions.  Keep them in the
270 	 * per-query memory context so they'll survive throughout the query.
271 	 */
272 	if (resultRelInfo->ri_GeneratedExprs == NULL)
273 	{
274 		oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
275 
276 		resultRelInfo->ri_GeneratedExprs =
277 			(ExprState **) palloc(natts * sizeof(ExprState *));
278 		resultRelInfo->ri_NumGeneratedNeeded = 0;
279 
280 		for (int i = 0; i < natts; i++)
281 		{
282 			if (TupleDescAttr(tupdesc, i)->attgenerated == ATTRIBUTE_GENERATED_STORED)
283 			{
284 				Expr	   *expr;
285 
286 				/*
287 				 * If it's an update and the current column was not marked as
288 				 * being updated, then we can skip the computation.  But if
289 				 * there is a BEFORE ROW UPDATE trigger, we cannot skip
290 				 * because the trigger might affect additional columns.
291 				 */
292 				if (cmdtype == CMD_UPDATE &&
293 					!(rel->trigdesc && rel->trigdesc->trig_update_before_row) &&
294 					!bms_is_member(i + 1 - FirstLowInvalidHeapAttributeNumber,
295 								   ExecGetExtraUpdatedCols(resultRelInfo, estate)))
296 				{
297 					resultRelInfo->ri_GeneratedExprs[i] = NULL;
298 					continue;
299 				}
300 
301 				expr = (Expr *) build_column_default(rel, i + 1);
302 				if (expr == NULL)
303 					elog(ERROR, "no generation expression found for column number %d of table \"%s\"",
304 						 i + 1, RelationGetRelationName(rel));
305 
306 				resultRelInfo->ri_GeneratedExprs[i] = ExecPrepareExpr(expr, estate);
307 				resultRelInfo->ri_NumGeneratedNeeded++;
308 			}
309 		}
310 
311 		MemoryContextSwitchTo(oldContext);
312 	}
313 
314 	/*
315 	 * If no generated columns have been affected by this change, then skip
316 	 * the rest.
317 	 */
318 	if (resultRelInfo->ri_NumGeneratedNeeded == 0)
319 		return;
320 
321 	oldContext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
322 
323 	values = palloc(sizeof(*values) * natts);
324 	nulls = palloc(sizeof(*nulls) * natts);
325 
326 	slot_getallattrs(slot);
327 	memcpy(nulls, slot->tts_isnull, sizeof(*nulls) * natts);
328 
329 	for (int i = 0; i < natts; i++)
330 	{
331 		Form_pg_attribute attr = TupleDescAttr(tupdesc, i);
332 
333 		if (attr->attgenerated == ATTRIBUTE_GENERATED_STORED &&
334 			resultRelInfo->ri_GeneratedExprs[i])
335 		{
336 			ExprContext *econtext;
337 			Datum		val;
338 			bool		isnull;
339 
340 			econtext = GetPerTupleExprContext(estate);
341 			econtext->ecxt_scantuple = slot;
342 
343 			val = ExecEvalExpr(resultRelInfo->ri_GeneratedExprs[i], econtext, &isnull);
344 
345 			/*
346 			 * We must make a copy of val as we have no guarantees about where
347 			 * memory for a pass-by-reference Datum is located.
348 			 */
349 			if (!isnull)
350 				val = datumCopy(val, attr->attbyval, attr->attlen);
351 
352 			values[i] = val;
353 			nulls[i] = isnull;
354 		}
355 		else
356 		{
357 			if (!nulls[i])
358 				values[i] = datumCopy(slot->tts_values[i], attr->attbyval, attr->attlen);
359 		}
360 	}
361 
362 	ExecClearTuple(slot);
363 	memcpy(slot->tts_values, values, sizeof(*values) * natts);
364 	memcpy(slot->tts_isnull, nulls, sizeof(*nulls) * natts);
365 	ExecStoreVirtualTuple(slot);
366 	ExecMaterializeSlot(slot);
367 
368 	MemoryContextSwitchTo(oldContext);
369 }
370 
371 /* ----------------------------------------------------------------
372  *		ExecInsert
373  *
374  *		For INSERT, we have to insert the tuple into the target relation
375  *		and insert appropriate tuples into the index relations.
376  *
377  *		slot contains the new tuple value to be stored.
378  *		planSlot is the output of the ModifyTable's subplan; we use it
379  *		to access "junk" columns that are not going to be stored.
380  *		In a cross-partition UPDATE, srcSlot is the slot that held the
381  *		updated tuple for the source relation; otherwise it's NULL.
382  *
383  *		returningRelInfo is the resultRelInfo for the source relation of a
384  *		cross-partition UPDATE; otherwise it's the current result relation.
385  *		We use it to process RETURNING lists, for reasons explained below.
386  *
387  *		Returns RETURNING result if any, otherwise NULL.
388  * ----------------------------------------------------------------
389  */
390 static TupleTableSlot *
ExecInsert(ModifyTableState * mtstate,TupleTableSlot * slot,TupleTableSlot * planSlot,TupleTableSlot * srcSlot,ResultRelInfo * returningRelInfo,EState * estate,bool canSetTag)391 ExecInsert(ModifyTableState *mtstate,
392 		   TupleTableSlot *slot,
393 		   TupleTableSlot *planSlot,
394 		   TupleTableSlot *srcSlot,
395 		   ResultRelInfo *returningRelInfo,
396 		   EState *estate,
397 		   bool canSetTag)
398 {
399 	ResultRelInfo *resultRelInfo;
400 	Relation	resultRelationDesc;
401 	List	   *recheckIndexes = NIL;
402 	TupleTableSlot *result = NULL;
403 	TransitionCaptureState *ar_insert_trig_tcs;
404 	ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
405 	OnConflictAction onconflict = node->onConflictAction;
406 
407 	ExecMaterializeSlot(slot);
408 
409 	/*
410 	 * get information on the (current) result relation
411 	 */
412 	resultRelInfo = estate->es_result_relation_info;
413 	resultRelationDesc = resultRelInfo->ri_RelationDesc;
414 
415 	/*
416 	 * BEFORE ROW INSERT Triggers.
417 	 *
418 	 * Note: We fire BEFORE ROW TRIGGERS for every attempted insertion in an
419 	 * INSERT ... ON CONFLICT statement.  We cannot check for constraint
420 	 * violations before firing these triggers, because they can change the
421 	 * values to insert.  Also, they can run arbitrary user-defined code with
422 	 * side-effects that we can't cancel by just not inserting the tuple.
423 	 */
424 	if (resultRelInfo->ri_TrigDesc &&
425 		resultRelInfo->ri_TrigDesc->trig_insert_before_row)
426 	{
427 		if (!ExecBRInsertTriggers(estate, resultRelInfo, slot))
428 			return NULL;		/* "do nothing" */
429 	}
430 
431 	/* INSTEAD OF ROW INSERT Triggers */
432 	if (resultRelInfo->ri_TrigDesc &&
433 		resultRelInfo->ri_TrigDesc->trig_insert_instead_row)
434 	{
435 		if (!ExecIRInsertTriggers(estate, resultRelInfo, slot))
436 			return NULL;		/* "do nothing" */
437 	}
438 	else if (resultRelInfo->ri_FdwRoutine)
439 	{
440 		/*
441 		 * GENERATED expressions might reference the tableoid column, so
442 		 * (re-)initialize tts_tableOid before evaluating them.
443 		 */
444 		slot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
445 
446 		/*
447 		 * Compute stored generated columns
448 		 */
449 		if (resultRelationDesc->rd_att->constr &&
450 			resultRelationDesc->rd_att->constr->has_generated_stored)
451 			ExecComputeStoredGenerated(estate, slot, CMD_INSERT);
452 
453 		/*
454 		 * insert into foreign table: let the FDW do it
455 		 */
456 		slot = resultRelInfo->ri_FdwRoutine->ExecForeignInsert(estate,
457 															   resultRelInfo,
458 															   slot,
459 															   planSlot);
460 
461 		if (slot == NULL)		/* "do nothing" */
462 			return NULL;
463 
464 		/*
465 		 * AFTER ROW Triggers or RETURNING expressions might reference the
466 		 * tableoid column, so (re-)initialize tts_tableOid before evaluating
467 		 * them.  (This covers the case where the FDW replaced the slot.)
468 		 */
469 		slot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
470 	}
471 	else
472 	{
473 		WCOKind		wco_kind;
474 
475 		/*
476 		 * Constraints and GENERATED expressions might reference the tableoid
477 		 * column, so (re-)initialize tts_tableOid before evaluating them.
478 		 */
479 		slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
480 
481 		/*
482 		 * Compute stored generated columns
483 		 */
484 		if (resultRelationDesc->rd_att->constr &&
485 			resultRelationDesc->rd_att->constr->has_generated_stored)
486 			ExecComputeStoredGenerated(estate, slot, CMD_INSERT);
487 
488 		/*
489 		 * Check any RLS WITH CHECK policies.
490 		 *
491 		 * Normally we should check INSERT policies. But if the insert is the
492 		 * result of a partition key update that moved the tuple to a new
493 		 * partition, we should instead check UPDATE policies, because we are
494 		 * executing policies defined on the target table, and not those
495 		 * defined on the child partitions.
496 		 */
497 		wco_kind = (mtstate->operation == CMD_UPDATE) ?
498 			WCO_RLS_UPDATE_CHECK : WCO_RLS_INSERT_CHECK;
499 
500 		/*
501 		 * ExecWithCheckOptions() will skip any WCOs which are not of the kind
502 		 * we are looking for at this point.
503 		 */
504 		if (resultRelInfo->ri_WithCheckOptions != NIL)
505 			ExecWithCheckOptions(wco_kind, resultRelInfo, slot, estate);
506 
507 		/*
508 		 * Check the constraints of the tuple.
509 		 */
510 		if (resultRelationDesc->rd_att->constr)
511 			ExecConstraints(resultRelInfo, slot, estate);
512 
513 		/*
514 		 * Also check the tuple against the partition constraint, if there is
515 		 * one; except that if we got here via tuple-routing, we don't need to
516 		 * if there's no BR trigger defined on the partition.
517 		 */
518 		if (resultRelInfo->ri_PartitionCheck &&
519 			(resultRelInfo->ri_RootResultRelInfo == NULL ||
520 			 (resultRelInfo->ri_TrigDesc &&
521 			  resultRelInfo->ri_TrigDesc->trig_insert_before_row)))
522 			ExecPartitionCheck(resultRelInfo, slot, estate, true);
523 
524 		if (onconflict != ONCONFLICT_NONE && resultRelInfo->ri_NumIndices > 0)
525 		{
526 			/* Perform a speculative insertion. */
527 			uint32		specToken;
528 			ItemPointerData conflictTid;
529 			bool		specConflict;
530 			List	   *arbiterIndexes;
531 
532 			arbiterIndexes = resultRelInfo->ri_onConflictArbiterIndexes;
533 
534 			/*
535 			 * Do a non-conclusive check for conflicts first.
536 			 *
537 			 * We're not holding any locks yet, so this doesn't guarantee that
538 			 * the later insert won't conflict.  But it avoids leaving behind
539 			 * a lot of canceled speculative insertions, if you run a lot of
540 			 * INSERT ON CONFLICT statements that do conflict.
541 			 *
542 			 * We loop back here if we find a conflict below, either during
543 			 * the pre-check, or when we re-check after inserting the tuple
544 			 * speculatively.
545 			 */
546 	vlock:
547 			specConflict = false;
548 			if (!ExecCheckIndexConstraints(slot, estate, &conflictTid,
549 										   arbiterIndexes))
550 			{
551 				/* committed conflict tuple found */
552 				if (onconflict == ONCONFLICT_UPDATE)
553 				{
554 					/*
555 					 * In case of ON CONFLICT DO UPDATE, execute the UPDATE
556 					 * part.  Be prepared to retry if the UPDATE fails because
557 					 * of another concurrent UPDATE/DELETE to the conflict
558 					 * tuple.
559 					 */
560 					TupleTableSlot *returning = NULL;
561 
562 					if (ExecOnConflictUpdate(mtstate, resultRelInfo,
563 											 &conflictTid, planSlot, slot,
564 											 estate, canSetTag, &returning))
565 					{
566 						InstrCountTuples2(&mtstate->ps, 1);
567 						return returning;
568 					}
569 					else
570 						goto vlock;
571 				}
572 				else
573 				{
574 					/*
575 					 * In case of ON CONFLICT DO NOTHING, do nothing. However,
576 					 * verify that the tuple is visible to the executor's MVCC
577 					 * snapshot at higher isolation levels.
578 					 *
579 					 * Using ExecGetReturningSlot() to store the tuple for the
580 					 * recheck isn't that pretty, but we can't trivially use
581 					 * the input slot, because it might not be of a compatible
582 					 * type. As there's no conflicting usage of
583 					 * ExecGetReturningSlot() in the DO NOTHING case...
584 					 */
585 					Assert(onconflict == ONCONFLICT_NOTHING);
586 					ExecCheckTIDVisible(estate, resultRelInfo, &conflictTid,
587 										ExecGetReturningSlot(estate, resultRelInfo));
588 					InstrCountTuples2(&mtstate->ps, 1);
589 					return NULL;
590 				}
591 			}
592 
593 			/*
594 			 * Before we start insertion proper, acquire our "speculative
595 			 * insertion lock".  Others can use that to wait for us to decide
596 			 * if we're going to go ahead with the insertion, instead of
597 			 * waiting for the whole transaction to complete.
598 			 */
599 			specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId());
600 
601 			/* insert the tuple, with the speculative token */
602 			table_tuple_insert_speculative(resultRelationDesc, slot,
603 										   estate->es_output_cid,
604 										   0,
605 										   NULL,
606 										   specToken);
607 
608 			/* insert index entries for tuple */
609 			recheckIndexes = ExecInsertIndexTuples(slot, estate, true,
610 												   &specConflict,
611 												   arbiterIndexes);
612 
613 			/* adjust the tuple's state accordingly */
614 			table_tuple_complete_speculative(resultRelationDesc, slot,
615 											 specToken, !specConflict);
616 
617 			/*
618 			 * Wake up anyone waiting for our decision.  They will re-check
619 			 * the tuple, see that it's no longer speculative, and wait on our
620 			 * XID as if this was a regularly inserted tuple all along.  Or if
621 			 * we killed the tuple, they will see it's dead, and proceed as if
622 			 * the tuple never existed.
623 			 */
624 			SpeculativeInsertionLockRelease(GetCurrentTransactionId());
625 
626 			/*
627 			 * If there was a conflict, start from the beginning.  We'll do
628 			 * the pre-check again, which will now find the conflicting tuple
629 			 * (unless it aborts before we get there).
630 			 */
631 			if (specConflict)
632 			{
633 				list_free(recheckIndexes);
634 				goto vlock;
635 			}
636 
637 			/* Since there was no insertion conflict, we're done */
638 		}
639 		else
640 		{
641 			/* insert the tuple normally */
642 			table_tuple_insert(resultRelationDesc, slot,
643 							   estate->es_output_cid,
644 							   0, NULL);
645 
646 			/* insert index entries for tuple */
647 			if (resultRelInfo->ri_NumIndices > 0)
648 				recheckIndexes = ExecInsertIndexTuples(slot, estate, false, NULL,
649 													   NIL);
650 		}
651 	}
652 
653 	if (canSetTag)
654 	{
655 		(estate->es_processed)++;
656 		setLastTid(&slot->tts_tid);
657 	}
658 
659 	/*
660 	 * If this insert is the result of a partition key update that moved the
661 	 * tuple to a new partition, put this row into the transition NEW TABLE,
662 	 * if there is one. We need to do this separately for DELETE and INSERT
663 	 * because they happen on different tables.
664 	 */
665 	ar_insert_trig_tcs = mtstate->mt_transition_capture;
666 	if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture
667 		&& mtstate->mt_transition_capture->tcs_update_new_table)
668 	{
669 		ExecARUpdateTriggers(estate, resultRelInfo, NULL,
670 							 NULL,
671 							 slot,
672 							 NULL,
673 							 mtstate->mt_transition_capture);
674 
675 		/*
676 		 * We've already captured the NEW TABLE row, so make sure any AR
677 		 * INSERT trigger fired below doesn't capture it again.
678 		 */
679 		ar_insert_trig_tcs = NULL;
680 	}
681 
682 	/* AFTER ROW INSERT Triggers */
683 	ExecARInsertTriggers(estate, resultRelInfo, slot, recheckIndexes,
684 						 ar_insert_trig_tcs);
685 
686 	list_free(recheckIndexes);
687 
688 	/*
689 	 * Check any WITH CHECK OPTION constraints from parent views.  We are
690 	 * required to do this after testing all constraints and uniqueness
691 	 * violations per the SQL spec, so we do it after actually inserting the
692 	 * record into the heap and all indexes.
693 	 *
694 	 * ExecWithCheckOptions will elog(ERROR) if a violation is found, so the
695 	 * tuple will never be seen, if it violates the WITH CHECK OPTION.
696 	 *
697 	 * ExecWithCheckOptions() will skip any WCOs which are not of the kind we
698 	 * are looking for at this point.
699 	 */
700 	if (resultRelInfo->ri_WithCheckOptions != NIL)
701 		ExecWithCheckOptions(WCO_VIEW_CHECK, resultRelInfo, slot, estate);
702 
703 	/* Process RETURNING if present */
704 	if (returningRelInfo->ri_projectReturning)
705 	{
706 		/*
707 		 * In a cross-partition UPDATE with RETURNING, we have to use the
708 		 * source partition's RETURNING list, because that matches the output
709 		 * of the planSlot, while the destination partition might have
710 		 * different resjunk columns.  This means we have to map the
711 		 * destination tuple back to the source's format so we can apply that
712 		 * RETURNING list.  This is expensive, but it should be an uncommon
713 		 * corner case, so we won't spend much effort on making it fast.
714 		 *
715 		 * We assume that we can use srcSlot to hold the re-converted tuple.
716 		 * Note that in the common case where the child partitions both match
717 		 * the root's format, previous optimizations will have resulted in
718 		 * slot and srcSlot being identical, cueing us that there's nothing to
719 		 * do here.
720 		 */
721 		if (returningRelInfo != resultRelInfo && slot != srcSlot)
722 		{
723 			Relation	srcRelationDesc = returningRelInfo->ri_RelationDesc;
724 			AttrMap    *map;
725 
726 			map = build_attrmap_by_name_if_req(RelationGetDescr(resultRelationDesc),
727 											   RelationGetDescr(srcRelationDesc));
728 			if (map)
729 			{
730 				TupleTableSlot *origSlot = slot;
731 
732 				slot = execute_attr_map_slot(map, slot, srcSlot);
733 				slot->tts_tid = origSlot->tts_tid;
734 				slot->tts_tableOid = origSlot->tts_tableOid;
735 				free_attrmap(map);
736 			}
737 		}
738 
739 		result = ExecProcessReturning(returningRelInfo->ri_projectReturning,
740 									  RelationGetRelid(resultRelationDesc),
741 									  slot, planSlot);
742 	}
743 
744 	return result;
745 }
746 
747 /* ----------------------------------------------------------------
748  *		ExecDelete
749  *
750  *		DELETE is like UPDATE, except that we delete the tuple and no
751  *		index modifications are needed.
752  *
753  *		When deleting from a table, tupleid identifies the tuple to
754  *		delete and oldtuple is NULL.  When deleting from a view,
755  *		oldtuple is passed to the INSTEAD OF triggers and identifies
756  *		what to delete, and tupleid is invalid.  When deleting from a
757  *		foreign table, tupleid is invalid; the FDW has to figure out
758  *		which row to delete using data from the planSlot.  oldtuple is
759  *		passed to foreign table triggers; it is NULL when the foreign
760  *		table has no relevant triggers.  We use tupleDeleted to indicate
761  *		whether the tuple is actually deleted, callers can use it to
762  *		decide whether to continue the operation.  When this DELETE is a
763  *		part of an UPDATE of partition-key, then the slot returned by
764  *		EvalPlanQual() is passed back using output parameter epqslot.
765  *
766  *		Returns RETURNING result if any, otherwise NULL.
767  * ----------------------------------------------------------------
768  */
769 static TupleTableSlot *
ExecDelete(ModifyTableState * mtstate,ItemPointer tupleid,HeapTuple oldtuple,TupleTableSlot * planSlot,EPQState * epqstate,EState * estate,bool processReturning,bool canSetTag,bool changingPart,bool * tupleDeleted,TupleTableSlot ** epqreturnslot)770 ExecDelete(ModifyTableState *mtstate,
771 		   ItemPointer tupleid,
772 		   HeapTuple oldtuple,
773 		   TupleTableSlot *planSlot,
774 		   EPQState *epqstate,
775 		   EState *estate,
776 		   bool processReturning,
777 		   bool canSetTag,
778 		   bool changingPart,
779 		   bool *tupleDeleted,
780 		   TupleTableSlot **epqreturnslot)
781 {
782 	ResultRelInfo *resultRelInfo;
783 	Relation	resultRelationDesc;
784 	TM_Result	result;
785 	TM_FailureData tmfd;
786 	TupleTableSlot *slot = NULL;
787 	TransitionCaptureState *ar_delete_trig_tcs;
788 
789 	if (tupleDeleted)
790 		*tupleDeleted = false;
791 
792 	/*
793 	 * get information on the (current) result relation
794 	 */
795 	resultRelInfo = estate->es_result_relation_info;
796 	resultRelationDesc = resultRelInfo->ri_RelationDesc;
797 
798 	/* BEFORE ROW DELETE Triggers */
799 	if (resultRelInfo->ri_TrigDesc &&
800 		resultRelInfo->ri_TrigDesc->trig_delete_before_row)
801 	{
802 		bool		dodelete;
803 
804 		dodelete = ExecBRDeleteTriggers(estate, epqstate, resultRelInfo,
805 										tupleid, oldtuple, epqreturnslot);
806 
807 		if (!dodelete)			/* "do nothing" */
808 			return NULL;
809 	}
810 
811 	/* INSTEAD OF ROW DELETE Triggers */
812 	if (resultRelInfo->ri_TrigDesc &&
813 		resultRelInfo->ri_TrigDesc->trig_delete_instead_row)
814 	{
815 		bool		dodelete;
816 
817 		Assert(oldtuple != NULL);
818 		dodelete = ExecIRDeleteTriggers(estate, resultRelInfo, oldtuple);
819 
820 		if (!dodelete)			/* "do nothing" */
821 			return NULL;
822 	}
823 	else if (resultRelInfo->ri_FdwRoutine)
824 	{
825 		/*
826 		 * delete from foreign table: let the FDW do it
827 		 *
828 		 * We offer the returning slot as a place to store RETURNING data,
829 		 * although the FDW can return some other slot if it wants.
830 		 */
831 		slot = ExecGetReturningSlot(estate, resultRelInfo);
832 		slot = resultRelInfo->ri_FdwRoutine->ExecForeignDelete(estate,
833 															   resultRelInfo,
834 															   slot,
835 															   planSlot);
836 
837 		if (slot == NULL)		/* "do nothing" */
838 			return NULL;
839 
840 		/*
841 		 * RETURNING expressions might reference the tableoid column, so
842 		 * (re)initialize tts_tableOid before evaluating them.
843 		 */
844 		if (TTS_EMPTY(slot))
845 			ExecStoreAllNullTuple(slot);
846 
847 		slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
848 	}
849 	else
850 	{
851 		/*
852 		 * delete the tuple
853 		 *
854 		 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check
855 		 * that the row to be deleted is visible to that snapshot, and throw a
856 		 * can't-serialize error if not. This is a special-case behavior
857 		 * needed for referential integrity updates in transaction-snapshot
858 		 * mode transactions.
859 		 */
860 ldelete:;
861 		result = table_tuple_delete(resultRelationDesc, tupleid,
862 									estate->es_output_cid,
863 									estate->es_snapshot,
864 									estate->es_crosscheck_snapshot,
865 									true /* wait for commit */ ,
866 									&tmfd,
867 									changingPart);
868 
869 		switch (result)
870 		{
871 			case TM_SelfModified:
872 
873 				/*
874 				 * The target tuple was already updated or deleted by the
875 				 * current command, or by a later command in the current
876 				 * transaction.  The former case is possible in a join DELETE
877 				 * where multiple tuples join to the same target tuple. This
878 				 * is somewhat questionable, but Postgres has always allowed
879 				 * it: we just ignore additional deletion attempts.
880 				 *
881 				 * The latter case arises if the tuple is modified by a
882 				 * command in a BEFORE trigger, or perhaps by a command in a
883 				 * volatile function used in the query.  In such situations we
884 				 * should not ignore the deletion, but it is equally unsafe to
885 				 * proceed.  We don't want to discard the original DELETE
886 				 * while keeping the triggered actions based on its deletion;
887 				 * and it would be no better to allow the original DELETE
888 				 * while discarding updates that it triggered.  The row update
889 				 * carries some information that might be important according
890 				 * to business rules; so throwing an error is the only safe
891 				 * course.
892 				 *
893 				 * If a trigger actually intends this type of interaction, it
894 				 * can re-execute the DELETE and then return NULL to cancel
895 				 * the outer delete.
896 				 */
897 				if (tmfd.cmax != estate->es_output_cid)
898 					ereport(ERROR,
899 							(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
900 							 errmsg("tuple to be deleted was already modified by an operation triggered by the current command"),
901 							 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
902 
903 				/* Else, already deleted by self; nothing to do */
904 				return NULL;
905 
906 			case TM_Ok:
907 				break;
908 
909 			case TM_Updated:
910 				{
911 					TupleTableSlot *inputslot;
912 					TupleTableSlot *epqslot;
913 
914 					if (IsolationUsesXactSnapshot())
915 						ereport(ERROR,
916 								(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
917 								 errmsg("could not serialize access due to concurrent update")));
918 
919 					/*
920 					 * Already know that we're going to need to do EPQ, so
921 					 * fetch tuple directly into the right slot.
922 					 */
923 					EvalPlanQualBegin(epqstate);
924 					inputslot = EvalPlanQualSlot(epqstate, resultRelationDesc,
925 												 resultRelInfo->ri_RangeTableIndex);
926 
927 					result = table_tuple_lock(resultRelationDesc, tupleid,
928 											  estate->es_snapshot,
929 											  inputslot, estate->es_output_cid,
930 											  LockTupleExclusive, LockWaitBlock,
931 											  TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
932 											  &tmfd);
933 
934 					switch (result)
935 					{
936 						case TM_Ok:
937 							Assert(tmfd.traversed);
938 							epqslot = EvalPlanQual(epqstate,
939 												   resultRelationDesc,
940 												   resultRelInfo->ri_RangeTableIndex,
941 												   inputslot);
942 							if (TupIsNull(epqslot))
943 								/* Tuple not passing quals anymore, exiting... */
944 								return NULL;
945 
946 							/*
947 							 * If requested, skip delete and pass back the
948 							 * updated row.
949 							 */
950 							if (epqreturnslot)
951 							{
952 								*epqreturnslot = epqslot;
953 								return NULL;
954 							}
955 							else
956 								goto ldelete;
957 
958 						case TM_SelfModified:
959 
960 							/*
961 							 * This can be reached when following an update
962 							 * chain from a tuple updated by another session,
963 							 * reaching a tuple that was already updated in
964 							 * this transaction. If previously updated by this
965 							 * command, ignore the delete, otherwise error
966 							 * out.
967 							 *
968 							 * See also TM_SelfModified response to
969 							 * table_tuple_delete() above.
970 							 */
971 							if (tmfd.cmax != estate->es_output_cid)
972 								ereport(ERROR,
973 										(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
974 										 errmsg("tuple to be deleted was already modified by an operation triggered by the current command"),
975 										 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
976 							return NULL;
977 
978 						case TM_Deleted:
979 							/* tuple already deleted; nothing to do */
980 							return NULL;
981 
982 						default:
983 
984 							/*
985 							 * TM_Invisible should be impossible because we're
986 							 * waiting for updated row versions, and would
987 							 * already have errored out if the first version
988 							 * is invisible.
989 							 *
990 							 * TM_Updated should be impossible, because we're
991 							 * locking the latest version via
992 							 * TUPLE_LOCK_FLAG_FIND_LAST_VERSION.
993 							 */
994 							elog(ERROR, "unexpected table_tuple_lock status: %u",
995 								 result);
996 							return NULL;
997 					}
998 
999 					Assert(false);
1000 					break;
1001 				}
1002 
1003 			case TM_Deleted:
1004 				if (IsolationUsesXactSnapshot())
1005 					ereport(ERROR,
1006 							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1007 							 errmsg("could not serialize access due to concurrent delete")));
1008 				/* tuple already deleted; nothing to do */
1009 				return NULL;
1010 
1011 			default:
1012 				elog(ERROR, "unrecognized table_tuple_delete status: %u",
1013 					 result);
1014 				return NULL;
1015 		}
1016 
1017 		/*
1018 		 * Note: Normally one would think that we have to delete index tuples
1019 		 * associated with the heap tuple now...
1020 		 *
1021 		 * ... but in POSTGRES, we have no need to do this because VACUUM will
1022 		 * take care of it later.  We can't delete index tuples immediately
1023 		 * anyway, since the tuple is still visible to other transactions.
1024 		 */
1025 	}
1026 
1027 	if (canSetTag)
1028 		(estate->es_processed)++;
1029 
1030 	/* Tell caller that the delete actually happened. */
1031 	if (tupleDeleted)
1032 		*tupleDeleted = true;
1033 
1034 	/*
1035 	 * If this delete is the result of a partition key update that moved the
1036 	 * tuple to a new partition, put this row into the transition OLD TABLE,
1037 	 * if there is one. We need to do this separately for DELETE and INSERT
1038 	 * because they happen on different tables.
1039 	 */
1040 	ar_delete_trig_tcs = mtstate->mt_transition_capture;
1041 	if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture
1042 		&& mtstate->mt_transition_capture->tcs_update_old_table)
1043 	{
1044 		ExecARUpdateTriggers(estate, resultRelInfo,
1045 							 tupleid,
1046 							 oldtuple,
1047 							 NULL,
1048 							 NULL,
1049 							 mtstate->mt_transition_capture);
1050 
1051 		/*
1052 		 * We've already captured the NEW TABLE row, so make sure any AR
1053 		 * DELETE trigger fired below doesn't capture it again.
1054 		 */
1055 		ar_delete_trig_tcs = NULL;
1056 	}
1057 
1058 	/* AFTER ROW DELETE Triggers */
1059 	ExecARDeleteTriggers(estate, resultRelInfo, tupleid, oldtuple,
1060 						 ar_delete_trig_tcs);
1061 
1062 	/* Process RETURNING if present and if requested */
1063 	if (processReturning && resultRelInfo->ri_projectReturning)
1064 	{
1065 		/*
1066 		 * We have to put the target tuple into a slot, which means first we
1067 		 * gotta fetch it.  We can use the trigger tuple slot.
1068 		 */
1069 		TupleTableSlot *rslot;
1070 
1071 		if (resultRelInfo->ri_FdwRoutine)
1072 		{
1073 			/* FDW must have provided a slot containing the deleted row */
1074 			Assert(!TupIsNull(slot));
1075 		}
1076 		else
1077 		{
1078 			slot = ExecGetReturningSlot(estate, resultRelInfo);
1079 			if (oldtuple != NULL)
1080 			{
1081 				ExecForceStoreHeapTuple(oldtuple, slot, false);
1082 			}
1083 			else
1084 			{
1085 				if (!table_tuple_fetch_row_version(resultRelationDesc, tupleid,
1086 												   SnapshotAny, slot))
1087 					elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING");
1088 			}
1089 		}
1090 
1091 		rslot = ExecProcessReturning(resultRelInfo->ri_projectReturning,
1092 									 RelationGetRelid(resultRelationDesc),
1093 									 slot, planSlot);
1094 
1095 		/*
1096 		 * Before releasing the target tuple again, make sure rslot has a
1097 		 * local copy of any pass-by-reference values.
1098 		 */
1099 		ExecMaterializeSlot(rslot);
1100 
1101 		ExecClearTuple(slot);
1102 
1103 		return rslot;
1104 	}
1105 
1106 	return NULL;
1107 }
1108 
1109 /* ----------------------------------------------------------------
1110  *		ExecUpdate
1111  *
1112  *		note: we can't run UPDATE queries with transactions
1113  *		off because UPDATEs are actually INSERTs and our
1114  *		scan will mistakenly loop forever, updating the tuple
1115  *		it just inserted..  This should be fixed but until it
1116  *		is, we don't want to get stuck in an infinite loop
1117  *		which corrupts your database..
1118  *
1119  *		When updating a table, tupleid identifies the tuple to
1120  *		update and oldtuple is NULL.  When updating a view, oldtuple
1121  *		is passed to the INSTEAD OF triggers and identifies what to
1122  *		update, and tupleid is invalid.  When updating a foreign table,
1123  *		tupleid is invalid; the FDW has to figure out which row to
1124  *		update using data from the planSlot.  oldtuple is passed to
1125  *		foreign table triggers; it is NULL when the foreign table has
1126  *		no relevant triggers.
1127  *
1128  *		Returns RETURNING result if any, otherwise NULL.
1129  * ----------------------------------------------------------------
1130  */
1131 static TupleTableSlot *
ExecUpdate(ModifyTableState * mtstate,ItemPointer tupleid,HeapTuple oldtuple,TupleTableSlot * slot,TupleTableSlot * planSlot,EPQState * epqstate,EState * estate,bool canSetTag)1132 ExecUpdate(ModifyTableState *mtstate,
1133 		   ItemPointer tupleid,
1134 		   HeapTuple oldtuple,
1135 		   TupleTableSlot *slot,
1136 		   TupleTableSlot *planSlot,
1137 		   EPQState *epqstate,
1138 		   EState *estate,
1139 		   bool canSetTag)
1140 {
1141 	ResultRelInfo *resultRelInfo;
1142 	Relation	resultRelationDesc;
1143 	TM_Result	result;
1144 	TM_FailureData tmfd;
1145 	List	   *recheckIndexes = NIL;
1146 	TupleConversionMap *saved_tcs_map = NULL;
1147 
1148 	/*
1149 	 * abort the operation if not running transactions
1150 	 */
1151 	if (IsBootstrapProcessingMode())
1152 		elog(ERROR, "cannot UPDATE during bootstrap");
1153 
1154 	ExecMaterializeSlot(slot);
1155 
1156 	/*
1157 	 * get information on the (current) result relation
1158 	 */
1159 	resultRelInfo = estate->es_result_relation_info;
1160 	resultRelationDesc = resultRelInfo->ri_RelationDesc;
1161 
1162 	/* BEFORE ROW UPDATE Triggers */
1163 	if (resultRelInfo->ri_TrigDesc &&
1164 		resultRelInfo->ri_TrigDesc->trig_update_before_row)
1165 	{
1166 		if (!ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
1167 								  tupleid, oldtuple, slot))
1168 			return NULL;		/* "do nothing" */
1169 	}
1170 
1171 	/* INSTEAD OF ROW UPDATE Triggers */
1172 	if (resultRelInfo->ri_TrigDesc &&
1173 		resultRelInfo->ri_TrigDesc->trig_update_instead_row)
1174 	{
1175 		if (!ExecIRUpdateTriggers(estate, resultRelInfo,
1176 								  oldtuple, slot))
1177 			return NULL;		/* "do nothing" */
1178 	}
1179 	else if (resultRelInfo->ri_FdwRoutine)
1180 	{
1181 		/*
1182 		 * GENERATED expressions might reference the tableoid column, so
1183 		 * (re-)initialize tts_tableOid before evaluating them.
1184 		 */
1185 		slot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
1186 
1187 		/*
1188 		 * Compute stored generated columns
1189 		 */
1190 		if (resultRelationDesc->rd_att->constr &&
1191 			resultRelationDesc->rd_att->constr->has_generated_stored)
1192 			ExecComputeStoredGenerated(estate, slot, CMD_UPDATE);
1193 
1194 		/*
1195 		 * update in foreign table: let the FDW do it
1196 		 */
1197 		slot = resultRelInfo->ri_FdwRoutine->ExecForeignUpdate(estate,
1198 															   resultRelInfo,
1199 															   slot,
1200 															   planSlot);
1201 
1202 		if (slot == NULL)		/* "do nothing" */
1203 			return NULL;
1204 
1205 		/*
1206 		 * AFTER ROW Triggers or RETURNING expressions might reference the
1207 		 * tableoid column, so (re-)initialize tts_tableOid before evaluating
1208 		 * them.  (This covers the case where the FDW replaced the slot.)
1209 		 */
1210 		slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
1211 	}
1212 	else
1213 	{
1214 		LockTupleMode lockmode;
1215 		bool		partition_constraint_failed;
1216 		bool		update_indexes;
1217 
1218 		/*
1219 		 * Constraints and GENERATED expressions might reference the tableoid
1220 		 * column, so (re-)initialize tts_tableOid before evaluating them.
1221 		 */
1222 		slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
1223 
1224 		/*
1225 		 * Compute stored generated columns
1226 		 */
1227 		if (resultRelationDesc->rd_att->constr &&
1228 			resultRelationDesc->rd_att->constr->has_generated_stored)
1229 			ExecComputeStoredGenerated(estate, slot, CMD_UPDATE);
1230 
1231 		/*
1232 		 * Check any RLS UPDATE WITH CHECK policies
1233 		 *
1234 		 * If we generate a new candidate tuple after EvalPlanQual testing, we
1235 		 * must loop back here and recheck any RLS policies and constraints.
1236 		 * (We don't need to redo triggers, however.  If there are any BEFORE
1237 		 * triggers then trigger.c will have done table_tuple_lock to lock the
1238 		 * correct tuple, so there's no need to do them again.)
1239 		 */
1240 lreplace:;
1241 
1242 		/* ensure slot is independent, consider e.g. EPQ */
1243 		ExecMaterializeSlot(slot);
1244 
1245 		/*
1246 		 * If partition constraint fails, this row might get moved to another
1247 		 * partition, in which case we should check the RLS CHECK policy just
1248 		 * before inserting into the new partition, rather than doing it here.
1249 		 * This is because a trigger on that partition might again change the
1250 		 * row.  So skip the WCO checks if the partition constraint fails.
1251 		 */
1252 		partition_constraint_failed =
1253 			resultRelInfo->ri_PartitionCheck &&
1254 			!ExecPartitionCheck(resultRelInfo, slot, estate, false);
1255 
1256 		if (!partition_constraint_failed &&
1257 			resultRelInfo->ri_WithCheckOptions != NIL)
1258 		{
1259 			/*
1260 			 * ExecWithCheckOptions() will skip any WCOs which are not of the
1261 			 * kind we are looking for at this point.
1262 			 */
1263 			ExecWithCheckOptions(WCO_RLS_UPDATE_CHECK,
1264 								 resultRelInfo, slot, estate);
1265 		}
1266 
1267 		/*
1268 		 * If a partition check failed, try to move the row into the right
1269 		 * partition.
1270 		 */
1271 		if (partition_constraint_failed)
1272 		{
1273 			bool		tuple_deleted;
1274 			TupleTableSlot *ret_slot;
1275 			TupleTableSlot *orig_slot = slot;
1276 			TupleTableSlot *epqslot = NULL;
1277 			PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
1278 			int			map_index;
1279 			TupleConversionMap *tupconv_map;
1280 
1281 			/*
1282 			 * Disallow an INSERT ON CONFLICT DO UPDATE that causes the
1283 			 * original row to migrate to a different partition.  Maybe this
1284 			 * can be implemented some day, but it seems a fringe feature with
1285 			 * little redeeming value.
1286 			 */
1287 			if (((ModifyTable *) mtstate->ps.plan)->onConflictAction == ONCONFLICT_UPDATE)
1288 				ereport(ERROR,
1289 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1290 						 errmsg("invalid ON UPDATE specification"),
1291 						 errdetail("The result tuple would appear in a different partition than the original tuple.")));
1292 
1293 			/*
1294 			 * When an UPDATE is run on a leaf partition, we will not have
1295 			 * partition tuple routing set up. In that case, fail with
1296 			 * partition constraint violation error.
1297 			 */
1298 			if (proute == NULL)
1299 				ExecPartitionCheckEmitError(resultRelInfo, slot, estate);
1300 
1301 			/*
1302 			 * Row movement, part 1.  Delete the tuple, but skip RETURNING
1303 			 * processing. We want to return rows from INSERT.
1304 			 */
1305 			ExecDelete(mtstate, tupleid, oldtuple, planSlot, epqstate,
1306 					   estate, false, false /* canSetTag */ ,
1307 					   true /* changingPart */ , &tuple_deleted, &epqslot);
1308 
1309 			/*
1310 			 * For some reason if DELETE didn't happen (e.g. trigger prevented
1311 			 * it, or it was already deleted by self, or it was concurrently
1312 			 * deleted by another transaction), then we should skip the insert
1313 			 * as well; otherwise, an UPDATE could cause an increase in the
1314 			 * total number of rows across all partitions, which is clearly
1315 			 * wrong.
1316 			 *
1317 			 * For a normal UPDATE, the case where the tuple has been the
1318 			 * subject of a concurrent UPDATE or DELETE would be handled by
1319 			 * the EvalPlanQual machinery, but for an UPDATE that we've
1320 			 * translated into a DELETE from this partition and an INSERT into
1321 			 * some other partition, that's not available, because CTID chains
1322 			 * can't span relation boundaries.  We mimic the semantics to a
1323 			 * limited extent by skipping the INSERT if the DELETE fails to
1324 			 * find a tuple. This ensures that two concurrent attempts to
1325 			 * UPDATE the same tuple at the same time can't turn one tuple
1326 			 * into two, and that an UPDATE of a just-deleted tuple can't
1327 			 * resurrect it.
1328 			 */
1329 			if (!tuple_deleted)
1330 			{
1331 				/*
1332 				 * epqslot will be typically NULL.  But when ExecDelete()
1333 				 * finds that another transaction has concurrently updated the
1334 				 * same row, it re-fetches the row, skips the delete, and
1335 				 * epqslot is set to the re-fetched tuple slot. In that case,
1336 				 * we need to do all the checks again.
1337 				 */
1338 				if (TupIsNull(epqslot))
1339 					return NULL;
1340 				else
1341 				{
1342 					slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
1343 					goto lreplace;
1344 				}
1345 			}
1346 
1347 			/*
1348 			 * Updates set the transition capture map only when a new subplan
1349 			 * is chosen.  But for inserts, it is set for each row. So after
1350 			 * INSERT, we need to revert back to the map created for UPDATE;
1351 			 * otherwise the next UPDATE will incorrectly use the one created
1352 			 * for INSERT.  So first save the one created for UPDATE.
1353 			 */
1354 			if (mtstate->mt_transition_capture)
1355 				saved_tcs_map = mtstate->mt_transition_capture->tcs_map;
1356 
1357 			/*
1358 			 * resultRelInfo is one of the per-subplan resultRelInfos.  So we
1359 			 * should convert the tuple into root's tuple descriptor, since
1360 			 * ExecInsert() starts the search from root.  The tuple conversion
1361 			 * map list is in the order of mtstate->resultRelInfo[], so to
1362 			 * retrieve the one for this resultRel, we need to know the
1363 			 * position of the resultRel in mtstate->resultRelInfo[].
1364 			 */
1365 			map_index = resultRelInfo - mtstate->resultRelInfo;
1366 			Assert(map_index >= 0 && map_index < mtstate->mt_nplans);
1367 			tupconv_map = tupconv_map_for_subplan(mtstate, map_index);
1368 			if (tupconv_map != NULL)
1369 				slot = execute_attr_map_slot(tupconv_map->attrMap,
1370 											 slot,
1371 											 mtstate->mt_root_tuple_slot);
1372 
1373 			/*
1374 			 * Prepare for tuple routing, making it look like we're inserting
1375 			 * into the root.
1376 			 */
1377 			Assert(mtstate->rootResultRelInfo != NULL);
1378 			slot = ExecPrepareTupleRouting(mtstate, estate, proute,
1379 										   mtstate->rootResultRelInfo, slot);
1380 
1381 			ret_slot = ExecInsert(mtstate, slot, planSlot,
1382 								  orig_slot, resultRelInfo,
1383 								  estate, canSetTag);
1384 
1385 			/* Revert ExecPrepareTupleRouting's node change. */
1386 			estate->es_result_relation_info = resultRelInfo;
1387 			if (mtstate->mt_transition_capture)
1388 			{
1389 				mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
1390 				mtstate->mt_transition_capture->tcs_map = saved_tcs_map;
1391 			}
1392 
1393 			return ret_slot;
1394 		}
1395 
1396 		/*
1397 		 * Check the constraints of the tuple.  We've already checked the
1398 		 * partition constraint above; however, we must still ensure the tuple
1399 		 * passes all other constraints, so we will call ExecConstraints() and
1400 		 * have it validate all remaining checks.
1401 		 */
1402 		if (resultRelationDesc->rd_att->constr)
1403 			ExecConstraints(resultRelInfo, slot, estate);
1404 
1405 		/*
1406 		 * replace the heap tuple
1407 		 *
1408 		 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check
1409 		 * that the row to be updated is visible to that snapshot, and throw a
1410 		 * can't-serialize error if not. This is a special-case behavior
1411 		 * needed for referential integrity updates in transaction-snapshot
1412 		 * mode transactions.
1413 		 */
1414 		result = table_tuple_update(resultRelationDesc, tupleid, slot,
1415 									estate->es_output_cid,
1416 									estate->es_snapshot,
1417 									estate->es_crosscheck_snapshot,
1418 									true /* wait for commit */ ,
1419 									&tmfd, &lockmode, &update_indexes);
1420 
1421 		switch (result)
1422 		{
1423 			case TM_SelfModified:
1424 
1425 				/*
1426 				 * The target tuple was already updated or deleted by the
1427 				 * current command, or by a later command in the current
1428 				 * transaction.  The former case is possible in a join UPDATE
1429 				 * where multiple tuples join to the same target tuple. This
1430 				 * is pretty questionable, but Postgres has always allowed it:
1431 				 * we just execute the first update action and ignore
1432 				 * additional update attempts.
1433 				 *
1434 				 * The latter case arises if the tuple is modified by a
1435 				 * command in a BEFORE trigger, or perhaps by a command in a
1436 				 * volatile function used in the query.  In such situations we
1437 				 * should not ignore the update, but it is equally unsafe to
1438 				 * proceed.  We don't want to discard the original UPDATE
1439 				 * while keeping the triggered actions based on it; and we
1440 				 * have no principled way to merge this update with the
1441 				 * previous ones.  So throwing an error is the only safe
1442 				 * course.
1443 				 *
1444 				 * If a trigger actually intends this type of interaction, it
1445 				 * can re-execute the UPDATE (assuming it can figure out how)
1446 				 * and then return NULL to cancel the outer update.
1447 				 */
1448 				if (tmfd.cmax != estate->es_output_cid)
1449 					ereport(ERROR,
1450 							(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
1451 							 errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
1452 							 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
1453 
1454 				/* Else, already updated by self; nothing to do */
1455 				return NULL;
1456 
1457 			case TM_Ok:
1458 				break;
1459 
1460 			case TM_Updated:
1461 				{
1462 					TupleTableSlot *inputslot;
1463 					TupleTableSlot *epqslot;
1464 
1465 					if (IsolationUsesXactSnapshot())
1466 						ereport(ERROR,
1467 								(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1468 								 errmsg("could not serialize access due to concurrent update")));
1469 
1470 					/*
1471 					 * Already know that we're going to need to do EPQ, so
1472 					 * fetch tuple directly into the right slot.
1473 					 */
1474 					inputslot = EvalPlanQualSlot(epqstate, resultRelationDesc,
1475 												 resultRelInfo->ri_RangeTableIndex);
1476 
1477 					result = table_tuple_lock(resultRelationDesc, tupleid,
1478 											  estate->es_snapshot,
1479 											  inputslot, estate->es_output_cid,
1480 											  lockmode, LockWaitBlock,
1481 											  TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
1482 											  &tmfd);
1483 
1484 					switch (result)
1485 					{
1486 						case TM_Ok:
1487 							Assert(tmfd.traversed);
1488 
1489 							epqslot = EvalPlanQual(epqstate,
1490 												   resultRelationDesc,
1491 												   resultRelInfo->ri_RangeTableIndex,
1492 												   inputslot);
1493 							if (TupIsNull(epqslot))
1494 								/* Tuple not passing quals anymore, exiting... */
1495 								return NULL;
1496 
1497 							slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
1498 							goto lreplace;
1499 
1500 						case TM_Deleted:
1501 							/* tuple already deleted; nothing to do */
1502 							return NULL;
1503 
1504 						case TM_SelfModified:
1505 
1506 							/*
1507 							 * This can be reached when following an update
1508 							 * chain from a tuple updated by another session,
1509 							 * reaching a tuple that was already updated in
1510 							 * this transaction. If previously modified by
1511 							 * this command, ignore the redundant update,
1512 							 * otherwise error out.
1513 							 *
1514 							 * See also TM_SelfModified response to
1515 							 * table_tuple_update() above.
1516 							 */
1517 							if (tmfd.cmax != estate->es_output_cid)
1518 								ereport(ERROR,
1519 										(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
1520 										 errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
1521 										 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
1522 							return NULL;
1523 
1524 						default:
1525 							/* see table_tuple_lock call in ExecDelete() */
1526 							elog(ERROR, "unexpected table_tuple_lock status: %u",
1527 								 result);
1528 							return NULL;
1529 					}
1530 				}
1531 
1532 				break;
1533 
1534 			case TM_Deleted:
1535 				if (IsolationUsesXactSnapshot())
1536 					ereport(ERROR,
1537 							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1538 							 errmsg("could not serialize access due to concurrent delete")));
1539 				/* tuple already deleted; nothing to do */
1540 				return NULL;
1541 
1542 			default:
1543 				elog(ERROR, "unrecognized table_tuple_update status: %u",
1544 					 result);
1545 				return NULL;
1546 		}
1547 
1548 		/* insert index entries for tuple if necessary */
1549 		if (resultRelInfo->ri_NumIndices > 0 && update_indexes)
1550 			recheckIndexes = ExecInsertIndexTuples(slot, estate, false, NULL, NIL);
1551 	}
1552 
1553 	if (canSetTag)
1554 		(estate->es_processed)++;
1555 
1556 	/* AFTER ROW UPDATE Triggers */
1557 	ExecARUpdateTriggers(estate, resultRelInfo, tupleid, oldtuple, slot,
1558 						 recheckIndexes,
1559 						 mtstate->operation == CMD_INSERT ?
1560 						 mtstate->mt_oc_transition_capture :
1561 						 mtstate->mt_transition_capture);
1562 
1563 	list_free(recheckIndexes);
1564 
1565 	/*
1566 	 * Check any WITH CHECK OPTION constraints from parent views.  We are
1567 	 * required to do this after testing all constraints and uniqueness
1568 	 * violations per the SQL spec, so we do it after actually updating the
1569 	 * record in the heap and all indexes.
1570 	 *
1571 	 * ExecWithCheckOptions() will skip any WCOs which are not of the kind we
1572 	 * are looking for at this point.
1573 	 */
1574 	if (resultRelInfo->ri_WithCheckOptions != NIL)
1575 		ExecWithCheckOptions(WCO_VIEW_CHECK, resultRelInfo, slot, estate);
1576 
1577 	/* Process RETURNING if present */
1578 	if (resultRelInfo->ri_projectReturning)
1579 		return ExecProcessReturning(resultRelInfo->ri_projectReturning,
1580 									RelationGetRelid(resultRelationDesc),
1581 									slot, planSlot);
1582 
1583 	return NULL;
1584 }
1585 
1586 /*
1587  * ExecOnConflictUpdate --- execute UPDATE of INSERT ON CONFLICT DO UPDATE
1588  *
1589  * Try to lock tuple for update as part of speculative insertion.  If
1590  * a qual originating from ON CONFLICT DO UPDATE is satisfied, update
1591  * (but still lock row, even though it may not satisfy estate's
1592  * snapshot).
1593  *
1594  * Returns true if we're done (with or without an update), or false if
1595  * the caller must retry the INSERT from scratch.
1596  */
1597 static bool
ExecOnConflictUpdate(ModifyTableState * mtstate,ResultRelInfo * resultRelInfo,ItemPointer conflictTid,TupleTableSlot * planSlot,TupleTableSlot * excludedSlot,EState * estate,bool canSetTag,TupleTableSlot ** returning)1598 ExecOnConflictUpdate(ModifyTableState *mtstate,
1599 					 ResultRelInfo *resultRelInfo,
1600 					 ItemPointer conflictTid,
1601 					 TupleTableSlot *planSlot,
1602 					 TupleTableSlot *excludedSlot,
1603 					 EState *estate,
1604 					 bool canSetTag,
1605 					 TupleTableSlot **returning)
1606 {
1607 	ExprContext *econtext = mtstate->ps.ps_ExprContext;
1608 	Relation	relation = resultRelInfo->ri_RelationDesc;
1609 	ExprState  *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause;
1610 	TupleTableSlot *existing = resultRelInfo->ri_onConflict->oc_Existing;
1611 	TM_FailureData tmfd;
1612 	LockTupleMode lockmode;
1613 	TM_Result	test;
1614 	Datum		xminDatum;
1615 	TransactionId xmin;
1616 	bool		isnull;
1617 
1618 	/* Determine lock mode to use */
1619 	lockmode = ExecUpdateLockMode(estate, resultRelInfo);
1620 
1621 	/*
1622 	 * Lock tuple for update.  Don't follow updates when tuple cannot be
1623 	 * locked without doing so.  A row locking conflict here means our
1624 	 * previous conclusion that the tuple is conclusively committed is not
1625 	 * true anymore.
1626 	 */
1627 	test = table_tuple_lock(relation, conflictTid,
1628 							estate->es_snapshot,
1629 							existing, estate->es_output_cid,
1630 							lockmode, LockWaitBlock, 0,
1631 							&tmfd);
1632 	switch (test)
1633 	{
1634 		case TM_Ok:
1635 			/* success! */
1636 			break;
1637 
1638 		case TM_Invisible:
1639 
1640 			/*
1641 			 * This can occur when a just inserted tuple is updated again in
1642 			 * the same command. E.g. because multiple rows with the same
1643 			 * conflicting key values are inserted.
1644 			 *
1645 			 * This is somewhat similar to the ExecUpdate() TM_SelfModified
1646 			 * case.  We do not want to proceed because it would lead to the
1647 			 * same row being updated a second time in some unspecified order,
1648 			 * and in contrast to plain UPDATEs there's no historical behavior
1649 			 * to break.
1650 			 *
1651 			 * It is the user's responsibility to prevent this situation from
1652 			 * occurring.  These problems are why SQL-2003 similarly specifies
1653 			 * that for SQL MERGE, an exception must be raised in the event of
1654 			 * an attempt to update the same row twice.
1655 			 */
1656 			xminDatum = slot_getsysattr(existing,
1657 										MinTransactionIdAttributeNumber,
1658 										&isnull);
1659 			Assert(!isnull);
1660 			xmin = DatumGetTransactionId(xminDatum);
1661 
1662 			if (TransactionIdIsCurrentTransactionId(xmin))
1663 				ereport(ERROR,
1664 						(errcode(ERRCODE_CARDINALITY_VIOLATION),
1665 						 errmsg("ON CONFLICT DO UPDATE command cannot affect row a second time"),
1666 						 errhint("Ensure that no rows proposed for insertion within the same command have duplicate constrained values.")));
1667 
1668 			/* This shouldn't happen */
1669 			elog(ERROR, "attempted to lock invisible tuple");
1670 			break;
1671 
1672 		case TM_SelfModified:
1673 
1674 			/*
1675 			 * This state should never be reached. As a dirty snapshot is used
1676 			 * to find conflicting tuples, speculative insertion wouldn't have
1677 			 * seen this row to conflict with.
1678 			 */
1679 			elog(ERROR, "unexpected self-updated tuple");
1680 			break;
1681 
1682 		case TM_Updated:
1683 			if (IsolationUsesXactSnapshot())
1684 				ereport(ERROR,
1685 						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1686 						 errmsg("could not serialize access due to concurrent update")));
1687 
1688 			/*
1689 			 * As long as we don't support an UPDATE of INSERT ON CONFLICT for
1690 			 * a partitioned table we shouldn't reach to a case where tuple to
1691 			 * be lock is moved to another partition due to concurrent update
1692 			 * of the partition key.
1693 			 */
1694 			Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid));
1695 
1696 			/*
1697 			 * Tell caller to try again from the very start.
1698 			 *
1699 			 * It does not make sense to use the usual EvalPlanQual() style
1700 			 * loop here, as the new version of the row might not conflict
1701 			 * anymore, or the conflicting tuple has actually been deleted.
1702 			 */
1703 			ExecClearTuple(existing);
1704 			return false;
1705 
1706 		case TM_Deleted:
1707 			if (IsolationUsesXactSnapshot())
1708 				ereport(ERROR,
1709 						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1710 						 errmsg("could not serialize access due to concurrent delete")));
1711 
1712 			/* see TM_Updated case */
1713 			Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid));
1714 			ExecClearTuple(existing);
1715 			return false;
1716 
1717 		default:
1718 			elog(ERROR, "unrecognized table_tuple_lock status: %u", test);
1719 	}
1720 
1721 	/* Success, the tuple is locked. */
1722 
1723 	/*
1724 	 * Verify that the tuple is visible to our MVCC snapshot if the current
1725 	 * isolation level mandates that.
1726 	 *
1727 	 * It's not sufficient to rely on the check within ExecUpdate() as e.g.
1728 	 * CONFLICT ... WHERE clause may prevent us from reaching that.
1729 	 *
1730 	 * This means we only ever continue when a new command in the current
1731 	 * transaction could see the row, even though in READ COMMITTED mode the
1732 	 * tuple will not be visible according to the current statement's
1733 	 * snapshot.  This is in line with the way UPDATE deals with newer tuple
1734 	 * versions.
1735 	 */
1736 	ExecCheckTupleVisible(estate, relation, existing);
1737 
1738 	/*
1739 	 * Make tuple and any needed join variables available to ExecQual and
1740 	 * ExecProject.  The EXCLUDED tuple is installed in ecxt_innertuple, while
1741 	 * the target's existing tuple is installed in the scantuple.  EXCLUDED
1742 	 * has been made to reference INNER_VAR in setrefs.c, but there is no
1743 	 * other redirection.
1744 	 */
1745 	econtext->ecxt_scantuple = existing;
1746 	econtext->ecxt_innertuple = excludedSlot;
1747 	econtext->ecxt_outertuple = NULL;
1748 
1749 	if (!ExecQual(onConflictSetWhere, econtext))
1750 	{
1751 		ExecClearTuple(existing);	/* see return below */
1752 		InstrCountFiltered1(&mtstate->ps, 1);
1753 		return true;			/* done with the tuple */
1754 	}
1755 
1756 	if (resultRelInfo->ri_WithCheckOptions != NIL)
1757 	{
1758 		/*
1759 		 * Check target's existing tuple against UPDATE-applicable USING
1760 		 * security barrier quals (if any), enforced here as RLS checks/WCOs.
1761 		 *
1762 		 * The rewriter creates UPDATE RLS checks/WCOs for UPDATE security
1763 		 * quals, and stores them as WCOs of "kind" WCO_RLS_CONFLICT_CHECK,
1764 		 * but that's almost the extent of its special handling for ON
1765 		 * CONFLICT DO UPDATE.
1766 		 *
1767 		 * The rewriter will also have associated UPDATE applicable straight
1768 		 * RLS checks/WCOs for the benefit of the ExecUpdate() call that
1769 		 * follows.  INSERTs and UPDATEs naturally have mutually exclusive WCO
1770 		 * kinds, so there is no danger of spurious over-enforcement in the
1771 		 * INSERT or UPDATE path.
1772 		 */
1773 		ExecWithCheckOptions(WCO_RLS_CONFLICT_CHECK, resultRelInfo,
1774 							 existing,
1775 							 mtstate->ps.state);
1776 	}
1777 
1778 	/* Project the new tuple version */
1779 	ExecProject(resultRelInfo->ri_onConflict->oc_ProjInfo);
1780 
1781 	/*
1782 	 * Note that it is possible that the target tuple has been modified in
1783 	 * this session, after the above table_tuple_lock. We choose to not error
1784 	 * out in that case, in line with ExecUpdate's treatment of similar cases.
1785 	 * This can happen if an UPDATE is triggered from within ExecQual(),
1786 	 * ExecWithCheckOptions() or ExecProject() above, e.g. by selecting from a
1787 	 * wCTE in the ON CONFLICT's SET.
1788 	 */
1789 
1790 	/* Execute UPDATE with projection */
1791 	*returning = ExecUpdate(mtstate, conflictTid, NULL,
1792 							resultRelInfo->ri_onConflict->oc_ProjSlot,
1793 							planSlot,
1794 							&mtstate->mt_epqstate, mtstate->ps.state,
1795 							canSetTag);
1796 
1797 	/*
1798 	 * Clear out existing tuple, as there might not be another conflict among
1799 	 * the next input rows. Don't want to hold resources till the end of the
1800 	 * query.
1801 	 */
1802 	ExecClearTuple(existing);
1803 	return true;
1804 }
1805 
1806 
1807 /*
1808  * Process BEFORE EACH STATEMENT triggers
1809  */
1810 static void
fireBSTriggers(ModifyTableState * node)1811 fireBSTriggers(ModifyTableState *node)
1812 {
1813 	ModifyTable *plan = (ModifyTable *) node->ps.plan;
1814 	ResultRelInfo *resultRelInfo = node->resultRelInfo;
1815 
1816 	/*
1817 	 * If the node modifies a partitioned table, we must fire its triggers.
1818 	 * Note that in that case, node->resultRelInfo points to the first leaf
1819 	 * partition, not the root table.
1820 	 */
1821 	if (node->rootResultRelInfo != NULL)
1822 		resultRelInfo = node->rootResultRelInfo;
1823 
1824 	switch (node->operation)
1825 	{
1826 		case CMD_INSERT:
1827 			ExecBSInsertTriggers(node->ps.state, resultRelInfo);
1828 			if (plan->onConflictAction == ONCONFLICT_UPDATE)
1829 				ExecBSUpdateTriggers(node->ps.state,
1830 									 resultRelInfo);
1831 			break;
1832 		case CMD_UPDATE:
1833 			ExecBSUpdateTriggers(node->ps.state, resultRelInfo);
1834 			break;
1835 		case CMD_DELETE:
1836 			ExecBSDeleteTriggers(node->ps.state, resultRelInfo);
1837 			break;
1838 		default:
1839 			elog(ERROR, "unknown operation");
1840 			break;
1841 	}
1842 }
1843 
1844 /*
1845  * Return the target rel ResultRelInfo.
1846  *
1847  * This relation is the same as :
1848  * - the relation for which we will fire AFTER STATEMENT triggers.
1849  * - the relation into whose tuple format all captured transition tuples must
1850  *   be converted.
1851  * - the root partitioned table.
1852  */
1853 static ResultRelInfo *
getTargetResultRelInfo(ModifyTableState * node)1854 getTargetResultRelInfo(ModifyTableState *node)
1855 {
1856 	/*
1857 	 * Note that if the node modifies a partitioned table, node->resultRelInfo
1858 	 * points to the first leaf partition, not the root table.
1859 	 */
1860 	if (node->rootResultRelInfo != NULL)
1861 		return node->rootResultRelInfo;
1862 	else
1863 		return node->resultRelInfo;
1864 }
1865 
1866 /*
1867  * Process AFTER EACH STATEMENT triggers
1868  */
1869 static void
fireASTriggers(ModifyTableState * node)1870 fireASTriggers(ModifyTableState *node)
1871 {
1872 	ModifyTable *plan = (ModifyTable *) node->ps.plan;
1873 	ResultRelInfo *resultRelInfo = getTargetResultRelInfo(node);
1874 
1875 	switch (node->operation)
1876 	{
1877 		case CMD_INSERT:
1878 			if (plan->onConflictAction == ONCONFLICT_UPDATE)
1879 				ExecASUpdateTriggers(node->ps.state,
1880 									 resultRelInfo,
1881 									 node->mt_oc_transition_capture);
1882 			ExecASInsertTriggers(node->ps.state, resultRelInfo,
1883 								 node->mt_transition_capture);
1884 			break;
1885 		case CMD_UPDATE:
1886 			ExecASUpdateTriggers(node->ps.state, resultRelInfo,
1887 								 node->mt_transition_capture);
1888 			break;
1889 		case CMD_DELETE:
1890 			ExecASDeleteTriggers(node->ps.state, resultRelInfo,
1891 								 node->mt_transition_capture);
1892 			break;
1893 		default:
1894 			elog(ERROR, "unknown operation");
1895 			break;
1896 	}
1897 }
1898 
1899 /*
1900  * Set up the state needed for collecting transition tuples for AFTER
1901  * triggers.
1902  */
1903 static void
ExecSetupTransitionCaptureState(ModifyTableState * mtstate,EState * estate)1904 ExecSetupTransitionCaptureState(ModifyTableState *mtstate, EState *estate)
1905 {
1906 	ModifyTable *plan = (ModifyTable *) mtstate->ps.plan;
1907 	ResultRelInfo *targetRelInfo = getTargetResultRelInfo(mtstate);
1908 
1909 	/* Check for transition tables on the directly targeted relation. */
1910 	mtstate->mt_transition_capture =
1911 		MakeTransitionCaptureState(targetRelInfo->ri_TrigDesc,
1912 								   RelationGetRelid(targetRelInfo->ri_RelationDesc),
1913 								   mtstate->operation);
1914 	if (plan->operation == CMD_INSERT &&
1915 		plan->onConflictAction == ONCONFLICT_UPDATE)
1916 		mtstate->mt_oc_transition_capture =
1917 			MakeTransitionCaptureState(targetRelInfo->ri_TrigDesc,
1918 									   RelationGetRelid(targetRelInfo->ri_RelationDesc),
1919 									   CMD_UPDATE);
1920 
1921 	/*
1922 	 * If we found that we need to collect transition tuples then we may also
1923 	 * need tuple conversion maps for any children that have TupleDescs that
1924 	 * aren't compatible with the tuplestores.  (We can share these maps
1925 	 * between the regular and ON CONFLICT cases.)
1926 	 */
1927 	if (mtstate->mt_transition_capture != NULL ||
1928 		mtstate->mt_oc_transition_capture != NULL)
1929 	{
1930 		ExecSetupChildParentMapForSubplan(mtstate);
1931 
1932 		/*
1933 		 * Install the conversion map for the first plan for UPDATE and DELETE
1934 		 * operations.  It will be advanced each time we switch to the next
1935 		 * plan.  (INSERT operations set it every time, so we need not update
1936 		 * mtstate->mt_oc_transition_capture here.)
1937 		 */
1938 		if (mtstate->mt_transition_capture && mtstate->operation != CMD_INSERT)
1939 			mtstate->mt_transition_capture->tcs_map =
1940 				tupconv_map_for_subplan(mtstate, 0);
1941 	}
1942 }
1943 
1944 /*
1945  * ExecPrepareTupleRouting --- prepare for routing one tuple
1946  *
1947  * Determine the partition in which the tuple in slot is to be inserted,
1948  * and modify mtstate and estate to prepare for it.
1949  *
1950  * Caller must revert the estate changes after executing the insertion!
1951  * In mtstate, transition capture changes may also need to be reverted.
1952  *
1953  * Returns a slot holding the tuple of the partition rowtype.
1954  */
1955 static TupleTableSlot *
ExecPrepareTupleRouting(ModifyTableState * mtstate,EState * estate,PartitionTupleRouting * proute,ResultRelInfo * targetRelInfo,TupleTableSlot * slot)1956 ExecPrepareTupleRouting(ModifyTableState *mtstate,
1957 						EState *estate,
1958 						PartitionTupleRouting *proute,
1959 						ResultRelInfo *targetRelInfo,
1960 						TupleTableSlot *slot)
1961 {
1962 	ResultRelInfo *partrel;
1963 	PartitionRoutingInfo *partrouteinfo;
1964 	TupleConversionMap *map;
1965 
1966 	/*
1967 	 * Lookup the target partition's ResultRelInfo.  If ExecFindPartition does
1968 	 * not find a valid partition for the tuple in 'slot' then an error is
1969 	 * raised.  An error may also be raised if the found partition is not a
1970 	 * valid target for INSERTs.  This is required since a partitioned table
1971 	 * UPDATE to another partition becomes a DELETE+INSERT.
1972 	 */
1973 	partrel = ExecFindPartition(mtstate, targetRelInfo, proute, slot, estate);
1974 	partrouteinfo = partrel->ri_PartitionInfo;
1975 	Assert(partrouteinfo != NULL);
1976 
1977 	/*
1978 	 * Make it look like we are inserting into the partition.
1979 	 */
1980 	estate->es_result_relation_info = partrel;
1981 
1982 	/*
1983 	 * If we're capturing transition tuples, we might need to convert from the
1984 	 * partition rowtype to root partitioned table's rowtype.
1985 	 */
1986 	if (mtstate->mt_transition_capture != NULL)
1987 	{
1988 		if (partrel->ri_TrigDesc &&
1989 			partrel->ri_TrigDesc->trig_insert_before_row)
1990 		{
1991 			/*
1992 			 * If there are any BEFORE triggers on the partition, we'll have
1993 			 * to be ready to convert their result back to tuplestore format.
1994 			 */
1995 			mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
1996 			mtstate->mt_transition_capture->tcs_map =
1997 				partrouteinfo->pi_PartitionToRootMap;
1998 		}
1999 		else
2000 		{
2001 			/*
2002 			 * Otherwise, just remember the original unconverted tuple, to
2003 			 * avoid a needless round trip conversion.
2004 			 */
2005 			mtstate->mt_transition_capture->tcs_original_insert_tuple = slot;
2006 			mtstate->mt_transition_capture->tcs_map = NULL;
2007 		}
2008 	}
2009 	if (mtstate->mt_oc_transition_capture != NULL)
2010 	{
2011 		mtstate->mt_oc_transition_capture->tcs_map =
2012 			partrouteinfo->pi_PartitionToRootMap;
2013 	}
2014 
2015 	/*
2016 	 * Convert the tuple, if necessary.
2017 	 */
2018 	map = partrouteinfo->pi_RootToPartitionMap;
2019 	if (map != NULL)
2020 	{
2021 		TupleTableSlot *new_slot = partrouteinfo->pi_PartitionTupleSlot;
2022 
2023 		slot = execute_attr_map_slot(map->attrMap, slot, new_slot);
2024 	}
2025 
2026 	return slot;
2027 }
2028 
2029 /*
2030  * Initialize the child-to-root tuple conversion map array for UPDATE subplans.
2031  *
2032  * This map array is required to convert the tuple from the subplan result rel
2033  * to the target table descriptor. This requirement arises for two independent
2034  * scenarios:
2035  * 1. For update-tuple-routing.
2036  * 2. For capturing tuples in transition tables.
2037  */
2038 static void
ExecSetupChildParentMapForSubplan(ModifyTableState * mtstate)2039 ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate)
2040 {
2041 	ResultRelInfo *targetRelInfo = getTargetResultRelInfo(mtstate);
2042 	ResultRelInfo *resultRelInfos = mtstate->resultRelInfo;
2043 	TupleDesc	outdesc;
2044 	int			numResultRelInfos = mtstate->mt_nplans;
2045 	int			i;
2046 
2047 	/*
2048 	 * Build array of conversion maps from each child's TupleDesc to the one
2049 	 * used in the target relation.  The map pointers may be NULL when no
2050 	 * conversion is necessary, which is hopefully a common case.
2051 	 */
2052 
2053 	/* Get tuple descriptor of the target rel. */
2054 	outdesc = RelationGetDescr(targetRelInfo->ri_RelationDesc);
2055 
2056 	mtstate->mt_per_subplan_tupconv_maps = (TupleConversionMap **)
2057 		palloc(sizeof(TupleConversionMap *) * numResultRelInfos);
2058 
2059 	for (i = 0; i < numResultRelInfos; ++i)
2060 	{
2061 		mtstate->mt_per_subplan_tupconv_maps[i] =
2062 			convert_tuples_by_name(RelationGetDescr(resultRelInfos[i].ri_RelationDesc),
2063 								   outdesc);
2064 	}
2065 }
2066 
2067 /*
2068  * For a given subplan index, get the tuple conversion map.
2069  */
2070 static TupleConversionMap *
tupconv_map_for_subplan(ModifyTableState * mtstate,int whichplan)2071 tupconv_map_for_subplan(ModifyTableState *mtstate, int whichplan)
2072 {
2073 	/* If nobody else set the per-subplan array of maps, do so ourselves. */
2074 	if (mtstate->mt_per_subplan_tupconv_maps == NULL)
2075 		ExecSetupChildParentMapForSubplan(mtstate);
2076 
2077 	Assert(whichplan >= 0 && whichplan < mtstate->mt_nplans);
2078 	return mtstate->mt_per_subplan_tupconv_maps[whichplan];
2079 }
2080 
2081 /* ----------------------------------------------------------------
2082  *	   ExecModifyTable
2083  *
2084  *		Perform table modifications as required, and return RETURNING results
2085  *		if needed.
2086  * ----------------------------------------------------------------
2087  */
2088 static TupleTableSlot *
ExecModifyTable(PlanState * pstate)2089 ExecModifyTable(PlanState *pstate)
2090 {
2091 	ModifyTableState *node = castNode(ModifyTableState, pstate);
2092 	PartitionTupleRouting *proute = node->mt_partition_tuple_routing;
2093 	EState	   *estate = node->ps.state;
2094 	CmdType		operation = node->operation;
2095 	ResultRelInfo *saved_resultRelInfo;
2096 	ResultRelInfo *resultRelInfo;
2097 	PlanState  *subplanstate;
2098 	JunkFilter *junkfilter;
2099 	TupleTableSlot *slot;
2100 	TupleTableSlot *planSlot;
2101 	ItemPointer tupleid;
2102 	ItemPointerData tuple_ctid;
2103 	HeapTupleData oldtupdata;
2104 	HeapTuple	oldtuple;
2105 
2106 	CHECK_FOR_INTERRUPTS();
2107 
2108 	/*
2109 	 * This should NOT get called during EvalPlanQual; we should have passed a
2110 	 * subplan tree to EvalPlanQual, instead.  Use a runtime test not just
2111 	 * Assert because this condition is easy to miss in testing.  (Note:
2112 	 * although ModifyTable should not get executed within an EvalPlanQual
2113 	 * operation, we do have to allow it to be initialized and shut down in
2114 	 * case it is within a CTE subplan.  Hence this test must be here, not in
2115 	 * ExecInitModifyTable.)
2116 	 */
2117 	if (estate->es_epq_active != NULL)
2118 		elog(ERROR, "ModifyTable should not be called during EvalPlanQual");
2119 
2120 	/*
2121 	 * If we've already completed processing, don't try to do more.  We need
2122 	 * this test because ExecPostprocessPlan might call us an extra time, and
2123 	 * our subplan's nodes aren't necessarily robust against being called
2124 	 * extra times.
2125 	 */
2126 	if (node->mt_done)
2127 		return NULL;
2128 
2129 	/*
2130 	 * On first call, fire BEFORE STATEMENT triggers before proceeding.
2131 	 */
2132 	if (node->fireBSTriggers)
2133 	{
2134 		fireBSTriggers(node);
2135 		node->fireBSTriggers = false;
2136 	}
2137 
2138 	/* Preload local variables */
2139 	resultRelInfo = node->resultRelInfo + node->mt_whichplan;
2140 	subplanstate = node->mt_plans[node->mt_whichplan];
2141 	junkfilter = resultRelInfo->ri_junkFilter;
2142 
2143 	/*
2144 	 * es_result_relation_info must point to the currently active result
2145 	 * relation while we are within this ModifyTable node.  Even though
2146 	 * ModifyTable nodes can't be nested statically, they can be nested
2147 	 * dynamically (since our subplan could include a reference to a modifying
2148 	 * CTE).  So we have to save and restore the caller's value.
2149 	 */
2150 	saved_resultRelInfo = estate->es_result_relation_info;
2151 
2152 	estate->es_result_relation_info = resultRelInfo;
2153 
2154 	/*
2155 	 * Fetch rows from subplan(s), and execute the required table modification
2156 	 * for each row.
2157 	 */
2158 	for (;;)
2159 	{
2160 		/*
2161 		 * Reset the per-output-tuple exprcontext.  This is needed because
2162 		 * triggers expect to use that context as workspace.  It's a bit ugly
2163 		 * to do this below the top level of the plan, however.  We might need
2164 		 * to rethink this later.
2165 		 */
2166 		ResetPerTupleExprContext(estate);
2167 
2168 		/*
2169 		 * Reset per-tuple memory context used for processing on conflict and
2170 		 * returning clauses, to free any expression evaluation storage
2171 		 * allocated in the previous cycle.
2172 		 */
2173 		if (pstate->ps_ExprContext)
2174 			ResetExprContext(pstate->ps_ExprContext);
2175 
2176 		planSlot = ExecProcNode(subplanstate);
2177 
2178 		if (TupIsNull(planSlot))
2179 		{
2180 			/* advance to next subplan if any */
2181 			node->mt_whichplan++;
2182 			if (node->mt_whichplan < node->mt_nplans)
2183 			{
2184 				resultRelInfo++;
2185 				subplanstate = node->mt_plans[node->mt_whichplan];
2186 				junkfilter = resultRelInfo->ri_junkFilter;
2187 				estate->es_result_relation_info = resultRelInfo;
2188 				EvalPlanQualSetPlan(&node->mt_epqstate, subplanstate->plan,
2189 									node->mt_arowmarks[node->mt_whichplan]);
2190 				/* Prepare to convert transition tuples from this child. */
2191 				if (node->mt_transition_capture != NULL)
2192 				{
2193 					node->mt_transition_capture->tcs_map =
2194 						tupconv_map_for_subplan(node, node->mt_whichplan);
2195 				}
2196 				if (node->mt_oc_transition_capture != NULL)
2197 				{
2198 					node->mt_oc_transition_capture->tcs_map =
2199 						tupconv_map_for_subplan(node, node->mt_whichplan);
2200 				}
2201 				continue;
2202 			}
2203 			else
2204 				break;
2205 		}
2206 
2207 		/*
2208 		 * Ensure input tuple is the right format for the target relation.
2209 		 */
2210 		if (node->mt_scans[node->mt_whichplan]->tts_ops != planSlot->tts_ops)
2211 		{
2212 			ExecCopySlot(node->mt_scans[node->mt_whichplan], planSlot);
2213 			planSlot = node->mt_scans[node->mt_whichplan];
2214 		}
2215 
2216 		/*
2217 		 * If resultRelInfo->ri_usesFdwDirectModify is true, all we need to do
2218 		 * here is compute the RETURNING expressions.
2219 		 */
2220 		if (resultRelInfo->ri_usesFdwDirectModify)
2221 		{
2222 			Assert(resultRelInfo->ri_projectReturning);
2223 
2224 			/*
2225 			 * A scan slot containing the data that was actually inserted,
2226 			 * updated or deleted has already been made available to
2227 			 * ExecProcessReturning by IterateDirectModify, so no need to
2228 			 * provide it here.
2229 			 */
2230 			slot = ExecProcessReturning(resultRelInfo->ri_projectReturning,
2231 										RelationGetRelid(resultRelInfo->ri_RelationDesc),
2232 										NULL, planSlot);
2233 
2234 			estate->es_result_relation_info = saved_resultRelInfo;
2235 			return slot;
2236 		}
2237 
2238 		EvalPlanQualSetSlot(&node->mt_epqstate, planSlot);
2239 		slot = planSlot;
2240 
2241 		tupleid = NULL;
2242 		oldtuple = NULL;
2243 		if (junkfilter != NULL)
2244 		{
2245 			/*
2246 			 * extract the 'ctid' or 'wholerow' junk attribute.
2247 			 */
2248 			if (operation == CMD_UPDATE || operation == CMD_DELETE)
2249 			{
2250 				char		relkind;
2251 				Datum		datum;
2252 				bool		isNull;
2253 
2254 				relkind = resultRelInfo->ri_RelationDesc->rd_rel->relkind;
2255 				if (relkind == RELKIND_RELATION || relkind == RELKIND_MATVIEW)
2256 				{
2257 					datum = ExecGetJunkAttribute(slot,
2258 												 junkfilter->jf_junkAttNo,
2259 												 &isNull);
2260 					/* shouldn't ever get a null result... */
2261 					if (isNull)
2262 						elog(ERROR, "ctid is NULL");
2263 
2264 					tupleid = (ItemPointer) DatumGetPointer(datum);
2265 					tuple_ctid = *tupleid;	/* be sure we don't free ctid!! */
2266 					tupleid = &tuple_ctid;
2267 				}
2268 
2269 				/*
2270 				 * Use the wholerow attribute, when available, to reconstruct
2271 				 * the old relation tuple.
2272 				 *
2273 				 * Foreign table updates have a wholerow attribute when the
2274 				 * relation has a row-level trigger.  Note that the wholerow
2275 				 * attribute does not carry system columns.  Foreign table
2276 				 * triggers miss seeing those, except that we know enough here
2277 				 * to set t_tableOid.  Quite separately from this, the FDW may
2278 				 * fetch its own junk attrs to identify the row.
2279 				 *
2280 				 * Other relevant relkinds, currently limited to views, always
2281 				 * have a wholerow attribute.
2282 				 */
2283 				else if (AttributeNumberIsValid(junkfilter->jf_junkAttNo))
2284 				{
2285 					datum = ExecGetJunkAttribute(slot,
2286 												 junkfilter->jf_junkAttNo,
2287 												 &isNull);
2288 					/* shouldn't ever get a null result... */
2289 					if (isNull)
2290 						elog(ERROR, "wholerow is NULL");
2291 
2292 					oldtupdata.t_data = DatumGetHeapTupleHeader(datum);
2293 					oldtupdata.t_len =
2294 						HeapTupleHeaderGetDatumLength(oldtupdata.t_data);
2295 					ItemPointerSetInvalid(&(oldtupdata.t_self));
2296 					/* Historically, view triggers see invalid t_tableOid. */
2297 					oldtupdata.t_tableOid =
2298 						(relkind == RELKIND_VIEW) ? InvalidOid :
2299 						RelationGetRelid(resultRelInfo->ri_RelationDesc);
2300 
2301 					oldtuple = &oldtupdata;
2302 				}
2303 				else
2304 					Assert(relkind == RELKIND_FOREIGN_TABLE);
2305 			}
2306 
2307 			/*
2308 			 * apply the junkfilter if needed.
2309 			 */
2310 			if (operation != CMD_DELETE)
2311 				slot = ExecFilterJunk(junkfilter, slot);
2312 		}
2313 
2314 		switch (operation)
2315 		{
2316 			case CMD_INSERT:
2317 				/* Prepare for tuple routing if needed. */
2318 				if (proute)
2319 					slot = ExecPrepareTupleRouting(node, estate, proute,
2320 												   resultRelInfo, slot);
2321 				slot = ExecInsert(node, slot, planSlot,
2322 								  NULL, estate->es_result_relation_info,
2323 								  estate, node->canSetTag);
2324 				/* Revert ExecPrepareTupleRouting's state change. */
2325 				if (proute)
2326 					estate->es_result_relation_info = resultRelInfo;
2327 				break;
2328 			case CMD_UPDATE:
2329 				slot = ExecUpdate(node, tupleid, oldtuple, slot, planSlot,
2330 								  &node->mt_epqstate, estate, node->canSetTag);
2331 				break;
2332 			case CMD_DELETE:
2333 				slot = ExecDelete(node, tupleid, oldtuple, planSlot,
2334 								  &node->mt_epqstate, estate,
2335 								  true, node->canSetTag,
2336 								  false /* changingPart */ , NULL, NULL);
2337 				break;
2338 			default:
2339 				elog(ERROR, "unknown operation");
2340 				break;
2341 		}
2342 
2343 		/*
2344 		 * If we got a RETURNING result, return it to caller.  We'll continue
2345 		 * the work on next call.
2346 		 */
2347 		if (slot)
2348 		{
2349 			estate->es_result_relation_info = saved_resultRelInfo;
2350 			return slot;
2351 		}
2352 	}
2353 
2354 	/* Restore es_result_relation_info before exiting */
2355 	estate->es_result_relation_info = saved_resultRelInfo;
2356 
2357 	/*
2358 	 * We're done, but fire AFTER STATEMENT triggers before exiting.
2359 	 */
2360 	fireASTriggers(node);
2361 
2362 	node->mt_done = true;
2363 
2364 	return NULL;
2365 }
2366 
2367 /* ----------------------------------------------------------------
2368  *		ExecInitModifyTable
2369  * ----------------------------------------------------------------
2370  */
2371 ModifyTableState *
ExecInitModifyTable(ModifyTable * node,EState * estate,int eflags)2372 ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
2373 {
2374 	ModifyTableState *mtstate;
2375 	CmdType		operation = node->operation;
2376 	int			nplans = list_length(node->plans);
2377 	ResultRelInfo *saved_resultRelInfo;
2378 	ResultRelInfo *resultRelInfo;
2379 	Plan	   *subplan;
2380 	ListCell   *l;
2381 	int			i;
2382 	Relation	rel;
2383 	bool		update_tuple_routing_needed = node->partColsUpdated;
2384 
2385 	/* check for unsupported flags */
2386 	Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
2387 
2388 	/*
2389 	 * create state structure
2390 	 */
2391 	mtstate = makeNode(ModifyTableState);
2392 	mtstate->ps.plan = (Plan *) node;
2393 	mtstate->ps.state = estate;
2394 	mtstate->ps.ExecProcNode = ExecModifyTable;
2395 
2396 	mtstate->operation = operation;
2397 	mtstate->canSetTag = node->canSetTag;
2398 	mtstate->mt_done = false;
2399 
2400 	mtstate->mt_plans = (PlanState **) palloc0(sizeof(PlanState *) * nplans);
2401 	mtstate->resultRelInfo = estate->es_result_relations + node->resultRelIndex;
2402 	mtstate->mt_scans = (TupleTableSlot **) palloc0(sizeof(TupleTableSlot *) * nplans);
2403 
2404 	/* If modifying a partitioned table, initialize the root table info */
2405 	if (node->rootResultRelIndex >= 0)
2406 		mtstate->rootResultRelInfo = estate->es_root_result_relations +
2407 			node->rootResultRelIndex;
2408 
2409 	mtstate->mt_arowmarks = (List **) palloc0(sizeof(List *) * nplans);
2410 	mtstate->mt_nplans = nplans;
2411 
2412 	/* set up epqstate with dummy subplan data for the moment */
2413 	EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, NIL, node->epqParam);
2414 	mtstate->fireBSTriggers = true;
2415 
2416 	/*
2417 	 * call ExecInitNode on each of the plans to be executed and save the
2418 	 * results into the array "mt_plans".  This is also a convenient place to
2419 	 * verify that the proposed target relations are valid and open their
2420 	 * indexes for insertion of new index entries.  Note we *must* set
2421 	 * estate->es_result_relation_info correctly while we initialize each
2422 	 * sub-plan; external modules such as FDWs may depend on that (see
2423 	 * contrib/postgres_fdw/postgres_fdw.c: postgresBeginDirectModify() as one
2424 	 * example).
2425 	 */
2426 	saved_resultRelInfo = estate->es_result_relation_info;
2427 
2428 	resultRelInfo = mtstate->resultRelInfo;
2429 	i = 0;
2430 	foreach(l, node->plans)
2431 	{
2432 		subplan = (Plan *) lfirst(l);
2433 
2434 		/* Initialize the usesFdwDirectModify flag */
2435 		resultRelInfo->ri_usesFdwDirectModify = bms_is_member(i,
2436 															  node->fdwDirectModifyPlans);
2437 
2438 		/*
2439 		 * Verify result relation is a valid target for the current operation
2440 		 */
2441 		CheckValidResultRel(resultRelInfo, operation);
2442 
2443 		/*
2444 		 * If there are indices on the result relation, open them and save
2445 		 * descriptors in the result relation info, so that we can add new
2446 		 * index entries for the tuples we add/update.  We need not do this
2447 		 * for a DELETE, however, since deletion doesn't affect indexes. Also,
2448 		 * inside an EvalPlanQual operation, the indexes might be open
2449 		 * already, since we share the resultrel state with the original
2450 		 * query.
2451 		 */
2452 		if (resultRelInfo->ri_RelationDesc->rd_rel->relhasindex &&
2453 			operation != CMD_DELETE &&
2454 			resultRelInfo->ri_IndexRelationDescs == NULL)
2455 			ExecOpenIndices(resultRelInfo,
2456 							node->onConflictAction != ONCONFLICT_NONE);
2457 
2458 		/*
2459 		 * If this is an UPDATE and a BEFORE UPDATE trigger is present, the
2460 		 * trigger itself might modify the partition-key values. So arrange
2461 		 * for tuple routing.
2462 		 */
2463 		if (resultRelInfo->ri_TrigDesc &&
2464 			resultRelInfo->ri_TrigDesc->trig_update_before_row &&
2465 			operation == CMD_UPDATE)
2466 			update_tuple_routing_needed = true;
2467 
2468 		/* Now init the plan for this result rel */
2469 		estate->es_result_relation_info = resultRelInfo;
2470 		mtstate->mt_plans[i] = ExecInitNode(subplan, estate, eflags);
2471 		mtstate->mt_scans[i] =
2472 			ExecInitExtraTupleSlot(mtstate->ps.state, ExecGetResultType(mtstate->mt_plans[i]),
2473 								   table_slot_callbacks(resultRelInfo->ri_RelationDesc));
2474 
2475 		/* Also let FDWs init themselves for foreign-table result rels */
2476 		if (!resultRelInfo->ri_usesFdwDirectModify &&
2477 			resultRelInfo->ri_FdwRoutine != NULL &&
2478 			resultRelInfo->ri_FdwRoutine->BeginForeignModify != NULL)
2479 		{
2480 			List	   *fdw_private = (List *) list_nth(node->fdwPrivLists, i);
2481 
2482 			resultRelInfo->ri_FdwRoutine->BeginForeignModify(mtstate,
2483 															 resultRelInfo,
2484 															 fdw_private,
2485 															 i,
2486 															 eflags);
2487 		}
2488 
2489 		resultRelInfo++;
2490 		i++;
2491 	}
2492 
2493 	estate->es_result_relation_info = saved_resultRelInfo;
2494 
2495 	/* Get the target relation */
2496 	rel = (getTargetResultRelInfo(mtstate))->ri_RelationDesc;
2497 
2498 	/*
2499 	 * If it's not a partitioned table after all, UPDATE tuple routing should
2500 	 * not be attempted.
2501 	 */
2502 	if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
2503 		update_tuple_routing_needed = false;
2504 
2505 	/*
2506 	 * Build state for tuple routing if it's an INSERT or if it's an UPDATE of
2507 	 * partition key.
2508 	 */
2509 	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
2510 		(operation == CMD_INSERT || update_tuple_routing_needed))
2511 		mtstate->mt_partition_tuple_routing =
2512 			ExecSetupPartitionTupleRouting(estate, mtstate, rel);
2513 
2514 	/*
2515 	 * Build state for collecting transition tuples.  This requires having a
2516 	 * valid trigger query context, so skip it in explain-only mode.
2517 	 */
2518 	if (!(eflags & EXEC_FLAG_EXPLAIN_ONLY))
2519 		ExecSetupTransitionCaptureState(mtstate, estate);
2520 
2521 	/*
2522 	 * Construct mapping from each of the per-subplan partition attnos to the
2523 	 * root attno.  This is required when during update row movement the tuple
2524 	 * descriptor of a source partition does not match the root partitioned
2525 	 * table descriptor.  In such a case we need to convert tuples to the root
2526 	 * tuple descriptor, because the search for destination partition starts
2527 	 * from the root.  We'll also need a slot to store these converted tuples.
2528 	 * We can skip this setup if it's not a partition key update.
2529 	 */
2530 	if (update_tuple_routing_needed)
2531 	{
2532 		ExecSetupChildParentMapForSubplan(mtstate);
2533 		mtstate->mt_root_tuple_slot = table_slot_create(rel, NULL);
2534 	}
2535 
2536 	/*
2537 	 * Initialize any WITH CHECK OPTION constraints if needed.
2538 	 */
2539 	resultRelInfo = mtstate->resultRelInfo;
2540 	i = 0;
2541 	foreach(l, node->withCheckOptionLists)
2542 	{
2543 		List	   *wcoList = (List *) lfirst(l);
2544 		List	   *wcoExprs = NIL;
2545 		ListCell   *ll;
2546 
2547 		foreach(ll, wcoList)
2548 		{
2549 			WithCheckOption *wco = (WithCheckOption *) lfirst(ll);
2550 			ExprState  *wcoExpr = ExecInitQual((List *) wco->qual,
2551 											   &mtstate->ps);
2552 
2553 			wcoExprs = lappend(wcoExprs, wcoExpr);
2554 		}
2555 
2556 		resultRelInfo->ri_WithCheckOptions = wcoList;
2557 		resultRelInfo->ri_WithCheckOptionExprs = wcoExprs;
2558 		resultRelInfo++;
2559 		i++;
2560 	}
2561 
2562 	/*
2563 	 * Initialize RETURNING projections if needed.
2564 	 */
2565 	if (node->returningLists)
2566 	{
2567 		TupleTableSlot *slot;
2568 		ExprContext *econtext;
2569 
2570 		/*
2571 		 * Initialize result tuple slot and assign its rowtype using the first
2572 		 * RETURNING list.  We assume the rest will look the same.
2573 		 */
2574 		mtstate->ps.plan->targetlist = (List *) linitial(node->returningLists);
2575 
2576 		/* Set up a slot for the output of the RETURNING projection(s) */
2577 		ExecInitResultTupleSlotTL(&mtstate->ps, &TTSOpsVirtual);
2578 		slot = mtstate->ps.ps_ResultTupleSlot;
2579 
2580 		/* Need an econtext too */
2581 		if (mtstate->ps.ps_ExprContext == NULL)
2582 			ExecAssignExprContext(estate, &mtstate->ps);
2583 		econtext = mtstate->ps.ps_ExprContext;
2584 
2585 		/*
2586 		 * Build a projection for each result rel.
2587 		 */
2588 		resultRelInfo = mtstate->resultRelInfo;
2589 		foreach(l, node->returningLists)
2590 		{
2591 			List	   *rlist = (List *) lfirst(l);
2592 
2593 			resultRelInfo->ri_returningList = rlist;
2594 			resultRelInfo->ri_projectReturning =
2595 				ExecBuildProjectionInfo(rlist, econtext, slot, &mtstate->ps,
2596 										resultRelInfo->ri_RelationDesc->rd_att);
2597 			resultRelInfo++;
2598 		}
2599 	}
2600 	else
2601 	{
2602 		/*
2603 		 * We still must construct a dummy result tuple type, because InitPlan
2604 		 * expects one (maybe should change that?).
2605 		 */
2606 		mtstate->ps.plan->targetlist = NIL;
2607 		ExecInitResultTypeTL(&mtstate->ps);
2608 
2609 		mtstate->ps.ps_ExprContext = NULL;
2610 	}
2611 
2612 	/* Set the list of arbiter indexes if needed for ON CONFLICT */
2613 	resultRelInfo = mtstate->resultRelInfo;
2614 	if (node->onConflictAction != ONCONFLICT_NONE)
2615 		resultRelInfo->ri_onConflictArbiterIndexes = node->arbiterIndexes;
2616 
2617 	/*
2618 	 * If needed, Initialize target list, projection and qual for ON CONFLICT
2619 	 * DO UPDATE.
2620 	 */
2621 	if (node->onConflictAction == ONCONFLICT_UPDATE)
2622 	{
2623 		OnConflictSetState *onconfl = makeNode(OnConflictSetState);
2624 		ExprContext *econtext;
2625 		TupleDesc	relationDesc;
2626 
2627 		/* insert may only have one plan, inheritance is not expanded */
2628 		Assert(nplans == 1);
2629 
2630 		/* already exists if created by RETURNING processing above */
2631 		if (mtstate->ps.ps_ExprContext == NULL)
2632 			ExecAssignExprContext(estate, &mtstate->ps);
2633 
2634 		econtext = mtstate->ps.ps_ExprContext;
2635 		relationDesc = resultRelInfo->ri_RelationDesc->rd_att;
2636 
2637 		/* create state for DO UPDATE SET operation */
2638 		resultRelInfo->ri_onConflict = onconfl;
2639 
2640 		/* initialize slot for the existing tuple */
2641 		onconfl->oc_Existing =
2642 			table_slot_create(resultRelInfo->ri_RelationDesc,
2643 							  &mtstate->ps.state->es_tupleTable);
2644 
2645 		/*
2646 		 * Create the tuple slot for the UPDATE SET projection. We want a slot
2647 		 * of the table's type here, because the slot will be used to insert
2648 		 * into the table, and for RETURNING processing - which may access
2649 		 * system attributes.
2650 		 */
2651 		onconfl->oc_ProjSlot =
2652 			table_slot_create(resultRelInfo->ri_RelationDesc,
2653 							  &mtstate->ps.state->es_tupleTable);
2654 
2655 		/*
2656 		 * The onConflictSet tlist should already have been adjusted to emit
2657 		 * the table's exact column list.  It could also contain resjunk
2658 		 * columns, which should be evaluated but not included in the
2659 		 * projection result.
2660 		 */
2661 		ExecCheckPlanOutput(resultRelInfo->ri_RelationDesc,
2662 							node->onConflictSet);
2663 
2664 		/* build UPDATE SET projection state */
2665 		onconfl->oc_ProjInfo =
2666 			ExecBuildProjectionInfoExt(node->onConflictSet, econtext,
2667 									   onconfl->oc_ProjSlot, false,
2668 									   &mtstate->ps,
2669 									   relationDesc);
2670 
2671 		/* initialize state to evaluate the WHERE clause, if any */
2672 		if (node->onConflictWhere)
2673 		{
2674 			ExprState  *qualexpr;
2675 
2676 			qualexpr = ExecInitQual((List *) node->onConflictWhere,
2677 									&mtstate->ps);
2678 			onconfl->oc_WhereClause = qualexpr;
2679 		}
2680 	}
2681 
2682 	/*
2683 	 * If we have any secondary relations in an UPDATE or DELETE, they need to
2684 	 * be treated like non-locked relations in SELECT FOR UPDATE, ie, the
2685 	 * EvalPlanQual mechanism needs to be told about them.  Locate the
2686 	 * relevant ExecRowMarks.
2687 	 */
2688 	foreach(l, node->rowMarks)
2689 	{
2690 		PlanRowMark *rc = lfirst_node(PlanRowMark, l);
2691 		ExecRowMark *erm;
2692 
2693 		/* ignore "parent" rowmarks; they are irrelevant at runtime */
2694 		if (rc->isParent)
2695 			continue;
2696 
2697 		/* find ExecRowMark (same for all subplans) */
2698 		erm = ExecFindRowMark(estate, rc->rti, false);
2699 
2700 		/* build ExecAuxRowMark for each subplan */
2701 		for (i = 0; i < nplans; i++)
2702 		{
2703 			ExecAuxRowMark *aerm;
2704 
2705 			subplan = mtstate->mt_plans[i]->plan;
2706 			aerm = ExecBuildAuxRowMark(erm, subplan->targetlist);
2707 			mtstate->mt_arowmarks[i] = lappend(mtstate->mt_arowmarks[i], aerm);
2708 		}
2709 	}
2710 
2711 	/* select first subplan */
2712 	mtstate->mt_whichplan = 0;
2713 	subplan = (Plan *) linitial(node->plans);
2714 	EvalPlanQualSetPlan(&mtstate->mt_epqstate, subplan,
2715 						mtstate->mt_arowmarks[0]);
2716 
2717 	/*
2718 	 * Initialize the junk filter(s) if needed.  INSERT queries need a filter
2719 	 * if there are any junk attrs in the tlist.  UPDATE and DELETE always
2720 	 * need a filter, since there's always at least one junk attribute present
2721 	 * --- no need to look first.  Typically, this will be a 'ctid' or
2722 	 * 'wholerow' attribute, but in the case of a foreign data wrapper it
2723 	 * might be a set of junk attributes sufficient to identify the remote
2724 	 * row.
2725 	 *
2726 	 * If there are multiple result relations, each one needs its own junk
2727 	 * filter.  Note multiple rels are only possible for UPDATE/DELETE, so we
2728 	 * can't be fooled by some needing a filter and some not.
2729 	 *
2730 	 * This section of code is also a convenient place to verify that the
2731 	 * output of an INSERT or UPDATE matches the target table(s).
2732 	 */
2733 	{
2734 		bool		junk_filter_needed = false;
2735 
2736 		switch (operation)
2737 		{
2738 			case CMD_INSERT:
2739 				foreach(l, subplan->targetlist)
2740 				{
2741 					TargetEntry *tle = (TargetEntry *) lfirst(l);
2742 
2743 					if (tle->resjunk)
2744 					{
2745 						junk_filter_needed = true;
2746 						break;
2747 					}
2748 				}
2749 				break;
2750 			case CMD_UPDATE:
2751 			case CMD_DELETE:
2752 				junk_filter_needed = true;
2753 				break;
2754 			default:
2755 				elog(ERROR, "unknown operation");
2756 				break;
2757 		}
2758 
2759 		if (junk_filter_needed)
2760 		{
2761 			resultRelInfo = mtstate->resultRelInfo;
2762 			for (i = 0; i < nplans; i++)
2763 			{
2764 				JunkFilter *j;
2765 				TupleTableSlot *junkresslot;
2766 
2767 				subplan = mtstate->mt_plans[i]->plan;
2768 
2769 				junkresslot =
2770 					ExecInitExtraTupleSlot(estate, NULL,
2771 										   table_slot_callbacks(resultRelInfo->ri_RelationDesc));
2772 
2773 				/*
2774 				 * For an INSERT or UPDATE, the result tuple must always match
2775 				 * the target table's descriptor.  For a DELETE, it won't
2776 				 * (indeed, there's probably no non-junk output columns).
2777 				 */
2778 				if (operation == CMD_INSERT || operation == CMD_UPDATE)
2779 				{
2780 					ExecCheckPlanOutput(resultRelInfo->ri_RelationDesc,
2781 										subplan->targetlist);
2782 					j = ExecInitJunkFilterInsertion(subplan->targetlist,
2783 													RelationGetDescr(resultRelInfo->ri_RelationDesc),
2784 													junkresslot);
2785 				}
2786 				else
2787 					j = ExecInitJunkFilter(subplan->targetlist,
2788 										   junkresslot);
2789 
2790 				if (operation == CMD_UPDATE || operation == CMD_DELETE)
2791 				{
2792 					/* For UPDATE/DELETE, find the appropriate junk attr now */
2793 					char		relkind;
2794 
2795 					relkind = resultRelInfo->ri_RelationDesc->rd_rel->relkind;
2796 					if (relkind == RELKIND_RELATION ||
2797 						relkind == RELKIND_MATVIEW ||
2798 						relkind == RELKIND_PARTITIONED_TABLE)
2799 					{
2800 						j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid");
2801 						if (!AttributeNumberIsValid(j->jf_junkAttNo))
2802 							elog(ERROR, "could not find junk ctid column");
2803 					}
2804 					else if (relkind == RELKIND_FOREIGN_TABLE)
2805 					{
2806 						/*
2807 						 * When there is a row-level trigger, there should be
2808 						 * a wholerow attribute.
2809 						 */
2810 						j->jf_junkAttNo = ExecFindJunkAttribute(j, "wholerow");
2811 					}
2812 					else
2813 					{
2814 						j->jf_junkAttNo = ExecFindJunkAttribute(j, "wholerow");
2815 						if (!AttributeNumberIsValid(j->jf_junkAttNo))
2816 							elog(ERROR, "could not find junk wholerow column");
2817 					}
2818 				}
2819 
2820 				resultRelInfo->ri_junkFilter = j;
2821 				resultRelInfo++;
2822 			}
2823 		}
2824 		else
2825 		{
2826 			if (operation == CMD_INSERT)
2827 				ExecCheckPlanOutput(mtstate->resultRelInfo->ri_RelationDesc,
2828 									subplan->targetlist);
2829 		}
2830 	}
2831 
2832 	/*
2833 	 * Lastly, if this is not the primary (canSetTag) ModifyTable node, add it
2834 	 * to estate->es_auxmodifytables so that it will be run to completion by
2835 	 * ExecPostprocessPlan.  (It'd actually work fine to add the primary
2836 	 * ModifyTable node too, but there's no need.)  Note the use of lcons not
2837 	 * lappend: we need later-initialized ModifyTable nodes to be shut down
2838 	 * before earlier ones.  This ensures that we don't throw away RETURNING
2839 	 * rows that need to be seen by a later CTE subplan.
2840 	 */
2841 	if (!mtstate->canSetTag)
2842 		estate->es_auxmodifytables = lcons(mtstate,
2843 										   estate->es_auxmodifytables);
2844 
2845 	return mtstate;
2846 }
2847 
2848 /* ----------------------------------------------------------------
2849  *		ExecEndModifyTable
2850  *
2851  *		Shuts down the plan.
2852  *
2853  *		Returns nothing of interest.
2854  * ----------------------------------------------------------------
2855  */
2856 void
ExecEndModifyTable(ModifyTableState * node)2857 ExecEndModifyTable(ModifyTableState *node)
2858 {
2859 	int			i;
2860 
2861 	/*
2862 	 * Allow any FDWs to shut down
2863 	 */
2864 	for (i = 0; i < node->mt_nplans; i++)
2865 	{
2866 		ResultRelInfo *resultRelInfo = node->resultRelInfo + i;
2867 
2868 		if (!resultRelInfo->ri_usesFdwDirectModify &&
2869 			resultRelInfo->ri_FdwRoutine != NULL &&
2870 			resultRelInfo->ri_FdwRoutine->EndForeignModify != NULL)
2871 			resultRelInfo->ri_FdwRoutine->EndForeignModify(node->ps.state,
2872 														   resultRelInfo);
2873 	}
2874 
2875 	/*
2876 	 * Close all the partitioned tables, leaf partitions, and their indices
2877 	 * and release the slot used for tuple routing, if set.
2878 	 */
2879 	if (node->mt_partition_tuple_routing)
2880 	{
2881 		ExecCleanupTupleRouting(node, node->mt_partition_tuple_routing);
2882 
2883 		if (node->mt_root_tuple_slot)
2884 			ExecDropSingleTupleTableSlot(node->mt_root_tuple_slot);
2885 	}
2886 
2887 	/*
2888 	 * Free the exprcontext
2889 	 */
2890 	ExecFreeExprContext(&node->ps);
2891 
2892 	/*
2893 	 * clean out the tuple table
2894 	 */
2895 	if (node->ps.ps_ResultTupleSlot)
2896 		ExecClearTuple(node->ps.ps_ResultTupleSlot);
2897 
2898 	/*
2899 	 * Terminate EPQ execution if active
2900 	 */
2901 	EvalPlanQualEnd(&node->mt_epqstate);
2902 
2903 	/*
2904 	 * shut down subplans
2905 	 */
2906 	for (i = 0; i < node->mt_nplans; i++)
2907 		ExecEndNode(node->mt_plans[i]);
2908 }
2909 
2910 void
ExecReScanModifyTable(ModifyTableState * node)2911 ExecReScanModifyTable(ModifyTableState *node)
2912 {
2913 	/*
2914 	 * Currently, we don't need to support rescan on ModifyTable nodes. The
2915 	 * semantics of that would be a bit debatable anyway.
2916 	 */
2917 	elog(ERROR, "ExecReScanModifyTable is not implemented");
2918 }
2919