1 /*-------------------------------------------------------------------------
2  *
3  * nodeModifyTable.c
4  *	  routines to handle ModifyTable nodes.
5  *
6  * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/executor/nodeModifyTable.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /* INTERFACE ROUTINES
16  *		ExecInitModifyTable - initialize the ModifyTable node
17  *		ExecModifyTable		- retrieve the next tuple from the node
18  *		ExecEndModifyTable	- shut down the ModifyTable node
19  *		ExecReScanModifyTable - rescan the ModifyTable node
20  *
21  *	 NOTES
22  *		Each ModifyTable node contains a list of one or more subplans,
23  *		much like an Append node.  There is one subplan per result relation.
24  *		The key reason for this is that in an inherited UPDATE command, each
25  *		result relation could have a different schema (more or different
26  *		columns) requiring a different plan tree to produce it.  In an
27  *		inherited DELETE, all the subplans should produce the same output
28  *		rowtype, but we might still find that different plans are appropriate
29  *		for different child relations.
30  *
31  *		If the query specifies RETURNING, then the ModifyTable returns a
32  *		RETURNING tuple after completing each row insert, update, or delete.
33  *		It must be called again to continue the operation.  Without RETURNING,
34  *		we just loop within the node until all the work is done, then
35  *		return NULL.  This avoids useless call/return overhead.
36  */
37 
38 #include "postgres.h"
39 
40 #include "access/heapam.h"
41 #include "access/htup_details.h"
42 #include "access/tableam.h"
43 #include "access/xact.h"
44 #include "catalog/catalog.h"
45 #include "commands/trigger.h"
46 #include "executor/execPartition.h"
47 #include "executor/executor.h"
48 #include "executor/nodeModifyTable.h"
49 #include "foreign/fdwapi.h"
50 #include "miscadmin.h"
51 #include "nodes/nodeFuncs.h"
52 #include "rewrite/rewriteHandler.h"
53 #include "storage/bufmgr.h"
54 #include "storage/lmgr.h"
55 #include "utils/builtins.h"
56 #include "utils/datum.h"
57 #include "utils/memutils.h"
58 #include "utils/rel.h"
59 
60 
61 static bool ExecOnConflictUpdate(ModifyTableState *mtstate,
62 								 ResultRelInfo *resultRelInfo,
63 								 ItemPointer conflictTid,
64 								 TupleTableSlot *planSlot,
65 								 TupleTableSlot *excludedSlot,
66 								 EState *estate,
67 								 bool canSetTag,
68 								 TupleTableSlot **returning);
69 static TupleTableSlot *ExecPrepareTupleRouting(ModifyTableState *mtstate,
70 											   EState *estate,
71 											   PartitionTupleRouting *proute,
72 											   ResultRelInfo *targetRelInfo,
73 											   TupleTableSlot *slot);
74 static ResultRelInfo *getTargetResultRelInfo(ModifyTableState *node);
75 static void ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate);
76 static TupleConversionMap *tupconv_map_for_subplan(ModifyTableState *node,
77 												   int whichplan);
78 
79 /*
80  * Verify that the tuples to be produced by INSERT or UPDATE match the
81  * target relation's rowtype
82  *
83  * We do this to guard against stale plans.  If plan invalidation is
84  * functioning properly then we should never get a failure here, but better
85  * safe than sorry.  Note that this is called after we have obtained lock
86  * on the target rel, so the rowtype can't change underneath us.
87  *
88  * The plan output is represented by its targetlist, because that makes
89  * handling the dropped-column case easier.
90  */
91 static void
92 ExecCheckPlanOutput(Relation resultRel, List *targetList)
93 {
94 	TupleDesc	resultDesc = RelationGetDescr(resultRel);
95 	int			attno = 0;
96 	ListCell   *lc;
97 
98 	foreach(lc, targetList)
99 	{
100 		TargetEntry *tle = (TargetEntry *) lfirst(lc);
101 		Form_pg_attribute attr;
102 
103 		if (tle->resjunk)
104 			continue;			/* ignore junk tlist items */
105 
106 		if (attno >= resultDesc->natts)
107 			ereport(ERROR,
108 					(errcode(ERRCODE_DATATYPE_MISMATCH),
109 					 errmsg("table row type and query-specified row type do not match"),
110 					 errdetail("Query has too many columns.")));
111 		attr = TupleDescAttr(resultDesc, attno);
112 		attno++;
113 
114 		if (!attr->attisdropped)
115 		{
116 			/* Normal case: demand type match */
117 			if (exprType((Node *) tle->expr) != attr->atttypid)
118 				ereport(ERROR,
119 						(errcode(ERRCODE_DATATYPE_MISMATCH),
120 						 errmsg("table row type and query-specified row type do not match"),
121 						 errdetail("Table has type %s at ordinal position %d, but query expects %s.",
122 								   format_type_be(attr->atttypid),
123 								   attno,
124 								   format_type_be(exprType((Node *) tle->expr)))));
125 		}
126 		else
127 		{
128 			/*
129 			 * For a dropped column, we can't check atttypid (it's likely 0).
130 			 * In any case the planner has most likely inserted an INT4 null.
131 			 * What we insist on is just *some* NULL constant.
132 			 */
133 			if (!IsA(tle->expr, Const) ||
134 				!((Const *) tle->expr)->constisnull)
135 				ereport(ERROR,
136 						(errcode(ERRCODE_DATATYPE_MISMATCH),
137 						 errmsg("table row type and query-specified row type do not match"),
138 						 errdetail("Query provides a value for a dropped column at ordinal position %d.",
139 								   attno)));
140 		}
141 	}
142 	if (attno != resultDesc->natts)
143 		ereport(ERROR,
144 				(errcode(ERRCODE_DATATYPE_MISMATCH),
145 				 errmsg("table row type and query-specified row type do not match"),
146 				 errdetail("Query has too few columns.")));
147 }
148 
149 /*
150  * ExecProcessReturning --- evaluate a RETURNING list
151  *
152  * projectReturning: the projection to evaluate
153  * resultRelOid: result relation's OID
154  * tupleSlot: slot holding tuple actually inserted/updated/deleted
155  * planSlot: slot holding tuple returned by top subplan node
156  *
157  * In cross-partition UPDATE cases, projectReturning and planSlot are as
158  * for the source partition, and tupleSlot must conform to that.  But
159  * resultRelOid is for the destination partition.
160  *
161  * Note: If tupleSlot is NULL, the FDW should have already provided econtext's
162  * scan tuple.
163  *
164  * Returns a slot holding the result tuple
165  */
166 static TupleTableSlot *
167 ExecProcessReturning(ProjectionInfo *projectReturning,
168 					 Oid resultRelOid,
169 					 TupleTableSlot *tupleSlot,
170 					 TupleTableSlot *planSlot)
171 {
172 	ExprContext *econtext = projectReturning->pi_exprContext;
173 
174 	/* Make tuple and any needed join variables available to ExecProject */
175 	if (tupleSlot)
176 		econtext->ecxt_scantuple = tupleSlot;
177 	else
178 		Assert(econtext->ecxt_scantuple);
179 	econtext->ecxt_outertuple = planSlot;
180 
181 	/*
182 	 * RETURNING expressions might reference the tableoid column, so be sure
183 	 * we expose the desired OID, ie that of the real target relation.
184 	 */
185 	econtext->ecxt_scantuple->tts_tableOid = resultRelOid;
186 
187 	/* Compute the RETURNING expressions */
188 	return ExecProject(projectReturning);
189 }
190 
191 /*
192  * ExecCheckTupleVisible -- verify tuple is visible
193  *
194  * It would not be consistent with guarantees of the higher isolation levels to
195  * proceed with avoiding insertion (taking speculative insertion's alternative
196  * path) on the basis of another tuple that is not visible to MVCC snapshot.
197  * Check for the need to raise a serialization failure, and do so as necessary.
198  */
199 static void
200 ExecCheckTupleVisible(EState *estate,
201 					  Relation rel,
202 					  TupleTableSlot *slot)
203 {
204 	if (!IsolationUsesXactSnapshot())
205 		return;
206 
207 	if (!table_tuple_satisfies_snapshot(rel, slot, estate->es_snapshot))
208 	{
209 		Datum		xminDatum;
210 		TransactionId xmin;
211 		bool		isnull;
212 
213 		xminDatum = slot_getsysattr(slot, MinTransactionIdAttributeNumber, &isnull);
214 		Assert(!isnull);
215 		xmin = DatumGetTransactionId(xminDatum);
216 
217 		/*
218 		 * We should not raise a serialization failure if the conflict is
219 		 * against a tuple inserted by our own transaction, even if it's not
220 		 * visible to our snapshot.  (This would happen, for example, if
221 		 * conflicting keys are proposed for insertion in a single command.)
222 		 */
223 		if (!TransactionIdIsCurrentTransactionId(xmin))
224 			ereport(ERROR,
225 					(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
226 					 errmsg("could not serialize access due to concurrent update")));
227 	}
228 }
229 
230 /*
231  * ExecCheckTIDVisible -- convenience variant of ExecCheckTupleVisible()
232  */
233 static void
234 ExecCheckTIDVisible(EState *estate,
235 					ResultRelInfo *relinfo,
236 					ItemPointer tid,
237 					TupleTableSlot *tempSlot)
238 {
239 	Relation	rel = relinfo->ri_RelationDesc;
240 
241 	/* Redundantly check isolation level */
242 	if (!IsolationUsesXactSnapshot())
243 		return;
244 
245 	if (!table_tuple_fetch_row_version(rel, tid, SnapshotAny, tempSlot))
246 		elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT");
247 	ExecCheckTupleVisible(estate, rel, tempSlot);
248 	ExecClearTuple(tempSlot);
249 }
250 
251 /*
252  * Compute stored generated columns for a tuple
253  */
254 void
255 ExecComputeStoredGenerated(EState *estate, TupleTableSlot *slot)
256 {
257 	ResultRelInfo *resultRelInfo = estate->es_result_relation_info;
258 	Relation	rel = resultRelInfo->ri_RelationDesc;
259 	TupleDesc	tupdesc = RelationGetDescr(rel);
260 	int			natts = tupdesc->natts;
261 	MemoryContext oldContext;
262 	Datum	   *values;
263 	bool	   *nulls;
264 
265 	Assert(tupdesc->constr && tupdesc->constr->has_generated_stored);
266 
267 	/*
268 	 * If first time through for this result relation, build expression
269 	 * nodetrees for rel's stored generation expressions.  Keep them in the
270 	 * per-query memory context so they'll survive throughout the query.
271 	 */
272 	if (resultRelInfo->ri_GeneratedExprs == NULL)
273 	{
274 		oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
275 
276 		resultRelInfo->ri_GeneratedExprs =
277 			(ExprState **) palloc(natts * sizeof(ExprState *));
278 
279 		for (int i = 0; i < natts; i++)
280 		{
281 			if (TupleDescAttr(tupdesc, i)->attgenerated == ATTRIBUTE_GENERATED_STORED)
282 			{
283 				Expr	   *expr;
284 
285 				expr = (Expr *) build_column_default(rel, i + 1);
286 				if (expr == NULL)
287 					elog(ERROR, "no generation expression found for column number %d of table \"%s\"",
288 						 i + 1, RelationGetRelationName(rel));
289 
290 				resultRelInfo->ri_GeneratedExprs[i] = ExecPrepareExpr(expr, estate);
291 			}
292 		}
293 
294 		MemoryContextSwitchTo(oldContext);
295 	}
296 
297 	oldContext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
298 
299 	values = palloc(sizeof(*values) * natts);
300 	nulls = palloc(sizeof(*nulls) * natts);
301 
302 	slot_getallattrs(slot);
303 	memcpy(nulls, slot->tts_isnull, sizeof(*nulls) * natts);
304 
305 	for (int i = 0; i < natts; i++)
306 	{
307 		Form_pg_attribute attr = TupleDescAttr(tupdesc, i);
308 
309 		if (attr->attgenerated == ATTRIBUTE_GENERATED_STORED)
310 		{
311 			ExprContext *econtext;
312 			Datum		val;
313 			bool		isnull;
314 
315 			econtext = GetPerTupleExprContext(estate);
316 			econtext->ecxt_scantuple = slot;
317 
318 			val = ExecEvalExpr(resultRelInfo->ri_GeneratedExprs[i], econtext, &isnull);
319 
320 			/*
321 			 * We must make a copy of val as we have no guarantees about where
322 			 * memory for a pass-by-reference Datum is located.
323 			 */
324 			if (!isnull)
325 				val = datumCopy(val, attr->attbyval, attr->attlen);
326 
327 			values[i] = val;
328 			nulls[i] = isnull;
329 		}
330 		else
331 		{
332 			if (!nulls[i])
333 				values[i] = datumCopy(slot->tts_values[i], attr->attbyval, attr->attlen);
334 		}
335 	}
336 
337 	ExecClearTuple(slot);
338 	memcpy(slot->tts_values, values, sizeof(*values) * natts);
339 	memcpy(slot->tts_isnull, nulls, sizeof(*nulls) * natts);
340 	ExecStoreVirtualTuple(slot);
341 	ExecMaterializeSlot(slot);
342 
343 	MemoryContextSwitchTo(oldContext);
344 }
345 
346 /* ----------------------------------------------------------------
347  *		ExecInsert
348  *
349  *		For INSERT, we have to insert the tuple into the target relation
350  *		and insert appropriate tuples into the index relations.
351  *
352  *		slot contains the new tuple value to be stored.
353  *		planSlot is the output of the ModifyTable's subplan; we use it
354  *		to access "junk" columns that are not going to be stored.
355  *		In a cross-partition UPDATE, srcSlot is the slot that held the
356  *		updated tuple for the source relation; otherwise it's NULL.
357  *
358  *		returningRelInfo is the resultRelInfo for the source relation of a
359  *		cross-partition UPDATE; otherwise it's the current result relation.
360  *		We use it to process RETURNING lists, for reasons explained below.
361  *
362  *		Returns RETURNING result if any, otherwise NULL.
363  * ----------------------------------------------------------------
364  */
365 static TupleTableSlot *
366 ExecInsert(ModifyTableState *mtstate,
367 		   TupleTableSlot *slot,
368 		   TupleTableSlot *planSlot,
369 		   TupleTableSlot *srcSlot,
370 		   ResultRelInfo *returningRelInfo,
371 		   EState *estate,
372 		   bool canSetTag)
373 {
374 	ResultRelInfo *resultRelInfo;
375 	Relation	resultRelationDesc;
376 	List	   *recheckIndexes = NIL;
377 	TupleTableSlot *result = NULL;
378 	TransitionCaptureState *ar_insert_trig_tcs;
379 	ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
380 	OnConflictAction onconflict = node->onConflictAction;
381 
382 	ExecMaterializeSlot(slot);
383 
384 	/*
385 	 * get information on the (current) result relation
386 	 */
387 	resultRelInfo = estate->es_result_relation_info;
388 	resultRelationDesc = resultRelInfo->ri_RelationDesc;
389 
390 	/*
391 	 * BEFORE ROW INSERT Triggers.
392 	 *
393 	 * Note: We fire BEFORE ROW TRIGGERS for every attempted insertion in an
394 	 * INSERT ... ON CONFLICT statement.  We cannot check for constraint
395 	 * violations before firing these triggers, because they can change the
396 	 * values to insert.  Also, they can run arbitrary user-defined code with
397 	 * side-effects that we can't cancel by just not inserting the tuple.
398 	 */
399 	if (resultRelInfo->ri_TrigDesc &&
400 		resultRelInfo->ri_TrigDesc->trig_insert_before_row)
401 	{
402 		if (!ExecBRInsertTriggers(estate, resultRelInfo, slot))
403 			return NULL;		/* "do nothing" */
404 	}
405 
406 	/* INSTEAD OF ROW INSERT Triggers */
407 	if (resultRelInfo->ri_TrigDesc &&
408 		resultRelInfo->ri_TrigDesc->trig_insert_instead_row)
409 	{
410 		if (!ExecIRInsertTriggers(estate, resultRelInfo, slot))
411 			return NULL;		/* "do nothing" */
412 	}
413 	else if (resultRelInfo->ri_FdwRoutine)
414 	{
415 		/*
416 		 * GENERATED expressions might reference the tableoid column, so
417 		 * (re-)initialize tts_tableOid before evaluating them.
418 		 */
419 		slot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
420 
421 		/*
422 		 * Compute stored generated columns
423 		 */
424 		if (resultRelationDesc->rd_att->constr &&
425 			resultRelationDesc->rd_att->constr->has_generated_stored)
426 			ExecComputeStoredGenerated(estate, slot);
427 
428 		/*
429 		 * insert into foreign table: let the FDW do it
430 		 */
431 		slot = resultRelInfo->ri_FdwRoutine->ExecForeignInsert(estate,
432 															   resultRelInfo,
433 															   slot,
434 															   planSlot);
435 
436 		if (slot == NULL)		/* "do nothing" */
437 			return NULL;
438 
439 		/*
440 		 * AFTER ROW Triggers or RETURNING expressions might reference the
441 		 * tableoid column, so (re-)initialize tts_tableOid before evaluating
442 		 * them.  (This covers the case where the FDW replaced the slot.)
443 		 */
444 		slot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
445 	}
446 	else
447 	{
448 		WCOKind		wco_kind;
449 
450 		/*
451 		 * Constraints and GENERATED expressions might reference the tableoid
452 		 * column, so (re-)initialize tts_tableOid before evaluating them.
453 		 */
454 		slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
455 
456 		/*
457 		 * Compute stored generated columns
458 		 */
459 		if (resultRelationDesc->rd_att->constr &&
460 			resultRelationDesc->rd_att->constr->has_generated_stored)
461 			ExecComputeStoredGenerated(estate, slot);
462 
463 		/*
464 		 * Check any RLS WITH CHECK policies.
465 		 *
466 		 * Normally we should check INSERT policies. But if the insert is the
467 		 * result of a partition key update that moved the tuple to a new
468 		 * partition, we should instead check UPDATE policies, because we are
469 		 * executing policies defined on the target table, and not those
470 		 * defined on the child partitions.
471 		 */
472 		wco_kind = (mtstate->operation == CMD_UPDATE) ?
473 			WCO_RLS_UPDATE_CHECK : WCO_RLS_INSERT_CHECK;
474 
475 		/*
476 		 * ExecWithCheckOptions() will skip any WCOs which are not of the kind
477 		 * we are looking for at this point.
478 		 */
479 		if (resultRelInfo->ri_WithCheckOptions != NIL)
480 			ExecWithCheckOptions(wco_kind, resultRelInfo, slot, estate);
481 
482 		/*
483 		 * Check the constraints of the tuple.
484 		 */
485 		if (resultRelationDesc->rd_att->constr)
486 			ExecConstraints(resultRelInfo, slot, estate);
487 
488 		/*
489 		 * Also check the tuple against the partition constraint, if there is
490 		 * one; except that if we got here via tuple-routing, we don't need to
491 		 * if there's no BR trigger defined on the partition.
492 		 */
493 		if (resultRelInfo->ri_PartitionCheck &&
494 			(resultRelInfo->ri_RootResultRelInfo == NULL ||
495 			 (resultRelInfo->ri_TrigDesc &&
496 			  resultRelInfo->ri_TrigDesc->trig_insert_before_row)))
497 			ExecPartitionCheck(resultRelInfo, slot, estate, true);
498 
499 		if (onconflict != ONCONFLICT_NONE && resultRelInfo->ri_NumIndices > 0)
500 		{
501 			/* Perform a speculative insertion. */
502 			uint32		specToken;
503 			ItemPointerData conflictTid;
504 			bool		specConflict;
505 			List	   *arbiterIndexes;
506 
507 			arbiterIndexes = resultRelInfo->ri_onConflictArbiterIndexes;
508 
509 			/*
510 			 * Do a non-conclusive check for conflicts first.
511 			 *
512 			 * We're not holding any locks yet, so this doesn't guarantee that
513 			 * the later insert won't conflict.  But it avoids leaving behind
514 			 * a lot of canceled speculative insertions, if you run a lot of
515 			 * INSERT ON CONFLICT statements that do conflict.
516 			 *
517 			 * We loop back here if we find a conflict below, either during
518 			 * the pre-check, or when we re-check after inserting the tuple
519 			 * speculatively.
520 			 */
521 	vlock:
522 			specConflict = false;
523 			if (!ExecCheckIndexConstraints(slot, estate, &conflictTid,
524 										   arbiterIndexes))
525 			{
526 				/* committed conflict tuple found */
527 				if (onconflict == ONCONFLICT_UPDATE)
528 				{
529 					/*
530 					 * In case of ON CONFLICT DO UPDATE, execute the UPDATE
531 					 * part.  Be prepared to retry if the UPDATE fails because
532 					 * of another concurrent UPDATE/DELETE to the conflict
533 					 * tuple.
534 					 */
535 					TupleTableSlot *returning = NULL;
536 
537 					if (ExecOnConflictUpdate(mtstate, resultRelInfo,
538 											 &conflictTid, planSlot, slot,
539 											 estate, canSetTag, &returning))
540 					{
541 						InstrCountTuples2(&mtstate->ps, 1);
542 						return returning;
543 					}
544 					else
545 						goto vlock;
546 				}
547 				else
548 				{
549 					/*
550 					 * In case of ON CONFLICT DO NOTHING, do nothing. However,
551 					 * verify that the tuple is visible to the executor's MVCC
552 					 * snapshot at higher isolation levels.
553 					 *
554 					 * Using ExecGetReturningSlot() to store the tuple for the
555 					 * recheck isn't that pretty, but we can't trivially use
556 					 * the input slot, because it might not be of a compatible
557 					 * type. As there's no conflicting usage of
558 					 * ExecGetReturningSlot() in the DO NOTHING case...
559 					 */
560 					Assert(onconflict == ONCONFLICT_NOTHING);
561 					ExecCheckTIDVisible(estate, resultRelInfo, &conflictTid,
562 										ExecGetReturningSlot(estate, resultRelInfo));
563 					InstrCountTuples2(&mtstate->ps, 1);
564 					return NULL;
565 				}
566 			}
567 
568 			/*
569 			 * Before we start insertion proper, acquire our "speculative
570 			 * insertion lock".  Others can use that to wait for us to decide
571 			 * if we're going to go ahead with the insertion, instead of
572 			 * waiting for the whole transaction to complete.
573 			 */
574 			specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId());
575 
576 			/* insert the tuple, with the speculative token */
577 			table_tuple_insert_speculative(resultRelationDesc, slot,
578 										   estate->es_output_cid,
579 										   0,
580 										   NULL,
581 										   specToken);
582 
583 			/* insert index entries for tuple */
584 			recheckIndexes = ExecInsertIndexTuples(slot, estate, true,
585 												   &specConflict,
586 												   arbiterIndexes);
587 
588 			/* adjust the tuple's state accordingly */
589 			table_tuple_complete_speculative(resultRelationDesc, slot,
590 											 specToken, !specConflict);
591 
592 			/*
593 			 * Wake up anyone waiting for our decision.  They will re-check
594 			 * the tuple, see that it's no longer speculative, and wait on our
595 			 * XID as if this was a regularly inserted tuple all along.  Or if
596 			 * we killed the tuple, they will see it's dead, and proceed as if
597 			 * the tuple never existed.
598 			 */
599 			SpeculativeInsertionLockRelease(GetCurrentTransactionId());
600 
601 			/*
602 			 * If there was a conflict, start from the beginning.  We'll do
603 			 * the pre-check again, which will now find the conflicting tuple
604 			 * (unless it aborts before we get there).
605 			 */
606 			if (specConflict)
607 			{
608 				list_free(recheckIndexes);
609 				goto vlock;
610 			}
611 
612 			/* Since there was no insertion conflict, we're done */
613 		}
614 		else
615 		{
616 			/* insert the tuple normally */
617 			table_tuple_insert(resultRelationDesc, slot,
618 							   estate->es_output_cid,
619 							   0, NULL);
620 
621 			/* insert index entries for tuple */
622 			if (resultRelInfo->ri_NumIndices > 0)
623 				recheckIndexes = ExecInsertIndexTuples(slot, estate, false, NULL,
624 													   NIL);
625 		}
626 	}
627 
628 	if (canSetTag)
629 	{
630 		(estate->es_processed)++;
631 		setLastTid(&slot->tts_tid);
632 	}
633 
634 	/*
635 	 * If this insert is the result of a partition key update that moved the
636 	 * tuple to a new partition, put this row into the transition NEW TABLE,
637 	 * if there is one. We need to do this separately for DELETE and INSERT
638 	 * because they happen on different tables.
639 	 */
640 	ar_insert_trig_tcs = mtstate->mt_transition_capture;
641 	if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture
642 		&& mtstate->mt_transition_capture->tcs_update_new_table)
643 	{
644 		ExecARUpdateTriggers(estate, resultRelInfo, NULL,
645 							 NULL,
646 							 slot,
647 							 NULL,
648 							 mtstate->mt_transition_capture);
649 
650 		/*
651 		 * We've already captured the NEW TABLE row, so make sure any AR
652 		 * INSERT trigger fired below doesn't capture it again.
653 		 */
654 		ar_insert_trig_tcs = NULL;
655 	}
656 
657 	/* AFTER ROW INSERT Triggers */
658 	ExecARInsertTriggers(estate, resultRelInfo, slot, recheckIndexes,
659 						 ar_insert_trig_tcs);
660 
661 	list_free(recheckIndexes);
662 
663 	/*
664 	 * Check any WITH CHECK OPTION constraints from parent views.  We are
665 	 * required to do this after testing all constraints and uniqueness
666 	 * violations per the SQL spec, so we do it after actually inserting the
667 	 * record into the heap and all indexes.
668 	 *
669 	 * ExecWithCheckOptions will elog(ERROR) if a violation is found, so the
670 	 * tuple will never be seen, if it violates the WITH CHECK OPTION.
671 	 *
672 	 * ExecWithCheckOptions() will skip any WCOs which are not of the kind we
673 	 * are looking for at this point.
674 	 */
675 	if (resultRelInfo->ri_WithCheckOptions != NIL)
676 		ExecWithCheckOptions(WCO_VIEW_CHECK, resultRelInfo, slot, estate);
677 
678 	/* Process RETURNING if present */
679 	if (returningRelInfo->ri_projectReturning)
680 	{
681 		/*
682 		 * In a cross-partition UPDATE with RETURNING, we have to use the
683 		 * source partition's RETURNING list, because that matches the output
684 		 * of the planSlot, while the destination partition might have
685 		 * different resjunk columns.  This means we have to map the
686 		 * destination tuple back to the source's format so we can apply that
687 		 * RETURNING list.  This is expensive, but it should be an uncommon
688 		 * corner case, so we won't spend much effort on making it fast.
689 		 *
690 		 * We assume that we can use srcSlot to hold the re-converted tuple.
691 		 * Note that in the common case where the child partitions both match
692 		 * the root's format, previous optimizations will have resulted in
693 		 * slot and srcSlot being identical, cueing us that there's nothing to
694 		 * do here.
695 		 */
696 		if (returningRelInfo != resultRelInfo && slot != srcSlot)
697 		{
698 			Relation	srcRelationDesc = returningRelInfo->ri_RelationDesc;
699 			AttrNumber *map;
700 
701 			map = convert_tuples_by_name_map_if_req(RelationGetDescr(resultRelationDesc),
702 													RelationGetDescr(srcRelationDesc),
703 													gettext_noop("could not convert row type"));
704 			if (map)
705 			{
706 				TupleTableSlot *origSlot = slot;
707 
708 				slot = execute_attr_map_slot(map, slot, srcSlot);
709 				slot->tts_tid = origSlot->tts_tid;
710 				slot->tts_tableOid = origSlot->tts_tableOid;
711 				pfree(map);
712 			}
713 		}
714 
715 		result = ExecProcessReturning(returningRelInfo->ri_projectReturning,
716 									  RelationGetRelid(resultRelationDesc),
717 									  slot, planSlot);
718 	}
719 
720 	return result;
721 }
722 
723 /* ----------------------------------------------------------------
724  *		ExecDelete
725  *
726  *		DELETE is like UPDATE, except that we delete the tuple and no
727  *		index modifications are needed.
728  *
729  *		When deleting from a table, tupleid identifies the tuple to
730  *		delete and oldtuple is NULL.  When deleting from a view,
731  *		oldtuple is passed to the INSTEAD OF triggers and identifies
732  *		what to delete, and tupleid is invalid.  When deleting from a
733  *		foreign table, tupleid is invalid; the FDW has to figure out
734  *		which row to delete using data from the planSlot.  oldtuple is
735  *		passed to foreign table triggers; it is NULL when the foreign
736  *		table has no relevant triggers.  We use tupleDeleted to indicate
737  *		whether the tuple is actually deleted, callers can use it to
738  *		decide whether to continue the operation.  When this DELETE is a
739  *		part of an UPDATE of partition-key, then the slot returned by
740  *		EvalPlanQual() is passed back using output parameter epqslot.
741  *
742  *		Returns RETURNING result if any, otherwise NULL.
743  * ----------------------------------------------------------------
744  */
745 static TupleTableSlot *
746 ExecDelete(ModifyTableState *mtstate,
747 		   ItemPointer tupleid,
748 		   HeapTuple oldtuple,
749 		   TupleTableSlot *planSlot,
750 		   EPQState *epqstate,
751 		   EState *estate,
752 		   bool processReturning,
753 		   bool canSetTag,
754 		   bool changingPart,
755 		   bool *tupleDeleted,
756 		   TupleTableSlot **epqreturnslot)
757 {
758 	ResultRelInfo *resultRelInfo;
759 	Relation	resultRelationDesc;
760 	TM_Result	result;
761 	TM_FailureData tmfd;
762 	TupleTableSlot *slot = NULL;
763 	TransitionCaptureState *ar_delete_trig_tcs;
764 
765 	if (tupleDeleted)
766 		*tupleDeleted = false;
767 
768 	/*
769 	 * get information on the (current) result relation
770 	 */
771 	resultRelInfo = estate->es_result_relation_info;
772 	resultRelationDesc = resultRelInfo->ri_RelationDesc;
773 
774 	/* BEFORE ROW DELETE Triggers */
775 	if (resultRelInfo->ri_TrigDesc &&
776 		resultRelInfo->ri_TrigDesc->trig_delete_before_row)
777 	{
778 		bool		dodelete;
779 
780 		dodelete = ExecBRDeleteTriggers(estate, epqstate, resultRelInfo,
781 										tupleid, oldtuple, epqreturnslot);
782 
783 		if (!dodelete)			/* "do nothing" */
784 			return NULL;
785 	}
786 
787 	/* INSTEAD OF ROW DELETE Triggers */
788 	if (resultRelInfo->ri_TrigDesc &&
789 		resultRelInfo->ri_TrigDesc->trig_delete_instead_row)
790 	{
791 		bool		dodelete;
792 
793 		Assert(oldtuple != NULL);
794 		dodelete = ExecIRDeleteTriggers(estate, resultRelInfo, oldtuple);
795 
796 		if (!dodelete)			/* "do nothing" */
797 			return NULL;
798 	}
799 	else if (resultRelInfo->ri_FdwRoutine)
800 	{
801 		/*
802 		 * delete from foreign table: let the FDW do it
803 		 *
804 		 * We offer the returning slot as a place to store RETURNING data,
805 		 * although the FDW can return some other slot if it wants.
806 		 */
807 		slot = ExecGetReturningSlot(estate, resultRelInfo);
808 		slot = resultRelInfo->ri_FdwRoutine->ExecForeignDelete(estate,
809 															   resultRelInfo,
810 															   slot,
811 															   planSlot);
812 
813 		if (slot == NULL)		/* "do nothing" */
814 			return NULL;
815 
816 		/*
817 		 * RETURNING expressions might reference the tableoid column, so
818 		 * (re)initialize tts_tableOid before evaluating them.
819 		 */
820 		if (TTS_EMPTY(slot))
821 			ExecStoreAllNullTuple(slot);
822 
823 		slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
824 	}
825 	else
826 	{
827 		/*
828 		 * delete the tuple
829 		 *
830 		 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check
831 		 * that the row to be deleted is visible to that snapshot, and throw a
832 		 * can't-serialize error if not. This is a special-case behavior
833 		 * needed for referential integrity updates in transaction-snapshot
834 		 * mode transactions.
835 		 */
836 ldelete:;
837 		result = table_tuple_delete(resultRelationDesc, tupleid,
838 									estate->es_output_cid,
839 									estate->es_snapshot,
840 									estate->es_crosscheck_snapshot,
841 									true /* wait for commit */ ,
842 									&tmfd,
843 									changingPart);
844 
845 		switch (result)
846 		{
847 			case TM_SelfModified:
848 
849 				/*
850 				 * The target tuple was already updated or deleted by the
851 				 * current command, or by a later command in the current
852 				 * transaction.  The former case is possible in a join DELETE
853 				 * where multiple tuples join to the same target tuple. This
854 				 * is somewhat questionable, but Postgres has always allowed
855 				 * it: we just ignore additional deletion attempts.
856 				 *
857 				 * The latter case arises if the tuple is modified by a
858 				 * command in a BEFORE trigger, or perhaps by a command in a
859 				 * volatile function used in the query.  In such situations we
860 				 * should not ignore the deletion, but it is equally unsafe to
861 				 * proceed.  We don't want to discard the original DELETE
862 				 * while keeping the triggered actions based on its deletion;
863 				 * and it would be no better to allow the original DELETE
864 				 * while discarding updates that it triggered.  The row update
865 				 * carries some information that might be important according
866 				 * to business rules; so throwing an error is the only safe
867 				 * course.
868 				 *
869 				 * If a trigger actually intends this type of interaction, it
870 				 * can re-execute the DELETE and then return NULL to cancel
871 				 * the outer delete.
872 				 */
873 				if (tmfd.cmax != estate->es_output_cid)
874 					ereport(ERROR,
875 							(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
876 							 errmsg("tuple to be deleted was already modified by an operation triggered by the current command"),
877 							 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
878 
879 				/* Else, already deleted by self; nothing to do */
880 				return NULL;
881 
882 			case TM_Ok:
883 				break;
884 
885 			case TM_Updated:
886 				{
887 					TupleTableSlot *inputslot;
888 					TupleTableSlot *epqslot;
889 
890 					if (IsolationUsesXactSnapshot())
891 						ereport(ERROR,
892 								(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
893 								 errmsg("could not serialize access due to concurrent update")));
894 
895 					/*
896 					 * Already know that we're going to need to do EPQ, so
897 					 * fetch tuple directly into the right slot.
898 					 */
899 					EvalPlanQualBegin(epqstate);
900 					inputslot = EvalPlanQualSlot(epqstate, resultRelationDesc,
901 												 resultRelInfo->ri_RangeTableIndex);
902 
903 					result = table_tuple_lock(resultRelationDesc, tupleid,
904 											  estate->es_snapshot,
905 											  inputslot, estate->es_output_cid,
906 											  LockTupleExclusive, LockWaitBlock,
907 											  TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
908 											  &tmfd);
909 
910 					switch (result)
911 					{
912 						case TM_Ok:
913 							Assert(tmfd.traversed);
914 							epqslot = EvalPlanQual(epqstate,
915 												   resultRelationDesc,
916 												   resultRelInfo->ri_RangeTableIndex,
917 												   inputslot);
918 							if (TupIsNull(epqslot))
919 								/* Tuple not passing quals anymore, exiting... */
920 								return NULL;
921 
922 							/*
923 							 * If requested, skip delete and pass back the
924 							 * updated row.
925 							 */
926 							if (epqreturnslot)
927 							{
928 								*epqreturnslot = epqslot;
929 								return NULL;
930 							}
931 							else
932 								goto ldelete;
933 
934 						case TM_SelfModified:
935 
936 							/*
937 							 * This can be reached when following an update
938 							 * chain from a tuple updated by another session,
939 							 * reaching a tuple that was already updated in
940 							 * this transaction. If previously updated by this
941 							 * command, ignore the delete, otherwise error
942 							 * out.
943 							 *
944 							 * See also TM_SelfModified response to
945 							 * table_tuple_delete() above.
946 							 */
947 							if (tmfd.cmax != estate->es_output_cid)
948 								ereport(ERROR,
949 										(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
950 										 errmsg("tuple to be deleted was already modified by an operation triggered by the current command"),
951 										 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
952 							return NULL;
953 
954 						case TM_Deleted:
955 							/* tuple already deleted; nothing to do */
956 							return NULL;
957 
958 						default:
959 
960 							/*
961 							 * TM_Invisible should be impossible because we're
962 							 * waiting for updated row versions, and would
963 							 * already have errored out if the first version
964 							 * is invisible.
965 							 *
966 							 * TM_Updated should be impossible, because we're
967 							 * locking the latest version via
968 							 * TUPLE_LOCK_FLAG_FIND_LAST_VERSION.
969 							 */
970 							elog(ERROR, "unexpected table_tuple_lock status: %u",
971 								 result);
972 							return NULL;
973 					}
974 
975 					Assert(false);
976 					break;
977 				}
978 
979 			case TM_Deleted:
980 				if (IsolationUsesXactSnapshot())
981 					ereport(ERROR,
982 							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
983 							 errmsg("could not serialize access due to concurrent delete")));
984 				/* tuple already deleted; nothing to do */
985 				return NULL;
986 
987 			default:
988 				elog(ERROR, "unrecognized table_tuple_delete status: %u",
989 					 result);
990 				return NULL;
991 		}
992 
993 		/*
994 		 * Note: Normally one would think that we have to delete index tuples
995 		 * associated with the heap tuple now...
996 		 *
997 		 * ... but in POSTGRES, we have no need to do this because VACUUM will
998 		 * take care of it later.  We can't delete index tuples immediately
999 		 * anyway, since the tuple is still visible to other transactions.
1000 		 */
1001 	}
1002 
1003 	if (canSetTag)
1004 		(estate->es_processed)++;
1005 
1006 	/* Tell caller that the delete actually happened. */
1007 	if (tupleDeleted)
1008 		*tupleDeleted = true;
1009 
1010 	/*
1011 	 * If this delete is the result of a partition key update that moved the
1012 	 * tuple to a new partition, put this row into the transition OLD TABLE,
1013 	 * if there is one. We need to do this separately for DELETE and INSERT
1014 	 * because they happen on different tables.
1015 	 */
1016 	ar_delete_trig_tcs = mtstate->mt_transition_capture;
1017 	if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture
1018 		&& mtstate->mt_transition_capture->tcs_update_old_table)
1019 	{
1020 		ExecARUpdateTriggers(estate, resultRelInfo,
1021 							 tupleid,
1022 							 oldtuple,
1023 							 NULL,
1024 							 NULL,
1025 							 mtstate->mt_transition_capture);
1026 
1027 		/*
1028 		 * We've already captured the NEW TABLE row, so make sure any AR
1029 		 * DELETE trigger fired below doesn't capture it again.
1030 		 */
1031 		ar_delete_trig_tcs = NULL;
1032 	}
1033 
1034 	/* AFTER ROW DELETE Triggers */
1035 	ExecARDeleteTriggers(estate, resultRelInfo, tupleid, oldtuple,
1036 						 ar_delete_trig_tcs);
1037 
1038 	/* Process RETURNING if present and if requested */
1039 	if (processReturning && resultRelInfo->ri_projectReturning)
1040 	{
1041 		/*
1042 		 * We have to put the target tuple into a slot, which means first we
1043 		 * gotta fetch it.  We can use the trigger tuple slot.
1044 		 */
1045 		TupleTableSlot *rslot;
1046 
1047 		if (resultRelInfo->ri_FdwRoutine)
1048 		{
1049 			/* FDW must have provided a slot containing the deleted row */
1050 			Assert(!TupIsNull(slot));
1051 		}
1052 		else
1053 		{
1054 			slot = ExecGetReturningSlot(estate, resultRelInfo);
1055 			if (oldtuple != NULL)
1056 			{
1057 				ExecForceStoreHeapTuple(oldtuple, slot, false);
1058 			}
1059 			else
1060 			{
1061 				if (!table_tuple_fetch_row_version(resultRelationDesc, tupleid,
1062 												   SnapshotAny, slot))
1063 					elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING");
1064 			}
1065 		}
1066 
1067 		rslot = ExecProcessReturning(resultRelInfo->ri_projectReturning,
1068 									 RelationGetRelid(resultRelationDesc),
1069 									 slot, planSlot);
1070 
1071 		/*
1072 		 * Before releasing the target tuple again, make sure rslot has a
1073 		 * local copy of any pass-by-reference values.
1074 		 */
1075 		ExecMaterializeSlot(rslot);
1076 
1077 		ExecClearTuple(slot);
1078 
1079 		return rslot;
1080 	}
1081 
1082 	return NULL;
1083 }
1084 
1085 /* ----------------------------------------------------------------
1086  *		ExecUpdate
1087  *
1088  *		note: we can't run UPDATE queries with transactions
1089  *		off because UPDATEs are actually INSERTs and our
1090  *		scan will mistakenly loop forever, updating the tuple
1091  *		it just inserted..  This should be fixed but until it
1092  *		is, we don't want to get stuck in an infinite loop
1093  *		which corrupts your database..
1094  *
1095  *		When updating a table, tupleid identifies the tuple to
1096  *		update and oldtuple is NULL.  When updating a view, oldtuple
1097  *		is passed to the INSTEAD OF triggers and identifies what to
1098  *		update, and tupleid is invalid.  When updating a foreign table,
1099  *		tupleid is invalid; the FDW has to figure out which row to
1100  *		update using data from the planSlot.  oldtuple is passed to
1101  *		foreign table triggers; it is NULL when the foreign table has
1102  *		no relevant triggers.
1103  *
1104  *		Returns RETURNING result if any, otherwise NULL.
1105  * ----------------------------------------------------------------
1106  */
1107 static TupleTableSlot *
1108 ExecUpdate(ModifyTableState *mtstate,
1109 		   ItemPointer tupleid,
1110 		   HeapTuple oldtuple,
1111 		   TupleTableSlot *slot,
1112 		   TupleTableSlot *planSlot,
1113 		   EPQState *epqstate,
1114 		   EState *estate,
1115 		   bool canSetTag)
1116 {
1117 	ResultRelInfo *resultRelInfo;
1118 	Relation	resultRelationDesc;
1119 	TM_Result	result;
1120 	TM_FailureData tmfd;
1121 	List	   *recheckIndexes = NIL;
1122 	TupleConversionMap *saved_tcs_map = NULL;
1123 
1124 	/*
1125 	 * abort the operation if not running transactions
1126 	 */
1127 	if (IsBootstrapProcessingMode())
1128 		elog(ERROR, "cannot UPDATE during bootstrap");
1129 
1130 	ExecMaterializeSlot(slot);
1131 
1132 	/*
1133 	 * get information on the (current) result relation
1134 	 */
1135 	resultRelInfo = estate->es_result_relation_info;
1136 	resultRelationDesc = resultRelInfo->ri_RelationDesc;
1137 
1138 	/* BEFORE ROW UPDATE Triggers */
1139 	if (resultRelInfo->ri_TrigDesc &&
1140 		resultRelInfo->ri_TrigDesc->trig_update_before_row)
1141 	{
1142 		if (!ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
1143 								  tupleid, oldtuple, slot))
1144 			return NULL;		/* "do nothing" */
1145 	}
1146 
1147 	/* INSTEAD OF ROW UPDATE Triggers */
1148 	if (resultRelInfo->ri_TrigDesc &&
1149 		resultRelInfo->ri_TrigDesc->trig_update_instead_row)
1150 	{
1151 		if (!ExecIRUpdateTriggers(estate, resultRelInfo,
1152 								  oldtuple, slot))
1153 			return NULL;		/* "do nothing" */
1154 	}
1155 	else if (resultRelInfo->ri_FdwRoutine)
1156 	{
1157 		/*
1158 		 * GENERATED expressions might reference the tableoid column, so
1159 		 * (re-)initialize tts_tableOid before evaluating them.
1160 		 */
1161 		slot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
1162 
1163 		/*
1164 		 * Compute stored generated columns
1165 		 */
1166 		if (resultRelationDesc->rd_att->constr &&
1167 			resultRelationDesc->rd_att->constr->has_generated_stored)
1168 			ExecComputeStoredGenerated(estate, slot);
1169 
1170 		/*
1171 		 * update in foreign table: let the FDW do it
1172 		 */
1173 		slot = resultRelInfo->ri_FdwRoutine->ExecForeignUpdate(estate,
1174 															   resultRelInfo,
1175 															   slot,
1176 															   planSlot);
1177 
1178 		if (slot == NULL)		/* "do nothing" */
1179 			return NULL;
1180 
1181 		/*
1182 		 * AFTER ROW Triggers or RETURNING expressions might reference the
1183 		 * tableoid column, so (re-)initialize tts_tableOid before evaluating
1184 		 * them.  (This covers the case where the FDW replaced the slot.)
1185 		 */
1186 		slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
1187 	}
1188 	else
1189 	{
1190 		LockTupleMode lockmode;
1191 		bool		partition_constraint_failed;
1192 		bool		update_indexes;
1193 
1194 		/*
1195 		 * Constraints and GENERATED expressions might reference the tableoid
1196 		 * column, so (re-)initialize tts_tableOid before evaluating them.
1197 		 */
1198 		slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
1199 
1200 		/*
1201 		 * Compute stored generated columns
1202 		 */
1203 		if (resultRelationDesc->rd_att->constr &&
1204 			resultRelationDesc->rd_att->constr->has_generated_stored)
1205 			ExecComputeStoredGenerated(estate, slot);
1206 
1207 		/*
1208 		 * Check any RLS UPDATE WITH CHECK policies
1209 		 *
1210 		 * If we generate a new candidate tuple after EvalPlanQual testing, we
1211 		 * must loop back here and recheck any RLS policies and constraints.
1212 		 * (We don't need to redo triggers, however.  If there are any BEFORE
1213 		 * triggers then trigger.c will have done table_tuple_lock to lock the
1214 		 * correct tuple, so there's no need to do them again.)
1215 		 */
1216 lreplace:;
1217 
1218 		/* ensure slot is independent, consider e.g. EPQ */
1219 		ExecMaterializeSlot(slot);
1220 
1221 		/*
1222 		 * If partition constraint fails, this row might get moved to another
1223 		 * partition, in which case we should check the RLS CHECK policy just
1224 		 * before inserting into the new partition, rather than doing it here.
1225 		 * This is because a trigger on that partition might again change the
1226 		 * row.  So skip the WCO checks if the partition constraint fails.
1227 		 */
1228 		partition_constraint_failed =
1229 			resultRelInfo->ri_PartitionCheck &&
1230 			!ExecPartitionCheck(resultRelInfo, slot, estate, false);
1231 
1232 		if (!partition_constraint_failed &&
1233 			resultRelInfo->ri_WithCheckOptions != NIL)
1234 		{
1235 			/*
1236 			 * ExecWithCheckOptions() will skip any WCOs which are not of the
1237 			 * kind we are looking for at this point.
1238 			 */
1239 			ExecWithCheckOptions(WCO_RLS_UPDATE_CHECK,
1240 								 resultRelInfo, slot, estate);
1241 		}
1242 
1243 		/*
1244 		 * If a partition check failed, try to move the row into the right
1245 		 * partition.
1246 		 */
1247 		if (partition_constraint_failed)
1248 		{
1249 			bool		tuple_deleted;
1250 			TupleTableSlot *ret_slot;
1251 			TupleTableSlot *orig_slot = slot;
1252 			TupleTableSlot *epqslot = NULL;
1253 			PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
1254 			int			map_index;
1255 			TupleConversionMap *tupconv_map;
1256 
1257 			/*
1258 			 * Disallow an INSERT ON CONFLICT DO UPDATE that causes the
1259 			 * original row to migrate to a different partition.  Maybe this
1260 			 * can be implemented some day, but it seems a fringe feature with
1261 			 * little redeeming value.
1262 			 */
1263 			if (((ModifyTable *) mtstate->ps.plan)->onConflictAction == ONCONFLICT_UPDATE)
1264 				ereport(ERROR,
1265 						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1266 						 errmsg("invalid ON UPDATE specification"),
1267 						 errdetail("The result tuple would appear in a different partition than the original tuple.")));
1268 
1269 			/*
1270 			 * When an UPDATE is run on a leaf partition, we will not have
1271 			 * partition tuple routing set up. In that case, fail with
1272 			 * partition constraint violation error.
1273 			 */
1274 			if (proute == NULL)
1275 				ExecPartitionCheckEmitError(resultRelInfo, slot, estate);
1276 
1277 			/*
1278 			 * Row movement, part 1.  Delete the tuple, but skip RETURNING
1279 			 * processing. We want to return rows from INSERT.
1280 			 */
1281 			ExecDelete(mtstate, tupleid, oldtuple, planSlot, epqstate,
1282 					   estate, false, false /* canSetTag */ ,
1283 					   true /* changingPart */ , &tuple_deleted, &epqslot);
1284 
1285 			/*
1286 			 * For some reason if DELETE didn't happen (e.g. trigger prevented
1287 			 * it, or it was already deleted by self, or it was concurrently
1288 			 * deleted by another transaction), then we should skip the insert
1289 			 * as well; otherwise, an UPDATE could cause an increase in the
1290 			 * total number of rows across all partitions, which is clearly
1291 			 * wrong.
1292 			 *
1293 			 * For a normal UPDATE, the case where the tuple has been the
1294 			 * subject of a concurrent UPDATE or DELETE would be handled by
1295 			 * the EvalPlanQual machinery, but for an UPDATE that we've
1296 			 * translated into a DELETE from this partition and an INSERT into
1297 			 * some other partition, that's not available, because CTID chains
1298 			 * can't span relation boundaries.  We mimic the semantics to a
1299 			 * limited extent by skipping the INSERT if the DELETE fails to
1300 			 * find a tuple. This ensures that two concurrent attempts to
1301 			 * UPDATE the same tuple at the same time can't turn one tuple
1302 			 * into two, and that an UPDATE of a just-deleted tuple can't
1303 			 * resurrect it.
1304 			 */
1305 			if (!tuple_deleted)
1306 			{
1307 				/*
1308 				 * epqslot will be typically NULL.  But when ExecDelete()
1309 				 * finds that another transaction has concurrently updated the
1310 				 * same row, it re-fetches the row, skips the delete, and
1311 				 * epqslot is set to the re-fetched tuple slot. In that case,
1312 				 * we need to do all the checks again.
1313 				 */
1314 				if (TupIsNull(epqslot))
1315 					return NULL;
1316 				else
1317 				{
1318 					slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
1319 					goto lreplace;
1320 				}
1321 			}
1322 
1323 			/*
1324 			 * Updates set the transition capture map only when a new subplan
1325 			 * is chosen.  But for inserts, it is set for each row. So after
1326 			 * INSERT, we need to revert back to the map created for UPDATE;
1327 			 * otherwise the next UPDATE will incorrectly use the one created
1328 			 * for INSERT.  So first save the one created for UPDATE.
1329 			 */
1330 			if (mtstate->mt_transition_capture)
1331 				saved_tcs_map = mtstate->mt_transition_capture->tcs_map;
1332 
1333 			/*
1334 			 * resultRelInfo is one of the per-subplan resultRelInfos.  So we
1335 			 * should convert the tuple into root's tuple descriptor, since
1336 			 * ExecInsert() starts the search from root.  The tuple conversion
1337 			 * map list is in the order of mtstate->resultRelInfo[], so to
1338 			 * retrieve the one for this resultRel, we need to know the
1339 			 * position of the resultRel in mtstate->resultRelInfo[].
1340 			 */
1341 			map_index = resultRelInfo - mtstate->resultRelInfo;
1342 			Assert(map_index >= 0 && map_index < mtstate->mt_nplans);
1343 			tupconv_map = tupconv_map_for_subplan(mtstate, map_index);
1344 			if (tupconv_map != NULL)
1345 				slot = execute_attr_map_slot(tupconv_map->attrMap,
1346 											 slot,
1347 											 mtstate->mt_root_tuple_slot);
1348 
1349 			/*
1350 			 * Prepare for tuple routing, making it look like we're inserting
1351 			 * into the root.
1352 			 */
1353 			Assert(mtstate->rootResultRelInfo != NULL);
1354 			slot = ExecPrepareTupleRouting(mtstate, estate, proute,
1355 										   mtstate->rootResultRelInfo, slot);
1356 
1357 			ret_slot = ExecInsert(mtstate, slot, planSlot,
1358 								  orig_slot, resultRelInfo,
1359 								  estate, canSetTag);
1360 
1361 			/* Revert ExecPrepareTupleRouting's node change. */
1362 			estate->es_result_relation_info = resultRelInfo;
1363 			if (mtstate->mt_transition_capture)
1364 			{
1365 				mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
1366 				mtstate->mt_transition_capture->tcs_map = saved_tcs_map;
1367 			}
1368 
1369 			return ret_slot;
1370 		}
1371 
1372 		/*
1373 		 * Check the constraints of the tuple.  We've already checked the
1374 		 * partition constraint above; however, we must still ensure the tuple
1375 		 * passes all other constraints, so we will call ExecConstraints() and
1376 		 * have it validate all remaining checks.
1377 		 */
1378 		if (resultRelationDesc->rd_att->constr)
1379 			ExecConstraints(resultRelInfo, slot, estate);
1380 
1381 		/*
1382 		 * replace the heap tuple
1383 		 *
1384 		 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check
1385 		 * that the row to be updated is visible to that snapshot, and throw a
1386 		 * can't-serialize error if not. This is a special-case behavior
1387 		 * needed for referential integrity updates in transaction-snapshot
1388 		 * mode transactions.
1389 		 */
1390 		result = table_tuple_update(resultRelationDesc, tupleid, slot,
1391 									estate->es_output_cid,
1392 									estate->es_snapshot,
1393 									estate->es_crosscheck_snapshot,
1394 									true /* wait for commit */ ,
1395 									&tmfd, &lockmode, &update_indexes);
1396 
1397 		switch (result)
1398 		{
1399 			case TM_SelfModified:
1400 
1401 				/*
1402 				 * The target tuple was already updated or deleted by the
1403 				 * current command, or by a later command in the current
1404 				 * transaction.  The former case is possible in a join UPDATE
1405 				 * where multiple tuples join to the same target tuple. This
1406 				 * is pretty questionable, but Postgres has always allowed it:
1407 				 * we just execute the first update action and ignore
1408 				 * additional update attempts.
1409 				 *
1410 				 * The latter case arises if the tuple is modified by a
1411 				 * command in a BEFORE trigger, or perhaps by a command in a
1412 				 * volatile function used in the query.  In such situations we
1413 				 * should not ignore the update, but it is equally unsafe to
1414 				 * proceed.  We don't want to discard the original UPDATE
1415 				 * while keeping the triggered actions based on it; and we
1416 				 * have no principled way to merge this update with the
1417 				 * previous ones.  So throwing an error is the only safe
1418 				 * course.
1419 				 *
1420 				 * If a trigger actually intends this type of interaction, it
1421 				 * can re-execute the UPDATE (assuming it can figure out how)
1422 				 * and then return NULL to cancel the outer update.
1423 				 */
1424 				if (tmfd.cmax != estate->es_output_cid)
1425 					ereport(ERROR,
1426 							(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
1427 							 errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
1428 							 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
1429 
1430 				/* Else, already updated by self; nothing to do */
1431 				return NULL;
1432 
1433 			case TM_Ok:
1434 				break;
1435 
1436 			case TM_Updated:
1437 				{
1438 					TupleTableSlot *inputslot;
1439 					TupleTableSlot *epqslot;
1440 
1441 					if (IsolationUsesXactSnapshot())
1442 						ereport(ERROR,
1443 								(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1444 								 errmsg("could not serialize access due to concurrent update")));
1445 
1446 					/*
1447 					 * Already know that we're going to need to do EPQ, so
1448 					 * fetch tuple directly into the right slot.
1449 					 */
1450 					inputslot = EvalPlanQualSlot(epqstate, resultRelationDesc,
1451 												 resultRelInfo->ri_RangeTableIndex);
1452 
1453 					result = table_tuple_lock(resultRelationDesc, tupleid,
1454 											  estate->es_snapshot,
1455 											  inputslot, estate->es_output_cid,
1456 											  lockmode, LockWaitBlock,
1457 											  TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
1458 											  &tmfd);
1459 
1460 					switch (result)
1461 					{
1462 						case TM_Ok:
1463 							Assert(tmfd.traversed);
1464 
1465 							epqslot = EvalPlanQual(epqstate,
1466 												   resultRelationDesc,
1467 												   resultRelInfo->ri_RangeTableIndex,
1468 												   inputslot);
1469 							if (TupIsNull(epqslot))
1470 								/* Tuple not passing quals anymore, exiting... */
1471 								return NULL;
1472 
1473 							slot = ExecFilterJunk(resultRelInfo->ri_junkFilter, epqslot);
1474 							goto lreplace;
1475 
1476 						case TM_Deleted:
1477 							/* tuple already deleted; nothing to do */
1478 							return NULL;
1479 
1480 						case TM_SelfModified:
1481 
1482 							/*
1483 							 * This can be reached when following an update
1484 							 * chain from a tuple updated by another session,
1485 							 * reaching a tuple that was already updated in
1486 							 * this transaction. If previously modified by
1487 							 * this command, ignore the redundant update,
1488 							 * otherwise error out.
1489 							 *
1490 							 * See also TM_SelfModified response to
1491 							 * table_tuple_update() above.
1492 							 */
1493 							if (tmfd.cmax != estate->es_output_cid)
1494 								ereport(ERROR,
1495 										(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
1496 										 errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
1497 										 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
1498 							return NULL;
1499 
1500 						default:
1501 							/* see table_tuple_lock call in ExecDelete() */
1502 							elog(ERROR, "unexpected table_tuple_lock status: %u",
1503 								 result);
1504 							return NULL;
1505 					}
1506 				}
1507 
1508 				break;
1509 
1510 			case TM_Deleted:
1511 				if (IsolationUsesXactSnapshot())
1512 					ereport(ERROR,
1513 							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1514 							 errmsg("could not serialize access due to concurrent delete")));
1515 				/* tuple already deleted; nothing to do */
1516 				return NULL;
1517 
1518 			default:
1519 				elog(ERROR, "unrecognized table_tuple_update status: %u",
1520 					 result);
1521 				return NULL;
1522 		}
1523 
1524 		/* insert index entries for tuple if necessary */
1525 		if (resultRelInfo->ri_NumIndices > 0 && update_indexes)
1526 			recheckIndexes = ExecInsertIndexTuples(slot, estate, false, NULL, NIL);
1527 	}
1528 
1529 	if (canSetTag)
1530 		(estate->es_processed)++;
1531 
1532 	/* AFTER ROW UPDATE Triggers */
1533 	ExecARUpdateTriggers(estate, resultRelInfo, tupleid, oldtuple, slot,
1534 						 recheckIndexes,
1535 						 mtstate->operation == CMD_INSERT ?
1536 						 mtstate->mt_oc_transition_capture :
1537 						 mtstate->mt_transition_capture);
1538 
1539 	list_free(recheckIndexes);
1540 
1541 	/*
1542 	 * Check any WITH CHECK OPTION constraints from parent views.  We are
1543 	 * required to do this after testing all constraints and uniqueness
1544 	 * violations per the SQL spec, so we do it after actually updating the
1545 	 * record in the heap and all indexes.
1546 	 *
1547 	 * ExecWithCheckOptions() will skip any WCOs which are not of the kind we
1548 	 * are looking for at this point.
1549 	 */
1550 	if (resultRelInfo->ri_WithCheckOptions != NIL)
1551 		ExecWithCheckOptions(WCO_VIEW_CHECK, resultRelInfo, slot, estate);
1552 
1553 	/* Process RETURNING if present */
1554 	if (resultRelInfo->ri_projectReturning)
1555 		return ExecProcessReturning(resultRelInfo->ri_projectReturning,
1556 									RelationGetRelid(resultRelationDesc),
1557 									slot, planSlot);
1558 
1559 	return NULL;
1560 }
1561 
1562 /*
1563  * ExecOnConflictUpdate --- execute UPDATE of INSERT ON CONFLICT DO UPDATE
1564  *
1565  * Try to lock tuple for update as part of speculative insertion.  If
1566  * a qual originating from ON CONFLICT DO UPDATE is satisfied, update
1567  * (but still lock row, even though it may not satisfy estate's
1568  * snapshot).
1569  *
1570  * Returns true if we're done (with or without an update), or false if
1571  * the caller must retry the INSERT from scratch.
1572  */
1573 static bool
1574 ExecOnConflictUpdate(ModifyTableState *mtstate,
1575 					 ResultRelInfo *resultRelInfo,
1576 					 ItemPointer conflictTid,
1577 					 TupleTableSlot *planSlot,
1578 					 TupleTableSlot *excludedSlot,
1579 					 EState *estate,
1580 					 bool canSetTag,
1581 					 TupleTableSlot **returning)
1582 {
1583 	ExprContext *econtext = mtstate->ps.ps_ExprContext;
1584 	Relation	relation = resultRelInfo->ri_RelationDesc;
1585 	ExprState  *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause;
1586 	TupleTableSlot *existing = resultRelInfo->ri_onConflict->oc_Existing;
1587 	TM_FailureData tmfd;
1588 	LockTupleMode lockmode;
1589 	TM_Result	test;
1590 	Datum		xminDatum;
1591 	TransactionId xmin;
1592 	bool		isnull;
1593 
1594 	/* Determine lock mode to use */
1595 	lockmode = ExecUpdateLockMode(estate, resultRelInfo);
1596 
1597 	/*
1598 	 * Lock tuple for update.  Don't follow updates when tuple cannot be
1599 	 * locked without doing so.  A row locking conflict here means our
1600 	 * previous conclusion that the tuple is conclusively committed is not
1601 	 * true anymore.
1602 	 */
1603 	test = table_tuple_lock(relation, conflictTid,
1604 							estate->es_snapshot,
1605 							existing, estate->es_output_cid,
1606 							lockmode, LockWaitBlock, 0,
1607 							&tmfd);
1608 	switch (test)
1609 	{
1610 		case TM_Ok:
1611 			/* success! */
1612 			break;
1613 
1614 		case TM_Invisible:
1615 
1616 			/*
1617 			 * This can occur when a just inserted tuple is updated again in
1618 			 * the same command. E.g. because multiple rows with the same
1619 			 * conflicting key values are inserted.
1620 			 *
1621 			 * This is somewhat similar to the ExecUpdate() TM_SelfModified
1622 			 * case.  We do not want to proceed because it would lead to the
1623 			 * same row being updated a second time in some unspecified order,
1624 			 * and in contrast to plain UPDATEs there's no historical behavior
1625 			 * to break.
1626 			 *
1627 			 * It is the user's responsibility to prevent this situation from
1628 			 * occurring.  These problems are why SQL-2003 similarly specifies
1629 			 * that for SQL MERGE, an exception must be raised in the event of
1630 			 * an attempt to update the same row twice.
1631 			 */
1632 			xminDatum = slot_getsysattr(existing,
1633 										MinTransactionIdAttributeNumber,
1634 										&isnull);
1635 			Assert(!isnull);
1636 			xmin = DatumGetTransactionId(xminDatum);
1637 
1638 			if (TransactionIdIsCurrentTransactionId(xmin))
1639 				ereport(ERROR,
1640 						(errcode(ERRCODE_CARDINALITY_VIOLATION),
1641 						 errmsg("ON CONFLICT DO UPDATE command cannot affect row a second time"),
1642 						 errhint("Ensure that no rows proposed for insertion within the same command have duplicate constrained values.")));
1643 
1644 			/* This shouldn't happen */
1645 			elog(ERROR, "attempted to lock invisible tuple");
1646 			break;
1647 
1648 		case TM_SelfModified:
1649 
1650 			/*
1651 			 * This state should never be reached. As a dirty snapshot is used
1652 			 * to find conflicting tuples, speculative insertion wouldn't have
1653 			 * seen this row to conflict with.
1654 			 */
1655 			elog(ERROR, "unexpected self-updated tuple");
1656 			break;
1657 
1658 		case TM_Updated:
1659 			if (IsolationUsesXactSnapshot())
1660 				ereport(ERROR,
1661 						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1662 						 errmsg("could not serialize access due to concurrent update")));
1663 
1664 			/*
1665 			 * As long as we don't support an UPDATE of INSERT ON CONFLICT for
1666 			 * a partitioned table we shouldn't reach to a case where tuple to
1667 			 * be lock is moved to another partition due to concurrent update
1668 			 * of the partition key.
1669 			 */
1670 			Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid));
1671 
1672 			/*
1673 			 * Tell caller to try again from the very start.
1674 			 *
1675 			 * It does not make sense to use the usual EvalPlanQual() style
1676 			 * loop here, as the new version of the row might not conflict
1677 			 * anymore, or the conflicting tuple has actually been deleted.
1678 			 */
1679 			ExecClearTuple(existing);
1680 			return false;
1681 
1682 		case TM_Deleted:
1683 			if (IsolationUsesXactSnapshot())
1684 				ereport(ERROR,
1685 						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1686 						 errmsg("could not serialize access due to concurrent delete")));
1687 
1688 			/* see TM_Updated case */
1689 			Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid));
1690 			ExecClearTuple(existing);
1691 			return false;
1692 
1693 		default:
1694 			elog(ERROR, "unrecognized table_tuple_lock status: %u", test);
1695 	}
1696 
1697 	/* Success, the tuple is locked. */
1698 
1699 	/*
1700 	 * Verify that the tuple is visible to our MVCC snapshot if the current
1701 	 * isolation level mandates that.
1702 	 *
1703 	 * It's not sufficient to rely on the check within ExecUpdate() as e.g.
1704 	 * CONFLICT ... WHERE clause may prevent us from reaching that.
1705 	 *
1706 	 * This means we only ever continue when a new command in the current
1707 	 * transaction could see the row, even though in READ COMMITTED mode the
1708 	 * tuple will not be visible according to the current statement's
1709 	 * snapshot.  This is in line with the way UPDATE deals with newer tuple
1710 	 * versions.
1711 	 */
1712 	ExecCheckTupleVisible(estate, relation, existing);
1713 
1714 	/*
1715 	 * Make tuple and any needed join variables available to ExecQual and
1716 	 * ExecProject.  The EXCLUDED tuple is installed in ecxt_innertuple, while
1717 	 * the target's existing tuple is installed in the scantuple.  EXCLUDED
1718 	 * has been made to reference INNER_VAR in setrefs.c, but there is no
1719 	 * other redirection.
1720 	 */
1721 	econtext->ecxt_scantuple = existing;
1722 	econtext->ecxt_innertuple = excludedSlot;
1723 	econtext->ecxt_outertuple = NULL;
1724 
1725 	if (!ExecQual(onConflictSetWhere, econtext))
1726 	{
1727 		ExecClearTuple(existing);	/* see return below */
1728 		InstrCountFiltered1(&mtstate->ps, 1);
1729 		return true;			/* done with the tuple */
1730 	}
1731 
1732 	if (resultRelInfo->ri_WithCheckOptions != NIL)
1733 	{
1734 		/*
1735 		 * Check target's existing tuple against UPDATE-applicable USING
1736 		 * security barrier quals (if any), enforced here as RLS checks/WCOs.
1737 		 *
1738 		 * The rewriter creates UPDATE RLS checks/WCOs for UPDATE security
1739 		 * quals, and stores them as WCOs of "kind" WCO_RLS_CONFLICT_CHECK,
1740 		 * but that's almost the extent of its special handling for ON
1741 		 * CONFLICT DO UPDATE.
1742 		 *
1743 		 * The rewriter will also have associated UPDATE applicable straight
1744 		 * RLS checks/WCOs for the benefit of the ExecUpdate() call that
1745 		 * follows.  INSERTs and UPDATEs naturally have mutually exclusive WCO
1746 		 * kinds, so there is no danger of spurious over-enforcement in the
1747 		 * INSERT or UPDATE path.
1748 		 */
1749 		ExecWithCheckOptions(WCO_RLS_CONFLICT_CHECK, resultRelInfo,
1750 							 existing,
1751 							 mtstate->ps.state);
1752 	}
1753 
1754 	/* Project the new tuple version */
1755 	ExecProject(resultRelInfo->ri_onConflict->oc_ProjInfo);
1756 
1757 	/*
1758 	 * Note that it is possible that the target tuple has been modified in
1759 	 * this session, after the above table_tuple_lock. We choose to not error
1760 	 * out in that case, in line with ExecUpdate's treatment of similar cases.
1761 	 * This can happen if an UPDATE is triggered from within ExecQual(),
1762 	 * ExecWithCheckOptions() or ExecProject() above, e.g. by selecting from a
1763 	 * wCTE in the ON CONFLICT's SET.
1764 	 */
1765 
1766 	/* Execute UPDATE with projection */
1767 	*returning = ExecUpdate(mtstate, conflictTid, NULL,
1768 							resultRelInfo->ri_onConflict->oc_ProjSlot,
1769 							planSlot,
1770 							&mtstate->mt_epqstate, mtstate->ps.state,
1771 							canSetTag);
1772 
1773 	/*
1774 	 * Clear out existing tuple, as there might not be another conflict among
1775 	 * the next input rows. Don't want to hold resources till the end of the
1776 	 * query.
1777 	 */
1778 	ExecClearTuple(existing);
1779 	return true;
1780 }
1781 
1782 
1783 /*
1784  * Process BEFORE EACH STATEMENT triggers
1785  */
1786 static void
1787 fireBSTriggers(ModifyTableState *node)
1788 {
1789 	ModifyTable *plan = (ModifyTable *) node->ps.plan;
1790 	ResultRelInfo *resultRelInfo = node->resultRelInfo;
1791 
1792 	/*
1793 	 * If the node modifies a partitioned table, we must fire its triggers.
1794 	 * Note that in that case, node->resultRelInfo points to the first leaf
1795 	 * partition, not the root table.
1796 	 */
1797 	if (node->rootResultRelInfo != NULL)
1798 		resultRelInfo = node->rootResultRelInfo;
1799 
1800 	switch (node->operation)
1801 	{
1802 		case CMD_INSERT:
1803 			ExecBSInsertTriggers(node->ps.state, resultRelInfo);
1804 			if (plan->onConflictAction == ONCONFLICT_UPDATE)
1805 				ExecBSUpdateTriggers(node->ps.state,
1806 									 resultRelInfo);
1807 			break;
1808 		case CMD_UPDATE:
1809 			ExecBSUpdateTriggers(node->ps.state, resultRelInfo);
1810 			break;
1811 		case CMD_DELETE:
1812 			ExecBSDeleteTriggers(node->ps.state, resultRelInfo);
1813 			break;
1814 		default:
1815 			elog(ERROR, "unknown operation");
1816 			break;
1817 	}
1818 }
1819 
1820 /*
1821  * Return the target rel ResultRelInfo.
1822  *
1823  * This relation is the same as :
1824  * - the relation for which we will fire AFTER STATEMENT triggers.
1825  * - the relation into whose tuple format all captured transition tuples must
1826  *   be converted.
1827  * - the root partitioned table.
1828  */
1829 static ResultRelInfo *
1830 getTargetResultRelInfo(ModifyTableState *node)
1831 {
1832 	/*
1833 	 * Note that if the node modifies a partitioned table, node->resultRelInfo
1834 	 * points to the first leaf partition, not the root table.
1835 	 */
1836 	if (node->rootResultRelInfo != NULL)
1837 		return node->rootResultRelInfo;
1838 	else
1839 		return node->resultRelInfo;
1840 }
1841 
1842 /*
1843  * Process AFTER EACH STATEMENT triggers
1844  */
1845 static void
1846 fireASTriggers(ModifyTableState *node)
1847 {
1848 	ModifyTable *plan = (ModifyTable *) node->ps.plan;
1849 	ResultRelInfo *resultRelInfo = getTargetResultRelInfo(node);
1850 
1851 	switch (node->operation)
1852 	{
1853 		case CMD_INSERT:
1854 			if (plan->onConflictAction == ONCONFLICT_UPDATE)
1855 				ExecASUpdateTriggers(node->ps.state,
1856 									 resultRelInfo,
1857 									 node->mt_oc_transition_capture);
1858 			ExecASInsertTriggers(node->ps.state, resultRelInfo,
1859 								 node->mt_transition_capture);
1860 			break;
1861 		case CMD_UPDATE:
1862 			ExecASUpdateTriggers(node->ps.state, resultRelInfo,
1863 								 node->mt_transition_capture);
1864 			break;
1865 		case CMD_DELETE:
1866 			ExecASDeleteTriggers(node->ps.state, resultRelInfo,
1867 								 node->mt_transition_capture);
1868 			break;
1869 		default:
1870 			elog(ERROR, "unknown operation");
1871 			break;
1872 	}
1873 }
1874 
1875 /*
1876  * Set up the state needed for collecting transition tuples for AFTER
1877  * triggers.
1878  */
1879 static void
1880 ExecSetupTransitionCaptureState(ModifyTableState *mtstate, EState *estate)
1881 {
1882 	ModifyTable *plan = (ModifyTable *) mtstate->ps.plan;
1883 	ResultRelInfo *targetRelInfo = getTargetResultRelInfo(mtstate);
1884 
1885 	/* Check for transition tables on the directly targeted relation. */
1886 	mtstate->mt_transition_capture =
1887 		MakeTransitionCaptureState(targetRelInfo->ri_TrigDesc,
1888 								   RelationGetRelid(targetRelInfo->ri_RelationDesc),
1889 								   mtstate->operation);
1890 	if (plan->operation == CMD_INSERT &&
1891 		plan->onConflictAction == ONCONFLICT_UPDATE)
1892 		mtstate->mt_oc_transition_capture =
1893 			MakeTransitionCaptureState(targetRelInfo->ri_TrigDesc,
1894 									   RelationGetRelid(targetRelInfo->ri_RelationDesc),
1895 									   CMD_UPDATE);
1896 
1897 	/*
1898 	 * If we found that we need to collect transition tuples then we may also
1899 	 * need tuple conversion maps for any children that have TupleDescs that
1900 	 * aren't compatible with the tuplestores.  (We can share these maps
1901 	 * between the regular and ON CONFLICT cases.)
1902 	 */
1903 	if (mtstate->mt_transition_capture != NULL ||
1904 		mtstate->mt_oc_transition_capture != NULL)
1905 	{
1906 		ExecSetupChildParentMapForSubplan(mtstate);
1907 
1908 		/*
1909 		 * Install the conversion map for the first plan for UPDATE and DELETE
1910 		 * operations.  It will be advanced each time we switch to the next
1911 		 * plan.  (INSERT operations set it every time, so we need not update
1912 		 * mtstate->mt_oc_transition_capture here.)
1913 		 */
1914 		if (mtstate->mt_transition_capture && mtstate->operation != CMD_INSERT)
1915 			mtstate->mt_transition_capture->tcs_map =
1916 				tupconv_map_for_subplan(mtstate, 0);
1917 	}
1918 }
1919 
1920 /*
1921  * ExecPrepareTupleRouting --- prepare for routing one tuple
1922  *
1923  * Determine the partition in which the tuple in slot is to be inserted,
1924  * and modify mtstate and estate to prepare for it.
1925  *
1926  * Caller must revert the estate changes after executing the insertion!
1927  * In mtstate, transition capture changes may also need to be reverted.
1928  *
1929  * Returns a slot holding the tuple of the partition rowtype.
1930  */
1931 static TupleTableSlot *
1932 ExecPrepareTupleRouting(ModifyTableState *mtstate,
1933 						EState *estate,
1934 						PartitionTupleRouting *proute,
1935 						ResultRelInfo *targetRelInfo,
1936 						TupleTableSlot *slot)
1937 {
1938 	ResultRelInfo *partrel;
1939 	PartitionRoutingInfo *partrouteinfo;
1940 	TupleConversionMap *map;
1941 
1942 	/*
1943 	 * Lookup the target partition's ResultRelInfo.  If ExecFindPartition does
1944 	 * not find a valid partition for the tuple in 'slot' then an error is
1945 	 * raised.  An error may also be raised if the found partition is not a
1946 	 * valid target for INSERTs.  This is required since a partitioned table
1947 	 * UPDATE to another partition becomes a DELETE+INSERT.
1948 	 */
1949 	partrel = ExecFindPartition(mtstate, targetRelInfo, proute, slot, estate);
1950 	partrouteinfo = partrel->ri_PartitionInfo;
1951 	Assert(partrouteinfo != NULL);
1952 
1953 	/*
1954 	 * Make it look like we are inserting into the partition.
1955 	 */
1956 	estate->es_result_relation_info = partrel;
1957 
1958 	/*
1959 	 * If we're capturing transition tuples, we might need to convert from the
1960 	 * partition rowtype to root partitioned table's rowtype.
1961 	 */
1962 	if (mtstate->mt_transition_capture != NULL)
1963 	{
1964 		if (partrel->ri_TrigDesc &&
1965 			partrel->ri_TrigDesc->trig_insert_before_row)
1966 		{
1967 			/*
1968 			 * If there are any BEFORE triggers on the partition, we'll have
1969 			 * to be ready to convert their result back to tuplestore format.
1970 			 */
1971 			mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
1972 			mtstate->mt_transition_capture->tcs_map =
1973 				partrouteinfo->pi_PartitionToRootMap;
1974 		}
1975 		else
1976 		{
1977 			/*
1978 			 * Otherwise, just remember the original unconverted tuple, to
1979 			 * avoid a needless round trip conversion.
1980 			 */
1981 			mtstate->mt_transition_capture->tcs_original_insert_tuple = slot;
1982 			mtstate->mt_transition_capture->tcs_map = NULL;
1983 		}
1984 	}
1985 	if (mtstate->mt_oc_transition_capture != NULL)
1986 	{
1987 		mtstate->mt_oc_transition_capture->tcs_map =
1988 			partrouteinfo->pi_PartitionToRootMap;
1989 	}
1990 
1991 	/*
1992 	 * Convert the tuple, if necessary.
1993 	 */
1994 	map = partrouteinfo->pi_RootToPartitionMap;
1995 	if (map != NULL)
1996 	{
1997 		TupleTableSlot *new_slot = partrouteinfo->pi_PartitionTupleSlot;
1998 
1999 		slot = execute_attr_map_slot(map->attrMap, slot, new_slot);
2000 	}
2001 
2002 	return slot;
2003 }
2004 
2005 /*
2006  * Initialize the child-to-root tuple conversion map array for UPDATE subplans.
2007  *
2008  * This map array is required to convert the tuple from the subplan result rel
2009  * to the target table descriptor. This requirement arises for two independent
2010  * scenarios:
2011  * 1. For update-tuple-routing.
2012  * 2. For capturing tuples in transition tables.
2013  */
2014 static void
2015 ExecSetupChildParentMapForSubplan(ModifyTableState *mtstate)
2016 {
2017 	ResultRelInfo *targetRelInfo = getTargetResultRelInfo(mtstate);
2018 	ResultRelInfo *resultRelInfos = mtstate->resultRelInfo;
2019 	TupleDesc	outdesc;
2020 	int			numResultRelInfos = mtstate->mt_nplans;
2021 	int			i;
2022 
2023 	/*
2024 	 * Build array of conversion maps from each child's TupleDesc to the one
2025 	 * used in the target relation.  The map pointers may be NULL when no
2026 	 * conversion is necessary, which is hopefully a common case.
2027 	 */
2028 
2029 	/* Get tuple descriptor of the target rel. */
2030 	outdesc = RelationGetDescr(targetRelInfo->ri_RelationDesc);
2031 
2032 	mtstate->mt_per_subplan_tupconv_maps = (TupleConversionMap **)
2033 		palloc(sizeof(TupleConversionMap *) * numResultRelInfos);
2034 
2035 	for (i = 0; i < numResultRelInfos; ++i)
2036 	{
2037 		mtstate->mt_per_subplan_tupconv_maps[i] =
2038 			convert_tuples_by_name(RelationGetDescr(resultRelInfos[i].ri_RelationDesc),
2039 								   outdesc,
2040 								   gettext_noop("could not convert row type"));
2041 	}
2042 }
2043 
2044 /*
2045  * For a given subplan index, get the tuple conversion map.
2046  */
2047 static TupleConversionMap *
2048 tupconv_map_for_subplan(ModifyTableState *mtstate, int whichplan)
2049 {
2050 	/* If nobody else set the per-subplan array of maps, do so ourselves. */
2051 	if (mtstate->mt_per_subplan_tupconv_maps == NULL)
2052 		ExecSetupChildParentMapForSubplan(mtstate);
2053 
2054 	Assert(whichplan >= 0 && whichplan < mtstate->mt_nplans);
2055 	return mtstate->mt_per_subplan_tupconv_maps[whichplan];
2056 }
2057 
2058 /* ----------------------------------------------------------------
2059  *	   ExecModifyTable
2060  *
2061  *		Perform table modifications as required, and return RETURNING results
2062  *		if needed.
2063  * ----------------------------------------------------------------
2064  */
2065 static TupleTableSlot *
2066 ExecModifyTable(PlanState *pstate)
2067 {
2068 	ModifyTableState *node = castNode(ModifyTableState, pstate);
2069 	PartitionTupleRouting *proute = node->mt_partition_tuple_routing;
2070 	EState	   *estate = node->ps.state;
2071 	CmdType		operation = node->operation;
2072 	ResultRelInfo *saved_resultRelInfo;
2073 	ResultRelInfo *resultRelInfo;
2074 	PlanState  *subplanstate;
2075 	JunkFilter *junkfilter;
2076 	TupleTableSlot *slot;
2077 	TupleTableSlot *planSlot;
2078 	ItemPointer tupleid;
2079 	ItemPointerData tuple_ctid;
2080 	HeapTupleData oldtupdata;
2081 	HeapTuple	oldtuple;
2082 
2083 	CHECK_FOR_INTERRUPTS();
2084 
2085 	/*
2086 	 * This should NOT get called during EvalPlanQual; we should have passed a
2087 	 * subplan tree to EvalPlanQual, instead.  Use a runtime test not just
2088 	 * Assert because this condition is easy to miss in testing.  (Note:
2089 	 * although ModifyTable should not get executed within an EvalPlanQual
2090 	 * operation, we do have to allow it to be initialized and shut down in
2091 	 * case it is within a CTE subplan.  Hence this test must be here, not in
2092 	 * ExecInitModifyTable.)
2093 	 */
2094 	if (estate->es_epq_active != NULL)
2095 		elog(ERROR, "ModifyTable should not be called during EvalPlanQual");
2096 
2097 	/*
2098 	 * If we've already completed processing, don't try to do more.  We need
2099 	 * this test because ExecPostprocessPlan might call us an extra time, and
2100 	 * our subplan's nodes aren't necessarily robust against being called
2101 	 * extra times.
2102 	 */
2103 	if (node->mt_done)
2104 		return NULL;
2105 
2106 	/*
2107 	 * On first call, fire BEFORE STATEMENT triggers before proceeding.
2108 	 */
2109 	if (node->fireBSTriggers)
2110 	{
2111 		fireBSTriggers(node);
2112 		node->fireBSTriggers = false;
2113 	}
2114 
2115 	/* Preload local variables */
2116 	resultRelInfo = node->resultRelInfo + node->mt_whichplan;
2117 	subplanstate = node->mt_plans[node->mt_whichplan];
2118 	junkfilter = resultRelInfo->ri_junkFilter;
2119 
2120 	/*
2121 	 * es_result_relation_info must point to the currently active result
2122 	 * relation while we are within this ModifyTable node.  Even though
2123 	 * ModifyTable nodes can't be nested statically, they can be nested
2124 	 * dynamically (since our subplan could include a reference to a modifying
2125 	 * CTE).  So we have to save and restore the caller's value.
2126 	 */
2127 	saved_resultRelInfo = estate->es_result_relation_info;
2128 
2129 	estate->es_result_relation_info = resultRelInfo;
2130 
2131 	/*
2132 	 * Fetch rows from subplan(s), and execute the required table modification
2133 	 * for each row.
2134 	 */
2135 	for (;;)
2136 	{
2137 		/*
2138 		 * Reset the per-output-tuple exprcontext.  This is needed because
2139 		 * triggers expect to use that context as workspace.  It's a bit ugly
2140 		 * to do this below the top level of the plan, however.  We might need
2141 		 * to rethink this later.
2142 		 */
2143 		ResetPerTupleExprContext(estate);
2144 
2145 		/*
2146 		 * Reset per-tuple memory context used for processing on conflict and
2147 		 * returning clauses, to free any expression evaluation storage
2148 		 * allocated in the previous cycle.
2149 		 */
2150 		if (pstate->ps_ExprContext)
2151 			ResetExprContext(pstate->ps_ExprContext);
2152 
2153 		planSlot = ExecProcNode(subplanstate);
2154 
2155 		if (TupIsNull(planSlot))
2156 		{
2157 			/* advance to next subplan if any */
2158 			node->mt_whichplan++;
2159 			if (node->mt_whichplan < node->mt_nplans)
2160 			{
2161 				resultRelInfo++;
2162 				subplanstate = node->mt_plans[node->mt_whichplan];
2163 				junkfilter = resultRelInfo->ri_junkFilter;
2164 				estate->es_result_relation_info = resultRelInfo;
2165 				EvalPlanQualSetPlan(&node->mt_epqstate, subplanstate->plan,
2166 									node->mt_arowmarks[node->mt_whichplan]);
2167 				/* Prepare to convert transition tuples from this child. */
2168 				if (node->mt_transition_capture != NULL)
2169 				{
2170 					node->mt_transition_capture->tcs_map =
2171 						tupconv_map_for_subplan(node, node->mt_whichplan);
2172 				}
2173 				if (node->mt_oc_transition_capture != NULL)
2174 				{
2175 					node->mt_oc_transition_capture->tcs_map =
2176 						tupconv_map_for_subplan(node, node->mt_whichplan);
2177 				}
2178 				continue;
2179 			}
2180 			else
2181 				break;
2182 		}
2183 
2184 		/*
2185 		 * Ensure input tuple is the right format for the target relation.
2186 		 */
2187 		if (node->mt_scans[node->mt_whichplan]->tts_ops != planSlot->tts_ops)
2188 		{
2189 			ExecCopySlot(node->mt_scans[node->mt_whichplan], planSlot);
2190 			planSlot = node->mt_scans[node->mt_whichplan];
2191 		}
2192 
2193 		/*
2194 		 * If resultRelInfo->ri_usesFdwDirectModify is true, all we need to do
2195 		 * here is compute the RETURNING expressions.
2196 		 */
2197 		if (resultRelInfo->ri_usesFdwDirectModify)
2198 		{
2199 			Assert(resultRelInfo->ri_projectReturning);
2200 
2201 			/*
2202 			 * A scan slot containing the data that was actually inserted,
2203 			 * updated or deleted has already been made available to
2204 			 * ExecProcessReturning by IterateDirectModify, so no need to
2205 			 * provide it here.
2206 			 */
2207 			slot = ExecProcessReturning(resultRelInfo->ri_projectReturning,
2208 										RelationGetRelid(resultRelInfo->ri_RelationDesc),
2209 										NULL, planSlot);
2210 
2211 			estate->es_result_relation_info = saved_resultRelInfo;
2212 			return slot;
2213 		}
2214 
2215 		EvalPlanQualSetSlot(&node->mt_epqstate, planSlot);
2216 		slot = planSlot;
2217 
2218 		tupleid = NULL;
2219 		oldtuple = NULL;
2220 		if (junkfilter != NULL)
2221 		{
2222 			/*
2223 			 * extract the 'ctid' or 'wholerow' junk attribute.
2224 			 */
2225 			if (operation == CMD_UPDATE || operation == CMD_DELETE)
2226 			{
2227 				char		relkind;
2228 				Datum		datum;
2229 				bool		isNull;
2230 
2231 				relkind = resultRelInfo->ri_RelationDesc->rd_rel->relkind;
2232 				if (relkind == RELKIND_RELATION || relkind == RELKIND_MATVIEW)
2233 				{
2234 					datum = ExecGetJunkAttribute(slot,
2235 												 junkfilter->jf_junkAttNo,
2236 												 &isNull);
2237 					/* shouldn't ever get a null result... */
2238 					if (isNull)
2239 						elog(ERROR, "ctid is NULL");
2240 
2241 					tupleid = (ItemPointer) DatumGetPointer(datum);
2242 					tuple_ctid = *tupleid;	/* be sure we don't free ctid!! */
2243 					tupleid = &tuple_ctid;
2244 				}
2245 
2246 				/*
2247 				 * Use the wholerow attribute, when available, to reconstruct
2248 				 * the old relation tuple.
2249 				 *
2250 				 * Foreign table updates have a wholerow attribute when the
2251 				 * relation has a row-level trigger.  Note that the wholerow
2252 				 * attribute does not carry system columns.  Foreign table
2253 				 * triggers miss seeing those, except that we know enough here
2254 				 * to set t_tableOid.  Quite separately from this, the FDW may
2255 				 * fetch its own junk attrs to identify the row.
2256 				 *
2257 				 * Other relevant relkinds, currently limited to views, always
2258 				 * have a wholerow attribute.
2259 				 */
2260 				else if (AttributeNumberIsValid(junkfilter->jf_junkAttNo))
2261 				{
2262 					datum = ExecGetJunkAttribute(slot,
2263 												 junkfilter->jf_junkAttNo,
2264 												 &isNull);
2265 					/* shouldn't ever get a null result... */
2266 					if (isNull)
2267 						elog(ERROR, "wholerow is NULL");
2268 
2269 					oldtupdata.t_data = DatumGetHeapTupleHeader(datum);
2270 					oldtupdata.t_len =
2271 						HeapTupleHeaderGetDatumLength(oldtupdata.t_data);
2272 					ItemPointerSetInvalid(&(oldtupdata.t_self));
2273 					/* Historically, view triggers see invalid t_tableOid. */
2274 					oldtupdata.t_tableOid =
2275 						(relkind == RELKIND_VIEW) ? InvalidOid :
2276 						RelationGetRelid(resultRelInfo->ri_RelationDesc);
2277 
2278 					oldtuple = &oldtupdata;
2279 				}
2280 				else
2281 					Assert(relkind == RELKIND_FOREIGN_TABLE);
2282 			}
2283 
2284 			/*
2285 			 * apply the junkfilter if needed.
2286 			 */
2287 			if (operation != CMD_DELETE)
2288 				slot = ExecFilterJunk(junkfilter, slot);
2289 		}
2290 
2291 		switch (operation)
2292 		{
2293 			case CMD_INSERT:
2294 				/* Prepare for tuple routing if needed. */
2295 				if (proute)
2296 					slot = ExecPrepareTupleRouting(node, estate, proute,
2297 												   resultRelInfo, slot);
2298 				slot = ExecInsert(node, slot, planSlot,
2299 								  NULL, estate->es_result_relation_info,
2300 								  estate, node->canSetTag);
2301 				/* Revert ExecPrepareTupleRouting's state change. */
2302 				if (proute)
2303 					estate->es_result_relation_info = resultRelInfo;
2304 				break;
2305 			case CMD_UPDATE:
2306 				slot = ExecUpdate(node, tupleid, oldtuple, slot, planSlot,
2307 								  &node->mt_epqstate, estate, node->canSetTag);
2308 				break;
2309 			case CMD_DELETE:
2310 				slot = ExecDelete(node, tupleid, oldtuple, planSlot,
2311 								  &node->mt_epqstate, estate,
2312 								  true, node->canSetTag,
2313 								  false /* changingPart */ , NULL, NULL);
2314 				break;
2315 			default:
2316 				elog(ERROR, "unknown operation");
2317 				break;
2318 		}
2319 
2320 		/*
2321 		 * If we got a RETURNING result, return it to caller.  We'll continue
2322 		 * the work on next call.
2323 		 */
2324 		if (slot)
2325 		{
2326 			estate->es_result_relation_info = saved_resultRelInfo;
2327 			return slot;
2328 		}
2329 	}
2330 
2331 	/* Restore es_result_relation_info before exiting */
2332 	estate->es_result_relation_info = saved_resultRelInfo;
2333 
2334 	/*
2335 	 * We're done, but fire AFTER STATEMENT triggers before exiting.
2336 	 */
2337 	fireASTriggers(node);
2338 
2339 	node->mt_done = true;
2340 
2341 	return NULL;
2342 }
2343 
2344 /* ----------------------------------------------------------------
2345  *		ExecInitModifyTable
2346  * ----------------------------------------------------------------
2347  */
2348 ModifyTableState *
2349 ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
2350 {
2351 	ModifyTableState *mtstate;
2352 	CmdType		operation = node->operation;
2353 	int			nplans = list_length(node->plans);
2354 	ResultRelInfo *saved_resultRelInfo;
2355 	ResultRelInfo *resultRelInfo;
2356 	Plan	   *subplan;
2357 	ListCell   *l;
2358 	int			i;
2359 	Relation	rel;
2360 	bool		update_tuple_routing_needed = node->partColsUpdated;
2361 
2362 	/* check for unsupported flags */
2363 	Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
2364 
2365 	/*
2366 	 * create state structure
2367 	 */
2368 	mtstate = makeNode(ModifyTableState);
2369 	mtstate->ps.plan = (Plan *) node;
2370 	mtstate->ps.state = estate;
2371 	mtstate->ps.ExecProcNode = ExecModifyTable;
2372 
2373 	mtstate->operation = operation;
2374 	mtstate->canSetTag = node->canSetTag;
2375 	mtstate->mt_done = false;
2376 
2377 	mtstate->mt_plans = (PlanState **) palloc0(sizeof(PlanState *) * nplans);
2378 	mtstate->resultRelInfo = estate->es_result_relations + node->resultRelIndex;
2379 	mtstate->mt_scans = (TupleTableSlot **) palloc0(sizeof(TupleTableSlot *) * nplans);
2380 
2381 	/* If modifying a partitioned table, initialize the root table info */
2382 	if (node->rootResultRelIndex >= 0)
2383 		mtstate->rootResultRelInfo = estate->es_root_result_relations +
2384 			node->rootResultRelIndex;
2385 
2386 	mtstate->mt_arowmarks = (List **) palloc0(sizeof(List *) * nplans);
2387 	mtstate->mt_nplans = nplans;
2388 
2389 	/* set up epqstate with dummy subplan data for the moment */
2390 	EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, NIL, node->epqParam);
2391 	mtstate->fireBSTriggers = true;
2392 
2393 	/*
2394 	 * call ExecInitNode on each of the plans to be executed and save the
2395 	 * results into the array "mt_plans".  This is also a convenient place to
2396 	 * verify that the proposed target relations are valid and open their
2397 	 * indexes for insertion of new index entries.  Note we *must* set
2398 	 * estate->es_result_relation_info correctly while we initialize each
2399 	 * sub-plan; external modules such as FDWs may depend on that (see
2400 	 * contrib/postgres_fdw/postgres_fdw.c: postgresBeginDirectModify() as one
2401 	 * example).
2402 	 */
2403 	saved_resultRelInfo = estate->es_result_relation_info;
2404 
2405 	resultRelInfo = mtstate->resultRelInfo;
2406 	i = 0;
2407 	foreach(l, node->plans)
2408 	{
2409 		subplan = (Plan *) lfirst(l);
2410 
2411 		/* Initialize the usesFdwDirectModify flag */
2412 		resultRelInfo->ri_usesFdwDirectModify = bms_is_member(i,
2413 															  node->fdwDirectModifyPlans);
2414 
2415 		/*
2416 		 * Verify result relation is a valid target for the current operation
2417 		 */
2418 		CheckValidResultRel(resultRelInfo, operation);
2419 
2420 		/*
2421 		 * If there are indices on the result relation, open them and save
2422 		 * descriptors in the result relation info, so that we can add new
2423 		 * index entries for the tuples we add/update.  We need not do this
2424 		 * for a DELETE, however, since deletion doesn't affect indexes. Also,
2425 		 * inside an EvalPlanQual operation, the indexes might be open
2426 		 * already, since we share the resultrel state with the original
2427 		 * query.
2428 		 */
2429 		if (resultRelInfo->ri_RelationDesc->rd_rel->relhasindex &&
2430 			operation != CMD_DELETE &&
2431 			resultRelInfo->ri_IndexRelationDescs == NULL)
2432 			ExecOpenIndices(resultRelInfo,
2433 							node->onConflictAction != ONCONFLICT_NONE);
2434 
2435 		/*
2436 		 * If this is an UPDATE and a BEFORE UPDATE trigger is present, the
2437 		 * trigger itself might modify the partition-key values. So arrange
2438 		 * for tuple routing.
2439 		 */
2440 		if (resultRelInfo->ri_TrigDesc &&
2441 			resultRelInfo->ri_TrigDesc->trig_update_before_row &&
2442 			operation == CMD_UPDATE)
2443 			update_tuple_routing_needed = true;
2444 
2445 		/* Now init the plan for this result rel */
2446 		estate->es_result_relation_info = resultRelInfo;
2447 		mtstate->mt_plans[i] = ExecInitNode(subplan, estate, eflags);
2448 		mtstate->mt_scans[i] =
2449 			ExecInitExtraTupleSlot(mtstate->ps.state, ExecGetResultType(mtstate->mt_plans[i]),
2450 								   table_slot_callbacks(resultRelInfo->ri_RelationDesc));
2451 
2452 		/* Also let FDWs init themselves for foreign-table result rels */
2453 		if (!resultRelInfo->ri_usesFdwDirectModify &&
2454 			resultRelInfo->ri_FdwRoutine != NULL &&
2455 			resultRelInfo->ri_FdwRoutine->BeginForeignModify != NULL)
2456 		{
2457 			List	   *fdw_private = (List *) list_nth(node->fdwPrivLists, i);
2458 
2459 			resultRelInfo->ri_FdwRoutine->BeginForeignModify(mtstate,
2460 															 resultRelInfo,
2461 															 fdw_private,
2462 															 i,
2463 															 eflags);
2464 		}
2465 
2466 		resultRelInfo++;
2467 		i++;
2468 	}
2469 
2470 	estate->es_result_relation_info = saved_resultRelInfo;
2471 
2472 	/* Get the target relation */
2473 	rel = (getTargetResultRelInfo(mtstate))->ri_RelationDesc;
2474 
2475 	/*
2476 	 * If it's not a partitioned table after all, UPDATE tuple routing should
2477 	 * not be attempted.
2478 	 */
2479 	if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
2480 		update_tuple_routing_needed = false;
2481 
2482 	/*
2483 	 * Build state for tuple routing if it's an INSERT or if it's an UPDATE of
2484 	 * partition key.
2485 	 */
2486 	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
2487 		(operation == CMD_INSERT || update_tuple_routing_needed))
2488 		mtstate->mt_partition_tuple_routing =
2489 			ExecSetupPartitionTupleRouting(estate, mtstate, rel);
2490 
2491 	/*
2492 	 * Build state for collecting transition tuples.  This requires having a
2493 	 * valid trigger query context, so skip it in explain-only mode.
2494 	 */
2495 	if (!(eflags & EXEC_FLAG_EXPLAIN_ONLY))
2496 		ExecSetupTransitionCaptureState(mtstate, estate);
2497 
2498 	/*
2499 	 * Construct mapping from each of the per-subplan partition attnos to the
2500 	 * root attno.  This is required when during update row movement the tuple
2501 	 * descriptor of a source partition does not match the root partitioned
2502 	 * table descriptor.  In such a case we need to convert tuples to the root
2503 	 * tuple descriptor, because the search for destination partition starts
2504 	 * from the root.  We'll also need a slot to store these converted tuples.
2505 	 * We can skip this setup if it's not a partition key update.
2506 	 */
2507 	if (update_tuple_routing_needed)
2508 	{
2509 		ExecSetupChildParentMapForSubplan(mtstate);
2510 		mtstate->mt_root_tuple_slot = table_slot_create(rel, NULL);
2511 	}
2512 
2513 	/*
2514 	 * Initialize any WITH CHECK OPTION constraints if needed.
2515 	 */
2516 	resultRelInfo = mtstate->resultRelInfo;
2517 	i = 0;
2518 	foreach(l, node->withCheckOptionLists)
2519 	{
2520 		List	   *wcoList = (List *) lfirst(l);
2521 		List	   *wcoExprs = NIL;
2522 		ListCell   *ll;
2523 
2524 		foreach(ll, wcoList)
2525 		{
2526 			WithCheckOption *wco = (WithCheckOption *) lfirst(ll);
2527 			ExprState  *wcoExpr = ExecInitQual((List *) wco->qual,
2528 											   &mtstate->ps);
2529 
2530 			wcoExprs = lappend(wcoExprs, wcoExpr);
2531 		}
2532 
2533 		resultRelInfo->ri_WithCheckOptions = wcoList;
2534 		resultRelInfo->ri_WithCheckOptionExprs = wcoExprs;
2535 		resultRelInfo++;
2536 		i++;
2537 	}
2538 
2539 	/*
2540 	 * Initialize RETURNING projections if needed.
2541 	 */
2542 	if (node->returningLists)
2543 	{
2544 		TupleTableSlot *slot;
2545 		ExprContext *econtext;
2546 
2547 		/*
2548 		 * Initialize result tuple slot and assign its rowtype using the first
2549 		 * RETURNING list.  We assume the rest will look the same.
2550 		 */
2551 		mtstate->ps.plan->targetlist = (List *) linitial(node->returningLists);
2552 
2553 		/* Set up a slot for the output of the RETURNING projection(s) */
2554 		ExecInitResultTupleSlotTL(&mtstate->ps, &TTSOpsVirtual);
2555 		slot = mtstate->ps.ps_ResultTupleSlot;
2556 
2557 		/* Need an econtext too */
2558 		if (mtstate->ps.ps_ExprContext == NULL)
2559 			ExecAssignExprContext(estate, &mtstate->ps);
2560 		econtext = mtstate->ps.ps_ExprContext;
2561 
2562 		/*
2563 		 * Build a projection for each result rel.
2564 		 */
2565 		resultRelInfo = mtstate->resultRelInfo;
2566 		foreach(l, node->returningLists)
2567 		{
2568 			List	   *rlist = (List *) lfirst(l);
2569 
2570 			resultRelInfo->ri_returningList = rlist;
2571 			resultRelInfo->ri_projectReturning =
2572 				ExecBuildProjectionInfo(rlist, econtext, slot, &mtstate->ps,
2573 										resultRelInfo->ri_RelationDesc->rd_att);
2574 			resultRelInfo++;
2575 		}
2576 	}
2577 	else
2578 	{
2579 		/*
2580 		 * We still must construct a dummy result tuple type, because InitPlan
2581 		 * expects one (maybe should change that?).
2582 		 */
2583 		mtstate->ps.plan->targetlist = NIL;
2584 		ExecInitResultTypeTL(&mtstate->ps);
2585 
2586 		mtstate->ps.ps_ExprContext = NULL;
2587 	}
2588 
2589 	/* Set the list of arbiter indexes if needed for ON CONFLICT */
2590 	resultRelInfo = mtstate->resultRelInfo;
2591 	if (node->onConflictAction != ONCONFLICT_NONE)
2592 		resultRelInfo->ri_onConflictArbiterIndexes = node->arbiterIndexes;
2593 
2594 	/*
2595 	 * If needed, Initialize target list, projection and qual for ON CONFLICT
2596 	 * DO UPDATE.
2597 	 */
2598 	if (node->onConflictAction == ONCONFLICT_UPDATE)
2599 	{
2600 		OnConflictSetState *onconfl = makeNode(OnConflictSetState);
2601 		ExprContext *econtext;
2602 		TupleDesc	relationDesc;
2603 
2604 		/* insert may only have one plan, inheritance is not expanded */
2605 		Assert(nplans == 1);
2606 
2607 		/* already exists if created by RETURNING processing above */
2608 		if (mtstate->ps.ps_ExprContext == NULL)
2609 			ExecAssignExprContext(estate, &mtstate->ps);
2610 
2611 		econtext = mtstate->ps.ps_ExprContext;
2612 		relationDesc = resultRelInfo->ri_RelationDesc->rd_att;
2613 
2614 		/* carried forward solely for the benefit of explain */
2615 		mtstate->mt_excludedtlist = node->exclRelTlist;
2616 
2617 		/* create state for DO UPDATE SET operation */
2618 		resultRelInfo->ri_onConflict = onconfl;
2619 
2620 		/* initialize slot for the existing tuple */
2621 		onconfl->oc_Existing =
2622 			table_slot_create(resultRelInfo->ri_RelationDesc,
2623 							  &mtstate->ps.state->es_tupleTable);
2624 
2625 		/*
2626 		 * Create the tuple slot for the UPDATE SET projection. We want a slot
2627 		 * of the table's type here, because the slot will be used to insert
2628 		 * into the table, and for RETURNING processing - which may access
2629 		 * system attributes.
2630 		 */
2631 		onconfl->oc_ProjSlot =
2632 			table_slot_create(resultRelInfo->ri_RelationDesc,
2633 							  &mtstate->ps.state->es_tupleTable);
2634 
2635 		/*
2636 		 * The onConflictSet tlist should already have been adjusted to emit
2637 		 * the table's exact column list.  It could also contain resjunk
2638 		 * columns, which should be evaluated but not included in the
2639 		 * projection result.
2640 		 */
2641 		ExecCheckPlanOutput(resultRelInfo->ri_RelationDesc,
2642 							node->onConflictSet);
2643 
2644 		/* build UPDATE SET projection state */
2645 		onconfl->oc_ProjInfo =
2646 			ExecBuildProjectionInfoExt(node->onConflictSet, econtext,
2647 									   onconfl->oc_ProjSlot, false,
2648 									   &mtstate->ps,
2649 									   relationDesc);
2650 
2651 		/* initialize state to evaluate the WHERE clause, if any */
2652 		if (node->onConflictWhere)
2653 		{
2654 			ExprState  *qualexpr;
2655 
2656 			qualexpr = ExecInitQual((List *) node->onConflictWhere,
2657 									&mtstate->ps);
2658 			onconfl->oc_WhereClause = qualexpr;
2659 		}
2660 	}
2661 
2662 	/*
2663 	 * If we have any secondary relations in an UPDATE or DELETE, they need to
2664 	 * be treated like non-locked relations in SELECT FOR UPDATE, ie, the
2665 	 * EvalPlanQual mechanism needs to be told about them.  Locate the
2666 	 * relevant ExecRowMarks.
2667 	 */
2668 	foreach(l, node->rowMarks)
2669 	{
2670 		PlanRowMark *rc = lfirst_node(PlanRowMark, l);
2671 		ExecRowMark *erm;
2672 
2673 		/* ignore "parent" rowmarks; they are irrelevant at runtime */
2674 		if (rc->isParent)
2675 			continue;
2676 
2677 		/* find ExecRowMark (same for all subplans) */
2678 		erm = ExecFindRowMark(estate, rc->rti, false);
2679 
2680 		/* build ExecAuxRowMark for each subplan */
2681 		for (i = 0; i < nplans; i++)
2682 		{
2683 			ExecAuxRowMark *aerm;
2684 
2685 			subplan = mtstate->mt_plans[i]->plan;
2686 			aerm = ExecBuildAuxRowMark(erm, subplan->targetlist);
2687 			mtstate->mt_arowmarks[i] = lappend(mtstate->mt_arowmarks[i], aerm);
2688 		}
2689 	}
2690 
2691 	/* select first subplan */
2692 	mtstate->mt_whichplan = 0;
2693 	subplan = (Plan *) linitial(node->plans);
2694 	EvalPlanQualSetPlan(&mtstate->mt_epqstate, subplan,
2695 						mtstate->mt_arowmarks[0]);
2696 
2697 	/*
2698 	 * Initialize the junk filter(s) if needed.  INSERT queries need a filter
2699 	 * if there are any junk attrs in the tlist.  UPDATE and DELETE always
2700 	 * need a filter, since there's always at least one junk attribute present
2701 	 * --- no need to look first.  Typically, this will be a 'ctid' or
2702 	 * 'wholerow' attribute, but in the case of a foreign data wrapper it
2703 	 * might be a set of junk attributes sufficient to identify the remote
2704 	 * row.
2705 	 *
2706 	 * If there are multiple result relations, each one needs its own junk
2707 	 * filter.  Note multiple rels are only possible for UPDATE/DELETE, so we
2708 	 * can't be fooled by some needing a filter and some not.
2709 	 *
2710 	 * This section of code is also a convenient place to verify that the
2711 	 * output of an INSERT or UPDATE matches the target table(s).
2712 	 */
2713 	{
2714 		bool		junk_filter_needed = false;
2715 
2716 		switch (operation)
2717 		{
2718 			case CMD_INSERT:
2719 				foreach(l, subplan->targetlist)
2720 				{
2721 					TargetEntry *tle = (TargetEntry *) lfirst(l);
2722 
2723 					if (tle->resjunk)
2724 					{
2725 						junk_filter_needed = true;
2726 						break;
2727 					}
2728 				}
2729 				break;
2730 			case CMD_UPDATE:
2731 			case CMD_DELETE:
2732 				junk_filter_needed = true;
2733 				break;
2734 			default:
2735 				elog(ERROR, "unknown operation");
2736 				break;
2737 		}
2738 
2739 		if (junk_filter_needed)
2740 		{
2741 			resultRelInfo = mtstate->resultRelInfo;
2742 			for (i = 0; i < nplans; i++)
2743 			{
2744 				JunkFilter *j;
2745 				TupleTableSlot *junkresslot;
2746 
2747 				subplan = mtstate->mt_plans[i]->plan;
2748 
2749 				junkresslot =
2750 					ExecInitExtraTupleSlot(estate, NULL,
2751 										   table_slot_callbacks(resultRelInfo->ri_RelationDesc));
2752 
2753 				/*
2754 				 * For an INSERT or UPDATE, the result tuple must always match
2755 				 * the target table's descriptor.  For a DELETE, it won't
2756 				 * (indeed, there's probably no non-junk output columns).
2757 				 */
2758 				if (operation == CMD_INSERT || operation == CMD_UPDATE)
2759 				{
2760 					ExecCheckPlanOutput(resultRelInfo->ri_RelationDesc,
2761 										subplan->targetlist);
2762 					j = ExecInitJunkFilterInsertion(subplan->targetlist,
2763 													RelationGetDescr(resultRelInfo->ri_RelationDesc),
2764 													junkresslot);
2765 				}
2766 				else
2767 					j = ExecInitJunkFilter(subplan->targetlist,
2768 										   junkresslot);
2769 
2770 				if (operation == CMD_UPDATE || operation == CMD_DELETE)
2771 				{
2772 					/* For UPDATE/DELETE, find the appropriate junk attr now */
2773 					char		relkind;
2774 
2775 					relkind = resultRelInfo->ri_RelationDesc->rd_rel->relkind;
2776 					if (relkind == RELKIND_RELATION ||
2777 						relkind == RELKIND_MATVIEW ||
2778 						relkind == RELKIND_PARTITIONED_TABLE)
2779 					{
2780 						j->jf_junkAttNo = ExecFindJunkAttribute(j, "ctid");
2781 						if (!AttributeNumberIsValid(j->jf_junkAttNo))
2782 							elog(ERROR, "could not find junk ctid column");
2783 					}
2784 					else if (relkind == RELKIND_FOREIGN_TABLE)
2785 					{
2786 						/*
2787 						 * When there is a row-level trigger, there should be
2788 						 * a wholerow attribute.
2789 						 */
2790 						j->jf_junkAttNo = ExecFindJunkAttribute(j, "wholerow");
2791 					}
2792 					else
2793 					{
2794 						j->jf_junkAttNo = ExecFindJunkAttribute(j, "wholerow");
2795 						if (!AttributeNumberIsValid(j->jf_junkAttNo))
2796 							elog(ERROR, "could not find junk wholerow column");
2797 					}
2798 				}
2799 
2800 				resultRelInfo->ri_junkFilter = j;
2801 				resultRelInfo++;
2802 			}
2803 		}
2804 		else
2805 		{
2806 			if (operation == CMD_INSERT)
2807 				ExecCheckPlanOutput(mtstate->resultRelInfo->ri_RelationDesc,
2808 									subplan->targetlist);
2809 		}
2810 	}
2811 
2812 	/*
2813 	 * Lastly, if this is not the primary (canSetTag) ModifyTable node, add it
2814 	 * to estate->es_auxmodifytables so that it will be run to completion by
2815 	 * ExecPostprocessPlan.  (It'd actually work fine to add the primary
2816 	 * ModifyTable node too, but there's no need.)  Note the use of lcons not
2817 	 * lappend: we need later-initialized ModifyTable nodes to be shut down
2818 	 * before earlier ones.  This ensures that we don't throw away RETURNING
2819 	 * rows that need to be seen by a later CTE subplan.
2820 	 */
2821 	if (!mtstate->canSetTag)
2822 		estate->es_auxmodifytables = lcons(mtstate,
2823 										   estate->es_auxmodifytables);
2824 
2825 	return mtstate;
2826 }
2827 
2828 /* ----------------------------------------------------------------
2829  *		ExecEndModifyTable
2830  *
2831  *		Shuts down the plan.
2832  *
2833  *		Returns nothing of interest.
2834  * ----------------------------------------------------------------
2835  */
2836 void
2837 ExecEndModifyTable(ModifyTableState *node)
2838 {
2839 	int			i;
2840 
2841 	/*
2842 	 * Allow any FDWs to shut down
2843 	 */
2844 	for (i = 0; i < node->mt_nplans; i++)
2845 	{
2846 		ResultRelInfo *resultRelInfo = node->resultRelInfo + i;
2847 
2848 		if (!resultRelInfo->ri_usesFdwDirectModify &&
2849 			resultRelInfo->ri_FdwRoutine != NULL &&
2850 			resultRelInfo->ri_FdwRoutine->EndForeignModify != NULL)
2851 			resultRelInfo->ri_FdwRoutine->EndForeignModify(node->ps.state,
2852 														   resultRelInfo);
2853 	}
2854 
2855 	/*
2856 	 * Close all the partitioned tables, leaf partitions, and their indices
2857 	 * and release the slot used for tuple routing, if set.
2858 	 */
2859 	if (node->mt_partition_tuple_routing)
2860 	{
2861 		ExecCleanupTupleRouting(node, node->mt_partition_tuple_routing);
2862 
2863 		if (node->mt_root_tuple_slot)
2864 			ExecDropSingleTupleTableSlot(node->mt_root_tuple_slot);
2865 	}
2866 
2867 	/*
2868 	 * Free the exprcontext
2869 	 */
2870 	ExecFreeExprContext(&node->ps);
2871 
2872 	/*
2873 	 * clean out the tuple table
2874 	 */
2875 	if (node->ps.ps_ResultTupleSlot)
2876 		ExecClearTuple(node->ps.ps_ResultTupleSlot);
2877 
2878 	/*
2879 	 * Terminate EPQ execution if active
2880 	 */
2881 	EvalPlanQualEnd(&node->mt_epqstate);
2882 
2883 	/*
2884 	 * shut down subplans
2885 	 */
2886 	for (i = 0; i < node->mt_nplans; i++)
2887 		ExecEndNode(node->mt_plans[i]);
2888 }
2889 
2890 void
2891 ExecReScanModifyTable(ModifyTableState *node)
2892 {
2893 	/*
2894 	 * Currently, we don't need to support rescan on ModifyTable nodes. The
2895 	 * semantics of that would be a bit debatable anyway.
2896 	 */
2897 	elog(ERROR, "ExecReScanModifyTable is not implemented");
2898 }
2899