1 /*-------------------------------------------------------------------------
2  *
3  * nodeModifyTable.c
4  *	  routines to handle ModifyTable nodes.
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  * Portions Copyright (c) 1994, Regents of the University of California
8  *
9  *
10  * IDENTIFICATION
11  *	  src/backend/executor/nodeModifyTable.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 /* INTERFACE ROUTINES
16  *		ExecInitModifyTable - initialize the ModifyTable node
17  *		ExecModifyTable		- retrieve the next tuple from the node
18  *		ExecEndModifyTable	- shut down the ModifyTable node
19  *		ExecReScanModifyTable - rescan the ModifyTable node
20  *
21  *	 NOTES
22  *		The ModifyTable node receives input from its outerPlan, which is
23  *		the data to insert for INSERT cases, or the changed columns' new
24  *		values plus row-locating info for UPDATE cases, or just the
25  *		row-locating info for DELETE cases.
26  *
27  *		If the query specifies RETURNING, then the ModifyTable returns a
28  *		RETURNING tuple after completing each row insert, update, or delete.
29  *		It must be called again to continue the operation.  Without RETURNING,
30  *		we just loop within the node until all the work is done, then
31  *		return NULL.  This avoids useless call/return overhead.
32  */
33 
34 #include "postgres.h"
35 
36 #include "access/heapam.h"
37 #include "access/htup_details.h"
38 #include "access/tableam.h"
39 #include "access/xact.h"
40 #include "catalog/catalog.h"
41 #include "commands/trigger.h"
42 #include "executor/execPartition.h"
43 #include "executor/executor.h"
44 #include "executor/nodeModifyTable.h"
45 #include "foreign/fdwapi.h"
46 #include "miscadmin.h"
47 #include "nodes/nodeFuncs.h"
48 #include "rewrite/rewriteHandler.h"
49 #include "storage/bufmgr.h"
50 #include "storage/lmgr.h"
51 #include "utils/builtins.h"
52 #include "utils/datum.h"
53 #include "utils/memutils.h"
54 #include "utils/rel.h"
55 
56 
57 typedef struct MTTargetRelLookup
58 {
59 	Oid			relationOid;	/* hash key, must be first */
60 	int			relationIndex;	/* rel's index in resultRelInfo[] array */
61 } MTTargetRelLookup;
62 
63 static void ExecBatchInsert(ModifyTableState *mtstate,
64 							ResultRelInfo *resultRelInfo,
65 							TupleTableSlot **slots,
66 							TupleTableSlot **planSlots,
67 							int numSlots,
68 							EState *estate,
69 							bool canSetTag);
70 static bool ExecOnConflictUpdate(ModifyTableState *mtstate,
71 								 ResultRelInfo *resultRelInfo,
72 								 ItemPointer conflictTid,
73 								 TupleTableSlot *planSlot,
74 								 TupleTableSlot *excludedSlot,
75 								 EState *estate,
76 								 bool canSetTag,
77 								 TupleTableSlot **returning);
78 static TupleTableSlot *ExecPrepareTupleRouting(ModifyTableState *mtstate,
79 											   EState *estate,
80 											   PartitionTupleRouting *proute,
81 											   ResultRelInfo *targetRelInfo,
82 											   TupleTableSlot *slot,
83 											   ResultRelInfo **partRelInfo);
84 
85 /*
86  * Verify that the tuples to be produced by INSERT match the
87  * target relation's rowtype
88  *
89  * We do this to guard against stale plans.  If plan invalidation is
90  * functioning properly then we should never get a failure here, but better
91  * safe than sorry.  Note that this is called after we have obtained lock
92  * on the target rel, so the rowtype can't change underneath us.
93  *
94  * The plan output is represented by its targetlist, because that makes
95  * handling the dropped-column case easier.
96  *
97  * We used to use this for UPDATE as well, but now the equivalent checks
98  * are done in ExecBuildUpdateProjection.
99  */
100 static void
ExecCheckPlanOutput(Relation resultRel,List * targetList)101 ExecCheckPlanOutput(Relation resultRel, List *targetList)
102 {
103 	TupleDesc	resultDesc = RelationGetDescr(resultRel);
104 	int			attno = 0;
105 	ListCell   *lc;
106 
107 	foreach(lc, targetList)
108 	{
109 		TargetEntry *tle = (TargetEntry *) lfirst(lc);
110 		Form_pg_attribute attr;
111 
112 		Assert(!tle->resjunk);	/* caller removed junk items already */
113 
114 		if (attno >= resultDesc->natts)
115 			ereport(ERROR,
116 					(errcode(ERRCODE_DATATYPE_MISMATCH),
117 					 errmsg("table row type and query-specified row type do not match"),
118 					 errdetail("Query has too many columns.")));
119 		attr = TupleDescAttr(resultDesc, attno);
120 		attno++;
121 
122 		if (!attr->attisdropped)
123 		{
124 			/* Normal case: demand type match */
125 			if (exprType((Node *) tle->expr) != attr->atttypid)
126 				ereport(ERROR,
127 						(errcode(ERRCODE_DATATYPE_MISMATCH),
128 						 errmsg("table row type and query-specified row type do not match"),
129 						 errdetail("Table has type %s at ordinal position %d, but query expects %s.",
130 								   format_type_be(attr->atttypid),
131 								   attno,
132 								   format_type_be(exprType((Node *) tle->expr)))));
133 		}
134 		else
135 		{
136 			/*
137 			 * For a dropped column, we can't check atttypid (it's likely 0).
138 			 * In any case the planner has most likely inserted an INT4 null.
139 			 * What we insist on is just *some* NULL constant.
140 			 */
141 			if (!IsA(tle->expr, Const) ||
142 				!((Const *) tle->expr)->constisnull)
143 				ereport(ERROR,
144 						(errcode(ERRCODE_DATATYPE_MISMATCH),
145 						 errmsg("table row type and query-specified row type do not match"),
146 						 errdetail("Query provides a value for a dropped column at ordinal position %d.",
147 								   attno)));
148 		}
149 	}
150 	if (attno != resultDesc->natts)
151 		ereport(ERROR,
152 				(errcode(ERRCODE_DATATYPE_MISMATCH),
153 				 errmsg("table row type and query-specified row type do not match"),
154 				 errdetail("Query has too few columns.")));
155 }
156 
157 /*
158  * ExecProcessReturning --- evaluate a RETURNING list
159  *
160  * resultRelInfo: current result rel
161  * tupleSlot: slot holding tuple actually inserted/updated/deleted
162  * planSlot: slot holding tuple returned by top subplan node
163  *
164  * Note: If tupleSlot is NULL, the FDW should have already provided econtext's
165  * scan tuple.
166  *
167  * Returns a slot holding the result tuple
168  */
169 static TupleTableSlot *
ExecProcessReturning(ResultRelInfo * resultRelInfo,TupleTableSlot * tupleSlot,TupleTableSlot * planSlot)170 ExecProcessReturning(ResultRelInfo *resultRelInfo,
171 					 TupleTableSlot *tupleSlot,
172 					 TupleTableSlot *planSlot)
173 {
174 	ProjectionInfo *projectReturning = resultRelInfo->ri_projectReturning;
175 	ExprContext *econtext = projectReturning->pi_exprContext;
176 
177 	/* Make tuple and any needed join variables available to ExecProject */
178 	if (tupleSlot)
179 		econtext->ecxt_scantuple = tupleSlot;
180 	econtext->ecxt_outertuple = planSlot;
181 
182 	/*
183 	 * RETURNING expressions might reference the tableoid column, so
184 	 * reinitialize tts_tableOid before evaluating them.
185 	 */
186 	econtext->ecxt_scantuple->tts_tableOid =
187 		RelationGetRelid(resultRelInfo->ri_RelationDesc);
188 
189 	/* Compute the RETURNING expressions */
190 	return ExecProject(projectReturning);
191 }
192 
193 /*
194  * ExecCheckTupleVisible -- verify tuple is visible
195  *
196  * It would not be consistent with guarantees of the higher isolation levels to
197  * proceed with avoiding insertion (taking speculative insertion's alternative
198  * path) on the basis of another tuple that is not visible to MVCC snapshot.
199  * Check for the need to raise a serialization failure, and do so as necessary.
200  */
201 static void
ExecCheckTupleVisible(EState * estate,Relation rel,TupleTableSlot * slot)202 ExecCheckTupleVisible(EState *estate,
203 					  Relation rel,
204 					  TupleTableSlot *slot)
205 {
206 	if (!IsolationUsesXactSnapshot())
207 		return;
208 
209 	if (!table_tuple_satisfies_snapshot(rel, slot, estate->es_snapshot))
210 	{
211 		Datum		xminDatum;
212 		TransactionId xmin;
213 		bool		isnull;
214 
215 		xminDatum = slot_getsysattr(slot, MinTransactionIdAttributeNumber, &isnull);
216 		Assert(!isnull);
217 		xmin = DatumGetTransactionId(xminDatum);
218 
219 		/*
220 		 * We should not raise a serialization failure if the conflict is
221 		 * against a tuple inserted by our own transaction, even if it's not
222 		 * visible to our snapshot.  (This would happen, for example, if
223 		 * conflicting keys are proposed for insertion in a single command.)
224 		 */
225 		if (!TransactionIdIsCurrentTransactionId(xmin))
226 			ereport(ERROR,
227 					(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
228 					 errmsg("could not serialize access due to concurrent update")));
229 	}
230 }
231 
232 /*
233  * ExecCheckTIDVisible -- convenience variant of ExecCheckTupleVisible()
234  */
235 static void
ExecCheckTIDVisible(EState * estate,ResultRelInfo * relinfo,ItemPointer tid,TupleTableSlot * tempSlot)236 ExecCheckTIDVisible(EState *estate,
237 					ResultRelInfo *relinfo,
238 					ItemPointer tid,
239 					TupleTableSlot *tempSlot)
240 {
241 	Relation	rel = relinfo->ri_RelationDesc;
242 
243 	/* Redundantly check isolation level */
244 	if (!IsolationUsesXactSnapshot())
245 		return;
246 
247 	if (!table_tuple_fetch_row_version(rel, tid, SnapshotAny, tempSlot))
248 		elog(ERROR, "failed to fetch conflicting tuple for ON CONFLICT");
249 	ExecCheckTupleVisible(estate, rel, tempSlot);
250 	ExecClearTuple(tempSlot);
251 }
252 
253 /*
254  * Compute stored generated columns for a tuple
255  */
256 void
ExecComputeStoredGenerated(ResultRelInfo * resultRelInfo,EState * estate,TupleTableSlot * slot,CmdType cmdtype)257 ExecComputeStoredGenerated(ResultRelInfo *resultRelInfo,
258 						   EState *estate, TupleTableSlot *slot,
259 						   CmdType cmdtype)
260 {
261 	Relation	rel = resultRelInfo->ri_RelationDesc;
262 	TupleDesc	tupdesc = RelationGetDescr(rel);
263 	int			natts = tupdesc->natts;
264 	MemoryContext oldContext;
265 	Datum	   *values;
266 	bool	   *nulls;
267 
268 	Assert(tupdesc->constr && tupdesc->constr->has_generated_stored);
269 
270 	/*
271 	 * If first time through for this result relation, build expression
272 	 * nodetrees for rel's stored generation expressions.  Keep them in the
273 	 * per-query memory context so they'll survive throughout the query.
274 	 */
275 	if (resultRelInfo->ri_GeneratedExprs == NULL)
276 	{
277 		oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
278 
279 		resultRelInfo->ri_GeneratedExprs =
280 			(ExprState **) palloc(natts * sizeof(ExprState *));
281 		resultRelInfo->ri_NumGeneratedNeeded = 0;
282 
283 		for (int i = 0; i < natts; i++)
284 		{
285 			if (TupleDescAttr(tupdesc, i)->attgenerated == ATTRIBUTE_GENERATED_STORED)
286 			{
287 				Expr	   *expr;
288 
289 				/*
290 				 * If it's an update and the current column was not marked as
291 				 * being updated, then we can skip the computation.  But if
292 				 * there is a BEFORE ROW UPDATE trigger, we cannot skip
293 				 * because the trigger might affect additional columns.
294 				 */
295 				if (cmdtype == CMD_UPDATE &&
296 					!(rel->trigdesc && rel->trigdesc->trig_update_before_row) &&
297 					!bms_is_member(i + 1 - FirstLowInvalidHeapAttributeNumber,
298 								   ExecGetExtraUpdatedCols(resultRelInfo, estate)))
299 				{
300 					resultRelInfo->ri_GeneratedExprs[i] = NULL;
301 					continue;
302 				}
303 
304 				expr = (Expr *) build_column_default(rel, i + 1);
305 				if (expr == NULL)
306 					elog(ERROR, "no generation expression found for column number %d of table \"%s\"",
307 						 i + 1, RelationGetRelationName(rel));
308 
309 				resultRelInfo->ri_GeneratedExprs[i] = ExecPrepareExpr(expr, estate);
310 				resultRelInfo->ri_NumGeneratedNeeded++;
311 			}
312 		}
313 
314 		MemoryContextSwitchTo(oldContext);
315 	}
316 
317 	/*
318 	 * If no generated columns have been affected by this change, then skip
319 	 * the rest.
320 	 */
321 	if (resultRelInfo->ri_NumGeneratedNeeded == 0)
322 		return;
323 
324 	oldContext = MemoryContextSwitchTo(GetPerTupleMemoryContext(estate));
325 
326 	values = palloc(sizeof(*values) * natts);
327 	nulls = palloc(sizeof(*nulls) * natts);
328 
329 	slot_getallattrs(slot);
330 	memcpy(nulls, slot->tts_isnull, sizeof(*nulls) * natts);
331 
332 	for (int i = 0; i < natts; i++)
333 	{
334 		Form_pg_attribute attr = TupleDescAttr(tupdesc, i);
335 
336 		if (attr->attgenerated == ATTRIBUTE_GENERATED_STORED &&
337 			resultRelInfo->ri_GeneratedExprs[i])
338 		{
339 			ExprContext *econtext;
340 			Datum		val;
341 			bool		isnull;
342 
343 			econtext = GetPerTupleExprContext(estate);
344 			econtext->ecxt_scantuple = slot;
345 
346 			val = ExecEvalExpr(resultRelInfo->ri_GeneratedExprs[i], econtext, &isnull);
347 
348 			/*
349 			 * We must make a copy of val as we have no guarantees about where
350 			 * memory for a pass-by-reference Datum is located.
351 			 */
352 			if (!isnull)
353 				val = datumCopy(val, attr->attbyval, attr->attlen);
354 
355 			values[i] = val;
356 			nulls[i] = isnull;
357 		}
358 		else
359 		{
360 			if (!nulls[i])
361 				values[i] = datumCopy(slot->tts_values[i], attr->attbyval, attr->attlen);
362 		}
363 	}
364 
365 	ExecClearTuple(slot);
366 	memcpy(slot->tts_values, values, sizeof(*values) * natts);
367 	memcpy(slot->tts_isnull, nulls, sizeof(*nulls) * natts);
368 	ExecStoreVirtualTuple(slot);
369 	ExecMaterializeSlot(slot);
370 
371 	MemoryContextSwitchTo(oldContext);
372 }
373 
374 /*
375  * ExecInitInsertProjection
376  *		Do one-time initialization of projection data for INSERT tuples.
377  *
378  * INSERT queries may need a projection to filter out junk attrs in the tlist.
379  *
380  * This is also a convenient place to verify that the
381  * output of an INSERT matches the target table.
382  */
383 static void
ExecInitInsertProjection(ModifyTableState * mtstate,ResultRelInfo * resultRelInfo)384 ExecInitInsertProjection(ModifyTableState *mtstate,
385 						 ResultRelInfo *resultRelInfo)
386 {
387 	ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
388 	Plan	   *subplan = outerPlan(node);
389 	EState	   *estate = mtstate->ps.state;
390 	List	   *insertTargetList = NIL;
391 	bool		need_projection = false;
392 	ListCell   *l;
393 
394 	/* Extract non-junk columns of the subplan's result tlist. */
395 	foreach(l, subplan->targetlist)
396 	{
397 		TargetEntry *tle = (TargetEntry *) lfirst(l);
398 
399 		if (!tle->resjunk)
400 			insertTargetList = lappend(insertTargetList, tle);
401 		else
402 			need_projection = true;
403 	}
404 
405 	/*
406 	 * The junk-free list must produce a tuple suitable for the result
407 	 * relation.
408 	 */
409 	ExecCheckPlanOutput(resultRelInfo->ri_RelationDesc, insertTargetList);
410 
411 	/* We'll need a slot matching the table's format. */
412 	resultRelInfo->ri_newTupleSlot =
413 		table_slot_create(resultRelInfo->ri_RelationDesc,
414 						  &estate->es_tupleTable);
415 
416 	/* Build ProjectionInfo if needed (it probably isn't). */
417 	if (need_projection)
418 	{
419 		TupleDesc	relDesc = RelationGetDescr(resultRelInfo->ri_RelationDesc);
420 
421 		/* need an expression context to do the projection */
422 		if (mtstate->ps.ps_ExprContext == NULL)
423 			ExecAssignExprContext(estate, &mtstate->ps);
424 
425 		resultRelInfo->ri_projectNew =
426 			ExecBuildProjectionInfo(insertTargetList,
427 									mtstate->ps.ps_ExprContext,
428 									resultRelInfo->ri_newTupleSlot,
429 									&mtstate->ps,
430 									relDesc);
431 	}
432 
433 	resultRelInfo->ri_projectNewInfoValid = true;
434 }
435 
436 /*
437  * ExecInitUpdateProjection
438  *		Do one-time initialization of projection data for UPDATE tuples.
439  *
440  * UPDATE always needs a projection, because (1) there's always some junk
441  * attrs, and (2) we may need to merge values of not-updated columns from
442  * the old tuple into the final tuple.  In UPDATE, the tuple arriving from
443  * the subplan contains only new values for the changed columns, plus row
444  * identity info in the junk attrs.
445  *
446  * This is "one-time" for any given result rel, but we might touch more than
447  * one result rel in the course of an inherited UPDATE, and each one needs
448  * its own projection due to possible column order variation.
449  *
450  * This is also a convenient place to verify that the output of an UPDATE
451  * matches the target table (ExecBuildUpdateProjection does that).
452  */
453 static void
ExecInitUpdateProjection(ModifyTableState * mtstate,ResultRelInfo * resultRelInfo)454 ExecInitUpdateProjection(ModifyTableState *mtstate,
455 						 ResultRelInfo *resultRelInfo)
456 {
457 	ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
458 	Plan	   *subplan = outerPlan(node);
459 	EState	   *estate = mtstate->ps.state;
460 	TupleDesc	relDesc = RelationGetDescr(resultRelInfo->ri_RelationDesc);
461 	int			whichrel;
462 	List	   *updateColnos;
463 
464 	/*
465 	 * Usually, mt_lastResultIndex matches the target rel.  If it happens not
466 	 * to, we can get the index the hard way with an integer division.
467 	 */
468 	whichrel = mtstate->mt_lastResultIndex;
469 	if (resultRelInfo != mtstate->resultRelInfo + whichrel)
470 	{
471 		whichrel = resultRelInfo - mtstate->resultRelInfo;
472 		Assert(whichrel >= 0 && whichrel < mtstate->mt_nrels);
473 	}
474 
475 	updateColnos = (List *) list_nth(node->updateColnosLists, whichrel);
476 
477 	/*
478 	 * For UPDATE, we use the old tuple to fill up missing values in the tuple
479 	 * produced by the subplan to get the new tuple.  We need two slots, both
480 	 * matching the table's desired format.
481 	 */
482 	resultRelInfo->ri_oldTupleSlot =
483 		table_slot_create(resultRelInfo->ri_RelationDesc,
484 						  &estate->es_tupleTable);
485 	resultRelInfo->ri_newTupleSlot =
486 		table_slot_create(resultRelInfo->ri_RelationDesc,
487 						  &estate->es_tupleTable);
488 
489 	/* need an expression context to do the projection */
490 	if (mtstate->ps.ps_ExprContext == NULL)
491 		ExecAssignExprContext(estate, &mtstate->ps);
492 
493 	resultRelInfo->ri_projectNew =
494 		ExecBuildUpdateProjection(subplan->targetlist,
495 								  false,	/* subplan did the evaluation */
496 								  updateColnos,
497 								  relDesc,
498 								  mtstate->ps.ps_ExprContext,
499 								  resultRelInfo->ri_newTupleSlot,
500 								  &mtstate->ps);
501 
502 	resultRelInfo->ri_projectNewInfoValid = true;
503 }
504 
505 /*
506  * ExecGetInsertNewTuple
507  *		This prepares a "new" tuple ready to be inserted into given result
508  *		relation, by removing any junk columns of the plan's output tuple
509  *		and (if necessary) coercing the tuple to the right tuple format.
510  */
511 static TupleTableSlot *
ExecGetInsertNewTuple(ResultRelInfo * relinfo,TupleTableSlot * planSlot)512 ExecGetInsertNewTuple(ResultRelInfo *relinfo,
513 					  TupleTableSlot *planSlot)
514 {
515 	ProjectionInfo *newProj = relinfo->ri_projectNew;
516 	ExprContext *econtext;
517 
518 	/*
519 	 * If there's no projection to be done, just make sure the slot is of the
520 	 * right type for the target rel.  If the planSlot is the right type we
521 	 * can use it as-is, else copy the data into ri_newTupleSlot.
522 	 */
523 	if (newProj == NULL)
524 	{
525 		if (relinfo->ri_newTupleSlot->tts_ops != planSlot->tts_ops)
526 		{
527 			ExecCopySlot(relinfo->ri_newTupleSlot, planSlot);
528 			return relinfo->ri_newTupleSlot;
529 		}
530 		else
531 			return planSlot;
532 	}
533 
534 	/*
535 	 * Else project; since the projection output slot is ri_newTupleSlot, this
536 	 * will also fix any slot-type problem.
537 	 *
538 	 * Note: currently, this is dead code, because INSERT cases don't receive
539 	 * any junk columns so there's never a projection to be done.
540 	 */
541 	econtext = newProj->pi_exprContext;
542 	econtext->ecxt_outertuple = planSlot;
543 	return ExecProject(newProj);
544 }
545 
546 /*
547  * ExecGetUpdateNewTuple
548  *		This prepares a "new" tuple by combining an UPDATE subplan's output
549  *		tuple (which contains values of changed columns) with unchanged
550  *		columns taken from the old tuple.
551  *
552  * The subplan tuple might also contain junk columns, which are ignored.
553  * Note that the projection also ensures we have a slot of the right type.
554  */
555 TupleTableSlot *
ExecGetUpdateNewTuple(ResultRelInfo * relinfo,TupleTableSlot * planSlot,TupleTableSlot * oldSlot)556 ExecGetUpdateNewTuple(ResultRelInfo *relinfo,
557 					  TupleTableSlot *planSlot,
558 					  TupleTableSlot *oldSlot)
559 {
560 	ProjectionInfo *newProj = relinfo->ri_projectNew;
561 	ExprContext *econtext;
562 
563 	/* Use a few extra Asserts to protect against outside callers */
564 	Assert(relinfo->ri_projectNewInfoValid);
565 	Assert(planSlot != NULL && !TTS_EMPTY(planSlot));
566 	Assert(oldSlot != NULL && !TTS_EMPTY(oldSlot));
567 
568 	econtext = newProj->pi_exprContext;
569 	econtext->ecxt_outertuple = planSlot;
570 	econtext->ecxt_scantuple = oldSlot;
571 	return ExecProject(newProj);
572 }
573 
574 
575 /* ----------------------------------------------------------------
576  *		ExecInsert
577  *
578  *		For INSERT, we have to insert the tuple into the target relation
579  *		(or partition thereof) and insert appropriate tuples into the index
580  *		relations.
581  *
582  *		slot contains the new tuple value to be stored.
583  *		planSlot is the output of the ModifyTable's subplan; we use it
584  *		to access "junk" columns that are not going to be stored.
585  *
586  *		Returns RETURNING result if any, otherwise NULL.
587  *
588  *		This may change the currently active tuple conversion map in
589  *		mtstate->mt_transition_capture, so the callers must take care to
590  *		save the previous value to avoid losing track of it.
591  * ----------------------------------------------------------------
592  */
593 static TupleTableSlot *
ExecInsert(ModifyTableState * mtstate,ResultRelInfo * resultRelInfo,TupleTableSlot * slot,TupleTableSlot * planSlot,EState * estate,bool canSetTag)594 ExecInsert(ModifyTableState *mtstate,
595 		   ResultRelInfo *resultRelInfo,
596 		   TupleTableSlot *slot,
597 		   TupleTableSlot *planSlot,
598 		   EState *estate,
599 		   bool canSetTag)
600 {
601 	Relation	resultRelationDesc;
602 	List	   *recheckIndexes = NIL;
603 	TupleTableSlot *result = NULL;
604 	TransitionCaptureState *ar_insert_trig_tcs;
605 	ModifyTable *node = (ModifyTable *) mtstate->ps.plan;
606 	OnConflictAction onconflict = node->onConflictAction;
607 	PartitionTupleRouting *proute = mtstate->mt_partition_tuple_routing;
608 	MemoryContext oldContext;
609 
610 	/*
611 	 * If the input result relation is a partitioned table, find the leaf
612 	 * partition to insert the tuple into.
613 	 */
614 	if (proute)
615 	{
616 		ResultRelInfo *partRelInfo;
617 
618 		slot = ExecPrepareTupleRouting(mtstate, estate, proute,
619 									   resultRelInfo, slot,
620 									   &partRelInfo);
621 		resultRelInfo = partRelInfo;
622 	}
623 
624 	ExecMaterializeSlot(slot);
625 
626 	resultRelationDesc = resultRelInfo->ri_RelationDesc;
627 
628 	/*
629 	 * Open the table's indexes, if we have not done so already, so that we
630 	 * can add new index entries for the inserted tuple.
631 	 */
632 	if (resultRelationDesc->rd_rel->relhasindex &&
633 		resultRelInfo->ri_IndexRelationDescs == NULL)
634 		ExecOpenIndices(resultRelInfo, onconflict != ONCONFLICT_NONE);
635 
636 	/*
637 	 * BEFORE ROW INSERT Triggers.
638 	 *
639 	 * Note: We fire BEFORE ROW TRIGGERS for every attempted insertion in an
640 	 * INSERT ... ON CONFLICT statement.  We cannot check for constraint
641 	 * violations before firing these triggers, because they can change the
642 	 * values to insert.  Also, they can run arbitrary user-defined code with
643 	 * side-effects that we can't cancel by just not inserting the tuple.
644 	 */
645 	if (resultRelInfo->ri_TrigDesc &&
646 		resultRelInfo->ri_TrigDesc->trig_insert_before_row)
647 	{
648 		if (!ExecBRInsertTriggers(estate, resultRelInfo, slot))
649 			return NULL;		/* "do nothing" */
650 	}
651 
652 	/* INSTEAD OF ROW INSERT Triggers */
653 	if (resultRelInfo->ri_TrigDesc &&
654 		resultRelInfo->ri_TrigDesc->trig_insert_instead_row)
655 	{
656 		if (!ExecIRInsertTriggers(estate, resultRelInfo, slot))
657 			return NULL;		/* "do nothing" */
658 	}
659 	else if (resultRelInfo->ri_FdwRoutine)
660 	{
661 		/*
662 		 * GENERATED expressions might reference the tableoid column, so
663 		 * (re-)initialize tts_tableOid before evaluating them.
664 		 */
665 		slot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
666 
667 		/*
668 		 * Compute stored generated columns
669 		 */
670 		if (resultRelationDesc->rd_att->constr &&
671 			resultRelationDesc->rd_att->constr->has_generated_stored)
672 			ExecComputeStoredGenerated(resultRelInfo, estate, slot,
673 									   CMD_INSERT);
674 
675 		/*
676 		 * If the FDW supports batching, and batching is requested, accumulate
677 		 * rows and insert them in batches. Otherwise use the per-row inserts.
678 		 */
679 		if (resultRelInfo->ri_BatchSize > 1)
680 		{
681 			/*
682 			 * If a certain number of tuples have already been accumulated, or
683 			 * a tuple has come for a different relation than that for the
684 			 * accumulated tuples, perform the batch insert
685 			 */
686 			if (resultRelInfo->ri_NumSlots == resultRelInfo->ri_BatchSize)
687 			{
688 				ExecBatchInsert(mtstate, resultRelInfo,
689 								resultRelInfo->ri_Slots,
690 								resultRelInfo->ri_PlanSlots,
691 								resultRelInfo->ri_NumSlots,
692 								estate, canSetTag);
693 				resultRelInfo->ri_NumSlots = 0;
694 			}
695 
696 			oldContext = MemoryContextSwitchTo(estate->es_query_cxt);
697 
698 			if (resultRelInfo->ri_Slots == NULL)
699 			{
700 				resultRelInfo->ri_Slots = palloc(sizeof(TupleTableSlot *) *
701 												 resultRelInfo->ri_BatchSize);
702 				resultRelInfo->ri_PlanSlots = palloc(sizeof(TupleTableSlot *) *
703 													 resultRelInfo->ri_BatchSize);
704 			}
705 
706 			/*
707 			 * Initialize the batch slots. We don't know how many slots will
708 			 * be needed, so we initialize them as the batch grows, and we
709 			 * keep them across batches. To mitigate an inefficiency in how
710 			 * resource owner handles objects with many references (as with
711 			 * many slots all referencing the same tuple descriptor) we copy
712 			 * the appropriate tuple descriptor for each slot.
713 			 */
714 			if (resultRelInfo->ri_NumSlots >= resultRelInfo->ri_NumSlotsInitialized)
715 			{
716 				TupleDesc	tdesc = CreateTupleDescCopy(slot->tts_tupleDescriptor);
717 				TupleDesc	plan_tdesc =
718 					CreateTupleDescCopy(planSlot->tts_tupleDescriptor);
719 
720 				resultRelInfo->ri_Slots[resultRelInfo->ri_NumSlots] =
721 					MakeSingleTupleTableSlot(tdesc, slot->tts_ops);
722 
723 				resultRelInfo->ri_PlanSlots[resultRelInfo->ri_NumSlots] =
724 					MakeSingleTupleTableSlot(plan_tdesc, planSlot->tts_ops);
725 
726 				/* remember how many batch slots we initialized */
727 				resultRelInfo->ri_NumSlotsInitialized++;
728 			}
729 
730 			ExecCopySlot(resultRelInfo->ri_Slots[resultRelInfo->ri_NumSlots],
731 						 slot);
732 
733 			ExecCopySlot(resultRelInfo->ri_PlanSlots[resultRelInfo->ri_NumSlots],
734 						 planSlot);
735 
736 			resultRelInfo->ri_NumSlots++;
737 
738 			MemoryContextSwitchTo(oldContext);
739 
740 			return NULL;
741 		}
742 
743 		/*
744 		 * insert into foreign table: let the FDW do it
745 		 */
746 		slot = resultRelInfo->ri_FdwRoutine->ExecForeignInsert(estate,
747 															   resultRelInfo,
748 															   slot,
749 															   planSlot);
750 
751 		if (slot == NULL)		/* "do nothing" */
752 			return NULL;
753 
754 		/*
755 		 * AFTER ROW Triggers or RETURNING expressions might reference the
756 		 * tableoid column, so (re-)initialize tts_tableOid before evaluating
757 		 * them.  (This covers the case where the FDW replaced the slot.)
758 		 */
759 		slot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
760 	}
761 	else
762 	{
763 		WCOKind		wco_kind;
764 
765 		/*
766 		 * Constraints and GENERATED expressions might reference the tableoid
767 		 * column, so (re-)initialize tts_tableOid before evaluating them.
768 		 */
769 		slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
770 
771 		/*
772 		 * Compute stored generated columns
773 		 */
774 		if (resultRelationDesc->rd_att->constr &&
775 			resultRelationDesc->rd_att->constr->has_generated_stored)
776 			ExecComputeStoredGenerated(resultRelInfo, estate, slot,
777 									   CMD_INSERT);
778 
779 		/*
780 		 * Check any RLS WITH CHECK policies.
781 		 *
782 		 * Normally we should check INSERT policies. But if the insert is the
783 		 * result of a partition key update that moved the tuple to a new
784 		 * partition, we should instead check UPDATE policies, because we are
785 		 * executing policies defined on the target table, and not those
786 		 * defined on the child partitions.
787 		 */
788 		wco_kind = (mtstate->operation == CMD_UPDATE) ?
789 			WCO_RLS_UPDATE_CHECK : WCO_RLS_INSERT_CHECK;
790 
791 		/*
792 		 * ExecWithCheckOptions() will skip any WCOs which are not of the kind
793 		 * we are looking for at this point.
794 		 */
795 		if (resultRelInfo->ri_WithCheckOptions != NIL)
796 			ExecWithCheckOptions(wco_kind, resultRelInfo, slot, estate);
797 
798 		/*
799 		 * Check the constraints of the tuple.
800 		 */
801 		if (resultRelationDesc->rd_att->constr)
802 			ExecConstraints(resultRelInfo, slot, estate);
803 
804 		/*
805 		 * Also check the tuple against the partition constraint, if there is
806 		 * one; except that if we got here via tuple-routing, we don't need to
807 		 * if there's no BR trigger defined on the partition.
808 		 */
809 		if (resultRelationDesc->rd_rel->relispartition &&
810 			(resultRelInfo->ri_RootResultRelInfo == NULL ||
811 			 (resultRelInfo->ri_TrigDesc &&
812 			  resultRelInfo->ri_TrigDesc->trig_insert_before_row)))
813 			ExecPartitionCheck(resultRelInfo, slot, estate, true);
814 
815 		if (onconflict != ONCONFLICT_NONE && resultRelInfo->ri_NumIndices > 0)
816 		{
817 			/* Perform a speculative insertion. */
818 			uint32		specToken;
819 			ItemPointerData conflictTid;
820 			bool		specConflict;
821 			List	   *arbiterIndexes;
822 
823 			arbiterIndexes = resultRelInfo->ri_onConflictArbiterIndexes;
824 
825 			/*
826 			 * Do a non-conclusive check for conflicts first.
827 			 *
828 			 * We're not holding any locks yet, so this doesn't guarantee that
829 			 * the later insert won't conflict.  But it avoids leaving behind
830 			 * a lot of canceled speculative insertions, if you run a lot of
831 			 * INSERT ON CONFLICT statements that do conflict.
832 			 *
833 			 * We loop back here if we find a conflict below, either during
834 			 * the pre-check, or when we re-check after inserting the tuple
835 			 * speculatively.
836 			 */
837 	vlock:
838 			specConflict = false;
839 			if (!ExecCheckIndexConstraints(resultRelInfo, slot, estate,
840 										   &conflictTid, arbiterIndexes))
841 			{
842 				/* committed conflict tuple found */
843 				if (onconflict == ONCONFLICT_UPDATE)
844 				{
845 					/*
846 					 * In case of ON CONFLICT DO UPDATE, execute the UPDATE
847 					 * part.  Be prepared to retry if the UPDATE fails because
848 					 * of another concurrent UPDATE/DELETE to the conflict
849 					 * tuple.
850 					 */
851 					TupleTableSlot *returning = NULL;
852 
853 					if (ExecOnConflictUpdate(mtstate, resultRelInfo,
854 											 &conflictTid, planSlot, slot,
855 											 estate, canSetTag, &returning))
856 					{
857 						InstrCountTuples2(&mtstate->ps, 1);
858 						return returning;
859 					}
860 					else
861 						goto vlock;
862 				}
863 				else
864 				{
865 					/*
866 					 * In case of ON CONFLICT DO NOTHING, do nothing. However,
867 					 * verify that the tuple is visible to the executor's MVCC
868 					 * snapshot at higher isolation levels.
869 					 *
870 					 * Using ExecGetReturningSlot() to store the tuple for the
871 					 * recheck isn't that pretty, but we can't trivially use
872 					 * the input slot, because it might not be of a compatible
873 					 * type. As there's no conflicting usage of
874 					 * ExecGetReturningSlot() in the DO NOTHING case...
875 					 */
876 					Assert(onconflict == ONCONFLICT_NOTHING);
877 					ExecCheckTIDVisible(estate, resultRelInfo, &conflictTid,
878 										ExecGetReturningSlot(estate, resultRelInfo));
879 					InstrCountTuples2(&mtstate->ps, 1);
880 					return NULL;
881 				}
882 			}
883 
884 			/*
885 			 * Before we start insertion proper, acquire our "speculative
886 			 * insertion lock".  Others can use that to wait for us to decide
887 			 * if we're going to go ahead with the insertion, instead of
888 			 * waiting for the whole transaction to complete.
889 			 */
890 			specToken = SpeculativeInsertionLockAcquire(GetCurrentTransactionId());
891 
892 			/* insert the tuple, with the speculative token */
893 			table_tuple_insert_speculative(resultRelationDesc, slot,
894 										   estate->es_output_cid,
895 										   0,
896 										   NULL,
897 										   specToken);
898 
899 			/* insert index entries for tuple */
900 			recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
901 												   slot, estate, false, true,
902 												   &specConflict,
903 												   arbiterIndexes);
904 
905 			/* adjust the tuple's state accordingly */
906 			table_tuple_complete_speculative(resultRelationDesc, slot,
907 											 specToken, !specConflict);
908 
909 			/*
910 			 * Wake up anyone waiting for our decision.  They will re-check
911 			 * the tuple, see that it's no longer speculative, and wait on our
912 			 * XID as if this was a regularly inserted tuple all along.  Or if
913 			 * we killed the tuple, they will see it's dead, and proceed as if
914 			 * the tuple never existed.
915 			 */
916 			SpeculativeInsertionLockRelease(GetCurrentTransactionId());
917 
918 			/*
919 			 * If there was a conflict, start from the beginning.  We'll do
920 			 * the pre-check again, which will now find the conflicting tuple
921 			 * (unless it aborts before we get there).
922 			 */
923 			if (specConflict)
924 			{
925 				list_free(recheckIndexes);
926 				goto vlock;
927 			}
928 
929 			/* Since there was no insertion conflict, we're done */
930 		}
931 		else
932 		{
933 			/* insert the tuple normally */
934 			table_tuple_insert(resultRelationDesc, slot,
935 							   estate->es_output_cid,
936 							   0, NULL);
937 
938 			/* insert index entries for tuple */
939 			if (resultRelInfo->ri_NumIndices > 0)
940 				recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
941 													   slot, estate, false,
942 													   false, NULL, NIL);
943 		}
944 	}
945 
946 	if (canSetTag)
947 		(estate->es_processed)++;
948 
949 	/*
950 	 * If this insert is the result of a partition key update that moved the
951 	 * tuple to a new partition, put this row into the transition NEW TABLE,
952 	 * if there is one. We need to do this separately for DELETE and INSERT
953 	 * because they happen on different tables.
954 	 */
955 	ar_insert_trig_tcs = mtstate->mt_transition_capture;
956 	if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture
957 		&& mtstate->mt_transition_capture->tcs_update_new_table)
958 	{
959 		ExecARUpdateTriggers(estate, resultRelInfo, NULL,
960 							 NULL,
961 							 slot,
962 							 NULL,
963 							 mtstate->mt_transition_capture);
964 
965 		/*
966 		 * We've already captured the NEW TABLE row, so make sure any AR
967 		 * INSERT trigger fired below doesn't capture it again.
968 		 */
969 		ar_insert_trig_tcs = NULL;
970 	}
971 
972 	/* AFTER ROW INSERT Triggers */
973 	ExecARInsertTriggers(estate, resultRelInfo, slot, recheckIndexes,
974 						 ar_insert_trig_tcs);
975 
976 	list_free(recheckIndexes);
977 
978 	/*
979 	 * Check any WITH CHECK OPTION constraints from parent views.  We are
980 	 * required to do this after testing all constraints and uniqueness
981 	 * violations per the SQL spec, so we do it after actually inserting the
982 	 * record into the heap and all indexes.
983 	 *
984 	 * ExecWithCheckOptions will elog(ERROR) if a violation is found, so the
985 	 * tuple will never be seen, if it violates the WITH CHECK OPTION.
986 	 *
987 	 * ExecWithCheckOptions() will skip any WCOs which are not of the kind we
988 	 * are looking for at this point.
989 	 */
990 	if (resultRelInfo->ri_WithCheckOptions != NIL)
991 		ExecWithCheckOptions(WCO_VIEW_CHECK, resultRelInfo, slot, estate);
992 
993 	/* Process RETURNING if present */
994 	if (resultRelInfo->ri_projectReturning)
995 		result = ExecProcessReturning(resultRelInfo, slot, planSlot);
996 
997 	return result;
998 }
999 
1000 /* ----------------------------------------------------------------
1001  *		ExecBatchInsert
1002  *
1003  *		Insert multiple tuples in an efficient way.
1004  *		Currently, this handles inserting into a foreign table without
1005  *		RETURNING clause.
1006  * ----------------------------------------------------------------
1007  */
1008 static void
ExecBatchInsert(ModifyTableState * mtstate,ResultRelInfo * resultRelInfo,TupleTableSlot ** slots,TupleTableSlot ** planSlots,int numSlots,EState * estate,bool canSetTag)1009 ExecBatchInsert(ModifyTableState *mtstate,
1010 				ResultRelInfo *resultRelInfo,
1011 				TupleTableSlot **slots,
1012 				TupleTableSlot **planSlots,
1013 				int numSlots,
1014 				EState *estate,
1015 				bool canSetTag)
1016 {
1017 	int			i;
1018 	int			numInserted = numSlots;
1019 	TupleTableSlot *slot = NULL;
1020 	TupleTableSlot **rslots;
1021 
1022 	/*
1023 	 * insert into foreign table: let the FDW do it
1024 	 */
1025 	rslots = resultRelInfo->ri_FdwRoutine->ExecForeignBatchInsert(estate,
1026 																  resultRelInfo,
1027 																  slots,
1028 																  planSlots,
1029 																  &numInserted);
1030 
1031 	for (i = 0; i < numInserted; i++)
1032 	{
1033 		slot = rslots[i];
1034 
1035 		/*
1036 		 * AFTER ROW Triggers or RETURNING expressions might reference the
1037 		 * tableoid column, so (re-)initialize tts_tableOid before evaluating
1038 		 * them.
1039 		 */
1040 		slot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
1041 
1042 		/* AFTER ROW INSERT Triggers */
1043 		ExecARInsertTriggers(estate, resultRelInfo, slot, NIL,
1044 							 mtstate->mt_transition_capture);
1045 
1046 		/*
1047 		 * Check any WITH CHECK OPTION constraints from parent views.  See the
1048 		 * comment in ExecInsert.
1049 		 */
1050 		if (resultRelInfo->ri_WithCheckOptions != NIL)
1051 			ExecWithCheckOptions(WCO_VIEW_CHECK, resultRelInfo, slot, estate);
1052 	}
1053 
1054 	if (canSetTag && numInserted > 0)
1055 		estate->es_processed += numInserted;
1056 }
1057 
1058 /* ----------------------------------------------------------------
1059  *		ExecDelete
1060  *
1061  *		DELETE is like UPDATE, except that we delete the tuple and no
1062  *		index modifications are needed.
1063  *
1064  *		When deleting from a table, tupleid identifies the tuple to
1065  *		delete and oldtuple is NULL.  When deleting from a view,
1066  *		oldtuple is passed to the INSTEAD OF triggers and identifies
1067  *		what to delete, and tupleid is invalid.  When deleting from a
1068  *		foreign table, tupleid is invalid; the FDW has to figure out
1069  *		which row to delete using data from the planSlot.  oldtuple is
1070  *		passed to foreign table triggers; it is NULL when the foreign
1071  *		table has no relevant triggers.  We use tupleDeleted to indicate
1072  *		whether the tuple is actually deleted, callers can use it to
1073  *		decide whether to continue the operation.  When this DELETE is a
1074  *		part of an UPDATE of partition-key, then the slot returned by
1075  *		EvalPlanQual() is passed back using output parameter epqslot.
1076  *
1077  *		Returns RETURNING result if any, otherwise NULL.
1078  * ----------------------------------------------------------------
1079  */
1080 static TupleTableSlot *
ExecDelete(ModifyTableState * mtstate,ResultRelInfo * resultRelInfo,ItemPointer tupleid,HeapTuple oldtuple,TupleTableSlot * planSlot,EPQState * epqstate,EState * estate,bool processReturning,bool canSetTag,bool changingPart,bool * tupleDeleted,TupleTableSlot ** epqreturnslot)1081 ExecDelete(ModifyTableState *mtstate,
1082 		   ResultRelInfo *resultRelInfo,
1083 		   ItemPointer tupleid,
1084 		   HeapTuple oldtuple,
1085 		   TupleTableSlot *planSlot,
1086 		   EPQState *epqstate,
1087 		   EState *estate,
1088 		   bool processReturning,
1089 		   bool canSetTag,
1090 		   bool changingPart,
1091 		   bool *tupleDeleted,
1092 		   TupleTableSlot **epqreturnslot)
1093 {
1094 	Relation	resultRelationDesc = resultRelInfo->ri_RelationDesc;
1095 	TM_Result	result;
1096 	TM_FailureData tmfd;
1097 	TupleTableSlot *slot = NULL;
1098 	TransitionCaptureState *ar_delete_trig_tcs;
1099 
1100 	if (tupleDeleted)
1101 		*tupleDeleted = false;
1102 
1103 	/* BEFORE ROW DELETE Triggers */
1104 	if (resultRelInfo->ri_TrigDesc &&
1105 		resultRelInfo->ri_TrigDesc->trig_delete_before_row)
1106 	{
1107 		bool		dodelete;
1108 
1109 		dodelete = ExecBRDeleteTriggers(estate, epqstate, resultRelInfo,
1110 										tupleid, oldtuple, epqreturnslot);
1111 
1112 		if (!dodelete)			/* "do nothing" */
1113 			return NULL;
1114 	}
1115 
1116 	/* INSTEAD OF ROW DELETE Triggers */
1117 	if (resultRelInfo->ri_TrigDesc &&
1118 		resultRelInfo->ri_TrigDesc->trig_delete_instead_row)
1119 	{
1120 		bool		dodelete;
1121 
1122 		Assert(oldtuple != NULL);
1123 		dodelete = ExecIRDeleteTriggers(estate, resultRelInfo, oldtuple);
1124 
1125 		if (!dodelete)			/* "do nothing" */
1126 			return NULL;
1127 	}
1128 	else if (resultRelInfo->ri_FdwRoutine)
1129 	{
1130 		/*
1131 		 * delete from foreign table: let the FDW do it
1132 		 *
1133 		 * We offer the returning slot as a place to store RETURNING data,
1134 		 * although the FDW can return some other slot if it wants.
1135 		 */
1136 		slot = ExecGetReturningSlot(estate, resultRelInfo);
1137 		slot = resultRelInfo->ri_FdwRoutine->ExecForeignDelete(estate,
1138 															   resultRelInfo,
1139 															   slot,
1140 															   planSlot);
1141 
1142 		if (slot == NULL)		/* "do nothing" */
1143 			return NULL;
1144 
1145 		/*
1146 		 * RETURNING expressions might reference the tableoid column, so
1147 		 * (re)initialize tts_tableOid before evaluating them.
1148 		 */
1149 		if (TTS_EMPTY(slot))
1150 			ExecStoreAllNullTuple(slot);
1151 
1152 		slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
1153 	}
1154 	else
1155 	{
1156 		/*
1157 		 * delete the tuple
1158 		 *
1159 		 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check
1160 		 * that the row to be deleted is visible to that snapshot, and throw a
1161 		 * can't-serialize error if not. This is a special-case behavior
1162 		 * needed for referential integrity updates in transaction-snapshot
1163 		 * mode transactions.
1164 		 */
1165 ldelete:;
1166 		result = table_tuple_delete(resultRelationDesc, tupleid,
1167 									estate->es_output_cid,
1168 									estate->es_snapshot,
1169 									estate->es_crosscheck_snapshot,
1170 									true /* wait for commit */ ,
1171 									&tmfd,
1172 									changingPart);
1173 
1174 		switch (result)
1175 		{
1176 			case TM_SelfModified:
1177 
1178 				/*
1179 				 * The target tuple was already updated or deleted by the
1180 				 * current command, or by a later command in the current
1181 				 * transaction.  The former case is possible in a join DELETE
1182 				 * where multiple tuples join to the same target tuple. This
1183 				 * is somewhat questionable, but Postgres has always allowed
1184 				 * it: we just ignore additional deletion attempts.
1185 				 *
1186 				 * The latter case arises if the tuple is modified by a
1187 				 * command in a BEFORE trigger, or perhaps by a command in a
1188 				 * volatile function used in the query.  In such situations we
1189 				 * should not ignore the deletion, but it is equally unsafe to
1190 				 * proceed.  We don't want to discard the original DELETE
1191 				 * while keeping the triggered actions based on its deletion;
1192 				 * and it would be no better to allow the original DELETE
1193 				 * while discarding updates that it triggered.  The row update
1194 				 * carries some information that might be important according
1195 				 * to business rules; so throwing an error is the only safe
1196 				 * course.
1197 				 *
1198 				 * If a trigger actually intends this type of interaction, it
1199 				 * can re-execute the DELETE and then return NULL to cancel
1200 				 * the outer delete.
1201 				 */
1202 				if (tmfd.cmax != estate->es_output_cid)
1203 					ereport(ERROR,
1204 							(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
1205 							 errmsg("tuple to be deleted was already modified by an operation triggered by the current command"),
1206 							 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
1207 
1208 				/* Else, already deleted by self; nothing to do */
1209 				return NULL;
1210 
1211 			case TM_Ok:
1212 				break;
1213 
1214 			case TM_Updated:
1215 				{
1216 					TupleTableSlot *inputslot;
1217 					TupleTableSlot *epqslot;
1218 
1219 					if (IsolationUsesXactSnapshot())
1220 						ereport(ERROR,
1221 								(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1222 								 errmsg("could not serialize access due to concurrent update")));
1223 
1224 					/*
1225 					 * Already know that we're going to need to do EPQ, so
1226 					 * fetch tuple directly into the right slot.
1227 					 */
1228 					EvalPlanQualBegin(epqstate);
1229 					inputslot = EvalPlanQualSlot(epqstate, resultRelationDesc,
1230 												 resultRelInfo->ri_RangeTableIndex);
1231 
1232 					result = table_tuple_lock(resultRelationDesc, tupleid,
1233 											  estate->es_snapshot,
1234 											  inputslot, estate->es_output_cid,
1235 											  LockTupleExclusive, LockWaitBlock,
1236 											  TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
1237 											  &tmfd);
1238 
1239 					switch (result)
1240 					{
1241 						case TM_Ok:
1242 							Assert(tmfd.traversed);
1243 							epqslot = EvalPlanQual(epqstate,
1244 												   resultRelationDesc,
1245 												   resultRelInfo->ri_RangeTableIndex,
1246 												   inputslot);
1247 							if (TupIsNull(epqslot))
1248 								/* Tuple not passing quals anymore, exiting... */
1249 								return NULL;
1250 
1251 							/*
1252 							 * If requested, skip delete and pass back the
1253 							 * updated row.
1254 							 */
1255 							if (epqreturnslot)
1256 							{
1257 								*epqreturnslot = epqslot;
1258 								return NULL;
1259 							}
1260 							else
1261 								goto ldelete;
1262 
1263 						case TM_SelfModified:
1264 
1265 							/*
1266 							 * This can be reached when following an update
1267 							 * chain from a tuple updated by another session,
1268 							 * reaching a tuple that was already updated in
1269 							 * this transaction. If previously updated by this
1270 							 * command, ignore the delete, otherwise error
1271 							 * out.
1272 							 *
1273 							 * See also TM_SelfModified response to
1274 							 * table_tuple_delete() above.
1275 							 */
1276 							if (tmfd.cmax != estate->es_output_cid)
1277 								ereport(ERROR,
1278 										(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
1279 										 errmsg("tuple to be deleted was already modified by an operation triggered by the current command"),
1280 										 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
1281 							return NULL;
1282 
1283 						case TM_Deleted:
1284 							/* tuple already deleted; nothing to do */
1285 							return NULL;
1286 
1287 						default:
1288 
1289 							/*
1290 							 * TM_Invisible should be impossible because we're
1291 							 * waiting for updated row versions, and would
1292 							 * already have errored out if the first version
1293 							 * is invisible.
1294 							 *
1295 							 * TM_Updated should be impossible, because we're
1296 							 * locking the latest version via
1297 							 * TUPLE_LOCK_FLAG_FIND_LAST_VERSION.
1298 							 */
1299 							elog(ERROR, "unexpected table_tuple_lock status: %u",
1300 								 result);
1301 							return NULL;
1302 					}
1303 
1304 					Assert(false);
1305 					break;
1306 				}
1307 
1308 			case TM_Deleted:
1309 				if (IsolationUsesXactSnapshot())
1310 					ereport(ERROR,
1311 							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1312 							 errmsg("could not serialize access due to concurrent delete")));
1313 				/* tuple already deleted; nothing to do */
1314 				return NULL;
1315 
1316 			default:
1317 				elog(ERROR, "unrecognized table_tuple_delete status: %u",
1318 					 result);
1319 				return NULL;
1320 		}
1321 
1322 		/*
1323 		 * Note: Normally one would think that we have to delete index tuples
1324 		 * associated with the heap tuple now...
1325 		 *
1326 		 * ... but in POSTGRES, we have no need to do this because VACUUM will
1327 		 * take care of it later.  We can't delete index tuples immediately
1328 		 * anyway, since the tuple is still visible to other transactions.
1329 		 */
1330 	}
1331 
1332 	if (canSetTag)
1333 		(estate->es_processed)++;
1334 
1335 	/* Tell caller that the delete actually happened. */
1336 	if (tupleDeleted)
1337 		*tupleDeleted = true;
1338 
1339 	/*
1340 	 * If this delete is the result of a partition key update that moved the
1341 	 * tuple to a new partition, put this row into the transition OLD TABLE,
1342 	 * if there is one. We need to do this separately for DELETE and INSERT
1343 	 * because they happen on different tables.
1344 	 */
1345 	ar_delete_trig_tcs = mtstate->mt_transition_capture;
1346 	if (mtstate->operation == CMD_UPDATE && mtstate->mt_transition_capture
1347 		&& mtstate->mt_transition_capture->tcs_update_old_table)
1348 	{
1349 		ExecARUpdateTriggers(estate, resultRelInfo,
1350 							 tupleid,
1351 							 oldtuple,
1352 							 NULL,
1353 							 NULL,
1354 							 mtstate->mt_transition_capture);
1355 
1356 		/*
1357 		 * We've already captured the NEW TABLE row, so make sure any AR
1358 		 * DELETE trigger fired below doesn't capture it again.
1359 		 */
1360 		ar_delete_trig_tcs = NULL;
1361 	}
1362 
1363 	/* AFTER ROW DELETE Triggers */
1364 	ExecARDeleteTriggers(estate, resultRelInfo, tupleid, oldtuple,
1365 						 ar_delete_trig_tcs);
1366 
1367 	/* Process RETURNING if present and if requested */
1368 	if (processReturning && resultRelInfo->ri_projectReturning)
1369 	{
1370 		/*
1371 		 * We have to put the target tuple into a slot, which means first we
1372 		 * gotta fetch it.  We can use the trigger tuple slot.
1373 		 */
1374 		TupleTableSlot *rslot;
1375 
1376 		if (resultRelInfo->ri_FdwRoutine)
1377 		{
1378 			/* FDW must have provided a slot containing the deleted row */
1379 			Assert(!TupIsNull(slot));
1380 		}
1381 		else
1382 		{
1383 			slot = ExecGetReturningSlot(estate, resultRelInfo);
1384 			if (oldtuple != NULL)
1385 			{
1386 				ExecForceStoreHeapTuple(oldtuple, slot, false);
1387 			}
1388 			else
1389 			{
1390 				if (!table_tuple_fetch_row_version(resultRelationDesc, tupleid,
1391 												   SnapshotAny, slot))
1392 					elog(ERROR, "failed to fetch deleted tuple for DELETE RETURNING");
1393 			}
1394 		}
1395 
1396 		rslot = ExecProcessReturning(resultRelInfo, slot, planSlot);
1397 
1398 		/*
1399 		 * Before releasing the target tuple again, make sure rslot has a
1400 		 * local copy of any pass-by-reference values.
1401 		 */
1402 		ExecMaterializeSlot(rslot);
1403 
1404 		ExecClearTuple(slot);
1405 
1406 		return rslot;
1407 	}
1408 
1409 	return NULL;
1410 }
1411 
1412 /*
1413  * ExecCrossPartitionUpdate --- Move an updated tuple to another partition.
1414  *
1415  * This works by first deleting the old tuple from the current partition,
1416  * followed by inserting the new tuple into the root parent table, that is,
1417  * mtstate->rootResultRelInfo.  It will be re-routed from there to the
1418  * correct partition.
1419  *
1420  * Returns true if the tuple has been successfully moved, or if it's found
1421  * that the tuple was concurrently deleted so there's nothing more to do
1422  * for the caller.
1423  *
1424  * False is returned if the tuple we're trying to move is found to have been
1425  * concurrently updated.  In that case, the caller must to check if the
1426  * updated tuple that's returned in *retry_slot still needs to be re-routed,
1427  * and call this function again or perform a regular update accordingly.
1428  */
1429 static bool
ExecCrossPartitionUpdate(ModifyTableState * mtstate,ResultRelInfo * resultRelInfo,ItemPointer tupleid,HeapTuple oldtuple,TupleTableSlot * slot,TupleTableSlot * planSlot,EPQState * epqstate,bool canSetTag,TupleTableSlot ** retry_slot,TupleTableSlot ** inserted_tuple)1430 ExecCrossPartitionUpdate(ModifyTableState *mtstate,
1431 						 ResultRelInfo *resultRelInfo,
1432 						 ItemPointer tupleid, HeapTuple oldtuple,
1433 						 TupleTableSlot *slot, TupleTableSlot *planSlot,
1434 						 EPQState *epqstate, bool canSetTag,
1435 						 TupleTableSlot **retry_slot,
1436 						 TupleTableSlot **inserted_tuple)
1437 {
1438 	EState	   *estate = mtstate->ps.state;
1439 	TupleConversionMap *tupconv_map;
1440 	bool		tuple_deleted;
1441 	TupleTableSlot *epqslot = NULL;
1442 
1443 	*inserted_tuple = NULL;
1444 	*retry_slot = NULL;
1445 
1446 	/*
1447 	 * Disallow an INSERT ON CONFLICT DO UPDATE that causes the original row
1448 	 * to migrate to a different partition.  Maybe this can be implemented
1449 	 * some day, but it seems a fringe feature with little redeeming value.
1450 	 */
1451 	if (((ModifyTable *) mtstate->ps.plan)->onConflictAction == ONCONFLICT_UPDATE)
1452 		ereport(ERROR,
1453 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1454 				 errmsg("invalid ON UPDATE specification"),
1455 				 errdetail("The result tuple would appear in a different partition than the original tuple.")));
1456 
1457 	/*
1458 	 * When an UPDATE is run directly on a leaf partition, simply fail with a
1459 	 * partition constraint violation error.
1460 	 */
1461 	if (resultRelInfo == mtstate->rootResultRelInfo)
1462 		ExecPartitionCheckEmitError(resultRelInfo, slot, estate);
1463 
1464 	/* Initialize tuple routing info if not already done. */
1465 	if (mtstate->mt_partition_tuple_routing == NULL)
1466 	{
1467 		Relation	rootRel = mtstate->rootResultRelInfo->ri_RelationDesc;
1468 		MemoryContext oldcxt;
1469 
1470 		/* Things built here have to last for the query duration. */
1471 		oldcxt = MemoryContextSwitchTo(estate->es_query_cxt);
1472 
1473 		mtstate->mt_partition_tuple_routing =
1474 			ExecSetupPartitionTupleRouting(estate, rootRel);
1475 
1476 		/*
1477 		 * Before a partition's tuple can be re-routed, it must first be
1478 		 * converted to the root's format, so we'll need a slot for storing
1479 		 * such tuples.
1480 		 */
1481 		Assert(mtstate->mt_root_tuple_slot == NULL);
1482 		mtstate->mt_root_tuple_slot = table_slot_create(rootRel, NULL);
1483 
1484 		MemoryContextSwitchTo(oldcxt);
1485 	}
1486 
1487 	/*
1488 	 * Row movement, part 1.  Delete the tuple, but skip RETURNING processing.
1489 	 * We want to return rows from INSERT.
1490 	 */
1491 	ExecDelete(mtstate, resultRelInfo, tupleid, oldtuple, planSlot,
1492 			   epqstate, estate,
1493 			   false,			/* processReturning */
1494 			   false,			/* canSetTag */
1495 			   true,			/* changingPart */
1496 			   &tuple_deleted, &epqslot);
1497 
1498 	/*
1499 	 * For some reason if DELETE didn't happen (e.g. trigger prevented it, or
1500 	 * it was already deleted by self, or it was concurrently deleted by
1501 	 * another transaction), then we should skip the insert as well;
1502 	 * otherwise, an UPDATE could cause an increase in the total number of
1503 	 * rows across all partitions, which is clearly wrong.
1504 	 *
1505 	 * For a normal UPDATE, the case where the tuple has been the subject of a
1506 	 * concurrent UPDATE or DELETE would be handled by the EvalPlanQual
1507 	 * machinery, but for an UPDATE that we've translated into a DELETE from
1508 	 * this partition and an INSERT into some other partition, that's not
1509 	 * available, because CTID chains can't span relation boundaries.  We
1510 	 * mimic the semantics to a limited extent by skipping the INSERT if the
1511 	 * DELETE fails to find a tuple.  This ensures that two concurrent
1512 	 * attempts to UPDATE the same tuple at the same time can't turn one tuple
1513 	 * into two, and that an UPDATE of a just-deleted tuple can't resurrect
1514 	 * it.
1515 	 */
1516 	if (!tuple_deleted)
1517 	{
1518 		/*
1519 		 * epqslot will be typically NULL.  But when ExecDelete() finds that
1520 		 * another transaction has concurrently updated the same row, it
1521 		 * re-fetches the row, skips the delete, and epqslot is set to the
1522 		 * re-fetched tuple slot.  In that case, we need to do all the checks
1523 		 * again.
1524 		 */
1525 		if (TupIsNull(epqslot))
1526 			return true;
1527 		else
1528 		{
1529 			/* Fetch the most recent version of old tuple. */
1530 			TupleTableSlot *oldSlot;
1531 
1532 			/* ... but first, make sure ri_oldTupleSlot is initialized. */
1533 			if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
1534 				ExecInitUpdateProjection(mtstate, resultRelInfo);
1535 			oldSlot = resultRelInfo->ri_oldTupleSlot;
1536 			if (!table_tuple_fetch_row_version(resultRelInfo->ri_RelationDesc,
1537 											   tupleid,
1538 											   SnapshotAny,
1539 											   oldSlot))
1540 				elog(ERROR, "failed to fetch tuple being updated");
1541 			*retry_slot = ExecGetUpdateNewTuple(resultRelInfo, epqslot,
1542 												oldSlot);
1543 			return false;
1544 		}
1545 	}
1546 
1547 	/*
1548 	 * resultRelInfo is one of the per-relation resultRelInfos.  So we should
1549 	 * convert the tuple into root's tuple descriptor if needed, since
1550 	 * ExecInsert() starts the search from root.
1551 	 */
1552 	tupconv_map = ExecGetChildToRootMap(resultRelInfo);
1553 	if (tupconv_map != NULL)
1554 		slot = execute_attr_map_slot(tupconv_map->attrMap,
1555 									 slot,
1556 									 mtstate->mt_root_tuple_slot);
1557 
1558 	/* Tuple routing starts from the root table. */
1559 	*inserted_tuple = ExecInsert(mtstate, mtstate->rootResultRelInfo, slot,
1560 								 planSlot, estate, canSetTag);
1561 
1562 	/*
1563 	 * Reset the transition state that may possibly have been written by
1564 	 * INSERT.
1565 	 */
1566 	if (mtstate->mt_transition_capture)
1567 		mtstate->mt_transition_capture->tcs_original_insert_tuple = NULL;
1568 
1569 	/* We're done moving. */
1570 	return true;
1571 }
1572 
1573 /* ----------------------------------------------------------------
1574  *		ExecUpdate
1575  *
1576  *		note: we can't run UPDATE queries with transactions
1577  *		off because UPDATEs are actually INSERTs and our
1578  *		scan will mistakenly loop forever, updating the tuple
1579  *		it just inserted..  This should be fixed but until it
1580  *		is, we don't want to get stuck in an infinite loop
1581  *		which corrupts your database..
1582  *
1583  *		When updating a table, tupleid identifies the tuple to
1584  *		update and oldtuple is NULL.  When updating a view, oldtuple
1585  *		is passed to the INSTEAD OF triggers and identifies what to
1586  *		update, and tupleid is invalid.  When updating a foreign table,
1587  *		tupleid is invalid; the FDW has to figure out which row to
1588  *		update using data from the planSlot.  oldtuple is passed to
1589  *		foreign table triggers; it is NULL when the foreign table has
1590  *		no relevant triggers.
1591  *
1592  *		slot contains the new tuple value to be stored.
1593  *		planSlot is the output of the ModifyTable's subplan; we use it
1594  *		to access values from other input tables (for RETURNING),
1595  *		row-ID junk columns, etc.
1596  *
1597  *		Returns RETURNING result if any, otherwise NULL.
1598  * ----------------------------------------------------------------
1599  */
1600 static TupleTableSlot *
ExecUpdate(ModifyTableState * mtstate,ResultRelInfo * resultRelInfo,ItemPointer tupleid,HeapTuple oldtuple,TupleTableSlot * slot,TupleTableSlot * planSlot,EPQState * epqstate,EState * estate,bool canSetTag)1601 ExecUpdate(ModifyTableState *mtstate,
1602 		   ResultRelInfo *resultRelInfo,
1603 		   ItemPointer tupleid,
1604 		   HeapTuple oldtuple,
1605 		   TupleTableSlot *slot,
1606 		   TupleTableSlot *planSlot,
1607 		   EPQState *epqstate,
1608 		   EState *estate,
1609 		   bool canSetTag)
1610 {
1611 	Relation	resultRelationDesc = resultRelInfo->ri_RelationDesc;
1612 	TM_Result	result;
1613 	TM_FailureData tmfd;
1614 	List	   *recheckIndexes = NIL;
1615 
1616 	/*
1617 	 * abort the operation if not running transactions
1618 	 */
1619 	if (IsBootstrapProcessingMode())
1620 		elog(ERROR, "cannot UPDATE during bootstrap");
1621 
1622 	ExecMaterializeSlot(slot);
1623 
1624 	/*
1625 	 * Open the table's indexes, if we have not done so already, so that we
1626 	 * can add new index entries for the updated tuple.
1627 	 */
1628 	if (resultRelationDesc->rd_rel->relhasindex &&
1629 		resultRelInfo->ri_IndexRelationDescs == NULL)
1630 		ExecOpenIndices(resultRelInfo, false);
1631 
1632 	/* BEFORE ROW UPDATE Triggers */
1633 	if (resultRelInfo->ri_TrigDesc &&
1634 		resultRelInfo->ri_TrigDesc->trig_update_before_row)
1635 	{
1636 		if (!ExecBRUpdateTriggers(estate, epqstate, resultRelInfo,
1637 								  tupleid, oldtuple, slot))
1638 			return NULL;		/* "do nothing" */
1639 	}
1640 
1641 	/* INSTEAD OF ROW UPDATE Triggers */
1642 	if (resultRelInfo->ri_TrigDesc &&
1643 		resultRelInfo->ri_TrigDesc->trig_update_instead_row)
1644 	{
1645 		if (!ExecIRUpdateTriggers(estate, resultRelInfo,
1646 								  oldtuple, slot))
1647 			return NULL;		/* "do nothing" */
1648 	}
1649 	else if (resultRelInfo->ri_FdwRoutine)
1650 	{
1651 		/*
1652 		 * GENERATED expressions might reference the tableoid column, so
1653 		 * (re-)initialize tts_tableOid before evaluating them.
1654 		 */
1655 		slot->tts_tableOid = RelationGetRelid(resultRelInfo->ri_RelationDesc);
1656 
1657 		/*
1658 		 * Compute stored generated columns
1659 		 */
1660 		if (resultRelationDesc->rd_att->constr &&
1661 			resultRelationDesc->rd_att->constr->has_generated_stored)
1662 			ExecComputeStoredGenerated(resultRelInfo, estate, slot,
1663 									   CMD_UPDATE);
1664 
1665 		/*
1666 		 * update in foreign table: let the FDW do it
1667 		 */
1668 		slot = resultRelInfo->ri_FdwRoutine->ExecForeignUpdate(estate,
1669 															   resultRelInfo,
1670 															   slot,
1671 															   planSlot);
1672 
1673 		if (slot == NULL)		/* "do nothing" */
1674 			return NULL;
1675 
1676 		/*
1677 		 * AFTER ROW Triggers or RETURNING expressions might reference the
1678 		 * tableoid column, so (re-)initialize tts_tableOid before evaluating
1679 		 * them.  (This covers the case where the FDW replaced the slot.)
1680 		 */
1681 		slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
1682 	}
1683 	else
1684 	{
1685 		LockTupleMode lockmode;
1686 		bool		partition_constraint_failed;
1687 		bool		update_indexes;
1688 
1689 		/*
1690 		 * Constraints and GENERATED expressions might reference the tableoid
1691 		 * column, so (re-)initialize tts_tableOid before evaluating them.
1692 		 */
1693 		slot->tts_tableOid = RelationGetRelid(resultRelationDesc);
1694 
1695 		/*
1696 		 * Compute stored generated columns
1697 		 */
1698 		if (resultRelationDesc->rd_att->constr &&
1699 			resultRelationDesc->rd_att->constr->has_generated_stored)
1700 			ExecComputeStoredGenerated(resultRelInfo, estate, slot,
1701 									   CMD_UPDATE);
1702 
1703 		/*
1704 		 * Check any RLS UPDATE WITH CHECK policies
1705 		 *
1706 		 * If we generate a new candidate tuple after EvalPlanQual testing, we
1707 		 * must loop back here and recheck any RLS policies and constraints.
1708 		 * (We don't need to redo triggers, however.  If there are any BEFORE
1709 		 * triggers then trigger.c will have done table_tuple_lock to lock the
1710 		 * correct tuple, so there's no need to do them again.)
1711 		 */
1712 lreplace:;
1713 
1714 		/* ensure slot is independent, consider e.g. EPQ */
1715 		ExecMaterializeSlot(slot);
1716 
1717 		/*
1718 		 * If partition constraint fails, this row might get moved to another
1719 		 * partition, in which case we should check the RLS CHECK policy just
1720 		 * before inserting into the new partition, rather than doing it here.
1721 		 * This is because a trigger on that partition might again change the
1722 		 * row.  So skip the WCO checks if the partition constraint fails.
1723 		 */
1724 		partition_constraint_failed =
1725 			resultRelationDesc->rd_rel->relispartition &&
1726 			!ExecPartitionCheck(resultRelInfo, slot, estate, false);
1727 
1728 		if (!partition_constraint_failed &&
1729 			resultRelInfo->ri_WithCheckOptions != NIL)
1730 		{
1731 			/*
1732 			 * ExecWithCheckOptions() will skip any WCOs which are not of the
1733 			 * kind we are looking for at this point.
1734 			 */
1735 			ExecWithCheckOptions(WCO_RLS_UPDATE_CHECK,
1736 								 resultRelInfo, slot, estate);
1737 		}
1738 
1739 		/*
1740 		 * If a partition check failed, try to move the row into the right
1741 		 * partition.
1742 		 */
1743 		if (partition_constraint_failed)
1744 		{
1745 			TupleTableSlot *inserted_tuple,
1746 					   *retry_slot;
1747 			bool		retry;
1748 
1749 			/*
1750 			 * ExecCrossPartitionUpdate will first DELETE the row from the
1751 			 * partition it's currently in and then insert it back into the
1752 			 * root table, which will re-route it to the correct partition.
1753 			 * The first part may have to be repeated if it is detected that
1754 			 * the tuple we're trying to move has been concurrently updated.
1755 			 */
1756 			retry = !ExecCrossPartitionUpdate(mtstate, resultRelInfo, tupleid,
1757 											  oldtuple, slot, planSlot,
1758 											  epqstate, canSetTag,
1759 											  &retry_slot, &inserted_tuple);
1760 			if (retry)
1761 			{
1762 				slot = retry_slot;
1763 				goto lreplace;
1764 			}
1765 
1766 			return inserted_tuple;
1767 		}
1768 
1769 		/*
1770 		 * Check the constraints of the tuple.  We've already checked the
1771 		 * partition constraint above; however, we must still ensure the tuple
1772 		 * passes all other constraints, so we will call ExecConstraints() and
1773 		 * have it validate all remaining checks.
1774 		 */
1775 		if (resultRelationDesc->rd_att->constr)
1776 			ExecConstraints(resultRelInfo, slot, estate);
1777 
1778 		/*
1779 		 * replace the heap tuple
1780 		 *
1781 		 * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check
1782 		 * that the row to be updated is visible to that snapshot, and throw a
1783 		 * can't-serialize error if not. This is a special-case behavior
1784 		 * needed for referential integrity updates in transaction-snapshot
1785 		 * mode transactions.
1786 		 */
1787 		result = table_tuple_update(resultRelationDesc, tupleid, slot,
1788 									estate->es_output_cid,
1789 									estate->es_snapshot,
1790 									estate->es_crosscheck_snapshot,
1791 									true /* wait for commit */ ,
1792 									&tmfd, &lockmode, &update_indexes);
1793 
1794 		switch (result)
1795 		{
1796 			case TM_SelfModified:
1797 
1798 				/*
1799 				 * The target tuple was already updated or deleted by the
1800 				 * current command, or by a later command in the current
1801 				 * transaction.  The former case is possible in a join UPDATE
1802 				 * where multiple tuples join to the same target tuple. This
1803 				 * is pretty questionable, but Postgres has always allowed it:
1804 				 * we just execute the first update action and ignore
1805 				 * additional update attempts.
1806 				 *
1807 				 * The latter case arises if the tuple is modified by a
1808 				 * command in a BEFORE trigger, or perhaps by a command in a
1809 				 * volatile function used in the query.  In such situations we
1810 				 * should not ignore the update, but it is equally unsafe to
1811 				 * proceed.  We don't want to discard the original UPDATE
1812 				 * while keeping the triggered actions based on it; and we
1813 				 * have no principled way to merge this update with the
1814 				 * previous ones.  So throwing an error is the only safe
1815 				 * course.
1816 				 *
1817 				 * If a trigger actually intends this type of interaction, it
1818 				 * can re-execute the UPDATE (assuming it can figure out how)
1819 				 * and then return NULL to cancel the outer update.
1820 				 */
1821 				if (tmfd.cmax != estate->es_output_cid)
1822 					ereport(ERROR,
1823 							(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
1824 							 errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
1825 							 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
1826 
1827 				/* Else, already updated by self; nothing to do */
1828 				return NULL;
1829 
1830 			case TM_Ok:
1831 				break;
1832 
1833 			case TM_Updated:
1834 				{
1835 					TupleTableSlot *inputslot;
1836 					TupleTableSlot *epqslot;
1837 					TupleTableSlot *oldSlot;
1838 
1839 					if (IsolationUsesXactSnapshot())
1840 						ereport(ERROR,
1841 								(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1842 								 errmsg("could not serialize access due to concurrent update")));
1843 
1844 					/*
1845 					 * Already know that we're going to need to do EPQ, so
1846 					 * fetch tuple directly into the right slot.
1847 					 */
1848 					inputslot = EvalPlanQualSlot(epqstate, resultRelationDesc,
1849 												 resultRelInfo->ri_RangeTableIndex);
1850 
1851 					result = table_tuple_lock(resultRelationDesc, tupleid,
1852 											  estate->es_snapshot,
1853 											  inputslot, estate->es_output_cid,
1854 											  lockmode, LockWaitBlock,
1855 											  TUPLE_LOCK_FLAG_FIND_LAST_VERSION,
1856 											  &tmfd);
1857 
1858 					switch (result)
1859 					{
1860 						case TM_Ok:
1861 							Assert(tmfd.traversed);
1862 
1863 							epqslot = EvalPlanQual(epqstate,
1864 												   resultRelationDesc,
1865 												   resultRelInfo->ri_RangeTableIndex,
1866 												   inputslot);
1867 							if (TupIsNull(epqslot))
1868 								/* Tuple not passing quals anymore, exiting... */
1869 								return NULL;
1870 
1871 							/* Make sure ri_oldTupleSlot is initialized. */
1872 							if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
1873 								ExecInitUpdateProjection(mtstate, resultRelInfo);
1874 
1875 							/* Fetch the most recent version of old tuple. */
1876 							oldSlot = resultRelInfo->ri_oldTupleSlot;
1877 							if (!table_tuple_fetch_row_version(resultRelationDesc,
1878 															   tupleid,
1879 															   SnapshotAny,
1880 															   oldSlot))
1881 								elog(ERROR, "failed to fetch tuple being updated");
1882 							slot = ExecGetUpdateNewTuple(resultRelInfo,
1883 														 epqslot, oldSlot);
1884 							goto lreplace;
1885 
1886 						case TM_Deleted:
1887 							/* tuple already deleted; nothing to do */
1888 							return NULL;
1889 
1890 						case TM_SelfModified:
1891 
1892 							/*
1893 							 * This can be reached when following an update
1894 							 * chain from a tuple updated by another session,
1895 							 * reaching a tuple that was already updated in
1896 							 * this transaction. If previously modified by
1897 							 * this command, ignore the redundant update,
1898 							 * otherwise error out.
1899 							 *
1900 							 * See also TM_SelfModified response to
1901 							 * table_tuple_update() above.
1902 							 */
1903 							if (tmfd.cmax != estate->es_output_cid)
1904 								ereport(ERROR,
1905 										(errcode(ERRCODE_TRIGGERED_DATA_CHANGE_VIOLATION),
1906 										 errmsg("tuple to be updated was already modified by an operation triggered by the current command"),
1907 										 errhint("Consider using an AFTER trigger instead of a BEFORE trigger to propagate changes to other rows.")));
1908 							return NULL;
1909 
1910 						default:
1911 							/* see table_tuple_lock call in ExecDelete() */
1912 							elog(ERROR, "unexpected table_tuple_lock status: %u",
1913 								 result);
1914 							return NULL;
1915 					}
1916 				}
1917 
1918 				break;
1919 
1920 			case TM_Deleted:
1921 				if (IsolationUsesXactSnapshot())
1922 					ereport(ERROR,
1923 							(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
1924 							 errmsg("could not serialize access due to concurrent delete")));
1925 				/* tuple already deleted; nothing to do */
1926 				return NULL;
1927 
1928 			default:
1929 				elog(ERROR, "unrecognized table_tuple_update status: %u",
1930 					 result);
1931 				return NULL;
1932 		}
1933 
1934 		/* insert index entries for tuple if necessary */
1935 		if (resultRelInfo->ri_NumIndices > 0 && update_indexes)
1936 			recheckIndexes = ExecInsertIndexTuples(resultRelInfo,
1937 												   slot, estate, true, false,
1938 												   NULL, NIL);
1939 	}
1940 
1941 	if (canSetTag)
1942 		(estate->es_processed)++;
1943 
1944 	/* AFTER ROW UPDATE Triggers */
1945 	ExecARUpdateTriggers(estate, resultRelInfo, tupleid, oldtuple, slot,
1946 						 recheckIndexes,
1947 						 mtstate->operation == CMD_INSERT ?
1948 						 mtstate->mt_oc_transition_capture :
1949 						 mtstate->mt_transition_capture);
1950 
1951 	list_free(recheckIndexes);
1952 
1953 	/*
1954 	 * Check any WITH CHECK OPTION constraints from parent views.  We are
1955 	 * required to do this after testing all constraints and uniqueness
1956 	 * violations per the SQL spec, so we do it after actually updating the
1957 	 * record in the heap and all indexes.
1958 	 *
1959 	 * ExecWithCheckOptions() will skip any WCOs which are not of the kind we
1960 	 * are looking for at this point.
1961 	 */
1962 	if (resultRelInfo->ri_WithCheckOptions != NIL)
1963 		ExecWithCheckOptions(WCO_VIEW_CHECK, resultRelInfo, slot, estate);
1964 
1965 	/* Process RETURNING if present */
1966 	if (resultRelInfo->ri_projectReturning)
1967 		return ExecProcessReturning(resultRelInfo, slot, planSlot);
1968 
1969 	return NULL;
1970 }
1971 
1972 /*
1973  * ExecOnConflictUpdate --- execute UPDATE of INSERT ON CONFLICT DO UPDATE
1974  *
1975  * Try to lock tuple for update as part of speculative insertion.  If
1976  * a qual originating from ON CONFLICT DO UPDATE is satisfied, update
1977  * (but still lock row, even though it may not satisfy estate's
1978  * snapshot).
1979  *
1980  * Returns true if we're done (with or without an update), or false if
1981  * the caller must retry the INSERT from scratch.
1982  */
1983 static bool
ExecOnConflictUpdate(ModifyTableState * mtstate,ResultRelInfo * resultRelInfo,ItemPointer conflictTid,TupleTableSlot * planSlot,TupleTableSlot * excludedSlot,EState * estate,bool canSetTag,TupleTableSlot ** returning)1984 ExecOnConflictUpdate(ModifyTableState *mtstate,
1985 					 ResultRelInfo *resultRelInfo,
1986 					 ItemPointer conflictTid,
1987 					 TupleTableSlot *planSlot,
1988 					 TupleTableSlot *excludedSlot,
1989 					 EState *estate,
1990 					 bool canSetTag,
1991 					 TupleTableSlot **returning)
1992 {
1993 	ExprContext *econtext = mtstate->ps.ps_ExprContext;
1994 	Relation	relation = resultRelInfo->ri_RelationDesc;
1995 	ExprState  *onConflictSetWhere = resultRelInfo->ri_onConflict->oc_WhereClause;
1996 	TupleTableSlot *existing = resultRelInfo->ri_onConflict->oc_Existing;
1997 	TM_FailureData tmfd;
1998 	LockTupleMode lockmode;
1999 	TM_Result	test;
2000 	Datum		xminDatum;
2001 	TransactionId xmin;
2002 	bool		isnull;
2003 
2004 	/* Determine lock mode to use */
2005 	lockmode = ExecUpdateLockMode(estate, resultRelInfo);
2006 
2007 	/*
2008 	 * Lock tuple for update.  Don't follow updates when tuple cannot be
2009 	 * locked without doing so.  A row locking conflict here means our
2010 	 * previous conclusion that the tuple is conclusively committed is not
2011 	 * true anymore.
2012 	 */
2013 	test = table_tuple_lock(relation, conflictTid,
2014 							estate->es_snapshot,
2015 							existing, estate->es_output_cid,
2016 							lockmode, LockWaitBlock, 0,
2017 							&tmfd);
2018 	switch (test)
2019 	{
2020 		case TM_Ok:
2021 			/* success! */
2022 			break;
2023 
2024 		case TM_Invisible:
2025 
2026 			/*
2027 			 * This can occur when a just inserted tuple is updated again in
2028 			 * the same command. E.g. because multiple rows with the same
2029 			 * conflicting key values are inserted.
2030 			 *
2031 			 * This is somewhat similar to the ExecUpdate() TM_SelfModified
2032 			 * case.  We do not want to proceed because it would lead to the
2033 			 * same row being updated a second time in some unspecified order,
2034 			 * and in contrast to plain UPDATEs there's no historical behavior
2035 			 * to break.
2036 			 *
2037 			 * It is the user's responsibility to prevent this situation from
2038 			 * occurring.  These problems are why SQL-2003 similarly specifies
2039 			 * that for SQL MERGE, an exception must be raised in the event of
2040 			 * an attempt to update the same row twice.
2041 			 */
2042 			xminDatum = slot_getsysattr(existing,
2043 										MinTransactionIdAttributeNumber,
2044 										&isnull);
2045 			Assert(!isnull);
2046 			xmin = DatumGetTransactionId(xminDatum);
2047 
2048 			if (TransactionIdIsCurrentTransactionId(xmin))
2049 				ereport(ERROR,
2050 						(errcode(ERRCODE_CARDINALITY_VIOLATION),
2051 						 errmsg("ON CONFLICT DO UPDATE command cannot affect row a second time"),
2052 						 errhint("Ensure that no rows proposed for insertion within the same command have duplicate constrained values.")));
2053 
2054 			/* This shouldn't happen */
2055 			elog(ERROR, "attempted to lock invisible tuple");
2056 			break;
2057 
2058 		case TM_SelfModified:
2059 
2060 			/*
2061 			 * This state should never be reached. As a dirty snapshot is used
2062 			 * to find conflicting tuples, speculative insertion wouldn't have
2063 			 * seen this row to conflict with.
2064 			 */
2065 			elog(ERROR, "unexpected self-updated tuple");
2066 			break;
2067 
2068 		case TM_Updated:
2069 			if (IsolationUsesXactSnapshot())
2070 				ereport(ERROR,
2071 						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
2072 						 errmsg("could not serialize access due to concurrent update")));
2073 
2074 			/*
2075 			 * As long as we don't support an UPDATE of INSERT ON CONFLICT for
2076 			 * a partitioned table we shouldn't reach to a case where tuple to
2077 			 * be lock is moved to another partition due to concurrent update
2078 			 * of the partition key.
2079 			 */
2080 			Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid));
2081 
2082 			/*
2083 			 * Tell caller to try again from the very start.
2084 			 *
2085 			 * It does not make sense to use the usual EvalPlanQual() style
2086 			 * loop here, as the new version of the row might not conflict
2087 			 * anymore, or the conflicting tuple has actually been deleted.
2088 			 */
2089 			ExecClearTuple(existing);
2090 			return false;
2091 
2092 		case TM_Deleted:
2093 			if (IsolationUsesXactSnapshot())
2094 				ereport(ERROR,
2095 						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
2096 						 errmsg("could not serialize access due to concurrent delete")));
2097 
2098 			/* see TM_Updated case */
2099 			Assert(!ItemPointerIndicatesMovedPartitions(&tmfd.ctid));
2100 			ExecClearTuple(existing);
2101 			return false;
2102 
2103 		default:
2104 			elog(ERROR, "unrecognized table_tuple_lock status: %u", test);
2105 	}
2106 
2107 	/* Success, the tuple is locked. */
2108 
2109 	/*
2110 	 * Verify that the tuple is visible to our MVCC snapshot if the current
2111 	 * isolation level mandates that.
2112 	 *
2113 	 * It's not sufficient to rely on the check within ExecUpdate() as e.g.
2114 	 * CONFLICT ... WHERE clause may prevent us from reaching that.
2115 	 *
2116 	 * This means we only ever continue when a new command in the current
2117 	 * transaction could see the row, even though in READ COMMITTED mode the
2118 	 * tuple will not be visible according to the current statement's
2119 	 * snapshot.  This is in line with the way UPDATE deals with newer tuple
2120 	 * versions.
2121 	 */
2122 	ExecCheckTupleVisible(estate, relation, existing);
2123 
2124 	/*
2125 	 * Make tuple and any needed join variables available to ExecQual and
2126 	 * ExecProject.  The EXCLUDED tuple is installed in ecxt_innertuple, while
2127 	 * the target's existing tuple is installed in the scantuple.  EXCLUDED
2128 	 * has been made to reference INNER_VAR in setrefs.c, but there is no
2129 	 * other redirection.
2130 	 */
2131 	econtext->ecxt_scantuple = existing;
2132 	econtext->ecxt_innertuple = excludedSlot;
2133 	econtext->ecxt_outertuple = NULL;
2134 
2135 	if (!ExecQual(onConflictSetWhere, econtext))
2136 	{
2137 		ExecClearTuple(existing);	/* see return below */
2138 		InstrCountFiltered1(&mtstate->ps, 1);
2139 		return true;			/* done with the tuple */
2140 	}
2141 
2142 	if (resultRelInfo->ri_WithCheckOptions != NIL)
2143 	{
2144 		/*
2145 		 * Check target's existing tuple against UPDATE-applicable USING
2146 		 * security barrier quals (if any), enforced here as RLS checks/WCOs.
2147 		 *
2148 		 * The rewriter creates UPDATE RLS checks/WCOs for UPDATE security
2149 		 * quals, and stores them as WCOs of "kind" WCO_RLS_CONFLICT_CHECK,
2150 		 * but that's almost the extent of its special handling for ON
2151 		 * CONFLICT DO UPDATE.
2152 		 *
2153 		 * The rewriter will also have associated UPDATE applicable straight
2154 		 * RLS checks/WCOs for the benefit of the ExecUpdate() call that
2155 		 * follows.  INSERTs and UPDATEs naturally have mutually exclusive WCO
2156 		 * kinds, so there is no danger of spurious over-enforcement in the
2157 		 * INSERT or UPDATE path.
2158 		 */
2159 		ExecWithCheckOptions(WCO_RLS_CONFLICT_CHECK, resultRelInfo,
2160 							 existing,
2161 							 mtstate->ps.state);
2162 	}
2163 
2164 	/* Project the new tuple version */
2165 	ExecProject(resultRelInfo->ri_onConflict->oc_ProjInfo);
2166 
2167 	/*
2168 	 * Note that it is possible that the target tuple has been modified in
2169 	 * this session, after the above table_tuple_lock. We choose to not error
2170 	 * out in that case, in line with ExecUpdate's treatment of similar cases.
2171 	 * This can happen if an UPDATE is triggered from within ExecQual(),
2172 	 * ExecWithCheckOptions() or ExecProject() above, e.g. by selecting from a
2173 	 * wCTE in the ON CONFLICT's SET.
2174 	 */
2175 
2176 	/* Execute UPDATE with projection */
2177 	*returning = ExecUpdate(mtstate, resultRelInfo, conflictTid, NULL,
2178 							resultRelInfo->ri_onConflict->oc_ProjSlot,
2179 							planSlot,
2180 							&mtstate->mt_epqstate, mtstate->ps.state,
2181 							canSetTag);
2182 
2183 	/*
2184 	 * Clear out existing tuple, as there might not be another conflict among
2185 	 * the next input rows. Don't want to hold resources till the end of the
2186 	 * query.
2187 	 */
2188 	ExecClearTuple(existing);
2189 	return true;
2190 }
2191 
2192 
2193 /*
2194  * Process BEFORE EACH STATEMENT triggers
2195  */
2196 static void
fireBSTriggers(ModifyTableState * node)2197 fireBSTriggers(ModifyTableState *node)
2198 {
2199 	ModifyTable *plan = (ModifyTable *) node->ps.plan;
2200 	ResultRelInfo *resultRelInfo = node->rootResultRelInfo;
2201 
2202 	switch (node->operation)
2203 	{
2204 		case CMD_INSERT:
2205 			ExecBSInsertTriggers(node->ps.state, resultRelInfo);
2206 			if (plan->onConflictAction == ONCONFLICT_UPDATE)
2207 				ExecBSUpdateTriggers(node->ps.state,
2208 									 resultRelInfo);
2209 			break;
2210 		case CMD_UPDATE:
2211 			ExecBSUpdateTriggers(node->ps.state, resultRelInfo);
2212 			break;
2213 		case CMD_DELETE:
2214 			ExecBSDeleteTriggers(node->ps.state, resultRelInfo);
2215 			break;
2216 		default:
2217 			elog(ERROR, "unknown operation");
2218 			break;
2219 	}
2220 }
2221 
2222 /*
2223  * Process AFTER EACH STATEMENT triggers
2224  */
2225 static void
fireASTriggers(ModifyTableState * node)2226 fireASTriggers(ModifyTableState *node)
2227 {
2228 	ModifyTable *plan = (ModifyTable *) node->ps.plan;
2229 	ResultRelInfo *resultRelInfo = node->rootResultRelInfo;
2230 
2231 	switch (node->operation)
2232 	{
2233 		case CMD_INSERT:
2234 			if (plan->onConflictAction == ONCONFLICT_UPDATE)
2235 				ExecASUpdateTriggers(node->ps.state,
2236 									 resultRelInfo,
2237 									 node->mt_oc_transition_capture);
2238 			ExecASInsertTriggers(node->ps.state, resultRelInfo,
2239 								 node->mt_transition_capture);
2240 			break;
2241 		case CMD_UPDATE:
2242 			ExecASUpdateTriggers(node->ps.state, resultRelInfo,
2243 								 node->mt_transition_capture);
2244 			break;
2245 		case CMD_DELETE:
2246 			ExecASDeleteTriggers(node->ps.state, resultRelInfo,
2247 								 node->mt_transition_capture);
2248 			break;
2249 		default:
2250 			elog(ERROR, "unknown operation");
2251 			break;
2252 	}
2253 }
2254 
2255 /*
2256  * Set up the state needed for collecting transition tuples for AFTER
2257  * triggers.
2258  */
2259 static void
ExecSetupTransitionCaptureState(ModifyTableState * mtstate,EState * estate)2260 ExecSetupTransitionCaptureState(ModifyTableState *mtstate, EState *estate)
2261 {
2262 	ModifyTable *plan = (ModifyTable *) mtstate->ps.plan;
2263 	ResultRelInfo *targetRelInfo = mtstate->rootResultRelInfo;
2264 
2265 	/* Check for transition tables on the directly targeted relation. */
2266 	mtstate->mt_transition_capture =
2267 		MakeTransitionCaptureState(targetRelInfo->ri_TrigDesc,
2268 								   RelationGetRelid(targetRelInfo->ri_RelationDesc),
2269 								   mtstate->operation);
2270 	if (plan->operation == CMD_INSERT &&
2271 		plan->onConflictAction == ONCONFLICT_UPDATE)
2272 		mtstate->mt_oc_transition_capture =
2273 			MakeTransitionCaptureState(targetRelInfo->ri_TrigDesc,
2274 									   RelationGetRelid(targetRelInfo->ri_RelationDesc),
2275 									   CMD_UPDATE);
2276 }
2277 
2278 /*
2279  * ExecPrepareTupleRouting --- prepare for routing one tuple
2280  *
2281  * Determine the partition in which the tuple in slot is to be inserted,
2282  * and return its ResultRelInfo in *partRelInfo.  The return value is
2283  * a slot holding the tuple of the partition rowtype.
2284  *
2285  * This also sets the transition table information in mtstate based on the
2286  * selected partition.
2287  */
2288 static TupleTableSlot *
ExecPrepareTupleRouting(ModifyTableState * mtstate,EState * estate,PartitionTupleRouting * proute,ResultRelInfo * targetRelInfo,TupleTableSlot * slot,ResultRelInfo ** partRelInfo)2289 ExecPrepareTupleRouting(ModifyTableState *mtstate,
2290 						EState *estate,
2291 						PartitionTupleRouting *proute,
2292 						ResultRelInfo *targetRelInfo,
2293 						TupleTableSlot *slot,
2294 						ResultRelInfo **partRelInfo)
2295 {
2296 	ResultRelInfo *partrel;
2297 	TupleConversionMap *map;
2298 
2299 	/*
2300 	 * Lookup the target partition's ResultRelInfo.  If ExecFindPartition does
2301 	 * not find a valid partition for the tuple in 'slot' then an error is
2302 	 * raised.  An error may also be raised if the found partition is not a
2303 	 * valid target for INSERTs.  This is required since a partitioned table
2304 	 * UPDATE to another partition becomes a DELETE+INSERT.
2305 	 */
2306 	partrel = ExecFindPartition(mtstate, targetRelInfo, proute, slot, estate);
2307 
2308 	/*
2309 	 * If we're capturing transition tuples, we might need to convert from the
2310 	 * partition rowtype to root partitioned table's rowtype.  But if there
2311 	 * are no BEFORE triggers on the partition that could change the tuple, we
2312 	 * can just remember the original unconverted tuple to avoid a needless
2313 	 * round trip conversion.
2314 	 */
2315 	if (mtstate->mt_transition_capture != NULL)
2316 	{
2317 		bool		has_before_insert_row_trig;
2318 
2319 		has_before_insert_row_trig = (partrel->ri_TrigDesc &&
2320 									  partrel->ri_TrigDesc->trig_insert_before_row);
2321 
2322 		mtstate->mt_transition_capture->tcs_original_insert_tuple =
2323 			!has_before_insert_row_trig ? slot : NULL;
2324 	}
2325 
2326 	/*
2327 	 * Convert the tuple, if necessary.
2328 	 */
2329 	map = partrel->ri_RootToPartitionMap;
2330 	if (map != NULL)
2331 	{
2332 		TupleTableSlot *new_slot = partrel->ri_PartitionTupleSlot;
2333 
2334 		slot = execute_attr_map_slot(map->attrMap, slot, new_slot);
2335 	}
2336 
2337 	*partRelInfo = partrel;
2338 	return slot;
2339 }
2340 
2341 /* ----------------------------------------------------------------
2342  *	   ExecModifyTable
2343  *
2344  *		Perform table modifications as required, and return RETURNING results
2345  *		if needed.
2346  * ----------------------------------------------------------------
2347  */
2348 static TupleTableSlot *
ExecModifyTable(PlanState * pstate)2349 ExecModifyTable(PlanState *pstate)
2350 {
2351 	ModifyTableState *node = castNode(ModifyTableState, pstate);
2352 	EState	   *estate = node->ps.state;
2353 	CmdType		operation = node->operation;
2354 	ResultRelInfo *resultRelInfo;
2355 	PlanState  *subplanstate;
2356 	TupleTableSlot *slot;
2357 	TupleTableSlot *planSlot;
2358 	TupleTableSlot *oldSlot;
2359 	ItemPointer tupleid;
2360 	ItemPointerData tuple_ctid;
2361 	HeapTupleData oldtupdata;
2362 	HeapTuple	oldtuple;
2363 	PartitionTupleRouting *proute = node->mt_partition_tuple_routing;
2364 	List	   *relinfos = NIL;
2365 	ListCell   *lc;
2366 
2367 	CHECK_FOR_INTERRUPTS();
2368 
2369 	/*
2370 	 * This should NOT get called during EvalPlanQual; we should have passed a
2371 	 * subplan tree to EvalPlanQual, instead.  Use a runtime test not just
2372 	 * Assert because this condition is easy to miss in testing.  (Note:
2373 	 * although ModifyTable should not get executed within an EvalPlanQual
2374 	 * operation, we do have to allow it to be initialized and shut down in
2375 	 * case it is within a CTE subplan.  Hence this test must be here, not in
2376 	 * ExecInitModifyTable.)
2377 	 */
2378 	if (estate->es_epq_active != NULL)
2379 		elog(ERROR, "ModifyTable should not be called during EvalPlanQual");
2380 
2381 	/*
2382 	 * If we've already completed processing, don't try to do more.  We need
2383 	 * this test because ExecPostprocessPlan might call us an extra time, and
2384 	 * our subplan's nodes aren't necessarily robust against being called
2385 	 * extra times.
2386 	 */
2387 	if (node->mt_done)
2388 		return NULL;
2389 
2390 	/*
2391 	 * On first call, fire BEFORE STATEMENT triggers before proceeding.
2392 	 */
2393 	if (node->fireBSTriggers)
2394 	{
2395 		fireBSTriggers(node);
2396 		node->fireBSTriggers = false;
2397 	}
2398 
2399 	/* Preload local variables */
2400 	resultRelInfo = node->resultRelInfo + node->mt_lastResultIndex;
2401 	subplanstate = outerPlanState(node);
2402 
2403 	/*
2404 	 * Fetch rows from subplan, and execute the required table modification
2405 	 * for each row.
2406 	 */
2407 	for (;;)
2408 	{
2409 		/*
2410 		 * Reset the per-output-tuple exprcontext.  This is needed because
2411 		 * triggers expect to use that context as workspace.  It's a bit ugly
2412 		 * to do this below the top level of the plan, however.  We might need
2413 		 * to rethink this later.
2414 		 */
2415 		ResetPerTupleExprContext(estate);
2416 
2417 		/*
2418 		 * Reset per-tuple memory context used for processing on conflict and
2419 		 * returning clauses, to free any expression evaluation storage
2420 		 * allocated in the previous cycle.
2421 		 */
2422 		if (pstate->ps_ExprContext)
2423 			ResetExprContext(pstate->ps_ExprContext);
2424 
2425 		planSlot = ExecProcNode(subplanstate);
2426 
2427 		/* No more tuples to process? */
2428 		if (TupIsNull(planSlot))
2429 			break;
2430 
2431 		/*
2432 		 * When there are multiple result relations, each tuple contains a
2433 		 * junk column that gives the OID of the rel from which it came.
2434 		 * Extract it and select the correct result relation.
2435 		 */
2436 		if (AttributeNumberIsValid(node->mt_resultOidAttno))
2437 		{
2438 			Datum		datum;
2439 			bool		isNull;
2440 			Oid			resultoid;
2441 
2442 			datum = ExecGetJunkAttribute(planSlot, node->mt_resultOidAttno,
2443 										 &isNull);
2444 			if (isNull)
2445 				elog(ERROR, "tableoid is NULL");
2446 			resultoid = DatumGetObjectId(datum);
2447 
2448 			/* If it's not the same as last time, we need to locate the rel */
2449 			if (resultoid != node->mt_lastResultOid)
2450 				resultRelInfo = ExecLookupResultRelByOid(node, resultoid,
2451 														 false, true);
2452 		}
2453 
2454 		/*
2455 		 * If resultRelInfo->ri_usesFdwDirectModify is true, all we need to do
2456 		 * here is compute the RETURNING expressions.
2457 		 */
2458 		if (resultRelInfo->ri_usesFdwDirectModify)
2459 		{
2460 			Assert(resultRelInfo->ri_projectReturning);
2461 
2462 			/*
2463 			 * A scan slot containing the data that was actually inserted,
2464 			 * updated or deleted has already been made available to
2465 			 * ExecProcessReturning by IterateDirectModify, so no need to
2466 			 * provide it here.
2467 			 */
2468 			slot = ExecProcessReturning(resultRelInfo, NULL, planSlot);
2469 
2470 			return slot;
2471 		}
2472 
2473 		EvalPlanQualSetSlot(&node->mt_epqstate, planSlot);
2474 		slot = planSlot;
2475 
2476 		tupleid = NULL;
2477 		oldtuple = NULL;
2478 
2479 		/*
2480 		 * For UPDATE/DELETE, fetch the row identity info for the tuple to be
2481 		 * updated/deleted.  For a heap relation, that's a TID; otherwise we
2482 		 * may have a wholerow junk attr that carries the old tuple in toto.
2483 		 * Keep this in step with the part of ExecInitModifyTable that sets up
2484 		 * ri_RowIdAttNo.
2485 		 */
2486 		if (operation == CMD_UPDATE || operation == CMD_DELETE)
2487 		{
2488 			char		relkind;
2489 			Datum		datum;
2490 			bool		isNull;
2491 
2492 			relkind = resultRelInfo->ri_RelationDesc->rd_rel->relkind;
2493 			if (relkind == RELKIND_RELATION ||
2494 				relkind == RELKIND_MATVIEW ||
2495 				relkind == RELKIND_PARTITIONED_TABLE)
2496 			{
2497 				/* ri_RowIdAttNo refers to a ctid attribute */
2498 				Assert(AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo));
2499 				datum = ExecGetJunkAttribute(slot,
2500 											 resultRelInfo->ri_RowIdAttNo,
2501 											 &isNull);
2502 				/* shouldn't ever get a null result... */
2503 				if (isNull)
2504 					elog(ERROR, "ctid is NULL");
2505 
2506 				tupleid = (ItemPointer) DatumGetPointer(datum);
2507 				tuple_ctid = *tupleid;	/* be sure we don't free ctid!! */
2508 				tupleid = &tuple_ctid;
2509 			}
2510 
2511 			/*
2512 			 * Use the wholerow attribute, when available, to reconstruct the
2513 			 * old relation tuple.  The old tuple serves one or both of two
2514 			 * purposes: 1) it serves as the OLD tuple for row triggers, 2) it
2515 			 * provides values for any unchanged columns for the NEW tuple of
2516 			 * an UPDATE, because the subplan does not produce all the columns
2517 			 * of the target table.
2518 			 *
2519 			 * Note that the wholerow attribute does not carry system columns,
2520 			 * so foreign table triggers miss seeing those, except that we
2521 			 * know enough here to set t_tableOid.  Quite separately from
2522 			 * this, the FDW may fetch its own junk attrs to identify the row.
2523 			 *
2524 			 * Other relevant relkinds, currently limited to views, always
2525 			 * have a wholerow attribute.
2526 			 */
2527 			else if (AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo))
2528 			{
2529 				datum = ExecGetJunkAttribute(slot,
2530 											 resultRelInfo->ri_RowIdAttNo,
2531 											 &isNull);
2532 				/* shouldn't ever get a null result... */
2533 				if (isNull)
2534 					elog(ERROR, "wholerow is NULL");
2535 
2536 				oldtupdata.t_data = DatumGetHeapTupleHeader(datum);
2537 				oldtupdata.t_len =
2538 					HeapTupleHeaderGetDatumLength(oldtupdata.t_data);
2539 				ItemPointerSetInvalid(&(oldtupdata.t_self));
2540 				/* Historically, view triggers see invalid t_tableOid. */
2541 				oldtupdata.t_tableOid =
2542 					(relkind == RELKIND_VIEW) ? InvalidOid :
2543 					RelationGetRelid(resultRelInfo->ri_RelationDesc);
2544 
2545 				oldtuple = &oldtupdata;
2546 			}
2547 			else
2548 			{
2549 				/* Only foreign tables are allowed to omit a row-ID attr */
2550 				Assert(relkind == RELKIND_FOREIGN_TABLE);
2551 			}
2552 		}
2553 
2554 		switch (operation)
2555 		{
2556 			case CMD_INSERT:
2557 				/* Initialize projection info if first time for this table */
2558 				if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
2559 					ExecInitInsertProjection(node, resultRelInfo);
2560 				slot = ExecGetInsertNewTuple(resultRelInfo, planSlot);
2561 				slot = ExecInsert(node, resultRelInfo, slot, planSlot,
2562 								  estate, node->canSetTag);
2563 				break;
2564 			case CMD_UPDATE:
2565 				/* Initialize projection info if first time for this table */
2566 				if (unlikely(!resultRelInfo->ri_projectNewInfoValid))
2567 					ExecInitUpdateProjection(node, resultRelInfo);
2568 
2569 				/*
2570 				 * Make the new tuple by combining plan's output tuple with
2571 				 * the old tuple being updated.
2572 				 */
2573 				oldSlot = resultRelInfo->ri_oldTupleSlot;
2574 				if (oldtuple != NULL)
2575 				{
2576 					/* Use the wholerow junk attr as the old tuple. */
2577 					ExecForceStoreHeapTuple(oldtuple, oldSlot, false);
2578 				}
2579 				else
2580 				{
2581 					/* Fetch the most recent version of old tuple. */
2582 					Relation	relation = resultRelInfo->ri_RelationDesc;
2583 
2584 					Assert(tupleid != NULL);
2585 					if (!table_tuple_fetch_row_version(relation, tupleid,
2586 													   SnapshotAny,
2587 													   oldSlot))
2588 						elog(ERROR, "failed to fetch tuple being updated");
2589 				}
2590 				slot = ExecGetUpdateNewTuple(resultRelInfo, planSlot,
2591 											 oldSlot);
2592 
2593 				/* Now apply the update. */
2594 				slot = ExecUpdate(node, resultRelInfo, tupleid, oldtuple, slot,
2595 								  planSlot, &node->mt_epqstate, estate,
2596 								  node->canSetTag);
2597 				break;
2598 			case CMD_DELETE:
2599 				slot = ExecDelete(node, resultRelInfo, tupleid, oldtuple,
2600 								  planSlot, &node->mt_epqstate, estate,
2601 								  true, /* processReturning */
2602 								  node->canSetTag,
2603 								  false,	/* changingPart */
2604 								  NULL, NULL);
2605 				break;
2606 			default:
2607 				elog(ERROR, "unknown operation");
2608 				break;
2609 		}
2610 
2611 		/*
2612 		 * If we got a RETURNING result, return it to caller.  We'll continue
2613 		 * the work on next call.
2614 		 */
2615 		if (slot)
2616 			return slot;
2617 	}
2618 
2619 	/*
2620 	 * Insert remaining tuples for batch insert.
2621 	 */
2622 	if (proute)
2623 		relinfos = estate->es_tuple_routing_result_relations;
2624 	else
2625 		relinfos = estate->es_opened_result_relations;
2626 
2627 	foreach(lc, relinfos)
2628 	{
2629 		resultRelInfo = lfirst(lc);
2630 		if (resultRelInfo->ri_NumSlots > 0)
2631 			ExecBatchInsert(node, resultRelInfo,
2632 							resultRelInfo->ri_Slots,
2633 							resultRelInfo->ri_PlanSlots,
2634 							resultRelInfo->ri_NumSlots,
2635 							estate, node->canSetTag);
2636 	}
2637 
2638 	/*
2639 	 * We're done, but fire AFTER STATEMENT triggers before exiting.
2640 	 */
2641 	fireASTriggers(node);
2642 
2643 	node->mt_done = true;
2644 
2645 	return NULL;
2646 }
2647 
2648 /*
2649  * ExecLookupResultRelByOid
2650  * 		If the table with given OID is among the result relations to be
2651  * 		updated by the given ModifyTable node, return its ResultRelInfo.
2652  *
2653  * If not found, return NULL if missing_ok, else raise error.
2654  *
2655  * If update_cache is true, then upon successful lookup, update the node's
2656  * one-element cache.  ONLY ExecModifyTable may pass true for this.
2657  */
2658 ResultRelInfo *
ExecLookupResultRelByOid(ModifyTableState * node,Oid resultoid,bool missing_ok,bool update_cache)2659 ExecLookupResultRelByOid(ModifyTableState *node, Oid resultoid,
2660 						 bool missing_ok, bool update_cache)
2661 {
2662 	if (node->mt_resultOidHash)
2663 	{
2664 		/* Use the pre-built hash table to locate the rel */
2665 		MTTargetRelLookup *mtlookup;
2666 
2667 		mtlookup = (MTTargetRelLookup *)
2668 			hash_search(node->mt_resultOidHash, &resultoid, HASH_FIND, NULL);
2669 		if (mtlookup)
2670 		{
2671 			if (update_cache)
2672 			{
2673 				node->mt_lastResultOid = resultoid;
2674 				node->mt_lastResultIndex = mtlookup->relationIndex;
2675 			}
2676 			return node->resultRelInfo + mtlookup->relationIndex;
2677 		}
2678 	}
2679 	else
2680 	{
2681 		/* With few target rels, just search the ResultRelInfo array */
2682 		for (int ndx = 0; ndx < node->mt_nrels; ndx++)
2683 		{
2684 			ResultRelInfo *rInfo = node->resultRelInfo + ndx;
2685 
2686 			if (RelationGetRelid(rInfo->ri_RelationDesc) == resultoid)
2687 			{
2688 				if (update_cache)
2689 				{
2690 					node->mt_lastResultOid = resultoid;
2691 					node->mt_lastResultIndex = ndx;
2692 				}
2693 				return rInfo;
2694 			}
2695 		}
2696 	}
2697 
2698 	if (!missing_ok)
2699 		elog(ERROR, "incorrect result relation OID %u", resultoid);
2700 	return NULL;
2701 }
2702 
2703 /* ----------------------------------------------------------------
2704  *		ExecInitModifyTable
2705  * ----------------------------------------------------------------
2706  */
2707 ModifyTableState *
ExecInitModifyTable(ModifyTable * node,EState * estate,int eflags)2708 ExecInitModifyTable(ModifyTable *node, EState *estate, int eflags)
2709 {
2710 	ModifyTableState *mtstate;
2711 	Plan	   *subplan = outerPlan(node);
2712 	CmdType		operation = node->operation;
2713 	int			nrels = list_length(node->resultRelations);
2714 	ResultRelInfo *resultRelInfo;
2715 	List	   *arowmarks;
2716 	ListCell   *l;
2717 	int			i;
2718 	Relation	rel;
2719 
2720 	/* check for unsupported flags */
2721 	Assert(!(eflags & (EXEC_FLAG_BACKWARD | EXEC_FLAG_MARK)));
2722 
2723 	/*
2724 	 * create state structure
2725 	 */
2726 	mtstate = makeNode(ModifyTableState);
2727 	mtstate->ps.plan = (Plan *) node;
2728 	mtstate->ps.state = estate;
2729 	mtstate->ps.ExecProcNode = ExecModifyTable;
2730 
2731 	mtstate->operation = operation;
2732 	mtstate->canSetTag = node->canSetTag;
2733 	mtstate->mt_done = false;
2734 
2735 	mtstate->mt_nrels = nrels;
2736 	mtstate->resultRelInfo = (ResultRelInfo *)
2737 		palloc(nrels * sizeof(ResultRelInfo));
2738 
2739 	/*----------
2740 	 * Resolve the target relation. This is the same as:
2741 	 *
2742 	 * - the relation for which we will fire FOR STATEMENT triggers,
2743 	 * - the relation into whose tuple format all captured transition tuples
2744 	 *   must be converted, and
2745 	 * - the root partitioned table used for tuple routing.
2746 	 *
2747 	 * If it's a partitioned table, the root partition doesn't appear
2748 	 * elsewhere in the plan and its RT index is given explicitly in
2749 	 * node->rootRelation.  Otherwise (i.e. table inheritance) the target
2750 	 * relation is the first relation in the node->resultRelations list.
2751 	 *----------
2752 	 */
2753 	if (node->rootRelation > 0)
2754 	{
2755 		mtstate->rootResultRelInfo = makeNode(ResultRelInfo);
2756 		ExecInitResultRelation(estate, mtstate->rootResultRelInfo,
2757 							   node->rootRelation);
2758 	}
2759 	else
2760 	{
2761 		mtstate->rootResultRelInfo = mtstate->resultRelInfo;
2762 		ExecInitResultRelation(estate, mtstate->resultRelInfo,
2763 							   linitial_int(node->resultRelations));
2764 	}
2765 
2766 	/* set up epqstate with dummy subplan data for the moment */
2767 	EvalPlanQualInit(&mtstate->mt_epqstate, estate, NULL, NIL, node->epqParam);
2768 	mtstate->fireBSTriggers = true;
2769 
2770 	/*
2771 	 * Build state for collecting transition tuples.  This requires having a
2772 	 * valid trigger query context, so skip it in explain-only mode.
2773 	 */
2774 	if (!(eflags & EXEC_FLAG_EXPLAIN_ONLY))
2775 		ExecSetupTransitionCaptureState(mtstate, estate);
2776 
2777 	/*
2778 	 * Open all the result relations and initialize the ResultRelInfo structs.
2779 	 * (But root relation was initialized above, if it's part of the array.)
2780 	 * We must do this before initializing the subplan, because direct-modify
2781 	 * FDWs expect their ResultRelInfos to be available.
2782 	 */
2783 	resultRelInfo = mtstate->resultRelInfo;
2784 	i = 0;
2785 	foreach(l, node->resultRelations)
2786 	{
2787 		Index		resultRelation = lfirst_int(l);
2788 
2789 		if (resultRelInfo != mtstate->rootResultRelInfo)
2790 		{
2791 			ExecInitResultRelation(estate, resultRelInfo, resultRelation);
2792 
2793 			/*
2794 			 * For child result relations, store the root result relation
2795 			 * pointer.  We do so for the convenience of places that want to
2796 			 * look at the query's original target relation but don't have the
2797 			 * mtstate handy.
2798 			 */
2799 			resultRelInfo->ri_RootResultRelInfo = mtstate->rootResultRelInfo;
2800 		}
2801 
2802 		/* Initialize the usesFdwDirectModify flag */
2803 		resultRelInfo->ri_usesFdwDirectModify = bms_is_member(i,
2804 															  node->fdwDirectModifyPlans);
2805 
2806 		/*
2807 		 * Verify result relation is a valid target for the current operation
2808 		 */
2809 		CheckValidResultRel(resultRelInfo, operation);
2810 
2811 		resultRelInfo++;
2812 		i++;
2813 	}
2814 
2815 	/*
2816 	 * Now we may initialize the subplan.
2817 	 */
2818 	outerPlanState(mtstate) = ExecInitNode(subplan, estate, eflags);
2819 
2820 	/*
2821 	 * Do additional per-result-relation initialization.
2822 	 */
2823 	for (i = 0; i < nrels; i++)
2824 	{
2825 		resultRelInfo = &mtstate->resultRelInfo[i];
2826 
2827 		/* Let FDWs init themselves for foreign-table result rels */
2828 		if (!resultRelInfo->ri_usesFdwDirectModify &&
2829 			resultRelInfo->ri_FdwRoutine != NULL &&
2830 			resultRelInfo->ri_FdwRoutine->BeginForeignModify != NULL)
2831 		{
2832 			List	   *fdw_private = (List *) list_nth(node->fdwPrivLists, i);
2833 
2834 			resultRelInfo->ri_FdwRoutine->BeginForeignModify(mtstate,
2835 															 resultRelInfo,
2836 															 fdw_private,
2837 															 i,
2838 															 eflags);
2839 		}
2840 
2841 		/*
2842 		 * For UPDATE/DELETE, find the appropriate junk attr now, either a
2843 		 * 'ctid' or 'wholerow' attribute depending on relkind.  For foreign
2844 		 * tables, the FDW might have created additional junk attr(s), but
2845 		 * those are no concern of ours.
2846 		 */
2847 		if (operation == CMD_UPDATE || operation == CMD_DELETE)
2848 		{
2849 			char		relkind;
2850 
2851 			relkind = resultRelInfo->ri_RelationDesc->rd_rel->relkind;
2852 			if (relkind == RELKIND_RELATION ||
2853 				relkind == RELKIND_MATVIEW ||
2854 				relkind == RELKIND_PARTITIONED_TABLE)
2855 			{
2856 				resultRelInfo->ri_RowIdAttNo =
2857 					ExecFindJunkAttributeInTlist(subplan->targetlist, "ctid");
2858 				if (!AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo))
2859 					elog(ERROR, "could not find junk ctid column");
2860 			}
2861 			else if (relkind == RELKIND_FOREIGN_TABLE)
2862 			{
2863 				/*
2864 				 * When there is a row-level trigger, there should be a
2865 				 * wholerow attribute.  We also require it to be present in
2866 				 * UPDATE, so we can get the values of unchanged columns.
2867 				 */
2868 				resultRelInfo->ri_RowIdAttNo =
2869 					ExecFindJunkAttributeInTlist(subplan->targetlist,
2870 												 "wholerow");
2871 				if (mtstate->operation == CMD_UPDATE &&
2872 					!AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo))
2873 					elog(ERROR, "could not find junk wholerow column");
2874 			}
2875 			else
2876 			{
2877 				/* Other valid target relkinds must provide wholerow */
2878 				resultRelInfo->ri_RowIdAttNo =
2879 					ExecFindJunkAttributeInTlist(subplan->targetlist,
2880 												 "wholerow");
2881 				if (!AttributeNumberIsValid(resultRelInfo->ri_RowIdAttNo))
2882 					elog(ERROR, "could not find junk wholerow column");
2883 			}
2884 		}
2885 	}
2886 
2887 	/*
2888 	 * If this is an inherited update/delete, there will be a junk attribute
2889 	 * named "tableoid" present in the subplan's targetlist.  It will be used
2890 	 * to identify the result relation for a given tuple to be
2891 	 * updated/deleted.
2892 	 */
2893 	mtstate->mt_resultOidAttno =
2894 		ExecFindJunkAttributeInTlist(subplan->targetlist, "tableoid");
2895 	Assert(AttributeNumberIsValid(mtstate->mt_resultOidAttno) || nrels == 1);
2896 	mtstate->mt_lastResultOid = InvalidOid; /* force lookup at first tuple */
2897 	mtstate->mt_lastResultIndex = 0;	/* must be zero if no such attr */
2898 
2899 	/* Get the root target relation */
2900 	rel = mtstate->rootResultRelInfo->ri_RelationDesc;
2901 
2902 	/*
2903 	 * Build state for tuple routing if it's a partitioned INSERT.  An UPDATE
2904 	 * might need this too, but only if it actually moves tuples between
2905 	 * partitions; in that case setup is done by ExecCrossPartitionUpdate.
2906 	 */
2907 	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE &&
2908 		operation == CMD_INSERT)
2909 		mtstate->mt_partition_tuple_routing =
2910 			ExecSetupPartitionTupleRouting(estate, rel);
2911 
2912 	/*
2913 	 * Initialize any WITH CHECK OPTION constraints if needed.
2914 	 */
2915 	resultRelInfo = mtstate->resultRelInfo;
2916 	foreach(l, node->withCheckOptionLists)
2917 	{
2918 		List	   *wcoList = (List *) lfirst(l);
2919 		List	   *wcoExprs = NIL;
2920 		ListCell   *ll;
2921 
2922 		foreach(ll, wcoList)
2923 		{
2924 			WithCheckOption *wco = (WithCheckOption *) lfirst(ll);
2925 			ExprState  *wcoExpr = ExecInitQual((List *) wco->qual,
2926 											   &mtstate->ps);
2927 
2928 			wcoExprs = lappend(wcoExprs, wcoExpr);
2929 		}
2930 
2931 		resultRelInfo->ri_WithCheckOptions = wcoList;
2932 		resultRelInfo->ri_WithCheckOptionExprs = wcoExprs;
2933 		resultRelInfo++;
2934 	}
2935 
2936 	/*
2937 	 * Initialize RETURNING projections if needed.
2938 	 */
2939 	if (node->returningLists)
2940 	{
2941 		TupleTableSlot *slot;
2942 		ExprContext *econtext;
2943 
2944 		/*
2945 		 * Initialize result tuple slot and assign its rowtype using the first
2946 		 * RETURNING list.  We assume the rest will look the same.
2947 		 */
2948 		mtstate->ps.plan->targetlist = (List *) linitial(node->returningLists);
2949 
2950 		/* Set up a slot for the output of the RETURNING projection(s) */
2951 		ExecInitResultTupleSlotTL(&mtstate->ps, &TTSOpsVirtual);
2952 		slot = mtstate->ps.ps_ResultTupleSlot;
2953 
2954 		/* Need an econtext too */
2955 		if (mtstate->ps.ps_ExprContext == NULL)
2956 			ExecAssignExprContext(estate, &mtstate->ps);
2957 		econtext = mtstate->ps.ps_ExprContext;
2958 
2959 		/*
2960 		 * Build a projection for each result rel.
2961 		 */
2962 		resultRelInfo = mtstate->resultRelInfo;
2963 		foreach(l, node->returningLists)
2964 		{
2965 			List	   *rlist = (List *) lfirst(l);
2966 
2967 			resultRelInfo->ri_returningList = rlist;
2968 			resultRelInfo->ri_projectReturning =
2969 				ExecBuildProjectionInfo(rlist, econtext, slot, &mtstate->ps,
2970 										resultRelInfo->ri_RelationDesc->rd_att);
2971 			resultRelInfo++;
2972 		}
2973 	}
2974 	else
2975 	{
2976 		/*
2977 		 * We still must construct a dummy result tuple type, because InitPlan
2978 		 * expects one (maybe should change that?).
2979 		 */
2980 		mtstate->ps.plan->targetlist = NIL;
2981 		ExecInitResultTypeTL(&mtstate->ps);
2982 
2983 		mtstate->ps.ps_ExprContext = NULL;
2984 	}
2985 
2986 	/* Set the list of arbiter indexes if needed for ON CONFLICT */
2987 	resultRelInfo = mtstate->resultRelInfo;
2988 	if (node->onConflictAction != ONCONFLICT_NONE)
2989 	{
2990 		/* insert may only have one relation, inheritance is not expanded */
2991 		Assert(nrels == 1);
2992 		resultRelInfo->ri_onConflictArbiterIndexes = node->arbiterIndexes;
2993 	}
2994 
2995 	/*
2996 	 * If needed, Initialize target list, projection and qual for ON CONFLICT
2997 	 * DO UPDATE.
2998 	 */
2999 	if (node->onConflictAction == ONCONFLICT_UPDATE)
3000 	{
3001 		OnConflictSetState *onconfl = makeNode(OnConflictSetState);
3002 		ExprContext *econtext;
3003 		TupleDesc	relationDesc;
3004 
3005 		/* already exists if created by RETURNING processing above */
3006 		if (mtstate->ps.ps_ExprContext == NULL)
3007 			ExecAssignExprContext(estate, &mtstate->ps);
3008 
3009 		econtext = mtstate->ps.ps_ExprContext;
3010 		relationDesc = resultRelInfo->ri_RelationDesc->rd_att;
3011 
3012 		/* create state for DO UPDATE SET operation */
3013 		resultRelInfo->ri_onConflict = onconfl;
3014 
3015 		/* initialize slot for the existing tuple */
3016 		onconfl->oc_Existing =
3017 			table_slot_create(resultRelInfo->ri_RelationDesc,
3018 							  &mtstate->ps.state->es_tupleTable);
3019 
3020 		/*
3021 		 * Create the tuple slot for the UPDATE SET projection. We want a slot
3022 		 * of the table's type here, because the slot will be used to insert
3023 		 * into the table, and for RETURNING processing - which may access
3024 		 * system attributes.
3025 		 */
3026 		onconfl->oc_ProjSlot =
3027 			table_slot_create(resultRelInfo->ri_RelationDesc,
3028 							  &mtstate->ps.state->es_tupleTable);
3029 
3030 		/* build UPDATE SET projection state */
3031 		onconfl->oc_ProjInfo =
3032 			ExecBuildUpdateProjection(node->onConflictSet,
3033 									  true,
3034 									  node->onConflictCols,
3035 									  relationDesc,
3036 									  econtext,
3037 									  onconfl->oc_ProjSlot,
3038 									  &mtstate->ps);
3039 
3040 		/* initialize state to evaluate the WHERE clause, if any */
3041 		if (node->onConflictWhere)
3042 		{
3043 			ExprState  *qualexpr;
3044 
3045 			qualexpr = ExecInitQual((List *) node->onConflictWhere,
3046 									&mtstate->ps);
3047 			onconfl->oc_WhereClause = qualexpr;
3048 		}
3049 	}
3050 
3051 	/*
3052 	 * If we have any secondary relations in an UPDATE or DELETE, they need to
3053 	 * be treated like non-locked relations in SELECT FOR UPDATE, ie, the
3054 	 * EvalPlanQual mechanism needs to be told about them.  Locate the
3055 	 * relevant ExecRowMarks.
3056 	 */
3057 	arowmarks = NIL;
3058 	foreach(l, node->rowMarks)
3059 	{
3060 		PlanRowMark *rc = lfirst_node(PlanRowMark, l);
3061 		ExecRowMark *erm;
3062 		ExecAuxRowMark *aerm;
3063 
3064 		/* ignore "parent" rowmarks; they are irrelevant at runtime */
3065 		if (rc->isParent)
3066 			continue;
3067 
3068 		/* Find ExecRowMark and build ExecAuxRowMark */
3069 		erm = ExecFindRowMark(estate, rc->rti, false);
3070 		aerm = ExecBuildAuxRowMark(erm, subplan->targetlist);
3071 		arowmarks = lappend(arowmarks, aerm);
3072 	}
3073 
3074 	EvalPlanQualSetPlan(&mtstate->mt_epqstate, subplan, arowmarks);
3075 
3076 	/*
3077 	 * If there are a lot of result relations, use a hash table to speed the
3078 	 * lookups.  If there are not a lot, a simple linear search is faster.
3079 	 *
3080 	 * It's not clear where the threshold is, but try 64 for starters.  In a
3081 	 * debugging build, use a small threshold so that we get some test
3082 	 * coverage of both code paths.
3083 	 */
3084 #ifdef USE_ASSERT_CHECKING
3085 #define MT_NRELS_HASH 4
3086 #else
3087 #define MT_NRELS_HASH 64
3088 #endif
3089 	if (nrels >= MT_NRELS_HASH)
3090 	{
3091 		HASHCTL		hash_ctl;
3092 
3093 		hash_ctl.keysize = sizeof(Oid);
3094 		hash_ctl.entrysize = sizeof(MTTargetRelLookup);
3095 		hash_ctl.hcxt = CurrentMemoryContext;
3096 		mtstate->mt_resultOidHash =
3097 			hash_create("ModifyTable target hash",
3098 						nrels, &hash_ctl,
3099 						HASH_ELEM | HASH_BLOBS | HASH_CONTEXT);
3100 		for (i = 0; i < nrels; i++)
3101 		{
3102 			Oid			hashkey;
3103 			MTTargetRelLookup *mtlookup;
3104 			bool		found;
3105 
3106 			resultRelInfo = &mtstate->resultRelInfo[i];
3107 			hashkey = RelationGetRelid(resultRelInfo->ri_RelationDesc);
3108 			mtlookup = (MTTargetRelLookup *)
3109 				hash_search(mtstate->mt_resultOidHash, &hashkey,
3110 							HASH_ENTER, &found);
3111 			Assert(!found);
3112 			mtlookup->relationIndex = i;
3113 		}
3114 	}
3115 	else
3116 		mtstate->mt_resultOidHash = NULL;
3117 
3118 	/*
3119 	 * Determine if the FDW supports batch insert and determine the batch size
3120 	 * (a FDW may support batching, but it may be disabled for the
3121 	 * server/table).
3122 	 *
3123 	 * We only do this for INSERT, so that for UPDATE/DELETE the batch size
3124 	 * remains set to 0.
3125 	 */
3126 	if (operation == CMD_INSERT)
3127 	{
3128 		/* insert may only have one relation, inheritance is not expanded */
3129 		Assert(nrels == 1);
3130 		resultRelInfo = mtstate->resultRelInfo;
3131 		if (!resultRelInfo->ri_usesFdwDirectModify &&
3132 			resultRelInfo->ri_FdwRoutine != NULL &&
3133 			resultRelInfo->ri_FdwRoutine->GetForeignModifyBatchSize &&
3134 			resultRelInfo->ri_FdwRoutine->ExecForeignBatchInsert)
3135 		{
3136 			resultRelInfo->ri_BatchSize =
3137 				resultRelInfo->ri_FdwRoutine->GetForeignModifyBatchSize(resultRelInfo);
3138 			Assert(resultRelInfo->ri_BatchSize >= 1);
3139 		}
3140 		else
3141 			resultRelInfo->ri_BatchSize = 1;
3142 	}
3143 
3144 	/*
3145 	 * Lastly, if this is not the primary (canSetTag) ModifyTable node, add it
3146 	 * to estate->es_auxmodifytables so that it will be run to completion by
3147 	 * ExecPostprocessPlan.  (It'd actually work fine to add the primary
3148 	 * ModifyTable node too, but there's no need.)  Note the use of lcons not
3149 	 * lappend: we need later-initialized ModifyTable nodes to be shut down
3150 	 * before earlier ones.  This ensures that we don't throw away RETURNING
3151 	 * rows that need to be seen by a later CTE subplan.
3152 	 */
3153 	if (!mtstate->canSetTag)
3154 		estate->es_auxmodifytables = lcons(mtstate,
3155 										   estate->es_auxmodifytables);
3156 
3157 	return mtstate;
3158 }
3159 
3160 /* ----------------------------------------------------------------
3161  *		ExecEndModifyTable
3162  *
3163  *		Shuts down the plan.
3164  *
3165  *		Returns nothing of interest.
3166  * ----------------------------------------------------------------
3167  */
3168 void
ExecEndModifyTable(ModifyTableState * node)3169 ExecEndModifyTable(ModifyTableState *node)
3170 {
3171 	int			i;
3172 
3173 	/*
3174 	 * Allow any FDWs to shut down
3175 	 */
3176 	for (i = 0; i < node->mt_nrels; i++)
3177 	{
3178 		int			j;
3179 		ResultRelInfo *resultRelInfo = node->resultRelInfo + i;
3180 
3181 		if (!resultRelInfo->ri_usesFdwDirectModify &&
3182 			resultRelInfo->ri_FdwRoutine != NULL &&
3183 			resultRelInfo->ri_FdwRoutine->EndForeignModify != NULL)
3184 			resultRelInfo->ri_FdwRoutine->EndForeignModify(node->ps.state,
3185 														   resultRelInfo);
3186 
3187 		/*
3188 		 * Cleanup the initialized batch slots. This only matters for FDWs
3189 		 * with batching, but the other cases will have ri_NumSlotsInitialized
3190 		 * == 0.
3191 		 */
3192 		for (j = 0; j < resultRelInfo->ri_NumSlotsInitialized; j++)
3193 		{
3194 			ExecDropSingleTupleTableSlot(resultRelInfo->ri_Slots[j]);
3195 			ExecDropSingleTupleTableSlot(resultRelInfo->ri_PlanSlots[j]);
3196 		}
3197 	}
3198 
3199 	/*
3200 	 * Close all the partitioned tables, leaf partitions, and their indices
3201 	 * and release the slot used for tuple routing, if set.
3202 	 */
3203 	if (node->mt_partition_tuple_routing)
3204 	{
3205 		ExecCleanupTupleRouting(node, node->mt_partition_tuple_routing);
3206 
3207 		if (node->mt_root_tuple_slot)
3208 			ExecDropSingleTupleTableSlot(node->mt_root_tuple_slot);
3209 	}
3210 
3211 	/*
3212 	 * Free the exprcontext
3213 	 */
3214 	ExecFreeExprContext(&node->ps);
3215 
3216 	/*
3217 	 * clean out the tuple table
3218 	 */
3219 	if (node->ps.ps_ResultTupleSlot)
3220 		ExecClearTuple(node->ps.ps_ResultTupleSlot);
3221 
3222 	/*
3223 	 * Terminate EPQ execution if active
3224 	 */
3225 	EvalPlanQualEnd(&node->mt_epqstate);
3226 
3227 	/*
3228 	 * shut down subplan
3229 	 */
3230 	ExecEndNode(outerPlanState(node));
3231 }
3232 
3233 void
ExecReScanModifyTable(ModifyTableState * node)3234 ExecReScanModifyTable(ModifyTableState *node)
3235 {
3236 	/*
3237 	 * Currently, we don't need to support rescan on ModifyTable nodes. The
3238 	 * semantics of that would be a bit debatable anyway.
3239 	 */
3240 	elog(ERROR, "ExecReScanModifyTable is not implemented");
3241 }
3242